{
  "name": "QFO Rank Tracking - EmbeddingGemma Semantic Dedup & Daily Tracking",
  "nodes": [
    {
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "hours",
              "hoursInterval": 24
            }
          ]
        }
      },
      "id": "daily-trigger",
      "name": "Daily at 2AM UTC",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.2,
      "position": [
        0,
        0
      ]
    },
    {
      "parameters": {
        "operation": "read",
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $json.sheetUrl }}"
        },
        "sheetName": {
          "__rl": true,
          "mode": "byName",
          "value": "QFO Queries"
        },
        "options": {}
      },
      "id": "get-qfo-queries",
      "name": "Get QFO Queries from Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.5,
      "position": [
        220,
        0
      ],
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "jsCode": "// Prepare queries and batch for EmbeddingGemma\n// Gemini batchEmbedContents supports up to 100 requests per call\nconst items = $input.all();\nconst queries = items.map((item, idx) => ({\n  text: (item.json.query || item.json.Query || item.json.keyword || item.json.Keyword || '').trim(),\n  original_index: idx,\n  topic: item.json.topic || item.json.Topic || item.json.parent_query || '',\n  seed_query: item.json.seed_query || item.json.SeedQuery || ''\n})).filter(q => q.text.length > 0);\n\n// Batch into groups of 100 (Gemini batch limit)\nconst batchSize = 100;\nconst batches = [];\nfor (let i = 0; i < queries.length; i += batchSize) {\n  batches.push({\n    batch_id: Math.floor(i / batchSize),\n    queries: queries.slice(i, i + batchSize)\n  });\n}\n\nconsole.log(`Total queries: ${queries.length}, Batches: ${batches.length}`);\nreturn batches.map(b => ({ json: b }));"
      },
      "id": "prepare-embedding-batches",
      "name": "Prepare Embedding Batches (100/batch)",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        440,
        0
      ]
    },
    {
      "parameters": {
        "batchSize": 1,
        "options": {
          "reset": false
        }
      },
      "id": "loop-embed-batches",
      "name": "Loop Over Embedding Batches",
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        660,
        0
      ]
    },
    {
      "parameters": {
        "amount": 500,
        "unit": "milliseconds"
      },
      "id": "embed-rate-limit",
      "name": "Rate Limit (500ms)",
      "type": "n8n-nodes-base.wait",
      "typeVersion": 1.1,
      "position": [
        880,
        0
      ]
    },
    {
      "parameters": {
        "method": "POST",
        "url": "=https://generativelanguage.googleapis.com/v1beta/models/gemma-embedding-exp-03-07:batchEmbedContents?key={{ $env.GOOGLE_AI_API_KEY }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={{ JSON.stringify({ requests: $json.queries.map(q => ({ model: 'models/gemma-embedding-exp-03-07', content: { parts: [{ text: q.text }] }, taskType: 'SEMANTIC_SIMILARITY' })) }) }}",
        "options": {
          "timeout": 60000
        }
      },
      "id": "embed-gemma",
      "name": "Embed with EmbeddingGemma (batchEmbedContents)",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1100,
        0
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "jsCode": "// Attach embeddings back to query objects for this batch\nconst item = $input.first();\nconst embeddingsResponse = item.json.embeddings || [];\nconst batchQueries = $('Loop Over Embedding Batches').first().json.queries;\n\nconst enriched = batchQueries.map((q, idx) => ({\n  text: q.text,\n  original_index: q.original_index,\n  topic: q.topic,\n  seed_query: q.seed_query,\n  embedding: embeddingsResponse[idx]?.values || []\n}));\n\nreturn [{ json: { enriched_queries: enriched } }];"
      },
      "id": "attach-embeddings",
      "name": "Attach Embeddings to Queries",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1320,
        0
      ]
    },
    {
      "parameters": {
        "jsCode": "// Collect all embedded queries from all batches\n// This node aggregates across the loop\nconst items = $input.all();\n\nlet allEmbedded = [];\nfor (const item of items) {\n  const batch = item.json.enriched_queries || [];\n  allEmbedded = allEmbedded.concat(batch);\n}\n\n// Store total for reduction ratio\nconst totalInput = allEmbedded.length;\n\n// =============================================\n// SEMANTIC DEDUPLICATION via greedy cosine clustering\n// Threshold: 0.92 \u2014 queries above this similarity\n// are considered semantically equivalent\n// =============================================\nfunction cosineSim(a, b) {\n  if (!a || !b || a.length === 0 || b.length === 0 || a.length !== b.length) return 0;\n  let dot = 0, nA = 0, nB = 0;\n  for (let i = 0; i < a.length; i++) {\n    dot += a[i] * b[i];\n    nA += a[i] * a[i];\n    nB += b[i] * b[i];\n  }\n  return dot / (Math.sqrt(nA) * Math.sqrt(nB));\n}\n\nconst THRESHOLD = 0.92;\nconst clusters = [];\nconst assigned = new Set();\n\nfor (let i = 0; i < allEmbedded.length; i++) {\n  if (assigned.has(i)) continue;\n  if (allEmbedded[i].embedding.length === 0) continue;\n\n  const cluster = {\n    centroid_idx: i,\n    members: [allEmbedded[i]]\n  };\n  assigned.add(i);\n\n  for (let j = i + 1; j < allEmbedded.length; j++) {\n    if (assigned.has(j)) continue;\n    if (allEmbedded[j].embedding.length === 0) continue;\n\n    const sim = cosineSim(allEmbedded[i].embedding, allEmbedded[j].embedding);\n    if (sim >= THRESHOLD) {\n      cluster.members.push(allEmbedded[j]);\n      assigned.add(j);\n    }\n  }\n\n  // Representative selection strategy:\n  // Pick the shortest query (most general/head-like)\n  // unless a member is clearly a seed query\n  let representative = cluster.members.reduce((best, curr) => {\n    // Prefer seed queries\n    if (curr.seed_query && curr.text === curr.seed_query) return curr;\n    if (best.seed_query && best.text === best.seed_query) return best;\n    // Otherwise pick shortest\n    return curr.text.length <= best.text.length ? curr : best;\n  });\n\n  clusters.push({\n    representative,\n    members: cluster.members\n  });\n}\n\n// Also capture any queries that had empty embeddings (API failures)\nconst unembedded = allEmbedded.filter((q, idx) => q.embedding.length === 0 && !assigned.has(idx));\nfor (const q of unembedded) {\n  clusters.push({\n    representative: q,\n    members: [q]\n  });\n}\n\nconst reductionPct = totalInput > 0 ? Math.round((1 - clusters.length / totalInput) * 100) : 0;\n\nconsole.log(`Semantic dedup: ${totalInput} \u2192 ${clusters.length} (${reductionPct}% reduction)`);\n\nreturn clusters.map(c => ({\n  json: {\n    representative_query: c.representative.text,\n    topic: c.representative.topic,\n    seed_query: c.representative.seed_query,\n    cluster_size: c.members.length,\n    cluster_members: c.members.map(m => m.text).join(' | '),\n    total_input_queries: totalInput,\n    total_representative_queries: clusters.length,\n    reduction_pct: reductionPct\n  }\n}));"
      },
      "id": "semantic-dedup",
      "name": "Semantic Dedup (Cosine 0.92 Threshold)",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1540,
        0
      ]
    },
    {
      "parameters": {
        "operation": "append",
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $json.sheetUrl }}"
        },
        "sheetName": {
          "__rl": true,
          "mode": "byName",
          "value": "Reduction Log"
        },
        "columns": {
          "mappingMode": "defineBelow",
          "values": {
            "date": "={{ new Date().toISOString().split('T')[0] }}",
            "total_input": "={{ $json.total_input_queries }}",
            "total_representative": "={{ $json.total_representative_queries }}",
            "reduction_pct": "={{ $json.reduction_pct }}%",
            "model": "gemma-embedding-exp-03-07",
            "threshold": "0.92"
          }
        },
        "options": {}
      },
      "id": "log-reduction-sheet",
      "name": "Log Reduction Stats to Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.5,
      "position": [
        1540,
        220
      ],
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "batchSize": 1,
        "options": {
          "reset": false
        }
      },
      "id": "loop-serp-queries",
      "name": "Loop Over Representative Queries",
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        1760,
        0
      ]
    },
    {
      "parameters": {
        "amount": 2,
        "unit": "seconds"
      },
      "id": "serp-delay",
      "name": "Polite Delay (2s)",
      "type": "n8n-nodes-base.wait",
      "typeVersion": 1.1,
      "position": [
        1980,
        0
      ]
    },
    {
      "parameters": {
        "url": "https://serpapi.com/search",
        "sendQuery": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "q",
              "value": "={{ $json.representative_query }}"
            },
            {
              "name": "api_key",
              "value": "={{ $env.SERPAPI_KEY }}"
            },
            {
              "name": "engine",
              "value": "google"
            },
            {
              "name": "gl",
              "value": "us"
            },
            {
              "name": "hl",
              "value": "en"
            },
            {
              "name": "num",
              "value": "20"
            }
          ]
        },
        "options": {
          "timeout": 30000
        }
      },
      "id": "pull-serp",
      "name": "Pull SERP via SerpAPI",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        2200,
        0
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "jsCode": "const serpData = $input.first().json;\nconst queryData = $('Loop Over Representative Queries').first().json;\nconst query = queryData.representative_query;\nconst topic = queryData.topic;\nconst seed_query = queryData.seed_query;\nconst cluster_size = queryData.cluster_size;\nconst cluster_members = queryData.cluster_members;\nconst timestamp = new Date().toISOString();\nconst date = timestamp.split('T')[0];\n\nconst results = [];\n\n// ---- ORGANIC RESULTS ----\nconst organic = serpData.organic_results || [];\nfor (const r of organic) {\n  results.push({\n    date,\n    timestamp,\n    query,\n    topic,\n    seed_query,\n    cluster_size,\n    position: r.position || null,\n    url: r.link || '',\n    title: (r.title || '').substring(0, 200),\n    snippet: (r.snippet || '').substring(0, 500),\n    domain: r.displayed_link || r.source || '',\n    result_type: 'organic',\n    serp_feature: null\n  });\n}\n\n// ---- AI OVERVIEW ----\nif (serpData.ai_overview) {\n  const aio = serpData.ai_overview;\n  const sources = aio.references || aio.sources || aio.cited_sources || [];\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: 0,\n    url: '',\n    title: 'AI Overview',\n    snippet: (typeof aio === 'string' ? aio : (aio.text || aio.snippet || JSON.stringify(aio))).substring(0, 500),\n    domain: '',\n    result_type: 'ai_overview',\n    serp_feature: 'ai_overview',\n    aio_source_count: sources.length,\n    aio_sources_json: JSON.stringify(sources.slice(0, 10)).substring(0, 2000)\n  });\n  // Also log individual AIO sources\n  for (let si = 0; si < sources.length && si < 10; si++) {\n    const src = sources[si];\n    results.push({\n      date, timestamp, query, topic, seed_query, cluster_size,\n      position: si + 1,\n      url: src.link || src.url || '',\n      title: (src.title || src.name || '').substring(0, 200),\n      snippet: (src.snippet || '').substring(0, 500),\n      domain: src.displayed_link || src.source || '',\n      result_type: 'ai_overview_source',\n      serp_feature: 'ai_overview'\n    });\n  }\n}\n\n// ---- FEATURED SNIPPET ----\nconst fs = serpData.answer_box || serpData.featured_snippet;\nif (fs) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: 0,\n    url: fs.link || '',\n    title: (fs.title || 'Featured Snippet').substring(0, 200),\n    snippet: (fs.snippet || fs.answer || fs.contents?.map(c => c.text || '').join(' ') || '').substring(0, 500),\n    domain: fs.displayed_link || '',\n    result_type: 'featured_snippet',\n    serp_feature: 'featured_snippet'\n  });\n}\n\n// ---- PEOPLE ALSO ASK ----\nconst paa = serpData.related_questions || [];\nfor (const q of paa.slice(0, 8)) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: null,\n    url: q.link || '',\n    title: (q.question || '').substring(0, 200),\n    snippet: (q.snippet || '').substring(0, 500),\n    domain: q.displayed_link || q.source || '',\n    result_type: 'people_also_ask',\n    serp_feature: 'paa'\n  });\n}\n\n// ---- KNOWLEDGE PANEL ----\nif (serpData.knowledge_graph) {\n  const kg = serpData.knowledge_graph;\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: null,\n    url: kg.website || kg.source?.link || '',\n    title: (kg.title || '').substring(0, 200),\n    snippet: (kg.description || '').substring(0, 500),\n    domain: kg.source?.name || '',\n    result_type: 'knowledge_graph',\n    serp_feature: 'knowledge_graph'\n  });\n}\n\n// ---- LOCAL PACK ----\nconst local = serpData.local_results?.places || [];\nfor (const l of local.slice(0, 3)) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: l.position || null,\n    url: l.link || '',\n    title: (l.title || '').substring(0, 200),\n    snippet: (l.address || '').substring(0, 500),\n    domain: '',\n    result_type: 'local_pack',\n    serp_feature: 'local_pack'\n  });\n}\n\n// ---- VIDEO RESULTS ----\nconst videos = serpData.inline_videos || [];\nfor (const v of videos.slice(0, 3)) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: null,\n    url: v.link || '',\n    title: (v.title || '').substring(0, 200),\n    snippet: (v.source || '').substring(0, 500),\n    domain: v.platform || v.source || '',\n    result_type: 'video',\n    serp_feature: 'video_carousel'\n  });\n}\n\n// ---- RELATED SEARCHES ----\nconst related = serpData.related_searches || [];\nfor (const r of related.slice(0, 8)) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: null,\n    url: '',\n    title: (r.query || '').substring(0, 200),\n    snippet: '',\n    domain: '',\n    result_type: 'related_search',\n    serp_feature: 'related_searches'\n  });\n}\n\n// Fallback if nothing came back\nif (results.length === 0) {\n  results.push({\n    date, timestamp, query, topic, seed_query, cluster_size,\n    position: null, url: '', title: '', snippet: 'No SERP data returned',\n    domain: '', result_type: 'error', serp_feature: null\n  });\n}\n\nreturn results.map(r => ({ json: r }));"
      },
      "id": "parse-serp-features",
      "name": "Parse All SERP Features",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2420,
        0
      ]
    },
    {
      "parameters": {
        "projectId": {
          "__rl": true,
          "mode": "list",
          "value": "YOUR_GCP_PROJECT_ID"
        },
        "datasetId": {
          "__rl": true,
          "mode": "list",
          "value": "seo_monitoring"
        },
        "tableId": "qfo_rank_tracking",
        "options": {
          "createTableIfNotExists": true,
          "createDisposition": "CREATE_IF_NEEDED",
          "writeDisposition": "WRITE_APPEND"
        }
      },
      "id": "insert-rankings-bq",
      "name": "Insert to BigQuery",
      "type": "n8n-nodes-base.googleBigQuery",
      "typeVersion": 2,
      "position": [
        2640,
        0
      ],
      "credentials": {
        "googleBigQueryOAuth2Api": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "jsCode": "// Filter for only AIO source rows to create a dedicated AIO tracking view\nconst items = $input.all();\nconst aioItems = items.filter(i => \n  i.json.result_type === 'ai_overview' || \n  i.json.result_type === 'ai_overview_source'\n);\nif (aioItems.length === 0) return [];\nreturn aioItems;"
      },
      "id": "filter-aio",
      "name": "Filter AIO Results",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2640,
        220
      ]
    },
    {
      "parameters": {
        "projectId": {
          "__rl": true,
          "mode": "list",
          "value": "YOUR_GCP_PROJECT_ID"
        },
        "datasetId": {
          "__rl": true,
          "mode": "list",
          "value": "seo_monitoring"
        },
        "tableId": "qfo_aio_tracking",
        "options": {
          "createTableIfNotExists": true,
          "createDisposition": "CREATE_IF_NEEDED",
          "writeDisposition": "WRITE_APPEND"
        }
      },
      "id": "insert-aio-bq",
      "name": "Insert AIO Data to BigQuery",
      "type": "n8n-nodes-base.googleBigQuery",
      "typeVersion": 2,
      "position": [
        2860,
        220
      ],
      "credentials": {
        "googleBigQueryOAuth2Api": {
          "name": "<your credential>"
        }
      }
    }
  ],
  "connections": {
    "Daily at 2AM UTC": {
      "main": [
        [
          {
            "node": "Get QFO Queries from Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get QFO Queries from Sheet": {
      "main": [
        [
          {
            "node": "Prepare Embedding Batches (100/batch)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Embedding Batches (100/batch)": {
      "main": [
        [
          {
            "node": "Loop Over Embedding Batches",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Embedding Batches": {
      "main": [
        [
          {
            "node": "Rate Limit (500ms)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Rate Limit (500ms)": {
      "main": [
        [
          {
            "node": "Embed with EmbeddingGemma (batchEmbedContents)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embed with EmbeddingGemma (batchEmbedContents)": {
      "main": [
        [
          {
            "node": "Attach Embeddings to Queries",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Attach Embeddings to Queries": {
      "main": [
        [
          {
            "node": "Semantic Dedup (Cosine 0.92 Threshold)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Semantic Dedup (Cosine 0.92 Threshold)": {
      "main": [
        [
          {
            "node": "Loop Over Representative Queries",
            "type": "main",
            "index": 0
          },
          {
            "node": "Log Reduction Stats to Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Representative Queries": {
      "main": [
        [
          {
            "node": "Polite Delay (2s)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Polite Delay (2s)": {
      "main": [
        [
          {
            "node": "Pull SERP via SerpAPI",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Pull SERP via SerpAPI": {
      "main": [
        [
          {
            "node": "Parse All SERP Features",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse All SERP Features": {
      "main": [
        [
          {
            "node": "Insert to BigQuery",
            "type": "main",
            "index": 0
          },
          {
            "node": "Filter AIO Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter AIO Results": {
      "main": [
        [
          {
            "node": "Insert AIO Data to BigQuery",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  }
}