{
  "id": "A0iNIsK4Vl5Tl1dA",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Data Enrichment",
  "tags": [],
  "nodes": [
    {
      "id": "f93ffe2d-93c2-4bba-ab46-7d2827c0e42f",
      "name": "Click to Start",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        1232,
        1392
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "3f4af357-425a-4a01-9f3e-db181183d83c",
      "name": "Combine with Request IDs",
      "type": "n8n-nodes-base.code",
      "position": [
        2832,
        1392
      ],
      "parameters": {
        "jsCode": "// Combine original data with request_id and add poll counter\nconst originals = $('Store Original Columns').all();\nconst researchResults = $input.all();\n\nreturn originals.map((orig, i) => ({\n  json: {\n    ...orig.json,\n    request_id: researchResults[i]?.json?.request_id || null,\n    poll_count: 0\n  }\n}));"
      },
      "typeVersion": 2
    },
    {
      "id": "807a8384-7a3a-4ec6-9462-b034da47b6fb",
      "name": "Wait 30s",
      "type": "n8n-nodes-base.wait",
      "position": [
        3568,
        1392
      ],
      "parameters": {
        "amount": 30
      },
      "typeVersion": 1.1
    },
    {
      "id": "3e0caa72-f8bd-4a21-ade5-a0ed321f734d",
      "name": "Check Research Status",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        3888,
        1392
      ],
      "parameters": {
        "url": "=https://api.tavily.com/research/{{ $json.request_id }}",
        "options": {},
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "b17d1581-b486-42a3-bf0e-0b52eb8d3e0f",
      "name": "Combine Status",
      "type": "n8n-nodes-base.code",
      "position": [
        4288,
        1392
      ],
      "parameters": {
        "jsCode": "// Combine original data with research status\nconst waitData = $('Wait 30s').all();\nconst statusResults = $input.all();\n\nreturn waitData.map((orig, i) => {\n  const status = statusResults[i]?.json || {};\n  return {\n    json: {\n      ...orig.json,\n      poll_count: (orig.json.poll_count || 0) + 1,\n      research_status: status.status || 'unknown',\n      research_result: status\n    }\n  };\n});"
      },
      "typeVersion": 2
    },
    {
      "id": "83fd49e4-3e3b-42f8-a6ba-59b8c8c9e55e",
      "name": "Research Done?",
      "type": "n8n-nodes-base.if",
      "position": [
        4512,
        1280
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "condition-completed",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.research_status }}",
              "rightValue": "completed"
            }
          ]
        }
      },
      "typeVersion": 2
    },
    {
      "id": "6ed17468-ed5d-4f1c-8276-66b63930c78d",
      "name": "Under 5 min?",
      "type": "n8n-nodes-base.if",
      "position": [
        4736,
        1456
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "condition-timeout",
              "operator": {
                "type": "number",
                "operation": "lt"
              },
              "leftValue": "={{ $json.poll_count }}",
              "rightValue": 10
            }
          ]
        }
      },
      "typeVersion": 2
    },
    {
      "id": "9c411b50-1969-4902-a563-30555ad6de8e",
      "name": "Merge All Data (Existing Columns Only)",
      "type": "n8n-nodes-base.code",
      "position": [
        5728,
        1392
      ],
      "parameters": {
        "jsCode": "// Merge original data with research results - ONLY for existing columns\nconst items = $input.all();\n\nconst isEmpty = (val) => val === undefined || val === null || val === '';\n\n// Helper function to normalize strings for matching\nconst normalize = (str) => {\n  if (!str) return '';\n  return str.toLowerCase().replace(/[^a-z0-9]/g, '');\n};\n\n// Helper function to find matching field in extracted data\nconst findMatchingField = (colName, extracted) => {\n  const normalizedCol = normalize(colName);\n  \n  // 1. Exact match (case-insensitive)\n  for (const key in extracted) {\n    if (normalize(key) === normalizedCol) {\n      return extracted[key];\n    }\n  }\n  \n  // 2. Partial match (column name contains research field or vice versa)\n  for (const key in extracted) {\n    const normalizedKey = normalize(key);\n    if (normalizedCol.includes(normalizedKey) || normalizedKey.includes(normalizedCol)) {\n      return extracted[key];\n    }\n  }\n  \n  // 3. Common field name variations\n  const variations = {\n    'cto': ['cto', 'chieftechnologyofficer', 'chieftech', 'technologyofficer'],\n    'ceo': ['ceo', 'chiefexecutiveofficer', 'chiefexec', 'executiveofficer'],\n    'cfo': ['cfo', 'chieffinancialofficer', 'financialofficer'],\n    'revenue': ['revenue', 'annualrevenue', 'revenues', 'totalrevenue'],\n    'headquarters': ['headquarters', 'hq', 'hqbased', 'location', 'headquarter'],\n    'employees': ['employees', 'noofemployees', 'employeecount', 'staff', 'workforce'],\n    'founded': ['founded', 'foundedyear', 'yearfounded', 'established', 'foundingyear'],\n    'funding': ['funding', 'latestfunding', 'recentfunding', 'investment', 'capital'],\n    'industry': ['industry', 'sector', 'businesssector'],\n    'website': ['website', 'url', 'web', 'site'],\n    'domain': ['domain', 'domainname', 'webdomain']\n  };\n  \n  for (const [variationKey, variationList] of Object.entries(variations)) {\n    if (variationList.some(v => normalize(v) === normalizedCol)) {\n      // Try to find matching research field\n      for (const key in extracted) {\n        const normalizedKey = normalize(key);\n        if (variationList.some(v => normalize(v) === normalizedKey)) {\n          return extracted[key];\n        }\n      }\n    }\n  }\n  \n  return null;\n};\n\nreturn items.map((item) => {\n  const original = item.json;\n  const research = original.research_result || {};\n  const originalColumns = original._original_columns || [];\n  \n  // Try multiple possible locations for structured output\n  let extracted = {};\n  \n  // 1. Check structured_output (primary expected location)\n  if (research.structured_output && typeof research.structured_output === 'object') {\n    extracted = research.structured_output;\n  }\n  // 2. Check if content is a JSON object (structured output)\n  else if (research.content && typeof research.content === 'object') {\n    extracted = research.content;\n  }\n  // 3. Try parsing content if it's a JSON string\n  else if (research.content && typeof research.content === 'string') {\n    try {\n      const parsed = JSON.parse(research.content);\n      if (typeof parsed === 'object') extracted = parsed;\n    } catch (e) { /* Not JSON, skip */ }\n  }\n  // 4. Check output field\n  else if (research.output && typeof research.output === 'object') {\n    extracted = research.output;\n  }\n  // 5. Check data field\n  else if (research.data && typeof research.data === 'object') {\n    extracted = research.data;\n  }\n  // 6. Check result field\n  else if (research.result && typeof research.result === 'object') {\n    extracted = research.result;\n  }\n  \n  // Build merged object - ONLY include columns that exist in original\n  const merged = {};\n  \n  // For each original column, try to fill it from research\n  originalColumns.forEach((colName) => {\n    // Skip internal fields\n    if (colName.startsWith('_')) {\n      return;\n    }\n    \n    // If original has a value, keep it\n    if (!isEmpty(original[colName])) {\n      merged[colName] = original[colName];\n      return;\n    }\n    \n    // Try to find matching research field using intelligent matching\n    const foundValue = findMatchingField(colName, extracted);\n    \n    // Set value (empty string if not found)\n    merged[colName] = foundValue !== null && foundValue !== undefined ? foundValue : '';\n  });\n  \n  return { json: merged };\n});"
      },
      "typeVersion": 2
    },
    {
      "id": "5a00c4d6-7021-475e-91bb-67c264731f97",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        144,
        976
      ],
      "parameters": {
        "width": 992,
        "height": 560,
        "content": "## Data Enrichment with Tavily Research API\nFill in missing company data in your Google Sheet using Tavily\u2019s web research. This is mainly for existing columns (You must manually add column names that you want to enrich, if they do not exist).\n\n## How it works\n- Reads rows from a Google Sheet\n- For missing values, finds information from the web using Tavily Research\n- Maps results to existing columns\n- Writes the enriched data to a new sheet\n- Retries automatically until results are ready (up to ~5 minutes)\n\n## Setup steps\n1) Prepare your sheet\n2) Create the columns you want filled (e.g. CEO, revenue, HQ - if the data is related to company information)\n3) Create an empty output sheet\n4) Add credentials\n5) Google Sheets OAuth for both sheet nodes\n6) Tavily API key for the research nodes\n7) Configure and run\n8) Enter the Google Sheet ID and sheet names (For both the CSV nodes)\n9) Click Start\n\nRequirements: Google account (For sheets) and Tavily API key\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "d94f518f-f0f5-445d-933a-445e54891739",
      "name": "Start Tavily Research",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2608,
        1392
      ],
      "parameters": {
        "url": "https://api.tavily.com/research",
        "method": "POST",
        "options": {},
        "jsonBody": "={{ JSON.stringify($json.research_body) }}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "b57b78d3-8fb0-4cbe-ad88-3d4c8942aaf8",
      "name": "Enrich CSV file",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        5984,
        1392
      ],
      "parameters": {
        "columns": {
          "value": {
            "CTO": "={{ $json.CTO }}",
            "domain": "={{ $json.domain }}",
            "Revenue": "={{ $json.Revenue }}",
            "website": "={{ $json.website }}",
            "hq_based": "={{ $json.hq_based }}",
            "industry": "={{ $json.industry }}",
            "company_name": "={{ $json.company_name }}",
            "no_of_employees": "={{ $json.no_of_employees }}"
          },
          "schema": [
            {
              "id": "company_name",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "company_name",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "no_of_employees",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "no_of_employees",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "hq_based",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "hq_based",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "industry",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "industry",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "website",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "website",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "domain",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "domain",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "CTO",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "CTO",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Revenue",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Revenue",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "<__PLACEHOLDER_VALUE__Destination Sheet Name (e.g., enriched_companies)__>"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "<__PLACEHOLDER_VALUE__Your Google Sheets Document ID__>"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "f7cb0bb3-d055-4edd-90a1-7ecfdeaa1b4a",
      "name": "Read CSV file",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1696,
        1392
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "<__PLACEHOLDER_VALUE__Source Sheet Name (e.g., sample_companies)__>"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "<__PLACEHOLDER_VALUE__Your Google Sheets Document ID__>"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "a6d2296d-3b78-41be-9f06-4b1b98db3935",
      "name": "Store Original Columns",
      "type": "n8n-nodes-base.code",
      "position": [
        2384,
        1392
      ],
      "parameters": {
        "jsCode": "// Store original column names and build dynamic output schema\nconst items = $input.all();\n\n// Get column names from first item (all items should have same structure)\nconst firstItem = items[0]?.json || {};\nconst originalColumns = Object.keys(firstItem).filter(col => !col.startsWith('_'));\n\n// Build dynamic output schema based on original columns\n// Map column names to research-friendly descriptions\nconst columnDescriptions = {\n  'company_name': 'Official company name',\n  'website': 'Company website URL',\n  'domain': 'Company domain name',\n  'hq_based': 'Headquarters location',\n  'headquarters': 'Headquarters location',\n  'no_of_employees': 'Number of employees',\n  'employee_count': 'Number of employees',\n  'employees': 'Number of employees',\n  'industry': 'Industry sector',\n  'founded_year': 'Year company was founded',\n  'founded': 'Year company was founded',\n  'ceo': 'Current CEO name',\n  'cto': 'Current CTO (Chief Technology Officer) name',\n  'cfo': 'Current CFO (Chief Financial Officer) name',\n  'revenue': 'Annual revenue or latest revenue figure',\n  'latest_funding': 'Latest funding round details',\n  'funding': 'Latest funding information',\n  'description': 'Brief company description',\n  'about': 'Company description or about information'\n};\n\nconst outputSchemaProperties = {};\noriginalColumns.forEach(col => {\n  // Use custom description if available, otherwise generate one\n  const description = columnDescriptions[col.toLowerCase()] || \n    `Information about ${col.replace(/_/g, ' ')}`;\n  outputSchemaProperties[col] = {\n    type: 'string',\n    description: description\n  };\n});\n\nconst outputSchema = { properties: outputSchemaProperties };\n\nreturn items.map((item, index) => {\n  const company = item.json.company_name || item.json.domain || item.json.website || 'unknown';\n  const researchInput = 'Research company ' + company + '. Find information about: ' + originalColumns.join(', ') + '.';\n  \n  return {\n    json: {\n      ...item.json,\n      _row_number: index + 2,\n      _original_columns: originalColumns,\n      research_body: {\n        input: researchInput,\n        output_schema: outputSchema\n      }\n    }\n  };\n});"
      },
      "typeVersion": 2
    },
    {
      "id": "ff8f57f7-dd9b-47c7-a5f1-a46c710962d9",
      "name": "Section: Data Input",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1168,
        976
      ],
      "parameters": {
        "color": 7,
        "width": 868,
        "height": 572,
        "content": "## Data Input\n\nReads company data from Google Sheets"
      },
      "typeVersion": 1
    },
    {
      "id": "964fe434-f1c8-4ce9-8fdf-779285c991ce",
      "name": "Section: Research Initiation",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2336,
        1232
      ],
      "parameters": {
        "color": 7,
        "width": 644,
        "height": 316,
        "content": "## Research Initiation\n\nPrepares data and starts Tavily research requests"
      },
      "typeVersion": 1
    },
    {
      "id": "7adeb5f6-3798-4340-ac2f-706b48897089",
      "name": "Section: Polling Loop",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3424,
        1088
      ],
      "parameters": {
        "color": 7,
        "width": 1604,
        "height": 588,
        "content": "## Polling Loop\n\nWaits and checks research status until complete (max 5 min)"
      },
      "typeVersion": 1
    },
    {
      "id": "78f1e4d9-260c-4fb4-910b-79b521b23a09",
      "name": "Section: Data Output",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        5488,
        1232
      ],
      "parameters": {
        "color": 7,
        "width": 836,
        "height": 364,
        "content": "## Data Output\n\nMerges enriched data and writes to Google Sheets"
      },
      "typeVersion": 1
    },
    {
      "id": "4dc5d6ba-10b8-4018-802e-7ad51232eca3",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1520,
        1088
      ],
      "parameters": {
        "color": 3,
        "width": 480,
        "height": 448,
        "content": "## \u26a0\ufe0f Important Setup Step\n\n**Before running:**\n\n1. Create an empty output sheet in the same Google Sheets document as your input data\n\n2. The output sheet will receive the enriched data\n\nExample:\n- Input: \"companies\"\n- Output: \"enriched_companies\""
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "callerPolicy": "workflowsFromSameOwner",
    "errorWorkflow": "A0iNIsK4Vl5Tl1dA",
    "timeSavedMode": "fixed",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "90486e97-38dd-4227-973b-741cd377c155",
  "connections": {
    "Wait 30s": {
      "main": [
        [
          {
            "node": "Check Research Status",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Under 5 min?": {
      "main": [
        [
          {
            "node": "Wait 30s",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Merge All Data (Existing Columns Only)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Read CSV file": {
      "main": [
        [
          {
            "node": "Store Original Columns",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Click to Start": {
      "main": [
        [
          {
            "node": "Read CSV file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Combine Status": {
      "main": [
        [
          {
            "node": "Research Done?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Research Done?": {
      "main": [
        [
          {
            "node": "Merge All Data (Existing Columns Only)",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Under 5 min?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Research Status": {
      "main": [
        [
          {
            "node": "Combine Status",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Start Tavily Research": {
      "main": [
        [
          {
            "node": "Combine with Request IDs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store Original Columns": {
      "main": [
        [
          {
            "node": "Start Tavily Research",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Combine with Request IDs": {
      "main": [
        [
          {
            "node": "Wait 30s",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge All Data (Existing Columns Only)": {
      "main": [
        [
          {
            "node": "Enrich CSV file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}