{
  "id": "u60IjchV5jLiPzYO",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Apify Scraper",
  "tags": [],
  "nodes": [
    {
      "id": "f5c3688c-1186-4562-be0e-1b61f2079728",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        4816,
        464
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "2437f4b4-981d-4978-98e0-56ce7da53d05",
      "name": "HTTP Request",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        5040,
        464
      ],
      "parameters": {
        "url": "https://api.apify.com/v2/acts/apify~google-search-scraper/run-sync-get-dataset-items",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n    \"countryCode\": \"us\",\n    \"focusOnPaidAds\": false,\n    \"forceExactMatch\": false,\n    \"includeIcons\": false,\n    \"includeUnfilteredResults\": true,\n    \"languageCode\": \"en\",\n    \"maxPagesPerQuery\": 3,\n    \"mobileResults\": true,\n    \"queries\": \"private equity firm US leadership team page\",\n    \"resultsPerPage\": 10,\n    \"saveHtml\": false,\n    \"saveHtmlToKeyValueStore\": false\n}",
        "sendBody": true,
        "sendQuery": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "queryParameters": {
          "parameters": [
            {
              "name": "format",
              "value": "json"
            },
            {
              "name": "clean",
              "value": "true"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer (apify-api)"
            },
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "typeVersion": 4.3
    },
    {
      "id": "38e36a4c-752b-4607-a26e-6e1013bf8dab",
      "name": "Split Out Organic Results",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        5264,
        464
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "organicResults"
      },
      "typeVersion": 1
    },
    {
      "id": "c23d705b-76a2-4eaf-aa00-b125779d969e",
      "name": "AI Extract Owner Emails",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        5488,
        464
      ],
      "parameters": {
        "text": "=Company: {{ $('Split Out Organic Results').item.json.title }}\nURL: {{ $('Split Out Organic Results').item.json.url }}\nDomain: {{ $('Split Out Organic Results').item.json.displayedUrl }}\n\nWebsite Text Content:\n{{ $json.description }}\n\nIF no decision makers name is present, do firstname@website, owner@website, office@website abnd founder@website",
        "options": {
          "systemMessage": "You are an AI assistant specialized in extracting owner and managing partner information from private equity firm websites.\n\nYour task:\n1. Analyze the provided HTML content from the website\n2. Identify the owner, managing partner, CEO, or founder names\n3. Extract the company domain from the URL\n4. Generate exactly 4 possible email addresses for the identified owner/partner using common email patterns\n\nEmail generation patterns to use:\n- user@example.com\n- user@example.com\n- user@example.com (first initial + last name)\n- user@example.com\n\nIMPORTANT RULES:\n- Return EXACTLY 4 email addresses, no more, no less\n- Exclude generic emails like info@, contact@, support@, admin@, sales@\n- If you cannot identify a specific person name from the HTML, generate common executive patterns like: ceo@, founder@, managing.partner@, owner@\n- Use the actual domain from the displayedUrl field\n- Return the result in the specified JSON format with company, domain, and owner_emails array"
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 3
    },
    {
      "id": "46c933b4-bcf7-4838-bf35-5f77f4fc5243",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        5504,
        688
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini"
        },
        "options": {},
        "builtInTools": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "345ffbea-4bfc-4158-ab7c-1025247c70e3",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        5632,
        688
      ],
      "parameters": {
        "schemaType": "manual",
        "inputSchema": "{\n\t\"type\": \"object\",\n\t\"properties\": {\n\t\t\"company\": {\n\t\t\t\"type\": \"string\"\n\t\t},\n\t\t\"domain\": {\n\t\t\t\"type\": \"string\"\n\t\t},\n\t\t\"owner_emails\": {\n\t\t\t\"type\": \"array\",\n\t\t\t\"items\": {\n\t\t\t\t\"type\": \"string\"\n\t\t\t},\n\t\t\t\"minItems\": 4,\n\t\t\t\"maxItems\": 4\n\t\t}\n\t}\n}"
      },
      "typeVersion": 1.3
    },
    {
      "id": "f8fa9471-49bf-4871-959c-145bf71c10eb",
      "name": "Code in JavaScript",
      "type": "n8n-nodes-base.code",
      "position": [
        5840,
        464
      ],
      "parameters": {
        "jsCode": "const fillers = [\n    \"firstname\",\"lastname\",\"owner\",\"founder\",\"ceo\",\"admin\",\"info\",\n    \"office\",\"contact\",\"support\",\"help\",\"our\",\"managing.partner\",\"team\",\"blastname\"\n];\n\nlet output = [];\nlet seen = {}; // track per domain to dedupe\n\nfor (const item of items) {\n    // Correct path to emails\n    const company = item.json.output?.company || \"Unknown Company\";\n    const domain = item.json.output?.domain || \"unknown.com\";\n    const emails = item.json.output?.owner_emails || [];\n\n    if (!emails.length) continue; // skip if no emails\n\n    if (!seen[domain]) seen[domain] = new Set();\n\n    for (const email of emails) {\n        if (!email) continue; // skip undefined/null\n\n        const local = email.split(\"@\")[0].toLowerCase();\n\n        // Skip any email that contains a filler anywhere\n        if (fillers.some(f => local.includes(f))) continue;\n\n        // Skip duplicates for same first-name per company\n        const firstNamePart = local.split(/[.\\-_]/)[0]; \n        if (seen[domain].has(firstNamePart)) continue;\n        seen[domain].add(firstNamePart);\n\n        output.push({\n            json: {\n                company,\n                domain,\n                email\n            }\n        });\n    }\n}\n\nreturn output;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "23ca241a-38a5-494c-bd23-1702e29d39a5",
      "name": "Verify Email",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        6064,
        464
      ],
      "parameters": {
        "url": "https://api.validkit.com/api/v1/verify",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "email",
              "value": "={{ $json.email }}"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "X-API-Key",
              "value": "(api key)"
            }
          ]
        }
      },
      "typeVersion": 4.3
    },
    {
      "id": "b89e948e-1cf6-43c6-bbed-4862e58ba34d",
      "name": "Format ValidKit Output",
      "type": "n8n-nodes-base.set",
      "position": [
        6288,
        464
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "id-1",
              "name": "company",
              "type": "string",
              "value": "={{ $('Code in JavaScript').item.json.company }}"
            },
            {
              "id": "id-2",
              "name": "domain",
              "type": "string",
              "value": "={{ $('Code in JavaScript').item.json.domain }}"
            },
            {
              "id": "id-3",
              "name": "email",
              "type": "string",
              "value": "={{ $('Code in JavaScript').item.json.email }}"
            },
            {
              "id": "id-4",
              "name": "status",
              "type": "string",
              "value": "={{ $json.status }}"
            },
            {
              "id": "id-5",
              "name": "syntax_valid",
              "type": "boolean",
              "value": "={{ $json.syntax_valid }}"
            },
            {
              "id": "id-6",
              "name": "mx_found",
              "type": "boolean",
              "value": "={{ $json.mx_found }}"
            },
            {
              "id": "id-7",
              "name": "smtp_check",
              "type": "boolean",
              "value": "={{ $json.smtp_check }}"
            },
            {
              "id": "id-8",
              "name": "disposable",
              "type": "boolean",
              "value": "={{ $json.disposable }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "adcacaeb-b348-44ab-b352-c276f8911b17",
      "name": "Log Verified Emails",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        6512,
        464
      ],
      "parameters": {
        "columns": {
          "value": {},
          "schema": [
            {
              "id": "company",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "company",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "domain",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "domain",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "email",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "email",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "status",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "status",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "syntax_valid",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "syntax_valid",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "mx_found",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "mx_found",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "smtp_check",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "smtp_check",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "disposable",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "disposable",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "appendOrUpdate",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_Wfe7SNs97UEN2zSc_IIEbHww_0V2hqh_Nni2o__dsI/edit#gid=0",
          "cachedResultName": "Decision Makers"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1_Wfe7SNs97UEN2zSc_IIEbHww_0V2hqh_Nni2o__dsI",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_Wfe7SNs97UEN2zSc_IIEbHww_0V2hqh_Nni2o__dsI/edit?usp=drivesdk",
          "cachedResultName": "Leads"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "34ca6d41-3325-4752-a8a6-695543fb5b39",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        5984,
        304
      ],
      "parameters": {
        "color": 7,
        "height": 560,
        "content": "## Verify Email (ValidKit API)"
      },
      "typeVersion": 1
    },
    {
      "id": "a0708d4d-39ad-4b9e-a9bd-7b9059b4ea5c",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        6240,
        304
      ],
      "parameters": {
        "color": 7,
        "width": 448,
        "height": 560,
        "content": "## Log Emails"
      },
      "typeVersion": 1
    },
    {
      "id": "057c1aa9-c4ec-49b6-a6e6-5e422e824898",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        5776,
        304
      ],
      "parameters": {
        "color": 7,
        "width": 192,
        "height": 560,
        "content": "## Remove Low-Quality Emails"
      },
      "typeVersion": 1
    },
    {
      "id": "2e51f774-a3d2-42ca-b7e5-092a1c24c7a3",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        5456,
        336
      ],
      "parameters": {
        "color": 7,
        "width": 304,
        "height": 560,
        "content": "## AI Emails"
      },
      "typeVersion": 1
    },
    {
      "id": "27b27633-bf4a-42bb-a2a3-650f7ef19470",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        4992,
        288
      ],
      "parameters": {
        "color": 7,
        "width": 448,
        "height": 560,
        "content": "## Apify Scraper (API) "
      },
      "typeVersion": 1
    },
    {
      "id": "49dce131-030f-4a29-bc7a-7ee5a7214619",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        4800,
        288
      ],
      "parameters": {
        "color": 7,
        "width": 176,
        "height": 560,
        "content": "## Trigger"
      },
      "typeVersion": 1
    },
    {
      "id": "c3fb0938-630a-4439-96b3-267d0103957d",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        4464,
        256
      ],
      "parameters": {
        "height": 624,
        "content": "## Main\n\nThis workflow finds and verifies email addresses from Google search results using AI.  \nIt processes organic search results, extracts company domains, generates likely email patterns, validates them, and saves verified emails to Google Sheets.\n\n\n\n## Setup\n\n1. Add your Apify API key  \n2. Add your OpenAI API key  \n3. Add your email verification API key  \n4. Connect Google Sheets  \n5. Update the search query  \n6. Run the workflow\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "b7bedc23-052d-415e-aa64-8364c01cd487",
  "connections": {
    "HTTP Request": {
      "main": [
        [
          {
            "node": "Split Out Organic Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Verify Email": {
      "main": [
        [
          {
            "node": "Format ValidKit Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Extract Owner Emails",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Code in JavaScript": {
      "main": [
        [
          {
            "node": "Verify Email",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Format ValidKit Output": {
      "main": [
        [
          {
            "node": "Log Verified Emails",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Extract Owner Emails": {
      "main": [
        [
          {
            "node": "Code in JavaScript",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "AI Extract Owner Emails",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Split Out Organic Results": {
      "main": [
        [
          {
            "node": "AI Extract Owner Emails",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}