{
  "name": "Astra Email Extractor v2 FIXED",
  "nodes": [
    {
      "parameters": {},
      "id": "1",
      "name": "Manual Trigger",
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        240,
        300
      ]
    },
    {
      "parameters": {
        "authentication": "oAuth2",
        "resource": "sheet",
        "operation": "read",
        "documentId": {
          "mode": "id",
          "value": "1RDa3Ui_N4wXK5tzeeyNoXjUknjFoT7S62D_j38MyEjk"
        },
        "sheetName": {
          "mode": "name",
          "value": "Sheet1"
        },
        "options": {
          "headerRow": 1,
          "includeRowNumber": true
        }
      },
      "id": "2",
      "name": "Get Row(s)",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4,
      "position": [
        460,
        300
      ]
    },
    {
      "parameters": {
        "batchSize": 1
      },
      "id": "3",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        680,
        300
      ]
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ ($json[\"E-mail\"] || $json[\"E-mail 1\"] || $json[\"Email\"] || \"\").toString().trim() }}",
              "operation": "isEmpty",
              "value2": ""
            },
            {
              "value1": "={{ ($json[\"E-mail 2\"] || $json[\"Email 2\"] || \"\").toString().trim() }}",
              "operation": "isEmpty",
              "value2": ""
            },
            {
              "value1": "={{ ($json[\"E-mail 3\"] || $json[\" E-mail 3\"] || $json[\"Email 3\"] || \"\").toString().trim() }}",
              "operation": "isEmpty",
              "value2": ""
            }
          ]
        },
        "combineOperation": "any"
      },
      "id": "4",
      "name": "Has Empty Email Slot?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 1,
      "position": [
        900,
        300
      ]
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "// Select URL v4 \u2014 Pure regex URL parsing (no URL constructor, works in all n8n sandboxes)\r\n// ROOT CAUSE FIX: n8n Code node runs in vm.createContext({}) where 'URL' is undefined.\r\n// new URL() inside fixUrl was silently catching ReferenceError -> returning '' -> no seeds found.\r\nconst inputItems = $input.all();\r\n\r\nconst URL_FIELD_PRIORITY = [\r\n  '\u0412\u0435\u0431-\u0441\u0430\u0439\u0442', '\u0412\u0435\u0431-\u0441\u0430\u0439\u0442 1', '\u0412\u0435\u0431-\u0441\u0430\u0439\u0442 2', '\u0412\u0435\u0431-\u0441\u0430\u0439\u0442 3',\r\n  '\u0421\u0430\u0439\u0442', '\u0421\u0430\u0439\u0442 1', '\u0421\u0430\u0439\u0442 2', '\u0421\u0430\u0439\u0442 3',\r\n  'Website', 'Website 1', 'Website 2', 'Website 3',\r\n  'Site', 'Site 1', 'Site 2', 'Site 3',\r\n  'URL', 'Url', 'url', '\u0414\u043e\u043c\u0435\u043d', 'Domain', '\u0413\u043b\u0430\u0432\u043d\u0430\u044f \u0441\u0442\u0440\u0430\u043d\u0438\u0446\u0430', 'Homepage',\r\n  '\u0412\u041a\u043e\u043d\u0442\u0430\u043a\u0442\u0435', '\u0412\u043a\u043e\u043d\u0442\u0430\u043a\u0442\u0435', 'VK', 'vk', 'VK URL',\r\n  'Telegram 1', 'Telegram 2', 'Telegram 3', 'Telegram', 'telegram', '\u0422\u0435\u043b\u0435\u0433\u0440\u0430\u043c',\r\n  '2GIS URL', '2\u0413\u0418\u0421',\r\n];\r\n\r\nconst BLOCKED_SEED = /^(www\\.|m\\.)?(vk\\.com|vk\\.ru|t\\.me|telegram\\.me|telegram\\.org|2gis\\.ru|2gis\\.com|dgis\\.ru|wa\\.me|whatsapp\\.com|api\\.whatsapp\\.com|chat\\.whatsapp\\.com|viber\\.com|viber\\.click|max\\.ru|max\\.me|youtu\\.be|youtube\\.com|twitter\\.com|x\\.com|tiktok\\.com|pinterest\\.com|threads\\.net)$/i;\r\nconst VK_HOST = /^(www\\.|m\\.)?vk\\.(com|ru)$/i;\r\nconst TG_HOST = /^(www\\.)?(t\\.me|telegram\\.me|telegram\\.org)$/i;\r\nconst SOCIAL_HOST = /^(www\\.|m\\.)?(vk\\.com|vk\\.ru|t\\.me|telegram\\.me|telegram\\.org|facebook\\.com|instagram\\.com|linkedin\\.com|twitter\\.com|x\\.com|youtube\\.com|tiktok\\.com|ok\\.ru)$/i;\r\n\r\nconst DEEP_PATHS = [\n  // \u041a\u043e\u043d\u0442\u0430\u043a\u0442\u044b \u2014 \u0441\u0430\u043c\u044b\u0435 \u0447\u0430\u0441\u0442\u044b\u0435 \u043f\u0443\u0442\u0438 (en + ru \u0442\u0440\u0430\u043d\u0441\u043b\u0438\u0442)\n  '/contacts',        // \u043d\u0430\u0438\u0431\u043e\u043b\u0435\u0435 \u043f\u043e\u043f\u0443\u043b\u044f\u0440\u043d\u044b\u0439 EN\n  '/kontakty',        // RU \u0442\u0440\u0430\u043d\u0441\u043b\u0438\u0442 \"\u043a\u043e\u043d\u0442\u0430\u043a\u0442\u044b\"\n  '/contact',         // EN short\n  '/kontakt',         // RU/DE short\n  '/contacts.html',   // \u0441\u0442\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0441\u0430\u0439\u0442\u044b\n  '/kontakty.html',\n  '/contact.html',\n  '/contact-us',\n  '/svyaz-s-nami',    // \"\u0441\u0432\u044f\u0437\u044c \u0441 \u043d\u0430\u043c\u0438\"\n  '/svyaz',           // \"\u0441\u0432\u044f\u0437\u044c\"\n  '/napisat',         // \"\u043d\u0430\u043f\u0438\u0441\u0430\u0442\u044c\"\n  '/feedback',        // \u043e\u0431\u0440\u0430\u0442\u043d\u0430\u044f \u0441\u0432\u044f\u0437\u044c\n  '/obratnaya-svyaz', // \"\u043e\u0431\u0440\u0430\u0442\u043d\u0430\u044f \u0441\u0432\u044f\u0437\u044c\"\n  '/obratnasvyaz',\n  // \u041e \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438 \u2014 \u0447\u0430\u0441\u0442\u043e \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442 email\n  '/about',\n  '/o-nas',           // \"\u043e \u043d\u0430\u0441\"\n  '/o-kompanii',      // \"\u043e \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u0438\"\n  '/about.html',\n  '/about-us',\n  '/company',\n  '/ru/contacts',     // \u043c\u0443\u043b\u044c\u0442\u0438\u044f\u0437\u044b\u0447\u043d\u044b\u0435 \u0441\u0430\u0439\u0442\u044b\n]; // MAX_CRAWL=5 \u2192 homepage + \u0434\u043e 4 \u0433\u043b\u0443\u0431\u043e\u043a\u0438\u0445 \u0441\u0442\u0440\u0430\u043d\u0438\u0446\n\r\nconst MAX_CRAWL = 5;\r\n\r\n// Get hostname from URL using regex (no URL constructor)\r\nfunction getHost(url) {\r\n  const m = String(url || '').match(/^https?:\\/\\/([^\\/:?#\\s]+)/i);\r\n  if (!m) return '';\r\n  return m[1].toLowerCase().replace(/^www\\./, '');\r\n}\r\n\r\n// Get origin (scheme+host) from URL using regex\r\nfunction getOrigin(url) {\r\n  const m = String(url || '').match(/^(https?:\\/\\/[^\\/:?#\\s]+)/i);\r\n  return m ? m[1] : '';\r\n}\r\n\r\n// Canonical deduplication key\r\nfunction canonKey(url) {\r\n  return String(url || '').trim().toLowerCase().replace(/#.*$/, '').replace(/\\/+$/, '') || String(url || '').toLowerCase();\r\n}\r\n\r\n// Normalize and validate URL \u2014 pure regex, no URL constructor\r\nfunction fixUrl(raw) {\r\n  const s = String(raw == null ? '' : raw).replace(/[\\u0000-\\u001f]/g, '').trim();\r\n  if (!s) return '';\r\n  if (/^[+\\d\\s().\\-]{5,}$/.test(s)) return '';\r\n  if (/^[a-z0-9._%+\\-]+@/i.test(s)) return '';\r\n  const withScheme = /^https?:\\/\\//i.test(s) ? s : 'http://' + s.replace(/^\\/+/, '');\r\n  const hm = withScheme.match(/^https?:\\/\\/([^\\/:?#\\s@]+)/i);\r\n  if (!hm) return '';\r\n  const h = hm[1].toLowerCase();\r\n  if (!h || !h.includes('.') || h.length < 4) return '';\r\n  if (!/[a-z\u0430-\u044f\u0451]/iu.test(h)) return '';\r\n  if (BLOCKED_SEED.test(h)) return '';\r\n  return withScheme.replace(/#.*$/, '');\r\n}\r\n\r\nfunction extractFromValue(val) {\r\n  const s = String(val == null ? '' : val).trim();\r\n  if (!s) return [];\r\n  const found = [];\r\n  const seen = new Set();\r\n  const add = (u) => {\r\n    const fixed = fixUrl(u);\r\n    if (fixed) { const k = canonKey(fixed); if (!seen.has(k)) { seen.add(k); found.push(fixed); } }\r\n  };\r\n  const direct = s.match(/https?:\\/\\/[^\\s\"'<>]+/gi) || [];\r\n  direct.forEach(add);\r\n  const domains = s.match(/(?:[a-z0-9\\-]+\\.)+[a-z]{2,}(?:\\/[^\\s\"'<>]*)?/gi) || [];\r\n  domains.forEach(add);\r\n  s.split(/[,;|\\n]+/).forEach(part => add(part.trim()));\r\n  return found;\r\n}\r\n\r\nconst outputItems = [];\r\n\r\nfor (let sourceIndex = 0; sourceIndex < inputItems.length; sourceIndex++) {\r\n  try {\r\n    const item = inputItems[sourceIndex];\r\n    let row;\r\n    try {\r\n      const serialized = JSON.stringify(item.json || {});\r\n      row = JSON.parse(serialized);\r\n    } catch {\r\n      row = {};\r\n    }\r\n\r\n    if (sourceIndex === 0) {\r\n      const allK = Object.keys(row);\r\n      console.log('[SelectURL v4] batch=' + inputItems.length + ' keys=' + JSON.stringify(allK.slice(0, 20)));\r\n      console.log('[SelectURL v4] \u0412\u0435\u0431-\u0441\u0430\u0439\u0442_1=' + JSON.stringify(row['\\u0412\\u0435\\u0431-\\u0441\\u0430\\u0439\\u0442 1']) + ' 2GIS=' + JSON.stringify(row['2GIS URL']));\r\n    }\r\n\r\n    const rowNumber = row.row_number;\r\n    const stableKey = (rowNumber !== null && rowNumber !== undefined && String(rowNumber).trim() !== '')\r\n      ? String(rowNumber).trim()\r\n      : String(sourceIndex);\r\n\r\n    const seedCandidates = [];\r\n    const seenSeeds = new Set();\r\n\r\n    const addSeed = (url, tag) => {\r\n      if (seedCandidates.length >= 3) return;\r\n      const k = canonKey(url);\r\n      if (!k || seenSeeds.has(k)) return;\r\n      seenSeeds.add(k);\r\n      seedCandidates.push({ url, tag });\r\n    };\r\n\r\n    // Pass 1: check known URL fields in priority order\r\n    for (let fi = 0; fi < URL_FIELD_PRIORITY.length; fi++) {\r\n      if (seedCandidates.length >= 3) break;\r\n      const fieldName = URL_FIELD_PRIORITY[fi];\r\n      const fieldVal = row[fieldName];\r\n      if (fieldVal == null || fieldVal === '') continue;\r\n      const urls = extractFromValue(String(fieldVal));\r\n      for (let ui = 0; ui < urls.length && seedCandidates.length < 3; ui++) {\r\n        addSeed(urls[ui], 'field:' + fieldName);\r\n      }\r\n    }\r\n\r\n    // Pass 2: scan all fields for any URL-like content\r\n    if (seedCandidates.length < 3) {\r\n      const allKeys = Object.keys(row);\r\n      for (let ki = 0; ki < allKeys.length && seedCandidates.length < 3; ki++) {\r\n        const v = row[allKeys[ki]];\r\n        if (v == null || v === '') continue;\r\n        const sv = String(v);\r\n        if (!/https?:\\/\\//i.test(sv) && !sv.includes('.')) continue;\r\n        const urls = extractFromValue(sv);\r\n        for (let ui = 0; ui < urls.length && seedCandidates.length < 3; ui++) {\r\n          addSeed(urls[ui], 'scan:' + allKeys[ki]);\r\n        }\r\n      }\r\n    }\r\n\r\n    console.log('[SelectURL v4] row=' + stableKey + ' seeds=' + seedCandidates.length + (seedCandidates.length ? ' urls=' + seedCandidates.map(x => x.url).join(' | ') : ''));\r\n\r\n    if (seedCandidates.length === 0) {\r\n      outputItems.push({\r\n        json: {\r\n          ...row,\r\n          targetUrl: '', targetHost: '', targetIsSocial: false,\r\n          seed_url: '', seed_source: 'none', crawl_depth: 0, url_source: 'none',\r\n          url_detected: false, source_row_index: sourceIndex,\r\n          source_row_stable_key: stableKey, source_row_number: rowNumber ?? '',\r\n          source_url_rank: 1, source_urls_count: 0,\r\n        },\r\n        pairedItem: { item: sourceIndex },\r\n      });\r\n      continue;\r\n    }\r\n\r\n    const crawlList = [];\r\n    const seenCrawl = new Set();\r\n\r\n    const addCrawl = (url, depth, source, seedUrl, seedTag) => {\r\n      if (crawlList.length >= MAX_CRAWL) return;\r\n      const k = canonKey(url);\r\n      if (!k || seenCrawl.has(k)) return;\r\n      seenCrawl.add(k);\r\n      const host = getHost(url);\r\n      crawlList.push({ url, depth, source, seedUrl, seedTag, targetHost: host, targetIsSocial: SOCIAL_HOST.test(host) });\r\n    };\r\n\r\n    for (let si = 0; si < seedCandidates.length; si++) {\r\n      const seed = seedCandidates[si];\r\n      addCrawl(seed.url, 0, seed.tag, seed.url, seed.tag);\r\n      const host = getHost(seed.url);\r\n      {\r\n        const origin = getOrigin(seed.url);\r\n        if (origin) {\r\n          for (let pi = 0; pi < DEEP_PATHS.length && crawlList.length < MAX_CRAWL; pi++) {\r\n            const deep = fixUrl(origin + DEEP_PATHS[pi]);\r\n            if (deep) addCrawl(deep, 1, 'deep:' + DEEP_PATHS[pi], seed.url, seed.tag);\r\n          }\r\n        }\r\n      }\r\n    }\r\n\r\n    for (let ci = 0; ci < crawlList.length; ci++) {\r\n      const entry = crawlList[ci];\r\n      outputItems.push({\r\n        json: {\r\n          ...row,\r\n          targetUrl: entry.url,\r\n          targetHost: entry.targetHost,\r\n          targetIsSocial: entry.targetIsSocial,\r\n          seed_url: entry.seedUrl,\r\n          seed_source: entry.seedTag,\r\n          crawl_depth: entry.depth,\r\n          url_source: entry.source,\r\n          url_detected: true,\r\n          source_row_index: sourceIndex,\r\n          source_row_stable_key: stableKey,\r\n          source_row_number: rowNumber ?? '',\r\n          source_url_rank: ci + 1,\r\n          source_urls_count: crawlList.length,\r\n        },\r\n        pairedItem: { item: sourceIndex },\r\n      });\r\n    }\r\n  } catch (err) {\r\n    const row = (inputItems[sourceIndex] && inputItems[sourceIndex].json) || {};\r\n    const stableKey = (row.row_number != null && String(row.row_number).trim() !== '')\r\n      ? String(row.row_number).trim() : String(sourceIndex);\r\n    outputItems.push({\r\n      json: {\r\n        ...row,\r\n        targetUrl: '', targetHost: '', targetIsSocial: false,\r\n        seed_url: '', seed_source: 'select-url-error', crawl_depth: 0, url_source: 'error',\r\n        url_detected: false, source_row_index: sourceIndex,\r\n        source_row_stable_key: stableKey, source_row_number: (row.row_number ?? ''),\r\n        source_url_rank: 1, source_urls_count: 0,\r\n        select_url_error: String((err && err.message) || err),\r\n      },\r\n      pairedItem: { item: sourceIndex },\r\n    });\r\n  }\r\n}\r\n\r\nreturn outputItems;\r\n"
      },
      "id": "5",
      "name": "Select URL",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1120,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ ($json[\"targetUrl\"] || \"\").toString().trim() }}",
              "operation": "isNotEmpty",
              "value2": ""
            }
          ]
        },
        "combineOperation": "all"
      },
      "id": "6",
      "name": "Has URL?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 1,
      "position": [
        1320,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "method": "GET",
        "url": "={{ $json[\"targetUrl\"] }}",
        "authentication": "none",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
            },
            {
              "name": "Accept",
              "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
            },
            {
              "name": "Accept-Language",
              "value": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"
            },
            {
              "name": "Accept-Encoding",
              "value": "gzip, deflate"
            },
            {
              "name": "Cache-Control",
              "value": "no-cache"
            },
            {
              "name": "Pragma",
              "value": "no-cache"
            },
            {
              "name": "Connection",
              "value": "close"
            }
          ]
        },
        "options": {
          "timeout": 3000,
          "allowUnauthorizedCerts": true,
          "redirect": {
            "redirect": {
              "followRedirects": true,
              "maxRedirects": 3
            }
          },
          "response": {
            "response": {
              "responseFormat": "text",
              "outputPropertyName": "html",
              "fullResponse": false
            }
          }
        }
      },
      "id": "7",
      "name": "Fetch Website",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4,
      "position": [
        1520,
        220
      ],
      "continueOnFail": true,
      "onError": "continueRegularOutput",
      "retryOnFail": false
    },
    {
      "parameters": {
        "mode": "runOnceForEachItem",
        "language": "javaScript",
        "jsCode": "// Normalize Response v8\n// HTTP Request with outputPropertyName:'html' REPLACES item.json entirely.\n// We recover original data (row_number, E-mail, targetUrl, etc.) from Has URL? node.\nconst httpJson = $input.item.json || {};\nconst originalJson = $('Select URL').item.json || {};\n\nlet html = httpJson.html || httpJson.body || '';\nif (html && typeof html === 'object') {\n  try { html = JSON.stringify(html); } catch(e) { html = String(html); }\n}\nif (typeof html !== 'string') html = html == null ? '' : String(html);\n\nconst statusCode = html.length > 0 ? 200 : 0;\n\nreturn {\n  json: {\n    ...originalJson,\n    html,\n    statusCode,\n  },\n};"
      },
      "id": "14",
      "name": "Normalize Response",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1700,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "const inputItems = $input.all();\n\nconst emailRegex = /[a-z0-9._%+-]+@[a-z0-9\\-._]*[a-z0-9\u0430-\u044f\u0451-]+\\.(?:xn--[a-z0-9-]{2,58}|[a-z\u0430-\u044f\u0451]{2,24})/giu;\nconst emailValidationRegex = /^[a-z0-9._%+-]+@[a-z0-9\\-._]*[a-z0-9\u0430-\u044f\u0451-]+\\.(?:xn--[a-z0-9-]{2,58}|[a-z\u0430-\u044f\u0451]{2,24})$/iu;\nconst mailtoRegex = /mailto:([^\\s\"'<>]+)/gi;\n\nconst obfuscatedEmailRegexEn =\n  /([a-z0-9._%+-]+)\\s*(?:\\[at\\]|\\(at\\)|\\{at\\}|\\sat\\s|\\{@\\}|\uff20)\\s*([a-z0-9\u0430-\u044f\u0451.-]+)\\s*(?:\\[dot\\]|\\(dot\\)|\\{dot\\}|\\sdot\\s|\\{\\.\\}|\uff0e)\\s*([a-z\u0430-\u044f\u0451]{2,24}|xn--[a-z0-9-]{2,58})/giu;\n\nconst obfuscatedEmailRegexRu =\n  /([a-z0-9._%+-]+)\\s*(?:\\[\u0441\u043e\u0431\u0430\u043a\u0430\\]|\\(\u0441\u043e\u0431\u0430\u043a\u0430\\)|\\{\u0441\u043e\u0431\u0430\u043a\u0430\\}|\\s\u0441\u043e\u0431\u0430\u043a\u0430\\s|\\[\u0441\u043e\u0431\u0430\u0447\u043a\u0430\\]|\\(\u0441\u043e\u0431\u0430\u0447\u043a\u0430\\)|\\s\u0441\u043e\u0431\u0430\u0447\u043a\u0430\\s|\\[at\\]|\\(at\\)|\\sat\\s)\\s*([a-z0-9\u0430-\u044f\u0451.-]+)\\s*(?:\\[\u0442\u043e\u0447\u043a\u0430\\]|\\(\u0442\u043e\u0447\u043a\u0430\\)|\\{\u0442\u043e\u0447\u043a\u0430\\}|\\s\u0442\u043e\u0447\u043a\u0430\\s|\\[dot\\]|\\(dot\\)|\\sdot\\s)\\s*([a-z\u0430-\u044f\u0451]{2,24}|xn--[a-z0-9-]{2,58})/giu;\n\nconst ruLabelRegex = /(?:\u043f\u043e\u0447\u0442\u0430|\u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u043d\u0430\u044f\\s*\u043f\u043e\u0447\u0442\u0430|e-?mail|\u043f\u0438\u0448\u0438\u0442\u0435\\s+\u043d\u0430)\\s*[:\\-\u2013]?\\s*([a-z0-9._%+-]+@[a-z0-9\\-._]*[a-z0-9\u0430-\u044f\u0451-]+\\.(?:xn--[a-z0-9-]{2,58}|[a-z\u0430-\u044f\u0451]{2,24}))/giu;\nconst dataAttrRegex = /\\bdata-(?:e-?mail|mail)\\s*=\\s*[\"']([^\"']+)[\"']/giu;\nconst itempropEmailRegex = /<[^>]*\\bitemprop\\s*=\\s*[\"']email[\"'][^>]*>([\\s\\S]{0,300}?)<\\/[^>]+>|\\bitemprop\\s*=\\s*[\"']email[\"'][^>]*\\bcontent\\s*=\\s*[\"']([^\"']+)[\"']/giu;\nconst cfEncodedRegex = /data-cfemail\\s*=\\s*[\"']([a-f0-9]{4,})[\"']/gi;\nconst cfemailHrefRegex = /\\/cdn-cgi\\/l\\/email-protection#([a-f0-9]{4,})/gi;\n\nconst blockedLocalPatterns = [\n  /^example/i, /^test/i, /^noreply/i, /^no-reply/i, /^donotreply/i,\n  /^mailer-daemon$/i, /^postmaster$/i, /^abuse$/i, /^webmaster$/i,\n  /^[a-f0-9]{16,}$/i, /^[a-z0-9]{20,}$/i,\n];\n\nconst blockedDomains = [\n  'wixpress.com', 'wix.com', 'parastorage.com', 'sentry.io', 'sentry-cdn.com',\n  'cloudflare.com', 'cloudfront.net', 'google.com', 'google.ru', 'googleapis.com',\n  'googleusercontent.com', 'gstatic.com', 'googletagmanager.com', 'google-analytics.com',\n  'doubleclick.net', 'googlesyndication.com', 'yandex.net', 'duckduckgo.com', 'bing.com',\n  'example.com', 'example.org', 'example.net', 'example.ru', 'domain.com', 'domain.ru',\n  'site.ru', 'mail.com', 'jsdelivr.net', 'unpkg.com', 'jquery.com', 'cdnjs.cloudflare.com',\n  'w3.org', 'schema.org', 'ogp.me', 'fbcdn.net', 'ytimg.com', 'vkuser.net',\n  'segment.io', 'segment.com', 'amplitude.com', 'mixpanel.com', 'hotjar.com', 'newrelic.com',\n  'tildacdn.com', 'tildacdn.info',\n];\n\nconst decodeHtmlEntitiesLite = (text) =>\n  String(text || '')\n    .replace(/&amp;/gi, '&').replace(/&quot;/gi, '\"').replace(/&#39;/gi, \"'\")\n    .replace(/&lt;/gi, '<').replace(/&gt;/gi, '>')\n    .replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(parseInt(hex, 16)))\n    .replace(/&#(\\d+);/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)));\n\nconst decodeCfEmail = (hexString) => {\n  try {\n    if (!hexString || hexString.length < 4 || hexString.length % 2 !== 0) return '';\n    const key = parseInt(hexString.slice(0, 2), 16);\n    let decoded = '';\n    for (let i = 2; i < hexString.length; i += 2) {\n      decoded += String.fromCharCode(parseInt(hexString.slice(i, i + 2), 16) ^ key);\n    }\n    return decoded;\n  } catch { return ''; }\n};\n\nconst normalizeEmail = (value) => {\n  if (!value) return '';\n  let email = String(value).trim();\n  email = decodeHtmlEntitiesLite(email);\n  email = email\n    .replace(/^mailto:/i, '')\n    .replace(/^['\"\\s<>()\\[\\]{}]+|['\"\\s<>()\\[\\]{}.,;:!]+$/g, '')\n    .replace(/\\u200b|\\u200c|\\u200d|\\ufeff/g, '')\n    .toLowerCase();\n  if (!/@/.test(email) || /\\s/.test(email) || email.length > 254) return '';\n  if (!emailValidationRegex.test(email)) return '';\n  return email;\n};\n\nconst isBlocked = (email) => {\n  if (!email) return true;\n  const parts = email.split('@');\n  if (parts.length !== 2) return true;\n  const local = parts[0] || '';\n  const domain = (parts[1] || '').toLowerCase();\n  if (blockedLocalPatterns.some((rx) => rx.test(local))) return true;\n  if (blockedDomains.some((d) => domain === d || domain.endsWith('.' + d))) return true;\n  return false;\n};\n\nconst collectByRegex = (text, regex, mapFn) => {\n  const found = [];\n  for (const m of String(text || '').matchAll(regex)) {\n    const candidate = mapFn(m);\n    if (!candidate) continue;\n    const email = normalizeEmail(candidate);\n    if (email && !isBlocked(email)) found.push(email);\n  }\n  return found;\n};\n\nconst extractFromJsonLikeScripts = (html) => {\n  const found = new Set();\n  const scriptRegex = /<script\\b([^>]*)>([\\s\\S]*?)<\\/script>/gi;\n  const stateMarkerRegex = /__NEXT_DATA__|__INITIAL_STATE__|__NUXT__|__APOLLO_STATE__|__REDUX_STATE__|__PRELOADED_STATE__|__SERVER_STATE__/i;\n  const walk = (node) => {\n    if (!node) return;\n    if (typeof node === 'string') {\n      const emails = node.match(emailRegex) || [];\n      for (const e of emails) { const n = normalizeEmail(e); if (n && !isBlocked(n)) found.add(n); }\n      emailRegex.lastIndex = 0;\n      return;\n    }\n    if (Array.isArray(node)) return node.forEach(walk);\n    if (typeof node === 'object') return Object.values(node).forEach(walk);\n  };\n  for (const m of String(html || '').matchAll(scriptRegex)) {\n    const attrs = String(m[1] || '');\n    const scriptBody = String(m[2] || '').trim();\n    if (!scriptBody) continue;\n    const hasMarker = stateMarkerRegex.test(attrs) || stateMarkerRegex.test(scriptBody);\n    if (!hasMarker) continue;\n    const candidates = [scriptBody];\n    const assignMatch = scriptBody.match(/(?:window\\.)?__(?:NUXT__|INITIAL_STATE__|APOLLO_STATE__|REDUX_STATE__|PRELOADED_STATE__|SERVER_STATE__)\\s*=\\s*([\\s\\S]*?);?\\s*$/i);\n    if (assignMatch && assignMatch[1]) candidates.push(assignMatch[1].trim());\n    let parsed = false;\n    for (const candidate of candidates) {\n      try { walk(JSON.parse(candidate)); parsed = true; break; } catch {}\n    }\n    if (parsed) continue;\n    const inline = scriptBody.match(emailRegex) || [];\n    for (const e of inline) { const n = normalizeEmail(e); if (n && !isBlocked(n)) found.add(n); }\n    emailRegex.lastIndex = 0;\n  }\n  return Array.from(found);\n};\n\nconst extractEmailsFromText = (text) => {\n  const found = new Set();\n  const raw = String(text || '');\n  const decodedEntity = decodeHtmlEntitiesLite(raw);\n  let decodedUri = decodedEntity;\n  try { decodedUri = decodeURIComponent(decodedEntity); } catch {}\n  const variants = [raw, decodedEntity, decodedUri];\n  for (const variant of variants) {\n    for (const m of variant.match(emailRegex) || []) {\n      const n = normalizeEmail(m);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    emailRegex.lastIndex = 0;\n    for (const m of variant.matchAll(mailtoRegex)) {\n      const n = normalizeEmail((m[1] || '').split('?')[0]);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    for (const m of variant.matchAll(obfuscatedEmailRegexEn)) {\n      const n = normalizeEmail(`${m[1] || ''}@${m[2] || ''}.${m[3] || ''}`);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    for (const m of variant.matchAll(obfuscatedEmailRegexRu)) {\n      const n = normalizeEmail(`${m[1] || ''}@${m[2] || ''}.${m[3] || ''}`);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    for (const m of variant.matchAll(ruLabelRegex)) {\n      const n = normalizeEmail(m[1] || '');\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    for (const e of collectByRegex(variant, dataAttrRegex, (m) => m[1])) found.add(e);\n    for (const e of collectByRegex(variant, itempropEmailRegex, (m) => m[1] || m[2])) found.add(e);\n    for (const m of variant.matchAll(cfEncodedRegex)) {\n      const d = decodeCfEmail(m[1] || ''); const n = normalizeEmail(d);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n    for (const m of variant.matchAll(cfemailHrefRegex)) {\n      const d = decodeCfEmail(m[1] || ''); const n = normalizeEmail(d);\n      if (n && !isBlocked(n)) found.add(n);\n    }\n  }\n  return Array.from(found);\n};\n\nconst extractFromJsonLd = (html) => {\n  const found = new Set();\n  const blockRegex = /<script\\b[^>]*type=[\"']application\\/ld\\+json[\"'][^>]*>([\\s\\S]*?)<\\/script>/gi;\n  const walk = (node) => {\n    if (!node) return;\n    if (typeof node === 'string') {\n      const matches = node.match(emailRegex) || [];\n      for (const m of matches) { const n = normalizeEmail(m); if (n && !isBlocked(n)) found.add(n); }\n      emailRegex.lastIndex = 0; return;\n    }\n    if (Array.isArray(node)) return node.forEach(walk);\n    if (typeof node === 'object') return Object.values(node).forEach(walk);\n  };\n  for (const match of String(html || '').matchAll(blockRegex)) {\n    const jsonText = String(match[1] || '').trim();\n    if (!jsonText) continue;\n    try { walk(JSON.parse(jsonText)); } catch {\n      const inner = jsonText.match(emailRegex) || [];\n      for (const e of inner) { const n = normalizeEmail(e); if (n && !isBlocked(n)) found.add(n); }\n      emailRegex.lastIndex = 0;\n    }\n  }\n  return Array.from(found);\n};\n\nconst rankEmails = (emails, domainsHint) => {\n  const hints = new Set((domainsHint || []).map((d) => String(d || '').toLowerCase().replace(/^www\\./, '')).filter(Boolean));\n  const preferredLocals = /^(info|contact|hello|support|sales|office|mail|admin)$/i;\n  const noisyLocals = /^(noreply|no-reply|donotreply|mailer-daemon|example|test)$/i;\n  const score = (email) => {\n    const [local, domainRaw] = String(email || '').toLowerCase().split('@');\n    const domain = (domainRaw || '').replace(/^www\\./, '');\n    let s = 0;\n    if (hints.has(domain)) s += 120;\n    for (const hint of hints) {\n      if (domain.endsWith('.' + hint) || hint.endsWith('.' + domain)) { s += 100; break; }\n    }\n    if (preferredLocals.test(local || '')) s += 40;\n    if ((local || '').length <= 4) s += 5;\n    if (/\\d{3,}/.test(local || '')) s -= 12;\n    if (noisyLocals.test(local || '')) s -= 80;\n    return s;\n  };\n  return Array.from(new Set(emails || []))\n    .map((email) => ({ email, score: score(email) }))\n    .sort((a, b) => b.score - a.score || a.email.localeCompare(b.email))\n    .map((x) => x.email);\n};\n\nconst getDomainHints = (bucket) => {\n  const hints = new Set();\n  for (const url of bucket.fetchedUrls) {\n    // Regex-based hostname extraction (no URL constructor \u2014 not available in n8n sandbox)\n    const m = String(url || '').match(/^https?:\\/\\/([^\\/:?#\\s]+)/i);\n    if (!m) continue;\n    const host = m[1].toLowerCase().replace(/^www\\./, '');\n    if (!host || /catalog\\.api\\.2gis\\./i.test(host)) continue;\n    hints.add(host);\n  }\n  const targetHost = String(bucket.mergedJson.targetHost || '').toLowerCase().replace(/^www\\./, '');\n  if (targetHost) hints.add(targetHost);\n  return Array.from(hints);\n};\n\nconst maxHtmlLength = 1500000;\n// FIX: Key buckets by source_row_stable_key (row_number-based) to prevent\n// cross-row email collisions when two rows share the same batch sourceIndex.\nconst buckets = new Map();\n\nfor (let idx = 0; idx < inputItems.length; idx++) {\n  try {\n    const item = inputItems[idx];\n    const currentJson = item.json || {};\n    // Prefer stable key (row_number string) set by Select URL node\n    const stableKey = String(currentJson.source_row_stable_key ?? currentJson.source_row_index ?? idx);\n\n    if (!buckets.has(stableKey)) {\n      buckets.set(stableKey, {\n        mergedJson: { ...currentJson },\n        emailsSet: new Set(),\n        fetchedUrls: new Set(),\n        successUrls: new Set(),\n        hasFetchError: false,\n        maxStatusCode: 0,\n        sortKey: Number(currentJson.source_row_number) || idx,\n      });\n    }\n\n    const bucket = buckets.get(stableKey);\n    bucket.mergedJson = { ...bucket.mergedJson, ...currentJson };\n\n    let htmlRaw = String(currentJson.html ?? currentJson.body ?? currentJson.data ?? currentJson.response ?? '');\n    if (htmlRaw.length > maxHtmlLength) htmlRaw = htmlRaw.slice(0, maxHtmlLength);\n\n    const statusCode = Number(currentJson.statusCode || currentJson.status || 0);\n    if (statusCode && statusCode > bucket.maxStatusCode) bucket.maxStatusCode = statusCode;\n\n    const prepared = htmlRaw\n      .replace(/<script\\b[^>]*>[\\s\\S]*?<\\/script>/gi, ' ')\n      .replace(/<style\\b[^>]*>[\\s\\S]*?<\\/style>/gi, ' ')\n      .replace(/<!--([\\s\\S]*?)-->/g, ' ');\n\n    for (const e of extractEmailsFromText(prepared)) bucket.emailsSet.add(e);\n    for (const e of extractFromJsonLd(htmlRaw)) bucket.emailsSet.add(e);\n    for (const e of extractFromJsonLikeScripts(htmlRaw)) bucket.emailsSet.add(e);\n\n    const fetchedUrl = String(currentJson.targetUrl || '').trim();\n    if (fetchedUrl) {\n      bucket.fetchedUrls.add(fetchedUrl);\n      const isSuccess = htmlRaw.length > 100;  // Has meaningful content\n      if (isSuccess) bucket.successUrls.add(fetchedUrl);\n    }\n\n    if (currentJson.error || (statusCode && statusCode >= 400) || (!htmlRaw && fetchedUrl)) {\n      bucket.hasFetchError = true;\n    }\n  } catch (err) {\n    const fallbackJson = (inputItems[idx] && inputItems[idx].json) || {};\n    const fallbackKey = String(fallbackJson.source_row_stable_key ?? fallbackJson.source_row_index ?? idx);\n    if (!buckets.has(fallbackKey)) {\n      buckets.set(fallbackKey, {\n        mergedJson: { ...fallbackJson },\n        emailsSet: new Set(),\n        fetchedUrls: new Set(),\n        successUrls: new Set(),\n        hasFetchError: true,\n        maxStatusCode: 0,\n        sortKey: Number(fallbackJson.source_row_number) || idx,\n      });\n    }\n    const bucket = buckets.get(fallbackKey);\n    bucket.hasFetchError = true;\n    bucket.mergedJson.extract_error = String((err && err.message) || err);\n  }\n}\n\nconst stableKeys = Array.from(buckets.keys()).sort((a, b) => {\n  return (buckets.get(a).sortKey || 0) - (buckets.get(b).sortKey || 0);\n});\n\nreturn stableKeys.map((stableKey, outputIdx) => {\n  const bucket = buckets.get(stableKey);\n  const mergedJson = { ...bucket.mergedJson };\n  const newEmails = Array.from(bucket.emailsSet);\n\n  const existing1 = String(mergedJson['E-mail'] ?? mergedJson['E-mail 1'] ?? mergedJson['Email'] ?? '').trim().toLowerCase();\n  const existing2 = String(mergedJson['E-mail 2'] ?? mergedJson['Email 2'] ?? '').trim().toLowerCase();\n  const existing3 = String(mergedJson['E-mail 3'] ?? mergedJson[' E-mail 3'] ?? mergedJson['Email 3'] ?? '').trim().toLowerCase();\n\n  const existingList = [existing1, existing2, existing3].filter(Boolean);\n  const existingSet = new Set(existingList);\n\n  const ranked = rankEmails([...existingList, ...newEmails], getDomainHints(bucket));\n  const final1 = ranked[0] || '';\n  const final2 = ranked[1] || '';\n  const final3 = ranked[2] || '';\n\n  const trulyNewEmails = ranked.filter((e) => !existingSet.has(e));\n\n  mergedJson.source_row_stable_key = stableKey;\n  mergedJson.parsed_urls = Array.from(bucket.fetchedUrls);\n  mergedJson.parsed_urls_count = mergedJson.parsed_urls.length;\n  mergedJson.success_urls_count = bucket.successUrls.size;\n  mergedJson.max_status_code = bucket.maxStatusCode;\n  mergedJson.emails_array = ranked;\n  mergedJson.emails_string = ranked.join(', ');\n  mergedJson.new_emails = trulyNewEmails;\n  mergedJson.new_emails_count = trulyNewEmails.length;\n  mergedJson.email_1 = final1;\n  mergedJson.email_2 = final2;\n  mergedJson.email_3 = final3;\n  // Write to exact sheet column names so autoMapInputData maps them correctly\n  mergedJson['E-mail'] = final1;\n  mergedJson['E-mail 2'] = final2;\n  mergedJson['E-mail 3'] = final3;\n  mergedJson.ID = mergedJson.ID ?? '';\n\n  if (newEmails.length > 0) {\n    mergedJson.parse_status = trulyNewEmails.length > 0 ? 'emails_found' : 'emails_unchanged';\n  } else if (bucket.hasFetchError && bucket.successUrls.size === 0) {\n    mergedJson.parse_status = 'fetch_error';\n  } else if (bucket.fetchedUrls.size === 0) {\n    mergedJson.parse_status = 'no_url';\n  } else {\n    mergedJson.parse_status = 'no_email_found';\n  }\n\n  delete mergedJson.html;\n  delete mergedJson.body;\n  delete mergedJson.data;\n  delete mergedJson.response;\n  delete mergedJson.headers;\n\n  return { json: mergedJson, pairedItem: { item: outputIdx } };\n});\n"
      },
      "id": "8",
      "name": "Extract Emails",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1880,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "const items = $input.all();\nreturn items.map((item, idx) => {\n  try {\n    const j = item.json || {};\n    return {\n      json: {\n        ...j,\n        email_1: (j['E-mail'] || j['E-mail 1'] || j['Email'] || '').toString(),\n        email_2: (j['E-mail 2'] || j['Email 2'] || '').toString(),\n        email_3: (j['E-mail 3'] || j[' E-mail 3'] || j['Email 3'] || '').toString(),\n        emails_array: [],\n        emails_string: '',\n        new_emails: [],\n        new_emails_count: 0,\n        parse_status: 'no_url',\n        parsed_urls: [],\n        parsed_urls_count: 0,\n        success_urls_count: 0,\n        max_status_code: 0,\n      },\n      pairedItem: item.pairedItem ?? { item: idx },\n    };\n  } catch (err) {\n    return { json: { ...(item.json || {}), parse_status: 'no_url', mark_no_url_error: String((err && err.message) || err) }, pairedItem: item.pairedItem ?? { item: idx } };\n  }\n});"
      },
      "id": "15",
      "name": "Mark No URL",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1520,
        380
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "const items = $input.all();\nconst stats = { total: items.length, emails_found: 0, no_email_found: 0, no_url: 0, fetch_error: 0, emails_unchanged: 0, other: 0 };\nconst sample = [];\nfor (const item of items) {\n  const j = item.json || {};\n  const status = j.parse_status || 'other';\n  if (stats[status] === undefined) stats.other++; else stats[status]++;\n  if (sample.length < 5 && j.new_emails_count > 0) {\n    sample.push({ row: j.row_number ?? j.source_row_number ?? '?', name: j['\u041d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435'] || j.name || '?', emails: j.new_emails || [] });\n  }\n}\nconsole.log('[Astra] Batch progress:', JSON.stringify(stats));\nif (sample.length) console.log('[Astra] Sample new emails:', JSON.stringify(sample));\nreturn items;"
      },
      "id": "16",
      "name": "Progress Log",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2060,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ ($json[\"row_number\"] || $json[\"source_row_number\"] || \"\").toString() }}",
              "operation": "isNotEmpty",
              "value2": ""
            }
          ]
        },
        "combineOperation": "all"
      },
      "id": "9",
      "name": "Has row_number?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 1,
      "position": [
        2240,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "const items = $input.all();\nfor (const item of items) {\n  const j = item.json || {};\n  console.log('[Astra] No row_number for row', JSON.stringify({\n    name: j['\u041d\u0430\u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u0438\u0435'] || j.name,\n    parse_status: j.parse_status,\n    parsed_urls_count: j.parsed_urls_count,\n  }));\n}\nreturn items;"
      },
      "id": "10",
      "name": "Log Skipped",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2240,
        420
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ ($json[\"email_1\"] || \"\").toString().trim() }}",
              "operation": "isNotEmpty",
              "value2": ""
            }
          ]
        },
        "combineOperation": "all"
      },
      "id": "18",
      "name": "Has new emails?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 1,
      "position": [
        2440,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "// Prepare Update v3 \u2014 Single-path architecture\n// Always returns an item for every input (never filters to null).\n// Items with emails \u2192 full update; items without \u2192 pass-through {row_number} only.\nconst items = $input.all();\nconst san = v => String(v == null ? '' : v).replace(/[\\u0000-\\u001f\\u007f]/g, '').replace(/\\s+/g, ' ').trim().slice(0, 320);\n\nconst out = items.map((item, idx) => {\n  const j = item.json || {};\n  const row = Number(j.row_number ?? j.source_row_number ?? j.source_row_index ?? 0);\n  const e1 = san(j.email_1 ?? j['E-mail'] ?? '');\n  const e2 = san(j.email_2 ?? j['E-mail 2'] ?? '');\n  const e3 = san(j.email_3 ?? j['E-mail 3'] ?? '');\n  const paired = item.pairedItem ?? { item: idx };\n\n  // No valid row number \u2014 pass a sentinel so Update Row fails gracefully and continues\n  if (!Number.isFinite(row) || row < 1) {\n    return { json: { row_number: null, __skip: true }, pairedItem: paired };\n  }\n\n  // No emails found \u2014 write marker so Filter Rows skips this row on next run\n  if (!e1 && !e2 && !e3) {\n    console.log('[PrepareUpdate] row=' + row + ' no_email \u2192 marker');\n    return { json: { row_number: row, 'E-mail': '\u2014', 'E-mail 2': '', 'E-mail 3': '' }, pairedItem: paired };\n  }\n\n  // Has emails \u2014 prepare full update\n  const result = { json: { row_number: row, 'E-mail': e1, 'E-mail 2': e2, 'E-mail 3': e3 }, pairedItem: paired };\n  console.log('[PrepareUpdate] row=' + row + ' email_1=' + e1);\n  return result;\n}).filter(Boolean);\n\nconsole.log('[PrepareUpdate] in=' + items.length + ' out=' + out.length);\nreturn out;"
      },
      "id": "17",
      "name": "Prepare Update",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2620,
        220
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "authentication": "oAuth2",
        "resource": "sheet",
        "operation": "update",
        "documentId": {
          "mode": "id",
          "value": "1RDa3Ui_N4wXK5tzeeyNoXjUknjFoT7S62D_j38MyEjk"
        },
        "sheetName": {
          "mode": "name",
          "value": "Sheet1"
        },
        "dataMode": "autoMapInputData",
        "columnToMatchOn": "row_number",
        "options": {
          "valueInputMode": "RAW"
        }
      },
      "id": "update-row-sheets",
      "name": "Update Row",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4,
      "position": [
        2820,
        220
      ],
      "continueOnFail": true,
      "onError": "continueRegularOutput"
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "const items = $input.all();\nreturn items.map(() => ({ json: {} }));"
      },
      "id": "19",
      "name": "Cleanup",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        3020,
        300
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "mode": "runOnceForAllItems",
        "language": "javaScript",
        "jsCode": "// Throttle v2 \u2014 simple pass-through.\n// The Wait node webhook is consumed after the first loop iteration,\n// causing the workflow to hang on subsequent batches.\n// HTTP requests already provide natural rate-limiting; no artificial delay needed.\nreturn $input.all().map(() => ({ json: {} }));"
      },
      "id": "20",
      "name": "Throttle",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        3220,
        300
      ],
      "continueOnFail": true
    },
    {
      "id": "filter-rows-v14",
      "name": "Filter Rows",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        570,
        300
      ],
      "parameters": {
        "mode": "runOnceForAllItems",
        "jsCode": "// Filter Rows v3 \u2014 skip rows that already have email, cap at ROWS_LIMIT per run\nconst ROWS_LIMIT = 100;\nconst items = $input.all();\nconst needsEmail = items.filter(item => !(item.json['E-mail'] || '').toString().trim());\nconst batch = needsEmail.slice(0, ROWS_LIMIT);\nconsole.log('[FilterRows] total=' + items.length + ' needs_email=' + needsEmail.length + ' this_run=' + batch.length);\nreturn batch;"
      }
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Get Row(s)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Row(s)": {
      "main": [
        [
          {
            "node": "Filter Rows",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Select URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Select URL": {
      "main": [
        [
          {
            "node": "Fetch Website",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Website": {
      "main": [
        [
          {
            "node": "Normalize Response",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Normalize Response": {
      "main": [
        [
          {
            "node": "Extract Emails",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Emails": {
      "main": [
        [
          {
            "node": "Prepare Update",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Update": {
      "main": [
        [
          {
            "node": "Update Row",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Row": {
      "main": [
        [
          {
            "node": "Cleanup",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Cleanup": {
      "main": [
        [
          {
            "node": "Throttle",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Throttle": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter Rows": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1",
    "saveDataErrorExecution": "all",
    "saveDataSuccessExecution": "none",
    "saveExecutionProgress": false,
    "saveManualExecutions": false,
    "executionTimeout": -1
  },
  "meta": {
    "templateCredsSetupCompleted": false
  },
  "id": "astra-email-extractor-v2-fixed"
}