This workflow corresponds to n8n.io template #11637 — we link there as the canonical source.
This workflow follows the Form → Form Trigger recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "Zyte AI Web Scraper",
"nodes": [
{
"id": "b55f2e35-326e-45c1-bc53-a812daa5db21",
"name": "Main form submission",
"type": "n8n-nodes-base.formTrigger",
"position": [
-3728,
1472
],
"parameters": {
"options": {},
"formTitle": "AI Web Scraper",
"formFields": {
"values": [
{
"fieldLabel": "Target URL",
"placeholder": "e.g. https://books.toscrape.com/",
"requiredField": true
},
{
"fieldType": "dropdown",
"fieldLabel": "Select Site Category",
"fieldOptions": {
"values": [
{
"option": "Online Store / Product (E-commerce)"
},
{
"option": "News, Blog, Forum or Article Site"
},
{
"option": "Job Board / Career Site"
},
{
"option": "Search Engine Results (SERP)"
},
{
"option": "General / Other Website"
}
]
},
"requiredField": true
},
{
"fieldLabel": "Zyte API Key",
"placeholder": "Your API Key Goes Here",
"requiredField": true
},
{
"html": "<div style=\"margin-top: -10px; font-size: 0.85em; color: #6b7280;\">\n Don't have one? \n <a href=\"https://www.zyte.com/?utm_campaign=Discord_n8n_tpl&utm_activity=Community&utm_medium=social&utm_source=Discord\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: #ea580c; text-decoration: underline;\">\n Get your free API key here \u2192\n </a>.\n</div>",
"fieldType": "html"
}
]
},
"formDescription": "Enter a URL and select your goal.\nThe workflow will automatically route your request to the best AI schema, or let you extract raw data manually."
},
"typeVersion": 2.3
},
{
"id": "07b730ae-d4cb-4901-93d8-499e6b2d19e7",
"name": "Product Extraction Goal",
"type": "n8n-nodes-base.switch",
"position": [
-2624,
1104
],
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "beb8e0bf-3d06-4511-bfa0-9e4847766441",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.extraction_goal }}",
"rightValue": "single"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "d1684ffb-aeb8-4b45-9c4b-729fea29bbcc",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.extraction_goal }}",
"rightValue": "list"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "19209cdf-6fc8-45bf-b6c3-949954b75a2c",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.extraction_goal }}",
"rightValue": "details_current"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "abe62040-53bc-43bf-8ed8-a641760fb024",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.extraction_goal }}",
"rightValue": "crawl_list"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "5af9a3f6-adbf-4fec-89ea-19c19879620d",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.extraction_goal }}",
"rightValue": "crawl_all"
}
]
}
}
]
},
"options": {}
},
"typeVersion": 3.3
},
{
"id": "a05269ed-6a54-4546-8d7a-aa096eb13ee3",
"name": "Format Output [ Single || List ]",
"type": "n8n-nodes-base.set",
"position": [
1312,
-1280
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b0af1640-c286-4053-aaf1-99b9aa194dc2",
"name": "data",
"type": "object",
"value": "={{ $json.productNavigation || $json.product || $json.productList || $json.browserHtml || $json.articleList?.articles || $json.jobPosting || $json.jobPostingNavigation }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "ceefac86-87ec-425c-8486-e7aa4d8f7dd1",
"name": "HTTP Node: [List] Get Current Page",
"type": "n8n-nodes-base.httpRequest",
"position": [
-192,
-992
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $json.target_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "a8df8c65-28c5-4270-b284-267518d560f0",
"name": "HTTP Node: [Current Page] Get Item URLs",
"type": "n8n-nodes-base.httpRequest",
"position": [
-192,
-640
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $json.navigation_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json['Zyte API Key'] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "0cb9fc7d-c65a-4cc7-9b4a-6746457d4e6b",
"name": "[Current Page] Split Items",
"type": "n8n-nodes-base.splitOut",
"position": [
128,
-640
],
"parameters": {
"options": {},
"fieldToSplitOut": "={{ \n$json.productNavigation ? 'productNavigation.items' : \n$json.articleNavigation ? 'articleNavigation.items' : \n$json.jobPostingNavigation ? 'jobPostingNavigation.items' : \n'items' \n}}"
},
"typeVersion": 1
},
{
"id": "f91a375e-08b8-4051-a964-e6b35acd18b6",
"name": "[Current Page] Item Loop",
"type": "n8n-nodes-base.splitInBatches",
"position": [
1104,
-640
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 5
},
"typeVersion": 3
},
{
"id": "3734b089-e080-4d10-8bb3-680d1c213037",
"name": "HTTP Node: [Current Page] Get Item Details",
"type": "n8n-nodes-base.httpRequest",
"position": [
1312,
-624
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $('Product Extraction Goal').item.json.target_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "2ea166b1-3b85-4ebf-a99b-df27ffa4ca9b",
"name": "[Details-All] Init State",
"type": "n8n-nodes-base.set",
"position": [
-464,
496
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "ca1aad2b-7dbc-4a00-aad4-d3e28259523b",
"name": "target_url",
"type": "string",
"value": "={{ $json.url }}"
},
{
"id": "5542d947-4475-4d5c-9543-4434d30ae8e6",
"name": "target_schema",
"type": "string",
"value": "={{ $json.target_schema }}"
},
{
"id": "12d0829d-1e1c-4174-a566-b5b834fa88ac",
"name": "navigation_schema",
"type": "string",
"value": "={{ $json.navigation_schema }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "3b0d0ff6-856a-4bee-9f46-daa5cd8af809",
"name": "[Details-All] Merge Pages",
"type": "n8n-nodes-base.merge",
"position": [
-64,
512
],
"parameters": {},
"typeVersion": 3.2
},
{
"id": "a31b526e-f7f0-4180-b55a-a18ebbc70cb3",
"name": "HTTP Node: [Details-All] Crawler (Phase 1)",
"type": "n8n-nodes-base.httpRequest",
"position": [
144,
512
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.target_url }}"
},
{
"name": "={{ $('Zyte Config Generator').item.json.navigation_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "11a8dcc9-c053-48a9-b341-1c07d3845a99",
"name": "[Details-All] URL Collector",
"type": "n8n-nodes-base.code",
"position": [
336,
512
],
"parameters": {
"jsCode": "// [Details-All] URL COLLECTOR\n// Collects URLs from Products, Articles, or Jobs\n\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Initialize Memory\nif (!staticData.allItemUrls) {\n staticData.allItemUrls = [];\n staticData.visitedPages = [];\n}\n\n// 2. Get Data from HTTP Node\nconst response = $input.first().json;\n\n// --- DYNAMIC DETECTION START ---\n// Detect which navigation type was returned by Zyte\nlet navObject = null;\nif (response.productNavigation) navObject = response.productNavigation;\nelse if (response.articleNavigation) navObject = response.articleNavigation;\nelse if (response.jobPostingNavigation) navObject = response.jobPostingNavigation;\nelse if (response.forumThreadNavigation) navObject = response.forumThreadNavigation;\n\nconst items = navObject ? navObject.items : [];\nconst nextPage = navObject ? navObject.nextPage : null;\n// --- DYNAMIC DETECTION END ---\n\n// 3. Extract JUST the URLs\nif (items && items.length > 0) {\n const urls = items.map(item => ({ url: item.url }));\n staticData.allItemUrls.push(...urls);\n}\n\n// 4. Navigation Logic\nlet nextUrl = null;\nlet stop = false;\n\nif (nextPage && nextPage.url) {\n if (staticData.visitedPages.includes(nextPage.url)) {\n stop = true;\n } else {\n // Mark current page as visited\n staticData.visitedPages.push(response.url);\n \n // --- PRODUCTION CODE ---\n nextUrl = nextPage.url; \n \n // --- TEST CODE (Uncomment to limit pages) ---\n // if (staticData.visitedPages.length >= 2) { stop = true; } \n // else { nextUrl = nextPage.url; }\n }\n} else {\n stop = true;\n}\n\nreturn {\n json: {\n stop: stop,\n nextPageUrl: nextUrl,\n totalUrlsCollected: staticData.allItemUrls.length\n }\n};"
},
"typeVersion": 2
},
{
"id": "adc53f96-a207-4092-b205-a5d91e4eccf9",
"name": "[Details-All] More Pages?",
"type": "n8n-nodes-base.if",
"position": [
544,
512
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "2d5e6515-7b6d-4b6f-bfdc-644cd324fadc",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.stop }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "10e22675-55c3-4835-b3bc-641afb942fb7",
"name": "[Details-All] Set Next URL",
"type": "n8n-nodes-base.set",
"position": [
752,
608
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "223e56ca-4ed7-402c-9e28-972f7820b841",
"name": "target_url",
"type": "string",
"value": "={{ $json.nextPageUrl }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "f8022ddb-0aed-4138-b387-fdd068416721",
"name": "[Details-All] Unpack List (Phase 2)",
"type": "n8n-nodes-base.code",
"position": [
752,
416
],
"parameters": {
"jsCode": "const staticData = $getWorkflowStaticData('global');\nconst allUrls = staticData.allItemUrls || [];\n\n// Reset the static data so next run starts fresh\nstaticData.allItemUrls = [];\nstaticData.visitedPages = [];\n\n// Output the items so the next Loop node sees them!\nreturn allUrls.map(u => ({ json: { url: u.url } }));"
},
"typeVersion": 2
},
{
"id": "a511ae0b-245b-4963-a8cd-fc0717ed1d76",
"name": "[Details-All] Batch Processor",
"type": "n8n-nodes-base.splitInBatches",
"onError": "continueRegularOutput",
"position": [
1088,
416
],
"parameters": {
"options": {
"reset": "={{ $prevNode.name === 'Split Out4' }}"
},
"batchSize": 100
},
"typeVersion": 3,
"alwaysOutputData": false
},
{
"id": "32eb4485-a392-4317-857d-400cd9586fdb",
"name": "HTTP Node: [Details-All] Get Details",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
1376,
432
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $('[Details-All] Init State').item.json.target_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "4a4adade-4953-44e5-9121-1b7720e30e98",
"name": "[Details-All] Accumulator",
"type": "n8n-nodes-base.code",
"position": [
1600,
432
],
"parameters": {
"jsCode": "// [Details-All] ACCUMULATOR\n// Saves detailed data (Product, Article, Job, etc.) inside the loop\n\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Initialize memory\nif (!staticData.finalResults) {\n staticData.finalResults = [];\n}\n\n// 2. Get Input Items\nconst items = $input.all();\nconst validBatch = [];\n\n// 3. Process Items\nfor (const item of items) {\n \n // Option A: Success - Check for ANY valid Zyte schema\n if (item.json.product) {\n validBatch.push(item.json.product);\n }\n else if (item.json.article) {\n validBatch.push(item.json.article);\n }\n else if (item.json.jobPosting) {\n validBatch.push(item.json.jobPosting);\n }\n \n // Option B: Success (Fallback - General browserHtml or Flat JSON)\n // We check that it has NO error message\n else if (item.json && !item.json.error && !item.json.message) {\n validBatch.push(item.json);\n }\n \n // Option C: FAILURE (Save an Error Record)\n else {\n validBatch.push({\n name: \"SKIPPED - ERROR\",\n url: \"See error details\",\n error_message: item.json.message || item.json.error || \"Unknown API Error\",\n timestamp: new Date().toISOString()\n });\n }\n}\n\n// 4. Push to memory\nif (validBatch.length > 0) {\n staticData.finalResults.push(...validBatch);\n}\n\n// 5. Pass data through\nreturn items;"
},
"typeVersion": 2
},
{
"id": "e21d3a15-271c-4d97-924a-47a602d09f70",
"name": "[Details-All] Final Output",
"type": "n8n-nodes-base.code",
"position": [
1600,
240
],
"parameters": {
"jsCode": "// COMBINED RETRIEVER & FORMATTER\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Retrieve everything we saved in the Accumulator\n// If nothing is there, default to an empty array\nconst allItems = staticData.finalResults || [];\n\n// 2. Reset memory for the next run (Cleanup)\n// This ensures your next execution starts with a fresh bag\nstaticData.finalResults = [];\n\n// 3. Filter the data to remove nulls or bad values\n// (Safety check from your second code block)\nconst validItems = allItems.filter(item => item && typeof item === 'object');\n\n// 4. Map the valid items directly to n8n format for the CSV node\nreturn validItems.map(item => {\n return {\n json: item\n };\n});"
},
"typeVersion": 2
},
{
"id": "01a48744-c1d6-4ec2-bdea-4ae62a0c2948",
"name": "HTTP Node: [Single Item] Get Details",
"type": "n8n-nodes-base.httpRequest",
"position": [
-192,
-1296
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $json.target_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "222abbf5-d347-44aa-ac1e-da07a63b97fe",
"name": "Route by Category",
"type": "n8n-nodes-base.switch",
"position": [
-3472,
1424
],
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "ff228f24-db2c-4a4c-8537-736a1549b61b",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"Select Site Category\"] }}",
"rightValue": "=Online Store / Product (E-commerce)"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "fa859055-7d10-4a4d-824a-f9f2c6a527f1",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"Select Site Category\"] }}",
"rightValue": "News, Blog, Forum or Article Site"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "48ffd4fc-3541-40f0-940e-beb5812a9207",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"Select Site Category\"] }}",
"rightValue": "Job Board / Career Site"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "1e6c9af3-906c-432c-9bdd-808efad19c75",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"Select Site Category\"] }}",
"rightValue": "Search Engine Results (SERP)"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "40faed08-3441-4738-b96d-0e2d7e880617",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"Select Site Category\"] }}",
"rightValue": "General / Other Website"
}
]
}
}
]
},
"options": {}
},
"typeVersion": 3.3
},
{
"id": "9dc68abd-7ab3-493a-9f3b-a95a81482b53",
"name": "Zyte Config Generator",
"type": "n8n-nodes-base.code",
"position": [
-2928,
1152
],
"parameters": {
"jsCode": "// ZYTE CONFIG GENERATOR\n// Translates Form Inputs into clean Variables\n\n// --- MAPPINGS ---\nconst categoryMap = {\n \"Online Store / Product (E-commerce)\": \"product\",\n \"News, Blog, Forum or Article Site\": \"article\",\n \"Job Board / Career Site\": \"job\"\n};\n\nconst goalMap = {\n // 1. Single Item\n \"Scrape details of a SINGLE item\": \"single\",\n \"Scrape details of a SINGLE job post\": \"single\",\n \"Scrape content of a SINGLE article\": \"single\",\n\n // 2. List (Current Page only)\n \"Get List of items from THIS PAGE only\": \"list\",\n\n // 3. Crawl List (URLs from ALL pages)\n \"Get List of items from all pages\": \"crawl_list\",\n \"Get List of Job URLs from ALL pages\": \"crawl_list\",\n \"Get List of Article URLs from ALL pages\": \"crawl_list\",\n\n // 4. Details (Current Page - Items on this page)\n \"Scrape details of all items on THIS page\": \"details_current\",\n \"Scrape content of articles on THIS page\": \"details_current\",\n\n // 5. Crawl Details (Full Scrape - ALL pages)\n \"Scrape details of all items on ALL pages\": \"crawl_all\",\n \"Scrape content of articles on ALL pages\": \"crawl_all\",\n \"Scrape job details from ALL pages\": \"crawl_all\"\n};\n\n// 1. Get Inputs\nconst rawCategory = $input.first().json[\"Site Type\"];\nconst rawGoal = $input.first().json[\"What is your extraction goal?\"];\nconst url = $input.first().json.URL;\n\n// 2. Map to Short Keys\nconst category = categoryMap[rawCategory] || 'product';\nconst goal = goalMap[rawGoal] || 'single';\n\n// 3. Determine Base Key\nlet baseKey = 'product'; \nif (category === 'article') baseKey = 'article';\nif (category === 'job') baseKey = 'jobPosting';\n\n// 4. LOGIC ENGINE: Set Target and Navigation Schemas\nlet targetSchema = baseKey; // What data do we want?\nlet navigationSchema = null; // How do we find the next page?\n\n// --- SCENARIO 1: SINGLE ITEM ---\nif (goal === 'single') {\n targetSchema = baseKey; // e.g. 'product'\n navigationSchema = null;\n}\n\n// --- SCENARIO 2: LIST (THIS PAGE ONLY) ---\nelse if (goal === 'list') {\n // Products/Articles use 'List'. Jobs use 'Navigation'.\n if (baseKey === 'jobPosting') {\n targetSchema = 'jobPostingNavigation';\n } else {\n targetSchema = baseKey + 'List'; // e.g. 'productList'\n }\n navigationSchema = null;\n}\n\n// --- SCENARIO 3: CRAWL LIST (ALL PAGES) ---\n// CORRECTION APPLIED HERE\nelse if (goal === 'crawl_list') {\n // TARGET: We want rich list data (same as Scenario 2)\n if (baseKey === 'jobPosting') {\n targetSchema = 'jobPostingNavigation';\n } else {\n targetSchema = baseKey + 'List'; // e.g. 'productList'\n }\n \n // NAVIGATION: We need to traverse pages to find the next list\n navigationSchema = baseKey + 'Navigation'; \n}\n\n// --- SCENARIO 4 & 5: DETAILS (THIS PAGE OR ALL PAGES) ---\nelse if (goal === 'details_current' || goal === 'crawl_all') {\n // TARGET: We want full details per item (e.g. 'product')\n targetSchema = baseKey; \n \n // NAVIGATION: We need to find the item links first\n navigationSchema = baseKey + 'Navigation'; \n}\n\n// 5. Output\nreturn {\n json: {\n url: url,\n extraction_goal: goal, // For Switch Node\n target_schema: targetSchema, // For Data Extraction (e.g. productList)\n navigation_schema: navigationSchema, // For Loop Control (e.g. productNavigation)\n // Debugging\n raw_goal: rawGoal \n }\n};"
},
"typeVersion": 2
},
{
"id": "b3be260c-4031-4080-bf17-9bad08b320d5",
"name": "[List-All] Get Item List",
"type": "n8n-nodes-base.httpRequest",
"position": [
1072,
-48
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.currentScrapeUrl }}"
},
{
"name": "={{ $('[List-All] Init State').item.json.target_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "ab7821f2-9ce5-4620-9c6f-aed326def6d4",
"name": "[List-All] List Accumulator",
"type": "n8n-nodes-base.code",
"position": [
1280,
-48
],
"parameters": {
"jsCode": "// [List-All] ACCUMULATOR (Universal)\n// Saves the batch of items found on the current page to memory\n// Works for Products, Articles, and Jobs\n\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Initialize Backpack\nif (!staticData.backpack) {\n staticData.backpack = [];\n}\n\n// 2. Get Data from the HTTP List Node\nconst response = $input.first().json;\n\n// 3. DYNAMIC EXTRACTION\n// We check for all possible list types Zyte might return for our supported categories\nlet newItems = [];\n\nif (response.productList && response.productList.products) {\n newItems = response.productList.products;\n} \nelse if (response.articleList && response.articleList.articles) {\n newItems = response.articleList.articles;\n}\nelse if (response.jobPostingNavigation && response.jobPostingNavigation.items) {\n // Jobs usually return 'items' inside navigation schema\n newItems = response.jobPostingNavigation.items;\n}\n// Fallbacks for variations in Zyte API responses\nelse if (response.items) {\n newItems = response.items;\n}\nelse if (response.products) {\n newItems = response.products;\n}\n\n// 4. Save to Memory (Safety Check included)\nif (newItems.length > 0) {\n staticData.backpack.push(...newItems);\n}\n\n// 5. Pass through (Output Status)\nreturn {\n json: {\n status: \"Saved\",\n type_detected: Object.keys(response).find(k => k.includes('List') || k.includes('Navigation')) || \"unknown\",\n items_found_on_page: newItems.length,\n total_items_collected: staticData.backpack.length\n }\n};"
},
"typeVersion": 2
},
{
"id": "f69aed70-5f72-40a1-a05d-a04c7e435aac",
"name": "[List-All] Init State",
"type": "n8n-nodes-base.set",
"position": [
-464,
-80
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "8e00d19a-bb7c-460e-b2b7-2bd4320287bc",
"name": "url",
"type": "string",
"value": "={{ $json.url }}"
},
{
"id": "517201f4-1765-4c36-a6ae-3b82091416a2",
"name": "target_schema",
"type": "string",
"value": "={{ $json.target_schema }}"
},
{
"id": "efd367fb-fe6f-4c5e-b49d-773c9e611901",
"name": "navigation_schema",
"type": "string",
"value": "={{ $json.navigation_schema }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "72f2b348-cf8a-46f5-9b45-6d6758eaa647",
"name": "[List-All] Merge Pages",
"type": "n8n-nodes-base.merge",
"position": [
-64,
-64
],
"parameters": {},
"typeVersion": 3.2
},
{
"id": "136fd770-8036-4f5d-815f-ae5a2bc56416",
"name": "HTTP Node: [List-All] Get Page URLs",
"type": "n8n-nodes-base.httpRequest",
"position": [
144,
-64
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "=url",
"value": "={{ $json.url }}"
},
{
"name": "={{ $('Zyte Config Generator').item.json.navigation_schema }}",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "d1fb4130-e846-4e25-9117-21e8abc39773",
"name": "[List-All] Page Controller",
"type": "n8n-nodes-base.code",
"position": [
352,
-64
],
"parameters": {
"jsCode": "// [List-All] PAGE CONTROLLER (The Brain)\n// Handles loop logic for Products, Articles, and Jobs automatically\n\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Initialize Memory\nif (!staticData.visited) staticData.visited = [];\n\n// 2. Get Navigation Data from the previous HTTP Node\n// Ensure the node name inside $() matches your actual HTTP node name!\nconst navData = $('HTTP Node: [List-All] Get Page URLs').first().json;\nconst currentUrl = navData.url; \n\n// --- DYNAMIC NAVIGATION DETECTION ---\n// We check which navigation object exists in the response\nlet navObject = null;\n\nif (navData.productNavigation) {\n navObject = navData.productNavigation;\n} else if (navData.articleNavigation) {\n navObject = navData.articleNavigation;\n} else if (navData.jobPostingNavigation) {\n navObject = navData.jobPostingNavigation;\n} else if (navData.forumThreadNavigation) { // Rare, but possible\n navObject = navData.forumThreadNavigation; \n}\n\n// Safely get the nextPage object (if it exists)\nconst nextPageObj = navObject ? navObject.nextPage : null;\n// ------------------------------------\n\n// 3. Mark Current Page as Visited\nif (currentUrl) {\n staticData.visited.push(currentUrl);\n}\n\n// 4. Determine Loop Logic\nlet nextUrl = null;\nlet stop = false;\n\nif (nextPageObj && nextPageObj.url) {\n // CRITICAL FIX: The \"Infinite Loop\" Preventer\n if (staticData.visited.includes(nextPageObj.url)) {\n stop = true; // We have been here before! Stop.\n } else {\n nextUrl = nextPageObj.url; // New page found. Continue.\n }\n} else {\n stop = true; // No next page (Null). Stop.\n}\n\n// 5. Output Configuration\nreturn {\n json: {\n // Tells the NEXT node (List Data) what to scrape right now\n currentScrapeUrl: currentUrl, \n \n // Tells the LOOP (Edit Fields) where to go next\n nextLoopUrl: nextUrl,\n \n // Tells the IF node whether to continue or finish\n stopLoop: stop\n }\n};"
},
"typeVersion": 2
},
{
"id": "270f34af-1dd2-45fe-a169-a87a5e2cd7ec",
"name": "[List-All] Check Next Page",
"type": "n8n-nodes-base.if",
"position": [
560,
-64
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "ab1dbeae-e363-4ac0-a99d-07b79c7b17e7",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.stopLoop }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "6b2317a4-2952-4ed1-9a37-ab3e21101376",
"name": "[List-All] Final Output",
"type": "n8n-nodes-base.code",
"position": [
1280,
-240
],
"parameters": {
"jsCode": "// [List-All] FINAL OUTPUT\nconst staticData = $getWorkflowStaticData('global');\n\n// 1. Retrieve Data\nconst allProducts = staticData.backpack || [];\n\n// 2. CLEANUP\nstaticData.backpack = [];\nstaticData.visited = [];\n\n// 3. SAFETY FILTER (New Optimization)\n// Removes nulls or bad data before sending to CSV\nconst validProducts = allProducts.filter(item => item && typeof item === 'object');\n\n// 4. Format\nreturn validProducts.map(item => {\n return {\n json: item\n };\n});"
},
"typeVersion": 2
},
{
"id": "00401370-462c-442b-b2d2-2aea9a5b5888",
"name": "[List-All] Set Next URL",
"type": "n8n-nodes-base.set",
"position": [
1488,
-48
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "082744c5-a824-4f98-9ae4-6620a5c07af3",
"name": "url",
"type": "string",
"value": "={{ $('[List-All] Page Controller').item.json.nextLoopUrl }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 3.4
},
{
"id": "d2d70190-d650-4c56-9705-a6198017a20c",
"name": "HTTP BrowserHtml",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1328,
2192
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.URL }}"
},
{
"name": "browserHtml",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "fc53a493-e109-48b6-a990-6cb5693d7265",
"name": "HTTP Node: SERP Extraction",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1248,
1504
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json[\"Target URL\"] }}"
},
{
"name": "serp",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "ed5afe35-e97f-4443-87d3-73bc43a708e2",
"name": "HTTP Response Body",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1328,
2496
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.URL }}"
},
{
"name": "httpResponseBody",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "cffe8ad3-f432-44e6-84f6-924e39cd8e91",
"name": "HTTP Node: Capture Page Screenshot",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1328,
3456
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.URL }}"
},
{
"name": "screenshot",
"value": "={{true}}"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "7aa21852-ea5b-41a0-8030-7a97652284f2",
"name": "HTTP Node: Capture Network API",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1328,
2816
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"jsonBody": "={\n\"url\": \"{{ $json.URL }}\",\n \"browserHtml\": true,\n \"networkCapture\": [\n {\n \"filterType\": \"url\",\n \"httpResponseBody\": true,\n \"value\": \"/api/\",\n \"matchType\": \"contains\"\n }\n ]\n}",
"sendBody": true,
"sendHeaders": true,
"specifyBody": "json",
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "87de0087-27e2-4ca6-a816-0868d03c2ba8",
"name": "Convert to File ( Image )",
"type": "n8n-nodes-base.convertToFile",
"position": [
3280,
3440
],
"parameters": {
"options": {
"fileName": "Sandbox Screenshot"
},
"operation": "toBinary",
"sourceProperty": "screenshot"
},
"typeVersion": 1.1
},
{
"id": "d6e5a7d3-64d3-4370-946f-527e00a5c6a2",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-4560,
1168
],
"parameters": {
"color": 6,
"width": 384,
"height": 688,
"content": "## AI Web Scraper\nThis workflow turns n8n into a universal scraping machine using the [**Zyte API**](https://www.zyte.com/?utm_campaign=Discord_n8n_tpl&utm_activity=Community&utm_medium=social&utm_source=Discord). It can crawl and extract structured data from almost any website (E-commerce, News, Jobs) without custom selectors.\n\n\n## How it works\n1. **User Input:** Takes a URL, Category, and API Key via the Form.\n2. **Phase 1 (Crawler):** Automatically loops through pagination to find all item URLs.\n3. **Phase 2 (Scraper):** Visits every item and uses AI to extract structured data (Price, Image, Availability, etc).\n4. **Manual Mode:** If AI isn't needed, you can select the \"General\" path to fetch raw HTML or Screenshots and handle the parsing yourself.\n\n## How to use\n1. Open the **\"On Form Submission\"** node and click **\"Open Form\"**.\n2. Enter your Target URL and select the Goal (e.g. \"Crawl ALL pages\").\n3. Enter your **Zyte API Key**.\n4. Run the workflow and download the CSV.\n\n## Requirements\n- A free or paid [**Zyte API Key**](https://www.zyte.com/?utm_campaign=Discord_n8n_tpl&utm_activity=Community&utm_medium=social&utm_source=Discord)."
},
"typeVersion": 1
},
{
"id": "964b6a23-8767-4fd3-a66c-fa8985db9145",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
3072,
-512
],
"parameters": {
"color": 4,
"width": 448,
"height": 352,
"content": "## AI Output: Aggregation & CSV Export"
},
"typeVersion": 1
},
{
"id": "a6d77074-b1f2-41ba-9fee-edc6c50276d8",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
3104,
2560
],
"parameters": {
"color": 2,
"width": 432,
"height": 368,
"content": "## General Output: Raw Export\n## ( Custom Parsing Needed ) \u26a0\ufe0f"
},
"typeVersion": 1
},
{
"id": "5636a5fc-99b9-4cef-b90d-61eb5f32a32a",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
3120,
3312
],
"parameters": {
"color": 4,
"width": 432,
"height": 368,
"content": "## Image Output: Save as JPEG or PNG"
},
"typeVersion": 1
},
{
"id": "45be9452-a6f4-4143-9ee9-fb40b4ce3b7c",
"name": "Custom Output",
"type": "n8n-nodes-base.set",
"position": [
3264,
2720
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b0af1640-c286-4053-aaf1-99b9aa194dc2",
"name": "data",
"type": "string",
"value": "={{ $json.browserHtml || $json.httpResponseBody || $json.networkCapture }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "b4956fd0-df61-4f43-90ca-b1bf7ba70c3e",
"name": "Sticky Note4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1424,
1280
],
"parameters": {
"color": 7,
"width": 464,
"height": 448,
"content": "## \ud83d\udd0d Non-AI Extraction :: SERP\n\nThis gets search results using the 'serp' schema.\n\n**Note:** You can modify the Domain (in the HTTP node) to get details from different search engines or regions."
},
"typeVersion": 1
},
{
"id": "ed28eee9-0f90-4b23-a5bf-75e0d930a733",
"name": "Sticky Note6",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1536,
1856
],
"parameters": {
"color": 7,
"width": 736,
"height": 1888,
"content": "## \ud83d\udee0\ufe0f Manual Mode (Raw Data)\n\n\n\n\nAggregates raw outputs (HTML, JSON, Base64) from the manual flows.\n\n**\u26a0\ufe0f NOTE:** No AI parsing happens here. You must add your own 'HTML Extract' or 'Code' nodes after this point to parse the data.\n\n\n\n**Capabilities:**\n\n\n- **Browser HTML:** Returns the full rendered DOM.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n- **HTTP Response:** Sends HTTP request & returns Base64-encoded response body.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n- **Infinite Scroll:** Auto-scrolls to bottom before capturing HTML.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n- **Network Capture:** intercepts API calls (JSON) from the page.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n- **Screenshot:** Returns a visual snapshot of the page."
},
"typeVersion": 1
},
{
"id": "a7c57f73-0ef8-4cf6-9836-2df17526269b",
"name": "HTTP Node: Infinite Scroll",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1328,
3136
],
"parameters": {
"url": "https://api.zyte.com/v1/extract",
"method": "POST",
"options": {},
"jsonBody": "={\n \"url\": \"{{ $json.URL }}\",\n \"browserHtml\": true,\n \"actions\": [\n {\n \"action\": \"scrollBottom\"\n }\n ]\n}",
"sendBody": true,
"sendHeaders": true,
"specifyBody": "json",
"headerParameters": {
"parameters": [
{
"name": "authorization",
"value": "=Basic {{ ($('Main form submission').item.json[\"Zyte API Key\"] + \":\").base64Encode() }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2
},
{
"id": "48cbf57b-ed1d-45df-ab51-28495d5243bd",
"name": "Sticky Note7",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3808,
944
],
"parameters": {
"color": 7,
"width": 1424,
"height": 1072,
"content": "## \ud83d\udd79\ufe0f Control Center\n\n**1. Form:** Accepts URL & API Key.\n**2. Logic Engine:** Maps your \"Goal\" (e.g., \"Crawl all pages\") to the correct AI Schema (Product, Article, Job).\n**3. Routing:** Directs traffic to one of 3 main pipelines:\n - \ud83e\udd16 **AI Extraction:** Smart parsing (Products/News/Jobs).\n - \ud83d\udd0d **SERP:** Search Engine Results.\n - \ud83d\udee0\ufe0f **Manual/General:** Raw HTML & Network dumps."
},
"typeVersion": 1
},
{
"id": "34bd22a1-03dc-42c5-a9a3-76765f2450ce",
"name": "Sticky Note8",
"type": "n8n-nodes-base.stickyNote",
"position": [
-608,
-1552
],
"parameters": {
"color": 7,
"width": 2592,
"height": 2560,
"content": "## \ud83e\udd16 AI Extraction Pipeline (5 Scenarios)\n\nThis section handles the smart extraction based on your goal:\n\n\n\n1. **Simple Scrapes (No Pagination):**\n\n\n\ni. **Single Item:** Scrapes one specific product/article.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nii. **List (Current Page):** Gets all items from the provided URL only.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\niii. **Details (Current Page):** Finds items on this page & visits them one-by-one.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n2. **Advanced Crawlers (Pagination Active):**\n\n\n\n\n\n\n\niv. **Crawl List:** Loops through ALL pages to build a massive master list.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nv. **Crawl Details:** The \"Ultimate\" Mode. Loops through ALL pages + visits EVERY item to extract full details.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
},
"typeVersion": 1
},
{
"id": "ef1ea378-9a1d-4fa4-9cd9-3da590389602",
"name": "serp response",
"type": "n8n-nodes-base.set",
"position": [
3248,
1504
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b0af1640-c286-4053-aaf1-99b9aa194dc2",
"name": "data",
"type": "object",
"value": "={{ $json.serp }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "1da628d0-c21d-457b-9dd0-85fbfb4c302a",
"name": "Extracted AI Output",
"type": "n8n-nodes-base.convertToFile",
"position": [
3248,
-368
],
"parameters": {
"options": {}
},
"typeVersion": 1.1
},
{
"id": "eb314b24-2931-4171-893c-e3b94ad04b98",
"name": "Sticky Note5",
"type": "n8n-nodes-base.stickyNote",
"position": [
3072,
1360
],
"parameters": {
"color": 4,
"width": 464,
"height": 368,
"content": "## Output:: serp Response"
},
"typeVersion": 1
},
{
"id": "1c10ab5d-2ce2-4d5e-8ce5-adc54ddf2ee0",
"name": "General Extract Goal",
"type": "n8n-nodes-base.switch",
"position": [
-2720,
1712
],
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "ce59b8de-cffc-4f8e-973f-c2fa20b13ba5",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"What is your extraction goal?\"] }}",
"rightValue": "BrowserHtml"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "b44c142d-4039-4f8b-83dd-64b30c3a7042",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"What is your extraction goal?\"] }}",
"rightValue": "httpResponseBody"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "5932f0e5-c195-447b-bbae-ca1d0a548810",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"What is your extraction goal?\"] }}",
"rightValue": "Capture Network API"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "8047909a-ffe0-4031-a566-98ed971b1a0c",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"What is your extraction goal?\"] }}",
"rightValue": "Infinite Scroll"
}
]
}
},
{
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "927fcd5a-eecc-4dc3-990a-c9eebca50835",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json[\"What is your extraction goal?\"] }}",
"rightValue": "Screenshot"
}
]
}
}
]
},
"options": {}
},
"typeVersion": 3.3
},
{
"id": "b312118e-1feb-4998-b7e4-0699dfefccd8",
"name": "General Extraction Goal Form",
"type": "n8n-nodes-base.form",
"position": [
-3168,
1760
],
"parameters": {
"options": {},
"formFields": {
"values": [
{
"fieldName": "URL",
"fieldType": "hiddenField",
"fieldValue": "={{ $json[\"Target URL\"] }}"
},
{
"fieldName": "Site Type",
"fieldType": "hiddenField",
"fieldValue": "={{ $json[\"Select Site Category\"] }}"
},
{
"fieldType": "dropdown",
"fieldLabel": "What is your extraction goal?",
"fieldOptions": {
"values": [
{
"option": "BrowserHtml"
},
{
"option": "httpResponseBody"
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
This workflow uses the Zyte API to automatically detect and extract structured data from E-commerce sites, Articles, Job Boards, and Search Engine Results (SERP) - no custom CSS selectors required.
Source: https://n8n.io/workflows/11637/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
This templates helps you extract the top performing pages from your website using Google Analytics scrape the content of the pages using Firecrawl API (HTTP node provided) build a knowledge graph for
Travel agencies, freelance travel planners, or anyone who wants to automate personalized trip planning by combining real-time hotel and flight data with AI-generated recommendations. Collects travel d
Automate LinkedIn lead generation by scraping comments from targeted posts and enriching profiles with detailed data
This n8n workflow collects leads from Google Maps, scrapes their websites via direct HTTP requests, and extracts valid email addresses — all while mimicking real user behavior to improve scraping reli
This workflow automates lead generation by scraping business data from Google Maps using Apify, enriching it with verified email addresses via Anymailfinder, and storing the results in a NocoDB databa