The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "Configurable Multi-Page Web Scraper Template",
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
176,
240
],
"id": "d9c48247-9b7d-4ef7-87b3-2a0109d12e77",
"name": "Start"
},
{
"parameters": {
"mode": "raw",
"jsonOutput": "{\n \"startUrl\": \"https://quotes.toscrape.com/tag/humor/\",\n \"nextPageSelector\": \"li.next a[href]\",\n \"fields\": [\n {\n \"name\": \"author\",\n \"selector\": \"span > small.author\",\n \"value\": \"text\"\n },\n {\n \"name\": \"text\",\n \"selector\": \"span.text\",\n \"value\": \"text\"\n }\n ]\n}\n",
"options": {}
},
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [
416,
240
],
"id": "4f24aada-20c6-4ee4-b785-512d35e8e540",
"name": "Input"
},
{
"parameters": {
"url": "={{ $json.startUrl }}",
"options": {
"response": {
"response": {
"responseFormat": "text",
"outputPropertyName": "content"
}
}
}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
656,
240
],
"id": "84f17c31-7bfb-4cc3-b3a2-9483f239a885",
"name": "Get Start URL"
},
{
"parameters": {
"assignments": {
"assignments": [
{
"id": "e8879b7e-1bda-451f-b83b-68b9d3ed1e2a",
"name": "startUrl",
"value": "=https://{{ $('Input').item.json.startUrl.extractDomain() }}{{ $json.nextPage }}",
"type": "string"
},
{
"id": "d2c403d4-fabb-4961-a202-4690c9f8e990",
"name": "nextPageSelector",
"value": "={{ $('Input').item.json.nextPageSelector }}",
"type": "string"
},
{
"id": "2b2e5ccc-c467-47cb-83b1-f401bb2812f9",
"name": "fields",
"value": "={{ $('Input').item.json.fields }}",
"type": "array"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [
1376,
240
],
"id": "2f8d0c78-7d85-4a39-b941-2dcc1a36ba9e",
"name": "Next Page Input"
},
{
"parameters": {
"fieldToSplitOut": "fields",
"options": {}
},
"type": "n8n-nodes-base.splitOut",
"typeVersion": 1,
"position": [
656,
48
],
"id": "732965f7-fdff-421c-8c41-daeb0ec4ffc0",
"name": "Split Out Fields",
"notesInFlow": false
},
{
"parameters": {
"mode": "combine",
"combineBy": "combineAll",
"options": {}
},
"type": "n8n-nodes-base.merge",
"typeVersion": 3.2,
"position": [
896,
48
],
"id": "e94991c0-8dec-468b-993a-45426fe737b4",
"name": "Merge HTML and Fields"
},
{
"parameters": {
"operation": "extractHtmlContent",
"dataPropertyName": "content",
"extractionValues": {
"values": [
{
"key": "={{ $json.name }}",
"cssSelector": "={{ $json.selector }}",
"returnValue": "={{ $json.value }}",
"returnArray": true
}
]
},
"options": {}
},
"type": "n8n-nodes-base.html",
"typeVersion": 1.2,
"position": [
1136,
48
],
"id": "7d4b957e-daa7-4017-9235-d107a5ff112d",
"name": "Scrape Fields"
},
{
"parameters": {
"operation": "extractHtmlContent",
"dataPropertyName": "content",
"extractionValues": {
"values": [
{
"key": "=nextPage",
"cssSelector": "={{ $('Input').item.json.nextPageSelector }}",
"returnValue": "attribute",
"attribute": "href"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.html",
"typeVersion": 1.2,
"position": [
896,
240
],
"id": "804fd9f9-167e-41c2-a023-b95b227d221a",
"name": "Scrape Next Page Link"
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "a1f84a0d-26a8-417c-99a0-329060ca258b",
"leftValue": "={{ $json.nextPage }}",
"rightValue": "",
"operator": {
"type": "string",
"operation": "exists",
"singleValue": true
}
},
{
"id": "89dd5fa6-0e12-43bc-a7ed-37844e16d627",
"leftValue": "={{ $json.nextPage }}",
"rightValue": "",
"operator": {
"type": "string",
"operation": "notEmpty",
"singleValue": true
}
}
],
"combinator": "and"
},
"options": {}
},
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
1136,
240
],
"id": "b7ddc82c-c87b-455b-b629-355daecdd9bb",
"name": "If Next Page Link"
},
{
"parameters": {
"aggregate": "aggregateAllItemData",
"destinationFieldName": "fields",
"options": {}
},
"type": "n8n-nodes-base.aggregate",
"typeVersion": 1,
"position": [
1376,
48
],
"id": "98c2a9aa-3673-4852-8ee8-c9cea73c9c99",
"name": "Aggregate Fields"
},
{
"parameters": {
"fieldToSplitOut": "={{ $json.fields.map((item, index) => 'fields[' + index + '].' + item.keys()[0]).join() }}",
"options": {
"destinationFieldName": "={{ $json.fields.map(item => item.keys()[0]).join() }}"
}
},
"type": "n8n-nodes-base.splitOut",
"typeVersion": 1,
"position": [
1616,
48
],
"id": "a65c6d07-1d6f-4ca0-be8a-2ca5cfa7044e",
"name": "Split Out Items"
},
{
"parameters": {
"content": "## Configurable Multi-Page Web Scraper\n### How it Works\nThis workflow is a dynamic, recursive web scraping template. It uses a single JSON object in the Input Node to define the target `startUrl`, the `nextPageSelector` (for pagination), and all data `fields` to extract.\n\nThe flow operates in two parallel branches after the initial HTTP Request:\n1. **Data Branch:** Cross-joins the HTML content with field configurations (Split Out/Merge), extracts the data using the **HTML Node**, and aggregates it.\n2. **Loop Branch**: Extracts the next page link. If a link is found, the **Set Node** updates the original configuration's `startUrl` and sends the flow back to the **HTTP Request Node**, creating a recursive loop that continues until the final page is reached.\n### Setup Steps\n1. **Input Node:** Update the JSON structure with the correct `startUrl`, the `nextPageSelector` (CSS selector for the next page link), and the `fields` array (CSS selectors for the data points you need).\n2. **Execution:** Run the workflow. It will automatically handle multi-page traversal and aggregate the final output.\n\n\nFor a full explanation of the internal logic and the recursive loop structure, view the original blog post: [Flexible Web Scraping with n8n: A Configurable, Multi-Page Template](https://n8nplaybook.com/post/2025/10/flexible-n8n-scraper-template/)",
"height": 528,
"width": 592
},
"type": "n8n-nodes-base.stickyNote",
"position": [
-16,
-336
],
"typeVersion": 1,
"id": "26ae3f56-cd65-479a-9b51-91f38cf9766b",
"name": "Sticky Note"
},
{
"parameters": {
"content": "The **Split Out** node separates the configured data fields (e.g., author, text). The **Merge** node then efficiently combines the fetched HTML content with every single field definition, preparing the data for the extractor.",
"height": 96,
"width": 464,
"color": 7
},
"type": "n8n-nodes-base.stickyNote",
"position": [
592,
-96
],
"typeVersion": 1,
"id": "4587fe2f-e1f0-4663-a7c9-451aa613c536",
"name": "Sticky Note1"
},
{
"parameters": {
"content": "The **HTML Node** uses the specific CSS selectors from the configuration to pull the required content. The **Aggregate Node** collects all extracted data items from the current page before the workflow decides whether to proceed to the next page.",
"height": 96,
"width": 464,
"color": 7
},
"type": "n8n-nodes-base.stickyNote",
"position": [
1072,
-96
],
"typeVersion": 1,
"id": "60b22f3b-0c34-4fc2-98a7-3edd48d68378",
"name": "Sticky Note2"
},
{
"parameters": {
"content": "This section defines the entire job via a single JSON config and performs the first action: fetching the HTML content from the current `startUrl` using the **HTTP Request** node.",
"height": 80,
"width": 464,
"color": 7
},
"type": "n8n-nodes-base.stickyNote",
"position": [
352,
464
],
"typeVersion": 1,
"id": "50a652f4-dbbb-42d0-979f-ef1d1bf787f1",
"name": "Sticky Note3"
},
{
"parameters": {
"content": "This branch checks for the next page link. If found (by the **If Node**), the **Set Node** overwrites the `startUrl` with the new link, routing the flow back to the HTTP Request node to start the next iteration.",
"height": 80,
"width": 704,
"color": 7
},
"type": "n8n-nodes-base.stickyNote",
"position": [
832,
464
],
"typeVersion": 1,
"id": "61b20e9f-fd02-4e08-acbc-89a334869147",
"name": "Sticky Note4"
},
{
"parameters": {
"authentication": "serviceAccount",
"operation": "append",
"documentId": {
"__rl": true,
"value": "1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ",
"mode": "list",
"cachedResultName": "Web Scraper Results",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ/edit?usp=drivesdk"
},
"sheetName": {
"__rl": true,
"value": "gid=0",
"mode": "list",
"cachedResultName": "Sheet1",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ/edit#gid=0"
},
"columns": {
"mappingMode": "autoMapInputData",
"value": {},
"matchingColumns": [],
"schema": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {
"useAppend": true
}
},
"type": "n8n-nodes-base.googleSheets",
"typeVersion": 4.7,
"position": [
1856,
48
],
"id": "c9ba233f-4d6e-4771-9dbc-3af8df4f2594",
"name": "Store Scraped Data",
"credentials": {
"googleApi": {
"name": "<your credential>"
}
}
}
],
"connections": {
"Start": {
"main": [
[
{
"node": "Input",
"type": "main",
"index": 0
}
]
]
},
"Input": {
"main": [
[
{
"node": "Get Start URL",
"type": "main",
"index": 0
},
{
"node": "Split Out Fields",
"type": "main",
"index": 0
}
]
]
},
"Get Start URL": {
"main": [
[
{
"node": "Merge HTML and Fields",
"type": "main",
"index": 1
},
{
"node": "Scrape Next Page Link",
"type": "main",
"index": 0
}
]
]
},
"Next Page Input": {
"main": [
[
{
"node": "Get Start URL",
"type": "main",
"index": 0
},
{
"node": "Split Out Fields",
"type": "main",
"index": 0
}
]
]
},
"Split Out Fields": {
"main": [
[
{
"node": "Merge HTML and Fields",
"type": "main",
"index": 0
}
]
]
},
"Merge HTML and Fields": {
"main": [
[
{
"node": "Scrape Fields",
"type": "main",
"index": 0
}
]
]
},
"Scrape Fields": {
"main": [
[
{
"node": "Aggregate Fields",
"type": "main",
"index": 0
}
]
]
},
"Scrape Next Page Link": {
"main": [
[
{
"node": "If Next Page Link",
"type": "main",
"index": 0
}
]
]
},
"If Next Page Link": {
"main": [
[
{
"node": "Next Page Input",
"type": "main",
"index": 0
}
],
[]
]
},
"Aggregate Fields": {
"main": [
[
{
"node": "Split Out Items",
"type": "main",
"index": 0
}
]
]
},
"Split Out Items": {
"main": [
[
{
"node": "Store Scraped Data",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "07e3c6e2-662f-45e5-aa8c-713d5e5790b6",
"meta": {
"templateCredsSetupCompleted": true
},
"id": "x8PC9K3CQCTMxKCl",
"tags": []
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
googleApi
About this workflow
Configurable Multi-Page Web Scraper Template. Uses manualTrigger, httpRequest, splitOut, html. Event-driven trigger; 17 nodes.
Source: https://github.com/vklepikovskiy/n8nplaybook-public/blob/main/workflows/web_scraper.json — original creator credit. Request a take-down →