{
  "name": "Configurable Multi-Page Web Scraper Template",
  "nodes": [
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        176,
        240
      ],
      "id": "d9c48247-9b7d-4ef7-87b3-2a0109d12e77",
      "name": "Start"
    },
    {
      "parameters": {
        "mode": "raw",
        "jsonOutput": "{\n  \"startUrl\": \"https://quotes.toscrape.com/tag/humor/\",\n  \"nextPageSelector\": \"li.next a[href]\",\n  \"fields\": [\n    {\n      \"name\": \"author\",\n      \"selector\": \"span > small.author\",\n      \"value\": \"text\"\n    },\n    {\n      \"name\": \"text\",\n      \"selector\": \"span.text\",\n      \"value\": \"text\"\n    }\n  ]\n}\n",
        "options": {}
      },
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        416,
        240
      ],
      "id": "4f24aada-20c6-4ee4-b785-512d35e8e540",
      "name": "Input"
    },
    {
      "parameters": {
        "url": "={{ $json.startUrl }}",
        "options": {
          "response": {
            "response": {
              "responseFormat": "text",
              "outputPropertyName": "content"
            }
          }
        }
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        656,
        240
      ],
      "id": "84f17c31-7bfb-4cc3-b3a2-9483f239a885",
      "name": "Get Start URL"
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "e8879b7e-1bda-451f-b83b-68b9d3ed1e2a",
              "name": "startUrl",
              "value": "=https://{{ $('Input').item.json.startUrl.extractDomain() }}{{ $json.nextPage }}",
              "type": "string"
            },
            {
              "id": "d2c403d4-fabb-4961-a202-4690c9f8e990",
              "name": "nextPageSelector",
              "value": "={{ $('Input').item.json.nextPageSelector }}",
              "type": "string"
            },
            {
              "id": "2b2e5ccc-c467-47cb-83b1-f401bb2812f9",
              "name": "fields",
              "value": "={{ $('Input').item.json.fields }}",
              "type": "array"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1376,
        240
      ],
      "id": "2f8d0c78-7d85-4a39-b941-2dcc1a36ba9e",
      "name": "Next Page Input"
    },
    {
      "parameters": {
        "fieldToSplitOut": "fields",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        656,
        48
      ],
      "id": "732965f7-fdff-421c-8c41-daeb0ec4ffc0",
      "name": "Split Out Fields",
      "notesInFlow": false
    },
    {
      "parameters": {
        "mode": "combine",
        "combineBy": "combineAll",
        "options": {}
      },
      "type": "n8n-nodes-base.merge",
      "typeVersion": 3.2,
      "position": [
        896,
        48
      ],
      "id": "e94991c0-8dec-468b-993a-45426fe737b4",
      "name": "Merge HTML and Fields"
    },
    {
      "parameters": {
        "operation": "extractHtmlContent",
        "dataPropertyName": "content",
        "extractionValues": {
          "values": [
            {
              "key": "={{ $json.name }}",
              "cssSelector": "={{ $json.selector }}",
              "returnValue": "={{ $json.value }}",
              "returnArray": true
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.html",
      "typeVersion": 1.2,
      "position": [
        1136,
        48
      ],
      "id": "7d4b957e-daa7-4017-9235-d107a5ff112d",
      "name": "Scrape Fields"
    },
    {
      "parameters": {
        "operation": "extractHtmlContent",
        "dataPropertyName": "content",
        "extractionValues": {
          "values": [
            {
              "key": "=nextPage",
              "cssSelector": "={{ $('Input').item.json.nextPageSelector }}",
              "returnValue": "attribute",
              "attribute": "href"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.html",
      "typeVersion": 1.2,
      "position": [
        896,
        240
      ],
      "id": "804fd9f9-167e-41c2-a023-b95b227d221a",
      "name": "Scrape Next Page Link"
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict",
            "version": 2
          },
          "conditions": [
            {
              "id": "a1f84a0d-26a8-417c-99a0-329060ca258b",
              "leftValue": "={{ $json.nextPage }}",
              "rightValue": "",
              "operator": {
                "type": "string",
                "operation": "exists",
                "singleValue": true
              }
            },
            {
              "id": "89dd5fa6-0e12-43bc-a7ed-37844e16d627",
              "leftValue": "={{ $json.nextPage }}",
              "rightValue": "",
              "operator": {
                "type": "string",
                "operation": "notEmpty",
                "singleValue": true
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "type": "n8n-nodes-base.if",
      "typeVersion": 2.2,
      "position": [
        1136,
        240
      ],
      "id": "b7ddc82c-c87b-455b-b629-355daecdd9bb",
      "name": "If Next Page Link"
    },
    {
      "parameters": {
        "aggregate": "aggregateAllItemData",
        "destinationFieldName": "fields",
        "options": {}
      },
      "type": "n8n-nodes-base.aggregate",
      "typeVersion": 1,
      "position": [
        1376,
        48
      ],
      "id": "98c2a9aa-3673-4852-8ee8-c9cea73c9c99",
      "name": "Aggregate Fields"
    },
    {
      "parameters": {
        "fieldToSplitOut": "={{ $json.fields.map((item, index) => 'fields[' + index + '].' + item.keys()[0]).join() }}",
        "options": {
          "destinationFieldName": "={{ $json.fields.map(item => item.keys()[0]).join() }}"
        }
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        1616,
        48
      ],
      "id": "a65c6d07-1d6f-4ca0-be8a-2ca5cfa7044e",
      "name": "Split Out Items"
    },
    {
      "parameters": {
        "content": "## Configurable Multi-Page Web Scraper\n### How it Works\nThis workflow is a dynamic, recursive web scraping template. It uses a single JSON object in the Input Node to define the target `startUrl`, the `nextPageSelector` (for pagination), and all data `fields` to extract.\n\nThe flow operates in two parallel branches after the initial HTTP Request:\n1. **Data Branch:** Cross-joins the HTML content with field configurations (Split Out/Merge), extracts the data using the **HTML Node**, and aggregates it.\n2. **Loop Branch**: Extracts the next page link. If a link is found, the **Set Node** updates the original configuration's `startUrl` and sends the flow back to the **HTTP Request Node**, creating a recursive loop that continues until the final page is reached.\n### Setup Steps\n1. **Input Node:** Update the JSON structure with the correct `startUrl`, the `nextPageSelector` (CSS selector for the next page link), and the `fields` array (CSS selectors for the data points you need).\n2. **Execution:** Run the workflow. It will automatically handle multi-page traversal and aggregate the final output.\n\n\nFor a full explanation of the internal logic and the recursive loop structure, view the original blog post: [Flexible Web Scraping with n8n: A Configurable, Multi-Page Template](https://n8nplaybook.com/post/2025/10/flexible-n8n-scraper-template/)",
        "height": 528,
        "width": 592
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -16,
        -336
      ],
      "typeVersion": 1,
      "id": "26ae3f56-cd65-479a-9b51-91f38cf9766b",
      "name": "Sticky Note"
    },
    {
      "parameters": {
        "content": "The **Split Out** node separates the configured data fields (e.g., author, text). The **Merge** node then efficiently combines the fetched HTML content with every single field definition, preparing the data for the extractor.",
        "height": 96,
        "width": 464,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        592,
        -96
      ],
      "typeVersion": 1,
      "id": "4587fe2f-e1f0-4663-a7c9-451aa613c536",
      "name": "Sticky Note1"
    },
    {
      "parameters": {
        "content": "The **HTML Node** uses the specific CSS selectors from the configuration to pull the required content. The **Aggregate Node** collects all extracted data items from the current page before the workflow decides whether to proceed to the next page.",
        "height": 96,
        "width": 464,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1072,
        -96
      ],
      "typeVersion": 1,
      "id": "60b22f3b-0c34-4fc2-98a7-3edd48d68378",
      "name": "Sticky Note2"
    },
    {
      "parameters": {
        "content": "This section defines the entire job via a single JSON config and performs the first action: fetching the HTML content from the current `startUrl` using the **HTTP Request** node.",
        "height": 80,
        "width": 464,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        352,
        464
      ],
      "typeVersion": 1,
      "id": "50a652f4-dbbb-42d0-979f-ef1d1bf787f1",
      "name": "Sticky Note3"
    },
    {
      "parameters": {
        "content": "This branch checks for the next page link. If found (by the **If Node**), the **Set Node** overwrites the `startUrl` with the new link, routing the flow back to the HTTP Request node to start the next iteration.",
        "height": 80,
        "width": 704,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        832,
        464
      ],
      "typeVersion": 1,
      "id": "61b20e9f-fd02-4e08-acbc-89a334869147",
      "name": "Sticky Note4"
    },
    {
      "parameters": {
        "authentication": "serviceAccount",
        "operation": "append",
        "documentId": {
          "__rl": true,
          "value": "1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ",
          "mode": "list",
          "cachedResultName": "Web Scraper Results",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ/edit?usp=drivesdk"
        },
        "sheetName": {
          "__rl": true,
          "value": "gid=0",
          "mode": "list",
          "cachedResultName": "Sheet1",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1_qgp7BRRHAoEMHjEo5tZ2oddpUVoh5aaGpA5otmT6aQ/edit#gid=0"
        },
        "columns": {
          "mappingMode": "autoMapInputData",
          "value": {},
          "matchingColumns": [],
          "schema": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "useAppend": true
        }
      },
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.7,
      "position": [
        1856,
        48
      ],
      "id": "c9ba233f-4d6e-4771-9dbc-3af8df4f2594",
      "name": "Store Scraped Data",
      "credentials": {
        "googleApi": {
          "name": "<your credential>"
        }
      }
    }
  ],
  "connections": {
    "Start": {
      "main": [
        [
          {
            "node": "Input",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Input": {
      "main": [
        [
          {
            "node": "Get Start URL",
            "type": "main",
            "index": 0
          },
          {
            "node": "Split Out Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Start URL": {
      "main": [
        [
          {
            "node": "Merge HTML and Fields",
            "type": "main",
            "index": 1
          },
          {
            "node": "Scrape Next Page Link",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Next Page Input": {
      "main": [
        [
          {
            "node": "Get Start URL",
            "type": "main",
            "index": 0
          },
          {
            "node": "Split Out Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out Fields": {
      "main": [
        [
          {
            "node": "Merge HTML and Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge HTML and Fields": {
      "main": [
        [
          {
            "node": "Scrape Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape Fields": {
      "main": [
        [
          {
            "node": "Aggregate Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape Next Page Link": {
      "main": [
        [
          {
            "node": "If Next Page Link",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If Next Page Link": {
      "main": [
        [
          {
            "node": "Next Page Input",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "Aggregate Fields": {
      "main": [
        [
          {
            "node": "Split Out Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out Items": {
      "main": [
        [
          {
            "node": "Store Scraped Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "07e3c6e2-662f-45e5-aa8c-713d5e5790b6",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "id": "x8PC9K3CQCTMxKCl",
  "tags": []
}