{
  "id": "IN1YGvwzaDENKsGw",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Real Estate Listing Scraper",
  "tags": [],
  "nodes": [
    {
      "id": "99554a23-7a31-499f-aa9b-73b33b4675c6",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -416,
        -112
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "be3522b5-012f-4cf9-b3ea-0e671e3cad96",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        480,
        -112
      ],
      "parameters": {
        "options": {
          "reset": false
        }
      },
      "typeVersion": 3
    },
    {
      "id": "1374ac98-2bc3-4975-9636-b9a3174c1668",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        256,
        -112
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "generated_urls"
      },
      "typeVersion": 1
    },
    {
      "id": "e98a9d0e-810a-4b26-bc52-74c0c180e739",
      "name": "Aggregate",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        736,
        -480
      ],
      "parameters": {
        "options": {},
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "output"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "8dbe762f-8ad4-484b-b43e-97ed6f802057",
      "name": "Split Out1",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        1216,
        -480
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "unified_output"
      },
      "typeVersion": 1
    },
    {
      "id": "d13ea647-f536-42bb-883d-c4b7fbd2e070",
      "name": "Limit",
      "type": "n8n-nodes-base.limit",
      "disabled": true,
      "position": [
        1440,
        -480
      ],
      "parameters": {
        "maxItems": 3
      },
      "typeVersion": 1
    },
    {
      "id": "1ebd384c-14fd-43cd-a319-755e05e2134c",
      "name": "Loop Over Items1",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        1744,
        -480
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "3f0c23dd-64d2-4018-b18f-0fc39d0fddef",
      "name": "Google Gemini Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        976,
        96
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "googlePalmApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "c8988b87-be21-4b6c-859c-fe6ed7cf9c7c",
      "name": "Wait",
      "type": "n8n-nodes-base.wait",
      "position": [
        1440,
        -96
      ],
      "parameters": {
        "amount": 10
      },
      "typeVersion": 1.1
    },
    {
      "id": "8e5019f7-56a8-4051-8472-e993ed689118",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1664,
        -640
      ],
      "parameters": {
        "color": 7,
        "width": 864,
        "height": 384,
        "content": "## STEP 4 - extracts detailed property\nClone [this Sheet](https://docs.google.com/spreadsheets/d/1jtMyMglBbekD9Z407q8-0vn-cDDXhM81Uj1oAZIJGX8/edit?usp=sharing). Then processes each listing URL through another **ScrapegraphAI** node, which extracts detailed property data (title, description, price, area, bedrooms, bathrooms, floor, rooms, balcony, terrace, cellar, heating, air conditioning, image URLs) based on a JSON schema. "
      },
      "typeVersion": 1
    },
    {
      "id": "9bf77a4f-0f8d-4eca-83f0-d77ae9bdad45",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -288,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 496,
        "height": 464,
        "content": "## STEP 1 - Config Params\nEnter the GET pagination parameter. For example, for immobiliare_it, if the paginated URL is:\n\n`https://www.immobiliare.it/vendita-case/verona/?pag=2`\n\nthen the `page_format_value` field should be set to `pag`.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "6c34eee1-bf6f-4e38-a103-c1a081a6106e",
      "name": "Set params",
      "type": "n8n-nodes-base.set",
      "position": [
        -192,
        -112
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "d8af8b6b-d121-4887-a739-f6dbcd871802",
              "name": "url",
              "type": "string",
              "value": "https://www.immobiliare.it/vendita-case/verona/"
            },
            {
              "id": "1890a580-64ce-4530-96a1-72c5a7142672",
              "name": "max_pages",
              "type": "string",
              "value": "2"
            },
            {
              "id": "f3abf556-33e1-4a68-b503-0ccad96c6fba",
              "name": "page_format_value",
              "type": "string",
              "value": "pag"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "f0eb8938-4799-430a-879f-09c052c8f90d",
      "name": "Generate Urls",
      "type": "n8n-nodes-base.code",
      "position": [
        32,
        -112
      ],
      "parameters": {
        "jsCode": "for (const item of $input.all()) {\n  \n  const baseUrl = item.json.url;\n  const maxPages = parseInt(item.json.max_pages, 10);\n  const pageFormatValue=item.json.page_format_value;\n  \n  const urls = [];\n\n  for (let i = 1; i <= maxPages; i++) {\n    urls.push(`${baseUrl}&${pageFormatValue}=${i}`);\n  }\n\n  item.json.generated_urls = urls;\n}\n\nreturn $input.all();\n"
      },
      "typeVersion": 2
    },
    {
      "id": "95f6bc78-c36c-4d9e-a1f9-95827b919349",
      "name": "Scrape listings",
      "type": "n8n-nodes-scrapegraphai.scrapegraphAi",
      "position": [
        736,
        -96
      ],
      "parameters": {
        "userPrompt": "=You are an expert extraction algorithm.\nYou must extract the URLs of individual listings.\nThe listings have the following structure:\n`https://www.XXX.it/annunci/xxxx`\n\nIf the page does not contain URLs of this type, return an empty array.",
        "websiteUrl": "={{ $json.generated_urls }}"
      },
      "credentials": {
        "scrapegraphAIApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "91bef7eb-4e07-4413-960b-d87c73045aee",
      "name": "Extract individual URL",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        1024,
        -96
      ],
      "parameters": {
        "text": "={{ JSON.stringify($json) }}",
        "options": {
          "systemPromptTemplate": "You are an expert extraction algorithm.\nYou must extract the URLs of individual listings.\nThe listings have the following structure:\n`https://www.immobiliare.it/annunci/xxxx`\n\nIf the page does not contain URLs of this type, return an empty array.\n"
        },
        "schemaType": "manual",
        "inputSchema": "{\n\t\"type\": \"array\",\n\t\"properties\": {\n\t\t\"url\": {\n\t\t\t\"type\": \"string\"\n\t\t}\n\t}\n}"
      },
      "typeVersion": 1.2
    },
    {
      "id": "4b369167-05a1-43ce-9b0a-0a997f58a119",
      "name": "Unified",
      "type": "n8n-nodes-base.code",
      "position": [
        976,
        -480
      ],
      "parameters": {
        "jsCode": "const items = $input.all();\n\nconst unified = items\n  .flatMap(item => item.json.output || [])\n  .flat();\n\nreturn [\n  {\n    json: {\n      unified_output: unified\n    }\n  }\n];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "79f0f797-6561-46fb-b114-1a108e64092a",
      "name": "Extract data",
      "type": "n8n-nodes-scrapegraphai.scrapegraphAi",
      "position": [
        2000,
        -464
      ],
      "parameters": {
        "userPrompt": "=Extract all useful property data, including the title, the full description, the listing reference, and all property image URLs.",
        "websiteUrl": "={{ $json.unified_output }}",
        "outputSchema": "{\n  \"type\": \"array\",\n  \"title\": \"RealEstateSchema\",\n  \"properties\": {\n    \"title\": {\n      \"type\": \"string\",\n      \"description\": \"Title of the real estate listing\"\n    },\n    \"description\": {\n      \"type\": \"string\",\n      \"description\": \"Full description of the property\"\n    },\n    \"reference\": {\n      \"type\": \"string\",\n      \"description\": \"Listing identification code\"\n    },\n    \"price\": {\n      \"type\": \"number\",\n      \"description\": \"Property price in numeric format\"\n    },\n    \"area\": {\n      \"type\": \"number\",\n      \"description\": \"Property size in square meters\"\n    },\n    \"bedrooms\": {\n      \"type\": \"integer\",\n      \"description\": \"Number of bedrooms\"\n    },\n    \"bathrooms\": {\n      \"type\": \"integer\",\n      \"description\": \"Number of bathrooms\"\n    },\n    \"floor\": {\n      \"type\": \"string\",\n      \"description\": \"Property floor level(s)\"\n    },\n    \"rooms\": {\n      \"type\": \"integer\",\n      \"description\": \"Total number of rooms\"\n    },\n    \"balcony\": {\n      \"type\": \"string\",\n      \"description\": \"Balcony availability (e.g. Yes/No)\"\n    },\n    \"terrace\": {\n      \"type\": \"string\",\n      \"description\": \"Terrace availability (e.g. Yes/No)\"\n    },\n    \"cellar\": {\n      \"type\": \"string\",\n      \"description\": \"Cellar availability (e.g. Yes/No)\"\n    },\n    \"heating\": {\n      \"type\": \"string\",\n      \"description\": \"Type of heating system\"\n    },\n    \"air_conditioning\": {\n      \"type\": \"string\",\n      \"description\": \"Type of air conditioning system\"\n    },\n    \"image_urls\": {\n      \"type\": \"array\",\n      \"description\": \"List of property image URLs\",\n      \"items\": {\n        \"type\": \"string\",\n        \"format\": \"uri\"\n      }\n    }\n  },\n  \"required\": [\n    \"title\",\n    \"description\",\n    \"price\",\n    \"area\"\n  ]\n}",
        "renderHeavyJs": true,
        "useOutputSchema": true
      },
      "credentials": {
        "scrapegraphAIApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "3b63d958-03c0-41f3-b0ad-98ade0d1803c",
      "name": "Update real estate listings",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2256,
        -464
      ],
      "parameters": {
        "columns": {
          "value": {
            "URL": "={{ $json.website_url }}",
            "AREA": "={{ $json.result.items[0].area }}",
            "FLOOR": "={{ $json.result.items[0].floor }}",
            "PROCE": "={{ $json.result.items[0].price }}",
            "ROOMS": "={{ $json.result.items[0].rooms }}",
            "TITLE": "={{ $json.result.items[0].title }}",
            "CELLAR": "={{ $json.result.items[0].cellar }}",
            "BALCONY": "={{ $json.result.items[0].balcony }}",
            "HEATING": "={{ $json.result.items[0].heating }}",
            "BEDROOMS": "={{ $json.result.items[0].bedrooms }}",
            "TERRANCE": "={{ $json.result.items[0].terrace }}",
            "BATHROOMS": "={{ $json.result.items[0].bathrooms }}",
            "REFERENCE": "={{ $json.result.items[0].reference }}",
            "IMAGE URLS": "={{ JSON.stringify($json.result.items[0].image_urls) }}",
            "DESCRIPTION": "={{ $json.result.items[0].description }}",
            "AIR_CONDITIONING": "={{ $json.result.items[0].air_conditioning }}"
          },
          "schema": [
            {
              "id": "URL",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "TITLE",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "TITLE",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "DESCRIPTION",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "DESCRIPTION",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "REFERENCE",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "REFERENCE",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "PROCE",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "PROCE",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "AREA",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "AREA",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "BEDROOMS",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "BEDROOMS",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "BATHROOMS",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "BATHROOMS",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "FLOOR",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "FLOOR",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "ROOMS",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "ROOMS",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "BALCONY",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "BALCONY",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "TERRANCE",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "TERRANCE",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "CELLAR",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "CELLAR",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "HEATING",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "HEATING",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "AIR_CONDITIONING",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "AIR_CONDITIONING",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "IMAGE URLS",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "IMAGE URLS",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "URL"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "appendOrUpdate",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1jtMyMglBbekD9Z407q8-0vn-cDDXhM81Uj1oAZIJGX8/edit#gid=0",
          "cachedResultName": "Foglio1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1jtMyMglBbekD9Z407q8-0vn-cDDXhM81Uj1oAZIJGX8",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1jtMyMglBbekD9Z407q8-0vn-cDDXhM81Uj1oAZIJGX8/edit?usp=drivesdk",
          "cachedResultName": "Real Estate listing"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "e2c1f45b-41a4-437f-9271-861e09480ac2",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        688,
        -1456
      ],
      "parameters": {
        "width": 944,
        "height": 784,
        "content": "# Automate Real Estate Listing Scraper with ScrapeGraph AI and Google Sheets\nThis workflow automates the process of **scraping real estate property listings** from websites using **ScrapeGraph AI**, extracting structured data, and saving it to a **Google Sheet**. It is designed to handle paginated listing pages and can be adapted to any real estate site that uses URL parameters for pagination.\n\nNOTE:\nThis workflow has been tested with Immobiliare_it, the #1 real estate website in Italy. However, it is designed to be adaptable by modifying the pagination parameter and the listing URL pattern, you can use it with **any real estate website** that structures its listings with URL-based pagination.\n\n### **How it works:**\n\nThe workflow operates in two structured phases: **listing URL discovery** and **data extraction & storage**. First, a Code node generates paginated listing URLs using a base URL, maximum page count, and pagination parameter. Each page is processed by **ScrapeGraphAI** to extract individual property URLs, which are validated and structured using a Google Gemini-powered Information Extractor. A Wait node controls request pacing, and looping ensures all pages are processed safely.\n\nIn the second phase, collected listing URLs are aggregated and iterated individually. ScrapeGraphAI extracts structured property data (price, area, rooms, features, images, etc.) according to a defined JSON schema. The results are written to **Google Sheets**, where records are deduplicated based on listing URL. The modular design enables scalability, schema customization, and storage replacement.\n\n### **Setup steps:**\n\nStart by importing the workflow into n8n and configuring required credentials: **ScrapeGraphAI API**, **Google Gemini API**, and **Google Sheets OAuth2**. Prepare a Google Sheet (or clone the template), then note the **Document ID** and **Sheet Name** for configuration.\n\nIn the **Set params** node, define the base listing URL, pagination parameter (e.g., `pag`), and number of pages to scrape. If needed, update the listing URL pattern in the **Extract individual URL** node and adjust the JSON schema in the **Extract data** node to match your target fields. Finally, configure the Google Sheets node with correct column mappings, activate the workflow, and execute it to begin automated scraping and structured data collection.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "e4ec8d66-b283-473f-8b90-94650cf1afac",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        688,
        -224
      ],
      "parameters": {
        "color": 7,
        "width": 944,
        "height": 384,
        "content": "## STEP 2 - Extract Urls\n\nAll collected listing URLs are aggregated and split into individual items.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "724ed092-e7d8-4dc0-9a5e-0a60e40a6038",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        688,
        -640
      ],
      "parameters": {
        "color": 7,
        "width": 944,
        "height": 384,
        "content": "## STEP 3 - Extract Urls\n\nAll collected listing URLs are aggregated and split into individual items\n"
      },
      "typeVersion": 1
    },
    {
      "id": "6c4562c2-b5da-4ce2-9275-53fd1aed994a",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1680,
        -1408
      ],
      "parameters": {
        "color": 7,
        "width": 736,
        "height": 736,
        "content": "## MY NEW YOUTUBE CHANNEL\n\ud83d\udc49 [Subscribe to my new **YouTube channel**](https://youtube.com/@n3witalia). Here I\u2019ll share videos and Shorts with practical tutorials and **FREE templates for n8n**.\n\n[![image](https://n3wstorage.b-cdn.net/n3witalia/youtube-n8n-cover.jpg)](https://youtube.com/@n3witalia)"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "binaryMode": "separate",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "c1b1f39f-2481-44a9-ab3c-a750cede2d35",
  "connections": {
    "Wait": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Limit": {
      "main": [
        [
          {
            "node": "Loop Over Items1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Unified": {
      "main": [
        [
          {
            "node": "Split Out1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate": {
      "main": [
        [
          {
            "node": "Unified",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set params": {
      "main": [
        [
          {
            "node": "Generate Urls",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out1": {
      "main": [
        [
          {
            "node": "Limit",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract data": {
      "main": [
        [
          {
            "node": "Update real estate listings",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate Urls": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [
          {
            "node": "Aggregate",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Scrape listings",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape listings": {
      "main": [
        [
          {
            "node": "Extract individual URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items1": {
      "main": [
        [],
        [
          {
            "node": "Extract data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract individual URL": {
      "main": [
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "Extract individual URL",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Update real estate listings": {
      "main": [
        [
          {
            "node": "Loop Over Items1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "Set params",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}