The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json

{
  "name": "Web Scraping Im\u00f3veis - Corrigido",
  "nodes": [
    {
      "parameters": {},
      "name": "In\u00edcio",
      "type": "n8n-nodes-base.start",
      "typeVersion": 1,
      "position": [
        250,
        300
      ]
    },
    {
      "parameters": {
        "functionCode": "return [{\n  json: {\n    sites: [\n      {\n        name: 'Imobi Report',\n        url: 'https://imobireport.com.br',\n        type: 'http',\n        selectors: {\n          posts: '.post-title a',\n          date: 'time.entry-date'\n        }\n      },\n      {\n        name: 'UOL Economia - Im\u00f3veis',\n        url: 'https://economia.uol.com.br/imoveis/',\n        type: 'playwright',\n        selectors: {\n          posts: '.thumb-title a',\n          date: '.thumb-date'\n        }\n      }\n    ]\n  }\n}];",
        "jsSandbox": false
      },
      "name": "Lista de Sites",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        450,
        300
      ]
    },
    {
      "parameters": {
        "functionCode": "const sites = $node[\"Lista de Sites\"].json.sites;\n\nconst items = sites.map((site, index) => ({\n  json: {\n    site,\n    index,\n    total: sites.length\n  }\n}));\n\nreturn items;",
        "jsSandbox": false
      },
      "name": "Preparar Itera\u00e7\u00e3o",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        650,
        300
      ]
    },
    {
      "parameters": {
        "functionCode": "const site = $input.item.json.site;\n\nif (site.type === 'http') {\n  return $input.all();\n}\n\nreturn [];",
        "jsSandbox": false
      },
      "name": "Filtrar HTTP",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        850,
        200
      ]
    },
    {
      "parameters": {
        "functionCode": "const site = $input.item.json.site;\n\nif (site.type === 'playwright') {\n  return $input.all();\n}\n\nreturn [];",
        "jsSandbox": false
      },
      "name": "Filtrar Playwright",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        850,
        400
      ]
    },
    {
      "parameters": {
        "options": {},
        "url": "={{$node[\"Filtrar HTTP\"].json[\"0\"].site.url}}",
        "responseFormat": "html"
      },
      "name": "Requisi\u00e7\u00e3o HTTP",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 1,
      "position": [
        1050,
        200
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "operation": "page.goto",
        "url": "={{$node[\"Filtrar Playwright\"].json[\"0\"].site.url}}",
        "interaction": [
          {
            "interaction": "waitForSelector",
            "options": {
              "selector": "={{$node[\"Filtrar Playwright\"].json[\"0\"].site.selectors.posts}}",
              "timeout": 10000
            }
          }
        ]
      },
      "name": "Playwright",
      "type": "n8n-nodes-base.playwright",
      "typeVersion": 1,
      "position": [
        1050,
        400
      ],
      "continueOnFail": true
    },
    {
      "parameters": {
        "extractionValues": [
          {
            "key": "titulo",
            "cssSelector": "={{$input.item.json.site.selectors.posts}}",
            "returnValue": "text"
          },
          {
            "key": "url",
            "cssSelector": "={{$input.item.json.site.selectors.posts}}",
            "returnValue": "href"
          },
          {
            "key": "data",
            "cssSelector": "={{$input.item.json.site.selectors.date}}",
            "returnValue": "text"
          }
        ]
      },
      "name": "Extrair HTML",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        1250,
        300
      ]
    },
    {
      "parameters": {
        "functionCode": "const inputData = $input.item;\nreturn [{\n  json: {\n    Site: inputData.json.site.name,\n    T\u00edtulo: inputData.json.titulo,\n    URL: inputData.json.url,\n    Data: inputData.json.data,\n    \u00cdndice: inputData.json.index,\n    Total: inputData.json.total\n  }\n}];",
        "jsSandbox": false
      },
      "name": "Formatar Sa\u00edda",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        1450,
        300
      ]
    },
    {
      "parameters": {
        "operation": "append",
        "fileName": "resultados_imoveis.csv",
        "format": "csv",
        "fields": "=['Site', 'T\u00edtulo', 'URL', 'Data']",
        "append": true
      },
      "name": "Salvar CSV",
      "type": "n8n-nodes-base.writeFile",
      "typeVersion": 1,
      "position": [
        1650,
        300
      ]
    }
  ],
  "connections": {
    "In\u00edcio": {
      "main": [
        [
          {
            "node": "Lista de Sites",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Lista de Sites": {
      "main": [
        [
          {
            "node": "Preparar Itera\u00e7\u00e3o",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Preparar Itera\u00e7\u00e3o": {
      "main": [
        [
          {
            "node": "Filtrar HTTP",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Filtrar Playwright",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filtrar HTTP": {
      "main": [
        [
          {
            "node": "Requisi\u00e7\u00e3o HTTP",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filtrar Playwright": {
      "main": [
        [
          {
            "node": "Playwright",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Requisi\u00e7\u00e3o HTTP": {
      "main": [
        [
          {
            "node": "Extrair HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Playwright": {
      "main": [
        [
          {
            "node": "Extrair HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extrair HTML": {
      "main": [
        [
          {
            "node": "Formatar Sa\u00edda",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Formatar Sa\u00edda": {
      "main": [
        [
          {
            "node": "Salvar CSV",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1",
    "timezone": "America/Sao_Paulo"
  },
  "version": 1,
  "active": false
}

Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

Web Scraping Imóveis - Corrigido. Uses start, httpRequest, playwright, htmlExtract. Manual trigger; 10 nodes.

Source: https://gist.github.com/KleberUziel/2ec441095cb99bab45746394d3aecc1f — original creator credit. Request a take-down →

More Web Scraping workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

Web Scraping

Scrape and Store Data From Multiple Website Pages

Scrape And Store Data From Multiple Website Pages. Uses manualTrigger, httpRequest, htmlExtract, splitInBatches. Event-driven trigger; 23 nodes.

HTTP Request, Html Extract, MongoDB +5

Web Scraping

Scrape Linkedin Profiles & Save to Google Sheets with Apify

This n8n workflow automates the process of scraping LinkedIn profiles using the Apify platform and organizing the extracted data into Google Sheets for easy analysis and follow-up. Lead Generation: Ex

Google Sheets, HTTP Request, Gmail

Web Scraping

Fqa_googlesheet_website103

FQA_GoogleSheet_Website103. Uses start, your-custom-node, googleSheets, httpRequest. Manual trigger; 14 nodes.

Start, Your Custom Node, Google Sheets +2

Web Scraping

Gist:ed Parsadanyan

Gist:Ed Parsadanyan. Uses start, httpRequest, htmlExtract, itemLists. Manual trigger; 7 nodes.

Start, HTTP Request, Html Extract +1

Web Scraping

Rfp/470 Form Scraper

2. RFP/470 Form Scraper. Uses httpRequest, writeFile. Webhook trigger; 7 nodes.