{
  "name": "RECOMELB \u2014 Daily Suburb Scraper v2",
  "_readme": {
    "description": "Scrapes Domain suburb profiles via fetch() in a Code node, writes to Google Sheet, sends Telegram confirmation.",
    "architecture": "Schedule \u2192 Code (fetch scrape) \u2192 Split \u2192 Google Sheets update \u2192 Telegram",
    "note": "The Code node uses native fetch() \u2014 no Playwright required. Runs entirely inside n8n.",
    "setup": [
      "1. Google Sheets credential: n8n Credentials \u2192 Google Sheets OAuth2",
      "   Set GOOGLE_SHEET_ID variable to your sheet ID (from the URL)",
      "   Sheet must have a 'suburb_data' tab with columns: suburb, median_price, dom, clearance_rate, rental_yield",
      "",
      "2. Telegram credential: n8n Credentials \u2192 Telegram",
      "   Create a bot via @BotFather, get the token",
      "   Get your chat ID (message @userinfobot)",
      "   Set TELEGRAM_CHAT_ID variable to your chat ID",
      "",
      "3. n8n Variables (Settings \u2192 Variables):",
      "   GOOGLE_SHEET_ID   = your-sheet-id",
      "   TELEGRAM_CHAT_ID  = your-chat-id",
      "",
      "4. Import: n8n \u2192 Workflows \u2192 Import from file \u2192 select this file",
      "5. Connect credentials to the Google Sheets and Telegram nodes",
      "6. Activate the workflow"
    ]
  },
  "nodes": [
    {
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "days",
              "daysInterval": 1,
              "triggerAtHour": 7,
              "triggerAtMinute": 0
            }
          ]
        }
      },
      "id": "node-01-schedule",
      "name": "Daily 7am",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.1,
      "position": [
        180,
        300
      ]
    },
    {
      "parameters": {
        "jsCode": "// \u2500\u2500\u2500 RECOMELB Domain Suburb Scraper \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n// Runs inside n8n Code node using native fetch().\n// Scrapes Domain suburb-profile pages and returns structured suburb data.\n\nconst SUBURBS = [\n  { name: 'Fitzroy',       slug: 'fitzroy-vic-3065' },\n  { name: 'Collingwood',   slug: 'collingwood-vic-3066' },\n  { name: 'Richmond',      slug: 'richmond-vic-3121' },\n  { name: 'Northcote',     slug: 'northcote-vic-3070' },\n  { name: 'Brunswick',     slug: 'brunswick-vic-3056' },\n  { name: 'Abbotsford',    slug: 'abbotsford-vic-3067' },\n  { name: 'Fitzroy North', slug: 'fitzroy-north-vic-3068' },\n  { name: 'Carlton',       slug: 'carlton-vic-3053' },\n  { name: 'Prahran',       slug: 'prahran-vic-3181' },\n  { name: 'South Yarra',   slug: 'south-yarra-vic-3141' },\n];\n\nconst HEADERS = {\n  'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',\n  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',\n  'Accept-Language': 'en-AU,en;q=0.9',\n  'Cache-Control': 'no-cache',\n};\n\nfunction parsePriceString(str) {\n  if (!str) return null;\n  const clean = str.replace(/[$,\\s]/g, '');\n  if (/m$/i.test(clean)) return Math.round(parseFloat(clean) * 1_000_000);\n  if (/k$/i.test(clean)) return Math.round(parseFloat(clean) * 1_000);\n  return parseInt(clean) || null;\n}\n\n// Domain renders a stats table in HTML (server-side rendered).\n// Row format: beds | type | median | days on market | clearance% | count\n// We parse table rows from raw HTML using regex.\nfunction parseSuburbHtml(html, suburbName) {\n  const result = { suburb: suburbName, median_price: null, dom: null, clearance_rate: null, rental_yield: null, quarterly_change: null };\n\n  // Extract all <tbody> rows \u2014 each is: beds\\ttype\\tmedian\\tdays\\tclearance%\\tcount\n  const tbodyPattern = /<tbody[^>]*>([\\s\\S]*?)<\\/tbody>/gi;\n  const tdPattern = /<td[^>]*>([\\s\\S]*?)<\\/td>/gi;\n\n  const tbodies = [...html.matchAll(tbodyPattern)];\n  const rows = [];\n\n  for (const tbody of tbodies) {\n    const cells = [...tbody[1].matchAll(tdPattern)].map(m =>\n      m[1].replace(/<[^>]+>/g, '').replace(/\\s+/g, ' ').trim()\n    );\n    if (cells.length >= 4) rows.push(cells);\n  }\n\n  // Find best house row: prefer 2-bed, then 3-bed, then any house\n  const houseRows = rows.filter(c => c[1]?.toLowerCase() === 'house');\n  const row = houseRows.find(r => r[0] === '2')\n    || houseRows.find(r => r[0] === '3')\n    || houseRows[0];\n\n  if (row) {\n    result.median_price = parsePriceString(row[2]);\n    const dm = row[3]?.match(/(\\d+)/);\n    result.dom = dm ? parseInt(dm[1]) : null;\n    const cm = row[4]?.match(/([\\d.]+)/);\n    result.clearance_rate = cm ? parseFloat(cm[1]) : null;\n  }\n\n  // DOM fallback: JSON embedded in page source\n  if (!result.dom) {\n    const m = html.match(/\"daysOnMarket\"\\s*:\\s*(\\d+)/);\n    if (m) result.dom = parseInt(m[1]);\n  }\n\n  return result;\n}\n\nasync function scrapeSuburb(suburb) {\n  const url = `https://www.domain.com.au/suburb-profile/${suburb.slug}/`;\n  try {\n    const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });\n    if (!res.ok) throw new Error(`HTTP ${res.status}`);\n    const html = await res.text();\n    return parseSuburbHtml(html, suburb.name);\n  } catch (err) {\n    console.error(`[scraper] ${suburb.name} failed: ${err.message}`);\n    return { suburb: suburb.name, median_price: null, dom: null, clearance_rate: null, rental_yield: null, quarterly_change: null };\n  }\n}\n\n// Scrape all suburbs with 1.5s delay between requests\nconst results = [];\nfor (const suburb of SUBURBS) {\n  const data = await scrapeSuburb(suburb);\n  results.push(data);\n  console.log(`[scraper] ${suburb.name}: median_price=${data.median_price} dom=${data.dom} clearance_rate=${data.clearance_rate}%`);\n  if (SUBURBS.indexOf(suburb) < SUBURBS.length - 1) {\n    await new Promise(r => setTimeout(r, 1500));\n  }\n}\n\nconst populated = results.filter(r => r.median_price || r.dom || r.clearance_rate).length;\nconsole.log(`[scraper] Done: ${populated}/${SUBURBS.length} suburbs with data`);\n\n// Return one item per suburb for the Split node\nreturn results.map(r => ({ json: r }));"
      },
      "id": "node-02-scrape",
      "name": "Scrape Domain Suburbs",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        420,
        300
      ]
    },
    {
      "parameters": {
        "operation": "update",
        "documentId": {
          "__rl": true,
          "value": "={{ $vars.GOOGLE_SHEET_ID }}",
          "mode": "id"
        },
        "sheetName": {
          "__rl": true,
          "value": "suburb_data",
          "mode": "name"
        },
        "columns": {
          "mappingMode": "defineBelow",
          "value": {
            "median_price": "={{ $json.median_price }}",
            "dom": "={{ $json.dom }}",
            "clearance_rate": "={{ $json.clearance_rate }}",
            "rental_yield": "={{ $json.rental_yield }}",
            "quarterly_change": "={{ $json.quarterly_change }}"
          }
        },
        "where": {
          "values": [
            {
              "column": "suburb",
              "condition": "textEquals",
              "value": "={{ $json.suburb }}"
            }
          ]
        },
        "options": {}
      },
      "id": "node-03-sheets",
      "name": "Update Google Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.4,
      "position": [
        660,
        300
      ],
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "onError": "continueErrorOutput"
    },
    {
      "parameters": {
        "jsCode": "// Aggregate results from all suburb updates\nconst items = $input.all();\nconst succeeded = items.filter(i => !i.json.error).length;\nconst failed = items.length - succeeded;\nconst suburbs = items.map(i => i.json.suburb || '?').join(', ');\n\nconsole.log(`[RECOMELB] Sheet update: ${succeeded} rows updated, ${failed} failed`);\n\nreturn [{\n  json: {\n    succeeded,\n    failed,\n    total: items.length,\n    suburbs,\n    timestamp: new Date().toISOString(),\n  }\n}];"
      },
      "id": "node-04-aggregate",
      "name": "Aggregate Results",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        900,
        300
      ]
    },
    {
      "parameters": {
        "chatId": "={{ $vars.TELEGRAM_CHAT_ID }}",
        "text": "=\ud83d\udcca *RECOMELB Daily Update*\n\n\u2705 {{ $json.succeeded }}/{{ $json.total }} suburbs updated\n{{ $json.failed > 0 ? '\u26a0\ufe0f ' + $json.failed + ' failed' : '\ud83d\udfe2 No errors' }}\n\n\ud83d\udd50 {{ $json.timestamp }}\n\nSuburbs: {{ $json.suburbs }}",
        "additionalFields": {
          "parse_mode": "Markdown"
        }
      },
      "id": "node-05-telegram",
      "name": "Telegram Confirmation",
      "type": "n8n-nodes-base.telegram",
      "typeVersion": 1.2,
      "position": [
        1140,
        300
      ],
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      }
    }
  ],
  "connections": {
    "Daily 7am": {
      "main": [
        [
          {
            "node": "Scrape Domain Suburbs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape Domain Suburbs": {
      "main": [
        [
          {
            "node": "Update Google Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Google Sheet": {
      "main": [
        [
          {
            "node": "Aggregate Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate Results": {
      "main": [
        [
          {
            "node": "Telegram Confirmation",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1",
    "saveManualExecutions": true,
    "saveExecutionProgress": true
  },
  "meta": {
    "templateCredsSetupCompleted": false
  }
}