The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "\u667a\u6167\u623f\u5c4b A17-A19 \u623f\u5c4b\u722c\u87f2",
"nodes": [
{
"parameters": {},
"id": "start-node",
"name": "\u624b\u52d5\u89f8\u767c",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
240,
300
]
},
{
"parameters": {
"values": {
"string": [
{
"name": "url_A17",
"value": "https://is.ycut.com.tw/is/development/webcase/search/list?s=U2FsdGVkX19N8KtgVk8a7J4fVrJj4lTBFt57kd%2Bn8XvO%2FG%2FCCImjy%2Fy70ZSwAB6kvk8WnOyFgxP%2F1vhEtT0zeTP4TVcx1lmSa8H23t7nUbSehYMHEe2QMU1qQEofZW5i6cLTL8c4lsUjbGhIw837VCK0IdgtvwiFNjJ8lzX5PCIYV887OJBBjia2Bi3teV3BI9X4wjsosBvcPfn%2BRS0rijZa7u5Kiw5y3KfVIhyUYvBlkYz%2BW2rZ6%2F6caw4z3z%2B2"
},
{
"name": "url_A18",
"value": "https://is.ycut.com.tw/is/development/webcase/search/list?s=U2FsdGVkX19jqkcsEnWAds%2F%2BG55BAHN7aXSabjfr95tDC8PjmCtowq9hvhs7M6Qwuo9m3fUgDuPVZUbRfVqInDlLn8xdft7%2FXPE0q1Qj0qMfoFm0lFM%2F4AGooSEmp%2BbcVWDg42Lf7MqCYyKqUF9ib%2BoW13GY6r5EpAMmhZTIxGcKn%2FfwTc4R7eK0KBWudopbWEscj4EsNfwW25F9XVNeIB1FNzaVs%2Ful0juKSHM6T0w6NSZxdDK71kMnJce7usES"
},
{
"name": "url_A19",
"value": "https://is.ycut.com.tw/is/development/webcase/search/list?s=U2FsdGVkX18BnPV0oqodGKNchuZLZDcLeYWrxoljM1Ij6yyEtJpk%2BdM0ZDd%2Bg5si8Q3jU182CyWuOKBxmWy2i126ha6MoEVjgSO0hM9JF3PzAOyScJzt59EqeVgVL3Vjxb5NaAumwIAVoaMXniYgLu5KeLSLfNFn1pjX%2FqNbh4ruC5fiTLn9msUWtQgzg3NT%2B%2B39OkVHnefQi9dV3DuC%2F%2B%2Bte7pGiId55rJnoaRbJzIyi8Jjo1H6PVV15oIP9ScI"
}
]
},
"options": {}
},
"id": "set-urls",
"name": "\u8a2d\u5b9a URL \u6e05\u55ae",
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [
460,
300
]
},
{
"parameters": {
"jsCode": "// \u5c07 URL \u62c6\u5206\u6210\u591a\u500b\u9805\u76ee\nconst urls = [\n { url: $input.first().json.url_A17, station: 'A17' },\n { url: $input.first().json.url_A18, station: 'A18' },\n { url: $input.first().json.url_A19, station: 'A19' }\n];\n\nreturn urls.map(item => ({ json: item }));"
},
"id": "split-urls",
"name": "\u62c6\u5206 URL",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
680,
300
]
},
{
"parameters": {
"method": "GET",
"url": "={{ $json.url }}",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
},
{
"name": "Accept-Language",
"value": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7"
}
]
},
"options": {
"response": {
"response": {
"fullResponse": false,
"responseFormat": "text"
}
}
}
},
"id": "http-request",
"name": "HTTP \u8acb\u6c42",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
900,
300
]
},
{
"parameters": {
"jsCode": "// n8n Function Node - \u667a\u6167\u623f\u5c4b\u7db2 HTML \u89e3\u6790\u5668\uff08\u5b89\u5168 Regex \u7248\uff09\n// \u89e3\u6790 A17~A19 \u5927\u5712\u5340\u623f\u5c4b\u7269\u4ef6\u8cc7\u6599\n\n// \u5b9a\u7fa9\u6240\u6709\u6b63\u5247\u8868\u9054\u5f0f\uff08\u4f7f\u7528 new RegExp \u683c\u5f0f\uff09\nconst stripTagPattern = new RegExp('<[^>]+>', 'g');\n\n// \u8f14\u52a9\u51fd\u6578\uff1a\u6e05\u9664 HTML \u6a19\u7c64\nfunction stripHtml(text) {\n if (!text) return '';\n return text.replace(stripTagPattern, '').trim();\n}\n\n// \u8f14\u52a9\u51fd\u6578\uff1a\u63d0\u53d6\u55ae\u4e00\u5339\u914d\nfunction extractFirst(pattern, text) {\n if (!text) return '';\n const regex = new RegExp(pattern.source, pattern.flags);\n const match = regex.exec(text);\n return match ? stripHtml(match[1] || match[0]) : '';\n}\n\n// \u8f14\u52a9\u51fd\u6578\uff1a\u63d0\u53d6\u6240\u6709\u5339\u914d\nfunction extractAll(pattern, text) {\n if (!text) return [];\n const results = [];\n const regex = new RegExp(pattern.source, pattern.flags);\n let match;\n while ((match = regex.exec(text)) !== null) {\n results.push(match[1] || match[0]);\n }\n return results;\n}\n\n// \u4e3b\u7a0b\u5f0f\nconst items = $input.all();\nconst results = [];\n\nfor (const item of items) {\n const station = item.json.station || '';\n const sourceUrl = item.json.url || '';\n \n // \u81ea\u52d5\u5075\u6e2c HTML \u4f86\u6e90\n let html = '';\n if (item.json && item.json.data) {\n html = item.json.data;\n } else if (item.json && item.json.html) {\n html = item.json.html;\n } else if (item.json && typeof item.json === 'string') {\n html = item.json;\n }\n \n if (!html) continue;\n \n // \u5c0b\u627e\u7269\u4ef6\u5340\u584a - \u6839\u64da\u667a\u6167\u623f\u5c4b\u7db2\u7d50\u69cb\n // \u6bcf\u500b\u7269\u4ef6\u901a\u5e38\u7531 <div class=\"...\"> \u5305\u88f9\n const propertyPattern = new RegExp('<div[^>]*class=\"[^\"]*today-item[^\"]*\"[^>]*>([\\\\s\\\\S]*?)(?=<div[^>]*class=\"[^\"]*today-item|<div[^>]*class=\"[^\"]*pagination|$)', 'gi');\n \n // \u5099\u7528\uff1a\u5c0b\u627e\u5305\u542b\u50f9\u683c\u7684\u5340\u584a\n const priceBlockPattern = new RegExp('([\\\\s\\\\S]{200,2000}?)(\\\\d{1,2}[,\uff0c]?\\\\d{3}|\\\\d{3,4})\\\\s*\u842c', 'gi');\n \n let propertyMatches = [];\n let match;\n \n // \u5617\u8a66\u65b9\u6cd51\n while ((match = propertyPattern.exec(html)) !== null) {\n propertyMatches.push(match[1]);\n }\n \n // \u5982\u679c\u65b9\u6cd51\u5931\u6557\uff0c\u5617\u8a66\u65b9\u6cd52\n if (propertyMatches.length === 0) {\n const altPattern = new RegExp('<div[^>]*>([\\\\s\\\\S]*?(?:\u5efa\u7269|\u4e3b\u5efa)[\\\\s\\\\S]*?\u842c[\\\\s\\\\S]*?)</div>', 'gi');\n while ((match = altPattern.exec(html)) !== null) {\n propertyMatches.push(match[1]);\n }\n }\n \n // \u5982\u679c\u4ecd\u7136\u6c92\u6709\u7d50\u679c\uff0c\u5c07\u6574\u500b\u9801\u9762\u4f5c\u70ba\u55ae\u4e00\u5340\u584a\u8655\u7406\n if (propertyMatches.length === 0) {\n propertyMatches = [html];\n }\n \n for (const block of propertyMatches) {\n // \u63d0\u53d6\u793e\u5340\u540d\u7a31 - \u901a\u5e38\u5728\u6a19\u984c\u6216\u9023\u7d50\u4e2d\n const titlePatterns = [\n new RegExp('<a[^>]*title=\"([^\"]+)\"', 'i'),\n new RegExp('<h[1-6][^>]*>([\\\\s\\\\S]*?)</h[1-6]>', 'i'),\n new RegExp('class=\"[^\"]*title[^\"]*\"[^>]*>([^<]+)', 'i'),\n new RegExp('(?:\u9752\u57d4|A17|A18|A19)[^<]{0,50}(?:\u793e\u5340|\u5927\u6a13|\u82b1\u5712|\u57ce|\u82d1|\u95a3|\u90b8|\u5712)[^<]{0,20}', 'i')\n ];\n \n let communityName = '';\n for (const pattern of titlePatterns) {\n const match = pattern.exec(block);\n if (match) {\n communityName = stripHtml(match[1] || match[0]);\n if (communityName.length > 2) break;\n }\n }\n \n // \u63d0\u53d6\u5730\u5740\n const addressPatterns = [\n new RegExp('5\u2605[\\\\s]*([^<]+(?:\u5e02|\u5340|\u8def|\u8857|\u5df7|\u5f04|\u865f)[^<]*)', 'i'),\n new RegExp('\u6843\u5712\u5e02[^<]*(?:\u5927\u5712\u5340|\u4e2d\u58e2\u5340|\u8606\u7af9\u5340)[^<]+', 'i'),\n new RegExp('(?:\u5730\u5740|\u4f4d\u7f6e)[\uff1a:\\\\s]*([^<]+)', 'i')\n ];\n \n let address = '';\n for (const pattern of addressPatterns) {\n const match = pattern.exec(block);\n if (match) {\n address = stripHtml(match[1] || match[0]);\n if (address.length > 5) break;\n }\n }\n \n // \u63d0\u53d6\u6a13\u5c64\n const floorPatterns = [\n new RegExp('(\\\\d+)\u6a13[/\uff0f](\u5171|\u7e3d)?(\\\\d+)\u6a13', 'i'),\n new RegExp('\u6a13\u5c64[\uff1a:\\\\s]*(\\\\d+[^<\\\\n]{0,20})', 'i'),\n new RegExp('(\\\\d{1,2})\u6a13', 'i')\n ];\n \n let floor = '';\n for (const pattern of floorPatterns) {\n const match = pattern.exec(block);\n if (match) {\n floor = stripHtml(match[0]);\n break;\n }\n }\n \n // \u63d0\u53d6\u683c\u5c40\n const layoutPatterns = [\n new RegExp('(\\\\d)\u623f[\uff08(]?(\u5ef3)?(\\\\d)?\u5ef3?(\u885b)?(\\\\d)?\u885b?[\uff09)]?', 'i'),\n new RegExp('\u683c\u5c40[\uff1a:\\\\s]*([^<\\\\n]+)', 'i')\n ];\n \n let layout = '';\n for (const pattern of layoutPatterns) {\n const match = pattern.exec(block);\n if (match) {\n layout = stripHtml(match[0]);\n if (layout.includes('\u623f')) break;\n }\n }\n \n // \u63d0\u53d6\u5efa\u7269\u576a\u6578\n const buildingAreaPatterns = [\n new RegExp('\u5efa\u7269[\uff1a:\\\\s]*(\\\\d+\\\\.?\\\\d*)\\\\s*\u576a?', 'i'),\n new RegExp('\u6b0a\u72c0[\uff1a:\\\\s]*(\\\\d+\\\\.?\\\\d*)\\\\s*\u576a', 'i'),\n new RegExp('\u576a\u6578[\uff1a:\\\\s]*(\\\\d+\\\\.?\\\\d*)\\\\s*\u576a?', 'i')\n ];\n \n let buildingArea = '';\n for (const pattern of buildingAreaPatterns) {\n const match = pattern.exec(block);\n if (match) {\n buildingArea = match[1] || '';\n if (buildingArea) break;\n }\n }\n \n // \u63d0\u53d6\u4e3b\u5efa\u576a\u6578\n const mainAreaPatterns = [\n new RegExp('\u4e3b\u5efa[\uff1a:\\\\s]*(\\\\d+\\\\.?\\\\d*)\\\\s*\u576a?', 'i'),\n new RegExp('\u5ba4\u5167[\uff1a:\\\\s]*(\\\\d+\\\\.?\\\\d*)\\\\s*\u576a', 'i')\n ];\n \n let mainArea = '';\n for (const pattern of mainAreaPatterns) {\n const match = pattern.exec(block);\n if (match) {\n mainArea = match[1] || '';\n if (mainArea) break;\n }\n }\n \n // \u63d0\u53d6\u50f9\u683c\n const pricePattern = new RegExp('(\\\\d{1,2}[,\uff0c]?\\\\d{3}|\\\\d{3,4})\\\\s*\u842c', 'gi');\n let price = '';\n const priceMatch = pricePattern.exec(block);\n if (priceMatch) {\n price = priceMatch[1].replace(/[,\uff0c]/g, '') + '\u842c';\n }\n \n // \u63d0\u53d6\u4f86\u6e90\u9023\u7d50\n const linkPattern = new RegExp('href=\"(https?://[^\"]+)\"', 'gi');\n let sourceLink = '';\n const linkMatch = linkPattern.exec(block);\n if (linkMatch) {\n sourceLink = linkMatch[1];\n }\n \n // \u53ea\u6709\u5305\u542b\u6709\u6548\u8cc7\u6599\u7684\u5340\u584a\u624d\u8f38\u51fa\n if (communityName || address || buildingArea || price) {\n results.push({\n json: {\n \u6377\u904b\u7ad9: station,\n \u793e\u5340\u540d\u7a31: communityName,\n \u5730\u5740: address,\n \u6a13\u5c64: floor,\n \u683c\u5c40: layout,\n \u5efa\u7269\u576a\u6578: buildingArea,\n \u4e3b\u5efa\u576a\u6578: mainArea,\n \u7e3d\u50f9: price,\n \u4f86\u6e90\u7db2\u7ad9\u9023\u7d50: sourceLink || sourceUrl\n }\n });\n }\n }\n}\n\n// \u5982\u679c\u6c92\u6709\u7d50\u679c\uff0c\u8fd4\u56de\u63d0\u793a\nif (results.length === 0) {\n return [{\n json: {\n message: '\u672a\u627e\u5230\u7b26\u5408\u689d\u4ef6\u7684\u623f\u5c4b\u7269\u4ef6',\n hint: '\u8acb\u78ba\u8a8d HTML \u5167\u5bb9\u662f\u5426\u6b63\u78ba\u50b3\u5165\uff0c\u6216\u7db2\u7ad9\u7d50\u69cb\u53ef\u80fd\u5df2\u8b8a\u66f4',\n rawDataPreview: items[0]?.json?.data?.substring(0, 500) || '\u7121\u8cc7\u6599'\n }\n }];\n}\n\nreturn results;"
},
"id": "parse-html",
"name": "\u89e3\u6790 HTML",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1120,
300
]
},
{
"parameters": {
"options": {}
},
"id": "output",
"name": "\u8f38\u51fa\u7d50\u679c",
"type": "n8n-nodes-base.noOp",
"typeVersion": 1,
"position": [
1340,
300
]
}
],
"connections": {
"\u624b\u52d5\u89f8\u767c": {
"main": [
[
{
"node": "\u8a2d\u5b9a URL \u6e05\u55ae",
"type": "main",
"index": 0
}
]
]
},
"\u8a2d\u5b9a URL \u6e05\u55ae": {
"main": [
[
{
"node": "\u62c6\u5206 URL",
"type": "main",
"index": 0
}
]
]
},
"\u62c6\u5206 URL": {
"main": [
[
{
"node": "HTTP \u8acb\u6c42",
"type": "main",
"index": 0
}
]
]
},
"HTTP \u8acb\u6c42": {
"main": [
[
{
"node": "\u89e3\u6790 HTML",
"type": "main",
"index": 0
}
]
]
},
"\u89e3\u6790 HTML": {
"main": [
[
{
"node": "\u8f38\u51fa\u7d50\u679c",
"type": "main",
"index": 0
}
]
]
}
},
"settings": {
"executionOrder": "v1"
},
"staticData": null,
"tags": [],
"triggerCount": 0,
"updatedAt": "2026-01-16T07:00:00.000Z",
"versionId": "1"
}
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
智慧房屋 A17-A19 房屋爬蟲. Uses httpRequest. Event-driven trigger; 6 nodes.
Source: https://github.com/Harris30732/yungching-realty/blob/cc417bbace3fff423a70e369fb1c3e125c14d473/n8n-workflows/ycut_property_scraper.json — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
This workflow uses the Zyte API to automatically detect and extract structured data from E-commerce sites, Articles, Job Boards, and Search Engine Results (SERP) - no custom CSS selectors required.
Automate LinkedIn lead generation by scraping comments from targeted posts and enriching profiles with detailed data
This workflow contains community nodes that are only compatible with the self-hosted version of n8n.
This workflow runs a spider job in the background via Scrapyd, using a YAML config that defines selectors and parsing rules. When triggered, it schedules the spider with parameters (query, project ID,
This n8n workflow collects leads from Google Maps, scrapes their websites via direct HTTP requests, and extracts valid email addresses — all while mimicking real user behavior to improve scraping reli