{
  "nodes": [
    {
      "id": "b7a61410-9c3f-402c-ba3f-734d9be292dd",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -2592,
        688
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "7f36718e-5d63-46dc-be78-e2cf53ed01ce",
      "name": "job_list",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -2144,
        688
      ],
      "parameters": {
        "url": "http://localhost:6800/listjobs.json",
        "options": {},
        "sendQuery": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "project",
              "value": "null"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "b3ae02b2-0b8d-4b40-a41d-313ee77e9f93",
      "name": "filter_job",
      "type": "n8n-nodes-base.code",
      "position": [
        -1920,
        592
      ],
      "parameters": {
        "jsCode": "// get target job id (adjust source as needed)\nconst jobId = $('run_job').first().json.jobid || $flow.get('jobId');\n\n// take the first incoming item\nconst first = $input.first().json;\n\n// your HTTP node returns an array with one object; unwrap it\nconst root = Array.isArray(first) ? first[0] : first;\n\n// search ONLY in finished[]\nconst hit = (root.finished || []).find(j => j.id === jobId);\n\n// always return an object in `json`\nif (hit) {\n  return [{ json: hit }];\n} else {\n  return [{ json: { match: false, lookedFor: jobId } }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "a19d378e-c52c-4a93-b01e-28fa4ea24c69",
      "name": "check_job_status",
      "type": "n8n-nodes-base.if",
      "position": [
        -1696,
        592
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "77b67847-b678-47a0-bb61-fc03b0d4c908",
              "operator": {
                "type": "string",
                "operation": "notEquals"
              },
              "leftValue": "={{ $json.match }}",
              "rightValue": "=false"
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "5768bb7e-4edb-4543-910c-9db8e8bb8928",
      "name": "Wait3",
      "type": "n8n-nodes-base.wait",
      "position": [
        -1472,
        768
      ],
      "parameters": {},
      "typeVersion": 1.1
    },
    {
      "id": "054d4dfc-e73f-415c-903e-78ed815f22dd",
      "name": "check_items",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1168,
        608
      ],
      "parameters": {
        "url": "=http://localhost:6800{{ $json.items_url }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "8e777885-3dfd-40ec-b6ce-aacd949458b4",
      "name": "run_job",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -2368,
        688
      ],
      "parameters": {
        "url": "http://localhost:6800/schedule.json",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "form-urlencoded",
        "bodyParameters": {
          "parameters": [
            {
              "name": "project",
              "value": "null"
            },
            {
              "name": "spider",
              "value": "generic_list"
            },
            {
              "name": "config_path",
              "value": "/data/sites/project001/config.yaml"
            },
            {
              "name": "project_id",
              "value": "project001"
            },
            {
              "name": "q",
              "value": "null"
            },
            {
              "name": "sci",
              "value": "123"
            },
            {
              "name": "prs",
              "value": "2"
            },
            {
              "name": "pages",
              "value": "2"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "3b3150a8-a7b2-4978-a943-5899a540f1e3",
      "name": "screenshots",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -720,
        336
      ],
      "parameters": {
        "url": "=http://localhost:8080/files/list?subpath=debug/{{ $('run_job').item.json.jobid }}/screenshots",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer $LOADER_API_TOKEN"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "abdc0fa6-7978-4ab0-a876-b5d589ed5c3c",
      "name": "job_log",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1168,
        144
      ],
      "parameters": {
        "url": "=http://localhost:6800{{ $json.log_url }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "a6530c1e-11ec-48ce-9457-623c19073b4e",
      "name": "DL-html",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        -48,
        768
      ],
      "parameters": {
        "url": "=http://localhost:8080/files/get",
        "options": {},
        "sendQuery": true,
        "sendHeaders": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "path",
              "value": "={{ $json.path }}"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer $LOADER_API_TOKEN"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "a1a1ffd4-cfd2-43fb-a650-5cbd9af0381b",
      "name": "HTML",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -720,
        720
      ],
      "parameters": {
        "url": "=http://localhost:8080/files/list?subpath=debug/{{ $('run_job').item.json.jobid }}/html",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer $LOADER_API_TOKEN"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "501fdb4c-590f-4f51-9c3b-9ddf7faab7a1",
      "name": "loop-html",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -272,
        720
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "411ed5cf-53a4-439e-9514-22c0096f7336",
      "name": "DL-screenshots",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        -48,
        384
      ],
      "parameters": {
        "url": "=http://localhost:8080/files/get",
        "options": {},
        "sendQuery": true,
        "sendHeaders": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "path",
              "value": "={{ $json.path }}"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer $LOADER_API_TOKEN"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "6216bb1e-9c74-4cbf-a606-72a9c1e914e4",
      "name": "loop-screenshots",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -272,
        336
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "2774b284-44fd-4504-ab32-f2b2e68cd889",
      "name": "combine-screenshots",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        -32,
        224
      ],
      "parameters": {
        "options": {
          "includeBinaries": true
        },
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "data"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "68da093f-03b0-45a8-806d-d91a73ca3175",
      "name": "combine-html",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        -32,
        624
      ],
      "parameters": {
        "options": {
          "includeBinaries": true
        },
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "data"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "c2b2304b-e6c9-4147-908c-766bb23a08d1",
      "name": "Split HTML",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -496,
        720
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "files"
      },
      "typeVersion": 1
    },
    {
      "id": "1d64a5b5-b643-41f7-96fa-fbfc9887718a",
      "name": "Split-screenshot",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -496,
        336
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "files"
      },
      "typeVersion": 1
    },
    {
      "id": "840e6609-a4e9-4e1a-9ed7-4a9860f57a1c",
      "name": "Respond to Webhook",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        -48,
        1088
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1.4
    },
    {
      "id": "ef9b8b49-5dfb-494d-b1ec-2d75d6529a81",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3776,
        368
      ],
      "parameters": {
        "width": 1120,
        "height": 880,
        "content": "## \ud83d\udfe2 Job Orchestration\n\nThis workflow automates the **full lifecycle** of running a Scrapy spider via Scrapyd: launching the job, monitoring until completion, collecting results, and outputting clean structured JSON.\n\n---\n\n### \ud83d\udd39 Step Breakdown\n\n1. **Manual Trigger** \u2192 Start via \u201cExecute Workflow\u201d  \n2. **Run Job (run_job)** \u2192 Submit spider run with parameters: project, spider, config path + search params (q, sci, prs, pages)  \n3. **Job Monitor** \u2192 Poll Scrapyd (job_list + filter_job) until status = *finished*  \n4. **Check Status (check_job_status)** \u2192 Branch logic (waiting vs ready)  \n5. **Collect Items (check_items)** \u2192 Retrieve items.jl for processing  \n6. **Enrich & Normalize (Filter-result)** \u2192  \n   - Parse JSONL  \n   - Deduplicate by URL (keep cheapest)  \n   - Extract structured fields: id, partNo, make, model, partName  \n   - Add metadata: domain, source, timestamp  \n   - Sort by price ascending  \n7. **Optional Debug Outputs:**  \n   - Logs (job_log)  \n   - Screenshots (batch loop + aggregate)  \n   - HTML dumps (HTML + loop + combine)  \n8. **Webhook Response** \u2192 Return enriched structured JSON to caller\n\n---\n\n### \ud83c\udfaf Purpose\n\nCreates an end-to-end pipeline: launch crawl jobs with correct config, monitor until done, transform & dedupe results, make debug artifacts available.  \n\nIn short: **a bridge between Scrapyd crawlers and downstream automation.**\n\nFull backend code and setup is in my private repo https://github.com/PersianGru?tab=repositories"
      },
      "typeVersion": 1
    },
    {
      "id": "332ea353-caba-4c62-a55a-deab67fd75a9",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -128,
        1008
      ],
      "parameters": {
        "color": 5,
        "width": 304,
        "height": 240,
        "content": "Add your webhook here to get the respond in JSON"
      },
      "typeVersion": 1
    },
    {
      "id": "96315598-79c7-41e3-bf6f-741b316e0eb9",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -80,
        560
      ],
      "parameters": {
        "color": 6,
        "width": 400,
        "height": 192,
        "content": "This will combine the html files and you can send the result via email or save locally"
      },
      "typeVersion": 1
    },
    {
      "id": "34566cfa-4de6-4e9c-b600-26e4b99dd463",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -80,
        144
      ],
      "parameters": {
        "color": 6,
        "width": 416,
        "height": 208,
        "content": "This will combine the page screenshots files and you can send the result via email or save locally"
      },
      "typeVersion": 1
    },
    {
      "id": "2f83de14-ed00-4f84-8f63-58b5fdd3c84d",
      "name": "If-job-check",
      "type": "n8n-nodes-base.if",
      "position": [
        -944,
        512
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "276b47dd-7c3b-44ea-b77a-3a2936a51db2",
              "operator": {
                "type": "string",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $json.data }}",
              "rightValue": "on"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "9c950636-e87c-4e45-9bba-75cf81fdef48",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -976,
        480
      ],
      "parameters": {
        "color": 3,
        "width": 432,
        "height": 176,
        "content": "You can set a warning here if job result were empty"
      },
      "typeVersion": 1
    },
    {
      "id": "adad882e-3de8-4357-8758-dbb9d8da629a",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1216,
        32
      ],
      "parameters": {
        "color": 6,
        "width": 214,
        "height": 240,
        "content": "You job log and you can send via email or save it locally"
      },
      "typeVersion": 1
    },
    {
      "id": "70aa6b78-b56a-4c4c-91d3-82401e386812",
      "name": "Filter-result",
      "type": "n8n-nodes-base.code",
      "position": [
        -496,
        1088
      ],
      "parameters": {
        "jsCode": "// Input can be from HTTP Request node (string body), or a JSON wrapper with \"data\",\n// or even binary base64. This normalizes all of those into a JL string.\nfunction getJLString(firstItem) {\n  if (!firstItem) return '';\n  if (typeof firstItem.json === 'string') return firstItem.json;\n\n  if (firstItem.json) {\n    if (typeof firstItem.json.data === 'string') return firstItem.json.data;\n    if (typeof firstItem.json.body === 'string') return firstItem.json.body;\n  }\n\n  if (firstItem.binary?.data) {\n    return Buffer.from(firstItem.binary.data, 'base64').toString('utf8');\n  }\n\n  // Sometimes people pass the raw JL in a field named \"text\" or similar\n  for (const k of Object.keys(firstItem.json || {})) {\n    const v = firstItem.json[k];\n    if (typeof v === 'string' && v.includes('{\"name\"') && v.includes('\"url\"')) {\n      return v;\n    }\n  }\n  return '';\n}\n\nfunction safeParse(line) {\n  try { return JSON.parse(line); } catch { return null; }\n}\n\nfunction parsePrice(p) {\n  if (typeof p === 'number') return p;\n  if (typeof p === 'string') {\n    const num = p.replace(/[^\\d,.\\-]/g, '').replace(',', '.');\n    const f = parseFloat(num);\n    return Number.isFinite(f) ? f : null;\n  }\n  return null;\n}\n\nfunction domainFromUrl(u) {\n  try { return new URL(u).hostname; } catch { return null; }\n}\n\nfunction enrich(record) {\n  const url = record.url || '';\n  const title = record.name || '';\n  const price = parsePrice(record.price);\n  const domain = domainFromUrl(url);\n\n  let path = '';\n  try { path = new URL(url).pathname; } catch {}\n  const parts = path.split('/').filter(Boolean); // ['en','used-part','fau19088-3834801110-hyundai-ix35-light-switch']\n  const idx = parts.findIndex(s => s === 'used-part');\n  let slug = [];\n  if (idx >= 0 && parts[idx + 1]) slug = parts[idx + 1].split('-');\n\n  const id = slug[0] || null;       // e.g. fau19088\n  const partNo = slug[1] || null;   // e.g. 3834801110\n  const make = slug[2] || null;     // e.g. hyundai\n  const model = slug[3] || null;    // e.g. ix35\n  const partName = slug.slice(4).join('-') || null; // e.g. light-switch\n\n  return {\n    title,\n    url,\n    price,\n    currency: 'EUR',\n    domain,\n    source: domain,\n    id,\n    partNo,\n    make,\n    model,\n    partName,\n    ts: new Date().toISOString(),\n  };\n}\n\n// ---- main ----\nconst jl = getJLString(items[0]);\nconst lines = jl.split(/\\r?\\n/).map(l => l.trim()).filter(Boolean);\n\nconst parsed = [];\nfor (const ln of lines) {\n  let obj = safeParse(ln);\n  if (!obj) {\n    // Try to recover bare JSON when the line has prefixes/suffixes\n    const m = ln.match(/\\{.*\\}$/);\n    if (m) obj = safeParse(m[0]);\n  }\n  if (obj && obj.url) parsed.push(obj);\n}\n\n// Dedup by URL (keep cheapest)\nconst byUrl = new Map();\nfor (const r of parsed) {\n  const e = enrich(r);\n  if (!e.url) continue;\n  const cur = byUrl.get(e.url);\n  if (!cur || (e.price != null && (cur.price == null || e.price < cur.price))) {\n    byUrl.set(e.url, e);\n  }\n}\n\n// Sort by price asc (nulls last)\nconst out = Array.from(byUrl.values()).sort((a, b) => {\n  const ap = a.price == null ? Infinity : a.price;\n  const bp = b.price == null ? Infinity : b.price;\n  return ap - bp;\n});\n\n// Return n8n items\nreturn out.map(row => ({ json: row }));"
      },
      "typeVersion": 2
    },
    {
      "id": "96c596b3-fbdf-471e-afe4-bc645a4d510c",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2240,
        384
      ],
      "parameters": {
        "color": 7,
        "width": 960,
        "height": 736,
        "content": "## Monitoring job"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "HTML": {
      "main": [
        [
          {
            "node": "Split HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait3": {
      "main": [
        [
          {
            "node": "job_list",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "DL-html": {
      "main": [
        [
          {
            "node": "loop-html",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "run_job": {
      "main": [
        [
          {
            "node": "job_list",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "job_list": {
      "main": [
        [
          {
            "node": "filter_job",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "loop-html": {
      "main": [
        [
          {
            "node": "combine-html",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "DL-html",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split HTML": {
      "main": [
        [
          {
            "node": "loop-html",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "filter_job": {
      "main": [
        [
          {
            "node": "check_job_status",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "check_items": {
      "main": [
        [
          {
            "node": "Filter-result",
            "type": "main",
            "index": 0
          },
          {
            "node": "If-job-check",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "screenshots": {
      "main": [
        [
          {
            "node": "Split-screenshot",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If-job-check": {
      "main": [
        [
          {
            "node": "screenshots",
            "type": "main",
            "index": 0
          },
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter-result": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "DL-screenshots": {
      "main": [
        [
          {
            "node": "loop-screenshots",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split-screenshot": {
      "main": [
        [
          {
            "node": "loop-screenshots",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "check_job_status": {
      "main": [
        [
          {
            "node": "check_items",
            "type": "main",
            "index": 0
          },
          {
            "node": "job_log",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Wait3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "loop-screenshots": {
      "main": [
        [
          {
            "node": "combine-screenshots",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "DL-screenshots",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "run_job",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}