{
  "name": "Xiaobot Article Harvester v2",
  "nodes": [
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        320,
        240
      ],
      "id": "7400be76-d60f-4bba-89e2-d23a819f52c2",
      "name": "When clicking \u2018Execute workflow\u2019"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "={{ $node['\u914d\u7f6e\u4e13\u680f\u53c2\u6570'].json.crawl4ai_server }}",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            },
            {
              "name": "Accept",
              "value": "application/json"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"urls\": [\"{{ $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').item.json.target_url }}\"],\n  \"browser_config\": {\n    \"type\": \"BrowserConfig\", \n    \"params\": {\n      \"headless\": true,\n      \"viewport\": {\"type\": \"dict\", \"value\": {\"width\": 1200, \"height\": 800}},\n      \"headers\": {\n        \"authorization\": \"Bearer {{ $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').item.json.xiaobot_authorization }}\",\n        \"user-agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36\"\n      }\n    }\n  },\n  \"crawler_config\": {\n    \"type\": \"CrawlerRunConfig\",\n    \"params\": {\n      \"page_timeout\": 180000,\n      \"wait_until\": \"networkidle\",\n      \"delay_before_return_html\": 5.0,\n      \"js_code\": [\n        \"window.apiData = []; const originalXHR = window.XMLHttpRequest; window.XMLHttpRequest = function() { const xhr = new originalXHR(); const originalOpen = xhr.open; const originalSend = xhr.send; xhr.open = function(method, url) { this._url = url; return originalOpen.apply(this, arguments); }; xhr.send = function(data) { if (this._url && this._url.includes('api.xiaobot.net')) { const originalOnReadyStateChange = this.onreadystatechange; this.onreadystatechange = function() { if (this.readyState === 4 && this.status === 200) { try { const data = JSON.parse(this.responseText); window.apiData.push(data); } catch(e) { } } if (originalOnReadyStateChange) originalOnReadyStateChange.apply(this, arguments); }; } return originalSend.apply(this, arguments); }; return xhr; };\",\n        \"document.querySelector('div.active')?.click();\",\n        \"await new Promise(resolve => setTimeout(resolve, 5000));\",\n        \"let lastApiCount = 0; let noNewDataCount = 0; for(let i=0; i<50; i++) { window.scrollTo(0, document.body.scrollHeight); await new Promise(resolve => setTimeout(resolve, 5000)); if(window.apiData.length === lastApiCount) { noNewDataCount++; if(noNewDataCount >= 3) break; } else { noNewDataCount = 0; lastApiCount = window.apiData.length; } }\",\n        \"await new Promise(resolve => setTimeout(resolve, 5000)); window.n8nResult = { articles: [], totalCount: 0, metadata: { timestamp: new Date().toISOString(), source: 'xiaobot', totalApiCalls: window.apiData.length } }; window.apiData.forEach(apiResponse => { if (apiResponse.data && Array.isArray(apiResponse.data)) { apiResponse.data.forEach(article => { window.n8nResult.articles.push({ id: article.uuid, title: article.title, content: article.content, published_at: article.created_at, tags: article.tag_names || [] }); }); } }); window.n8nResult.totalCount = window.n8nResult.articles.length; return window.n8nResult;\"\n      ],\n      \"css_selector\": \"#posts\",\n      \"cache_mode\": \"bypass\"\n    }\n  }\n}",
        "options": {
          "timeout": 1000000
        }
      },
      "id": "e6aebd8b-767c-4e19-928d-59a1c70cb064",
      "name": "Crawl4AI\u83b7\u53d6\u6570\u636e\u5c0f\u62a5\u7ae5",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1040,
        40
      ],
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "options": {}
      },
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        620,
        440
      ],
      "id": "bc8ece36-6c3f-411b-8784-66edba37a601",
      "name": "Loop Over Items"
    },
    {
      "parameters": {
        "content": "## \u5c0f\u62a5\u7ae5\u6587\u7ae0\u81ea\u52a8\u4e0b\u8f7d\n\n### \ud83d\udccb \u57fa\u672c\u4fe1\u606f\n- \u5de5\u4f5c\u6d41\u540d\u79f0: Xiaobot Article Harvester\n- \u7248\u672c: v1.0.2\n- \u521b\u5efa\u8005: LQ\n- \u5fae\u4fe1\uff1acloud-native-101\n- \u516c\u4f17\u53f7: \u6797\u6708\u534a\u5b50\u7684AI\u7b14\u8bb0\n\n### \ud83c\udfaf \u529f\u80fd\u6982\u8ff0\n\u8fd9\u4e2a\u5de5\u4f5c\u6d41\u5b9e\u73b0\u4e86\u4ece\u5c0f\u62a5\u7ae5(Xiaobot)\u5e73\u53f0\u81ea\u52a8\u6279\u91cf\u4e0b\u8f7d\u6587\u7ae0\u5185\u5bb9\u5e76\u8f6c\u6362\u4e3a Markdown \u683c\u5f0f\u6587\u4ef6\u7684\u5b8c\u6574\u6d41\u7a0b\u3002\u901a\u8fc7\u6a21\u62df\u6d4f\u89c8\u5668\u884c\u4e3a**\u7ed5\u8fc7\u52a8\u6001 API \u7b7e\u540d\u9650\u5236**\uff0c\u5b9e\u73b0\u5168\u81ea\u52a8\u5316\u7684\u6587\u7ae0\u91c7\u96c6\u4e0e\u683c\u5f0f\u8f6c\u6362\u3002\n\n\n### \ud83c\udfc6 \u5de5\u4f5c\u6d41\u4f18\u52bf\n  1. \u81ea\u52a8\u5316\u7a0b\u5ea6\u9ad8: \u4e00\u952e\u5b8c\u6210\u4ece\u8bbf\u95ee\u5230\u4fdd\u5b58\u7684\u5168\u6d41\u7a0b\n  2. \u7ed5\u8fc7\u6280\u672f\u9650\u5236: \u5de7\u5999\u89e3\u51b3\u52a8\u6001 API \u7b7e\u540d\u95ee\u9898\n  3. \u6570\u636e\u5b8c\u6574\u6027: \u901a\u8fc7\u65e0\u9650\u6eda\u52a8\u83b7\u53d6\n\n### \u2699\ufe0f \u914d\u7f6e\u8bf4\u660e\n1. \u914d\u7f6e\u5c0f\u62a5\u7ae5\u4e13\u680f\u5730\u5740\uff1a\u5c06 `YOUR_COLUMN_ID` \u66ff\u6362\u4e3a\u91c7\u96c6\u4e13\u680fID\n2. \u914d\u7f6e\u5c0f\u62a5\u7ae5 authorization\uff1a\u5c06 `YOUR_XIAOBOT_TOKEN_HERE` \u66ff\u6362\u4e3a\u4f60\u81ea\u5df1\u7684 Authorization",
        "height": 780,
        "width": 580
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        -40,
        -80
      ],
      "id": "f8f1bdda-6dcc-43ac-a9f5-d3073962ea95",
      "name": "Sticky Note"
    },
    {
      "parameters": {
        "content": "## \u914d\u7f6e\u4e13\u680f\u53c2\u6570 \n\u914d\u7f6e\u5c0f\u62a5\u7ae5\u4e13\u680f URL \u548c \u8ba4\u8bc1\u4fe1\u606f",
        "height": 300,
        "width": 360,
        "color": 4
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        560,
        -80
      ],
      "id": "4ab64bc7-5fa6-425c-b711-14126b543fd0",
      "name": "Sticky Note1"
    },
    {
      "parameters": {
        "content": "## \u6293\u53d6\u5c0f\u62a5\u7ae5\u6587\u7ae0\n- \u4f7f\u7528 Crawl4AI \u6a21\u62df\u6d4f\u89c8\u5668\u8bbf\u95ee\u5c0f\u62a5\u7ae5\u5e76\u62e6\u622a API \u54cd\u5e94\n- \ud83d\udce2 \u5305\u542b\u6240\u6709\u6587\u7ae0\u4fe1\u606f\u7684\u7ed3\u6784\u5316 JSON \u6570\u636e(**\u6240\u4ee5\u6709\u70b9\u6162**)\n- \u5c06\u6587\u7ae0\u6570\u7ec4\u62c6\u5206\u4e3a\u5355\u4e2a\u6587\u7ae0\u9879",
        "height": 300,
        "width": 720,
        "color": 5
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        940,
        -80
      ],
      "id": "7b77fe2f-8956-4502-94e8-4431fbffab9e",
      "name": "Sticky Note2"
    },
    {
      "parameters": {
        "fieldToSplitOut": "results[0].js_execution_result.results[4].articles",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        1520,
        80
      ],
      "id": "de9e066a-0ec3-45fc-b8e2-b965436bf7bc",
      "name": "\u62c6\u5206\u6587\u7ae0\u5217\u8868"
    },
    {
      "parameters": {
        "jsCode": "// \u83b7\u53d6 raw_markdown \u5185\u5bb9\nconst rawMarkdown = $input.first().json.data;\n\n// \u8f6c\u6362\u4e3a\u4e8c\u8fdb\u5236\u6570\u636e\nconst binaryData = Buffer.from(rawMarkdown, 'utf8');\n\nconst item = $input.first().json;\n\n// \u5b89\u5168\u7684\u6587\u4ef6\u540d\u6e05\u7406\u51fd\u6570\nfunction createSafeFilename(title) {\n  if (!title) return 'untitled';\n  \n  return title\n    .replace(/[\\/\\\\:*?\"<>|]/g, '-')    // \u66ff\u6362\u975e\u6cd5\u5b57\u7b26\u4e3a\u6a2a\u7ebf\n    .replace(/\\s+/g, '_')              // \u7a7a\u683c\u66ff\u6362\u4e3a\u4e0b\u5212\u7ebf\n    .replace(/-{2,}/g, '-')            // \u591a\u4e2a\u6a2a\u7ebf\u5408\u5e76\n    .replace(/_{2,}/g, '_')            // \u591a\u4e2a\u4e0b\u5212\u7ebf\u5408\u5e76\n    .replace(/^[-_]+|[-_]+$/g, '')     // \u79fb\u9664\u9996\u5c3e\u7684\u6a2a\u7ebf\u4e0b\u5212\u7ebf\n    .substring(0, 80);                 // \u9650\u5236\u957f\u5ea6\n}\n\n// \u4ece target_url \u63d0\u53d6\u4e13\u680f\u6807\u8bc6\u7b26\nfunction extractColumnId(targetUrl) {\n  // \u5904\u7406\u4e24\u79cd\u60c5\u51b5: /p/30AI \u548c /p/30AI/\n  const match = targetUrl.match(/\\/p\\/([^\\/]+)\\/?$/);\n  return match ? match[1] : 'unknown';\n}\n\n// \u83b7\u53d6\u4e13\u680f\u6807\u8bc6\u7b26 (\u4ece\u524d\u9762\u7684\u8282\u70b9\u83b7\u53d6 target_url)\nconst targetUrl = $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').first().json.target_url;\nconst outputBasePath = $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').first().json.output_base_path;\nconst columnId = extractColumnId(targetUrl);\n\nconst safeTitle = createSafeFilename(item.title);\nconst publishDate = $input.first().json.published_at ? $input.first().json.published_at.split('T')[0] : 'unknown';\n\n// \u8fd4\u56de\u4e8c\u8fdb\u5236\u6570\u636e\nreturn [{\n  json: {\n    columnId: columnId,\n    fullPath: `${outputBasePath}/${columnId}/${publishDate}_${safeTitle}.md`\n  },\n  binary: {\n    data: {\n      data: binaryData.toString('base64'),\n      mimeType: 'text/markdown',\n      fileName: `${publishDate}_${safeTitle}`\n    }\n  }\n}];"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1080,
        440
      ],
      "id": "be606b27-cc5f-4930-b308-c74a8cb9b47c",
      "name": "\u5904\u7406\u6587\u4ef6\u4fe1\u606f"
    },
    {
      "parameters": {
        "amount": 1
      },
      "type": "n8n-nodes-base.wait",
      "typeVersion": 1.1,
      "position": [
        1500,
        440
      ],
      "id": "026f7f20-e08e-4c1a-a6b6-a2cbef5d8c6c",
      "name": "\u5199\u5165\u5ef6\u8fdf"
    },
    {
      "parameters": {
        "content": "## \u6587\u7ae0\u5904\u7406\u4e0e\u4fdd\u5b58\n  - \u5faa\u73af\u5904\u7406\u6bcf\u7bc7\u6587\u7ae0\uff0c\u8f6c\u6362\u683c\u5f0f\u5e76\u4fdd\u5b58\u4e3a\u672c\u5730\u6587\u4ef6\n  - \u5bf9\u6bcf\u4e2a\u6587\u7ae0\u9879\u8fdb\u884cHTML\u5230Markdown\u8f6c\u6362\u548c\u6587\u4ef6\u5199\u5165\u64cd\u4f5c",
        "height": 440,
        "width": 1100,
        "color": 6
      },
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        560,
        260
      ],
      "id": "9cf21964-3ada-4892-aef9-848dad73b5db",
      "name": "Sticky Note3"
    },
    {
      "parameters": {
        "html": "={{ $json.content }}",
        "options": {}
      },
      "type": "n8n-nodes-base.markdown",
      "typeVersion": 1,
      "position": [
        880,
        440
      ],
      "id": "0a7061b5-80bc-4a6b-a9c5-b0ebbbeda13f",
      "name": "HTML \u8f6c Markdown"
    },
    {
      "parameters": {
        "operation": "write",
        "fileName": "={{ $('\u5904\u7406\u6587\u4ef6\u4fe1\u606f').first().json.fullPath }}",
        "options": {}
      },
      "type": "n8n-nodes-base.readWriteFile",
      "typeVersion": 1,
      "position": [
        1300,
        440
      ],
      "id": "51d8fe46-8b7c-4182-b461-053c4bf3163a",
      "name": "\u4fdd\u5b58 Markdown \u6587\u4ef6"
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "973d2cdb-d4dc-4533-bbb7-6445beb13a8d",
              "name": "crawl4ai_server",
              "value": "http://host.docker.internal:11235/crawl",
              "type": "string"
            },
            {
              "id": "7a42dfa4-b43a-44eb-a1ae-39f5eb37ee3f",
              "name": "target_url",
              "value": "https://www.xiaobot.net/p/<YOUR_COLUMN_ID>",
              "type": "string"
            },
            {
              "id": "f2208a2d-847c-4134-8d8c-02b7c710ffe2",
              "name": "xiaobot_authorization",
              "value": "<YOUR_XIAOBOT_TOKEN_HERE>",
              "type": "string"
            },
            {
              "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
              "name": "output_base_path",
              "value": "/tmp",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "66f5d392-c14c-4d0d-a700-df6d586c36b8",
      "name": "\u914d\u7f6e\u4e13\u680f\u53c2\u6570",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        600,
        40
      ]
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict",
            "version": 2
          },
          "conditions": [
            {
              "id": "2f58ac4f-517e-404c-b7d8-9da8cc8930b6",
              "leftValue": "={{ $('Crawl4AI\u83b7\u53d6\u6570\u636e\u5c0f\u62a5\u7ae5').item.json.results[0].js_execution_result.results[4].totalCount }}",
              "rightValue": 0,
              "operator": {
                "type": "number",
                "operation": "gt"
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "type": "n8n-nodes-base.if",
      "typeVersion": 2.2,
      "position": [
        1260,
        40
      ],
      "id": "b0add70d-114c-4e57-b644-53fd56f66b6b",
      "name": "If"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.noOp",
      "typeVersion": 1,
      "position": [
        1520,
        -60
      ],
      "id": "b9757130-850a-4766-953c-cbb5bab90123",
      "name": "No Operation, do nothing"
    },
    {
      "parameters": {
        "command": "=mkdir -p {{ $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').first().json.output_base_path }}/{{ $('\u914d\u7f6e\u4e13\u680f\u53c2\u6570').first().json.target_url.split('/p/')[1].replace('/', '') }}"
      },
      "type": "n8n-nodes-base.executeCommand",
      "typeVersion": 1,
      "position": [
        780,
        40
      ],
      "id": "841bad96-aa4d-4715-a891-252d7356cf05",
      "name": "\u521b\u5efa\u76ee\u5f55",
      "alwaysOutputData": false
    }
  ],
  "connections": {
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "\u914d\u7f6e\u4e13\u680f\u53c2\u6570",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Crawl4AI\u83b7\u53d6\u6570\u636e\u5c0f\u62a5\u7ae5": {
      "main": [
        [
          {
            "node": "If",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "HTML \u8f6c Markdown",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u62c6\u5206\u6587\u7ae0\u5217\u8868": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u5904\u7406\u6587\u4ef6\u4fe1\u606f": {
      "main": [
        [
          {
            "node": "\u4fdd\u5b58 Markdown \u6587\u4ef6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u5199\u5165\u5ef6\u8fdf": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML \u8f6c Markdown": {
      "main": [
        [
          {
            "node": "\u5904\u7406\u6587\u4ef6\u4fe1\u606f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u4fdd\u5b58 Markdown \u6587\u4ef6": {
      "main": [
        [
          {
            "node": "\u5199\u5165\u5ef6\u8fdf",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u914d\u7f6e\u4e13\u680f\u53c2\u6570": {
      "main": [
        [
          {
            "node": "\u521b\u5efa\u76ee\u5f55",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If": {
      "main": [
        [
          {
            "node": "\u62c6\u5206\u6587\u7ae0\u5217\u8868",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "No Operation, do nothing",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\u521b\u5efa\u76ee\u5f55": {
      "main": [
        [
          {
            "node": "Crawl4AI\u83b7\u53d6\u6570\u636e\u5c0f\u62a5\u7ae5",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "bb0ec758-8a2f-4e43-8f6a-fa6cec127e53",
  "id": "DDmaYRKBjO5i6Fs2",
  "tags": []
}