AutomationFlowsWeb Scraping › Wf-extraction - Xhs内容提取

Wf-extraction - Xhs内容提取

WF-Extraction - XHS内容提取. Uses httpRequest. Scheduled trigger; 15 nodes.

Cron / scheduled trigger★★★★☆ complexity15 nodesHTTP Request
Web Scraping Trigger: Cron / scheduled Nodes: 15 Complexity: ★★★★☆ Added:

The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json
{
  "name": "WF-Extraction - XHS\u5185\u5bb9\u63d0\u53d6",
  "nodes": [
    {
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "minutes",
              "minutesInterval": 15
            }
          ]
        }
      },
      "id": "schedule-trigger",
      "name": "Schedule Trigger",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.2,
      "position": [
        250,
        300
      ],
      "notes": "\u6bcf15\u5206\u949f\u81ea\u52a8\u6267\u884c"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "xhs-extraction",
        "responseMode": "responseNode"
      },
      "id": "webhook-trigger",
      "name": "Webhook Trigger",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [
        250,
        500
      ],
      "notes": "\u624b\u52a8\u89e6\u53d1\u5165\u53e3"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "app_id",
              "value": "={{ $env.LARK_APP_ID }}"
            },
            {
              "name": "app_secret",
              "value": "={{ $env.LARK_APP_SECRET }}"
            }
          ]
        },
        "options": {}
      },
      "id": "get-lark-token",
      "name": "Get Lark Token",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        500,
        400
      ],
      "notes": "\u83b7\u53d6\u98de\u4e66\u8bbf\u95ee\u4ee4\u724c"
    },
    {
      "parameters": {
        "method": "GET",
        "url": "={{ 'https://open.feishu.cn/open-apis/bitable/v1/apps/' + $env.LARK_APP_TOKEN + '/tables/' + $env.LARK_TOPICS_TABLE_ID + '/records' }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=Bearer {{ $node['Get Lark Token'].json.tenant_access_token }}"
            }
          ]
        },
        "sendQuery": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "filter",
              "value": "CurrentValue.[status]=\"\u5f85\u63d0\u53d6\""
            },
            {
              "name": "page_size",
              "value": "10"
            }
          ]
        },
        "options": {}
      },
      "id": "query-topics",
      "name": "Query Topics",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        750,
        400
      ],
      "notes": "\u67e5\u8be2\u5f85\u63d0\u53d6\u7684\u9009\u9898"
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict"
          },
          "conditions": [
            {
              "id": "check-records",
              "leftValue": "={{ $json.data.items.length }}",
              "rightValue": "0",
              "operator": {
                "type": "number",
                "operation": "gt"
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "id": "check-has-topics",
      "name": "Has Topics?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        1000,
        400
      ],
      "notes": "\u68c0\u67e5\u662f\u5426\u6709\u5f85\u63d0\u53d6\u9009\u9898"
    },
    {
      "parameters": {
        "jsCode": "// \u63d0\u53d6\u9009\u9898\u5217\u8868\nconst items = $input.first().json.data.items || [];\n\nconst topics = items.map(item => ({\n  record_id: item.record_id,\n  note_id: item.fields.note_id,\n  title: item.fields.title,\n  author: item.fields.author,\n  keyword_id: item.fields.keyword_id\n}));\n\nreturn topics.map(t => ({ json: t }));"
      },
      "id": "extract-topics",
      "name": "Extract Topics",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1250,
        300
      ],
      "notes": "\u63d0\u53d6\u9009\u9898\u6570\u636e"
    },
    {
      "parameters": {
        "method": "PATCH",
        "url": "={{ 'https://open.feishu.cn/open-apis/bitable/v1/apps/' + $env.LARK_APP_TOKEN + '/tables/' + $env.LARK_TOPICS_TABLE_ID + '/records/' + $json.record_id }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=Bearer {{ $node['Get Lark Token'].json.tenant_access_token }}"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"fields\": {\n    \"status\": \"\u63d0\u53d6\u4e2d\",\n    \"locked_at\": \"{{ $now.toISO() }}\"\n  }\n}",
        "options": {}
      },
      "id": "lock-topic",
      "name": "Lock Topic",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1500,
        300
      ],
      "notes": "\u9501\u5b9a\u9009\u9898\u72b6\u6001\u4e3a\u63d0\u53d6\u4e2d"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "={{ $env.CRAWLER_API_URL + '/api/note/detail' }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "X-API-Key",
              "value": "={{ $env.CRAWLER_API_KEY }}"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"note_ids\": [\"{{ $node['Extract Topics'].json.note_id }}\"]\n}",
        "options": {
          "timeout": 60000
        }
      },
      "id": "get-note-detail",
      "name": "Get Note Detail",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1750,
        300
      ],
      "notes": "\u8c03\u7528\u722c\u866bAPI\u83b7\u53d6\u7b14\u8bb0\u8be6\u60c5"
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict"
          },
          "conditions": [
            {
              "id": "check-success",
              "leftValue": "={{ $json.success }}",
              "rightValue": true,
              "operator": {
                "type": "boolean",
                "operation": "equals"
              }
            },
            {
              "id": "check-items",
              "leftValue": "={{ $json.data?.items?.length }}",
              "rightValue": "0",
              "operator": {
                "type": "number",
                "operation": "gt"
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "id": "check-detail-result",
      "name": "Detail Success?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        2000,
        300
      ],
      "notes": "\u68c0\u67e5\u8be6\u60c5\u83b7\u53d6\u662f\u5426\u6210\u529f"
    },
    {
      "parameters": {
        "jsCode": "// \u8f6c\u6362\u7b14\u8bb0\u8be6\u60c5\u4e3aSource\u8bb0\u5f55\u683c\u5f0f\nconst detailResult = $input.first().json;\nconst topicData = $node['Extract Topics'].json;\nconst noteDetail = detailResult.data?.items?.[0] || {};\n\n// \u63d0\u53d6\u7b14\u8bb0\u5185\u5bb9\nconst sourceRecord = {\n  fields: {\n    topic_id: topicData.record_id,\n    note_id: topicData.note_id,\n    original_title: noteDetail.title || topicData.title,\n    original_content: noteDetail.desc || noteDetail.content || '',\n    original_images: JSON.stringify(noteDetail.images || noteDetail.image_list || []),\n    author_name: noteDetail.user?.nickname || topicData.author,\n    author_id: noteDetail.user?.user_id || '',\n    likes_count: noteDetail.liked_count || noteDetail.interact_info?.liked_count || 0,\n    comments_count: noteDetail.comments_count || noteDetail.interact_info?.comment_count || 0,\n    collects_count: noteDetail.collected_count || noteDetail.interact_info?.collected_count || 0,\n    status: '\u5f85\u751f\u6210',\n    extracted_at: new Date().toISOString()\n  }\n};\n\nreturn [{ json: { record: sourceRecord, topic_record_id: topicData.record_id } }];"
      },
      "id": "transform-detail",
      "name": "Transform Detail",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2250,
        200
      ],
      "notes": "\u8f6c\u6362\u7b14\u8bb0\u8be6\u60c5\u683c\u5f0f"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "={{ 'https://open.feishu.cn/open-apis/bitable/v1/apps/' + $env.LARK_APP_TOKEN + '/tables/' + $env.LARK_SOURCE_TABLE_ID + '/records' }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=Bearer {{ $node['Get Lark Token'].json.tenant_access_token }}"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={{ JSON.stringify($json.record) }}",
        "options": {}
      },
      "id": "save-to-source",
      "name": "Save to Source",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        2500,
        200
      ],
      "notes": "\u5199\u5165Source\u8868"
    },
    {
      "parameters": {
        "method": "PATCH",
        "url": "={{ 'https://open.feishu.cn/open-apis/bitable/v1/apps/' + $env.LARK_APP_TOKEN + '/tables/' + $env.LARK_TOPICS_TABLE_ID + '/records/' + $json.topic_record_id }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=Bearer {{ $node['Get Lark Token'].json.tenant_access_token }}"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"fields\": {\n    \"status\": \"\u5df2\u63d0\u53d6\",\n    \"locked_at\": null\n  }\n}",
        "options": {}
      },
      "id": "update-topic-success",
      "name": "Update Topic Success",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        2750,
        200
      ],
      "notes": "\u66f4\u65b0\u9009\u9898\u72b6\u6001\u4e3a\u5df2\u63d0\u53d6"
    },
    {
      "parameters": {
        "method": "PATCH",
        "url": "={{ 'https://open.feishu.cn/open-apis/bitable/v1/apps/' + $env.LARK_APP_TOKEN + '/tables/' + $env.LARK_TOPICS_TABLE_ID + '/records/' + $node['Extract Topics'].json.record_id }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=Bearer {{ $node['Get Lark Token'].json.tenant_access_token }}"
            }
          ]
        },
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"fields\": {\n    \"status\": \"\u63d0\u53d6\u5931\u8d25\",\n    \"error_message\": \"{{ $json.error?.message || 'Note detail not found' }}\",\n    \"locked_at\": null\n  }\n}",
        "options": {}
      },
      "id": "update-topic-failed",
      "name": "Update Topic Failed",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        2250,
        400
      ],
      "notes": "\u66f4\u65b0\u9009\u9898\u72b6\u6001\u4e3a\u63d0\u53d6\u5931\u8d25"
    },
    {
      "parameters": {
        "respondWith": "json",
        "responseBody": "={{ { success: true, message: 'Extraction completed' } }}"
      },
      "id": "respond-success",
      "name": "Respond Success",
      "type": "n8n-nodes-base.respondToWebhook",
      "typeVersion": 1.1,
      "position": [
        3000,
        200
      ],
      "notes": "\u8fd4\u56de\u6210\u529f\u54cd\u5e94"
    },
    {
      "parameters": {
        "respondWith": "json",
        "responseBody": "={{ { success: false, message: 'No topics to process' } }}"
      },
      "id": "respond-no-topics",
      "name": "Respond No Topics",
      "type": "n8n-nodes-base.respondToWebhook",
      "typeVersion": 1.1,
      "position": [
        1250,
        500
      ],
      "notes": "\u65e0\u9009\u9898\u65f6\u8fd4\u56de"
    }
  ],
  "connections": {
    "Schedule Trigger": {
      "main": [
        [
          {
            "node": "Get Lark Token",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook Trigger": {
      "main": [
        [
          {
            "node": "Get Lark Token",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Lark Token": {
      "main": [
        [
          {
            "node": "Query Topics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Query Topics": {
      "main": [
        [
          {
            "node": "Has Topics?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Has Topics?": {
      "main": [
        [
          {
            "node": "Extract Topics",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Respond No Topics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Topics": {
      "main": [
        [
          {
            "node": "Lock Topic",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Lock Topic": {
      "main": [
        [
          {
            "node": "Get Note Detail",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Note Detail": {
      "main": [
        [
          {
            "node": "Detail Success?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Detail Success?": {
      "main": [
        [
          {
            "node": "Transform Detail",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Update Topic Failed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Transform Detail": {
      "main": [
        [
          {
            "node": "Save to Source",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Save to Source": {
      "main": [
        [
          {
            "node": "Update Topic Success",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Topic Success": {
      "main": [
        [
          {
            "node": "Respond Success",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  },
  "staticData": null,
  "tags": [
    {
      "name": "XHS-Pipeline"
    }
  ],
  "triggerCount": 2
}
Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

WF-Extraction - XHS内容提取. Uses httpRequest. Scheduled trigger; 15 nodes.

Source: https://github.com/wade56754/XHS/blob/b5b999f35721bcfe8ffc6b518245e1d8dbaab5c5/workflows/WF-Extraction.json — original creator credit. Request a take-down →

More Web Scraping workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

Web Scraping

As n8n instances scale, teams often lose track of sub-workflows—who uses them, where they are referenced, and whether they can be safely updated. This leads to inefficiencies like unnecessary copies o

HTTP Request, n8n, N8N Trigger +1
Web Scraping

This workflow is an improvement of this workflow by Greg Brzezinka.

HTTP Request, Email Send, XML +1
Web Scraping

N8N-Workflow-Github-Manager. Uses github, httpRequest, n8n. Scheduled trigger; 38 nodes.

GitHub, HTTP Request, n8n
Web Scraping

This workflow uses KlickTipp community nodes, available for self-hosted n8n instances only.

N8N Nodes Klicktipp, Salesforce, Salesforce Trigger +1
Web Scraping

This workflow acts as an automated engagement bot. It sends a Direct Message (DM) with a link or resource to any follower who replies to your post with a specific target keyword.

HTTP Request