This workflow corresponds to n8n.io template #12338 — we link there as the canonical source.
This workflow follows the Execute Workflow Trigger → HTTP Request recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"nodes": [
{
"id": "a22c634c-eb9b-4de7-be1e-40d9378f704b",
"name": "Aggregate",
"type": "n8n-nodes-base.aggregate",
"position": [
2128,
672
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData"
},
"typeVersion": 1
},
{
"id": "a711dee2-5337-4a4e-a653-3575ef5e72fc",
"name": "setOffset",
"type": "n8n-nodes-base.set",
"position": [
2352,
672
],
"parameters": {
"options": {
"dotNotation": true
},
"assignments": {
"assignments": [
{
"id": "5e7e828f-07ee-4071-9088-88a09cb19d12",
"name": "rows_count",
"type": "number",
"value": "={{$items(\"Row_Splitter\").length}}"
},
{
"id": "cde76c8b-969d-4286-8545-f0f33147ca78",
"name": "=offset",
"type": "number",
"value": "={{ $('SubTrigger').item.json.offset + $('SubTrigger').item.json.length }}"
},
{
"id": "129e5a82-4382-48be-934a-51694830e6b5",
"name": "length",
"type": "number",
"value": "={{ $('SubTrigger').item.json.length }}"
},
{
"id": "40b54f42-a079-4404-ac75-0872f3544c7b",
"name": "dataset",
"type": "string",
"value": "={{ $('SubTrigger').item.json.dataset }}"
},
{
"id": "efde740c-69f0-4e42-a3ee-945ffdeaca79",
"name": "config",
"type": "string",
"value": "={{ $('SubTrigger').item.json.config }}"
},
{
"id": "04c9572b-d363-43e8-8b61-00e88e58cd7e",
"name": "split",
"type": "string",
"value": "={{ $('SubTrigger').item.json.split }}"
},
{
"id": "bd612c4f-4ca5-42b6-8fbd-7d28f34e6ac1",
"name": "collection_name",
"type": "string",
"value": "={{ $('SubTrigger').item.json.collection_name }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "c8f6511a-b2e6-4a19-a5a8-639d2102466c",
"name": "SubTrigger",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
560,
768
],
"parameters": {
"workflowInputs": {
"values": [
{
"name": "offset",
"type": "number"
},
{
"name": "length",
"type": "number"
},
{
"name": "dataset"
},
{
"name": "config"
},
{
"name": "split"
},
{
"name": "collection_name"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "030000b4-48f3-490f-a2f8-ab86c077e451",
"name": "HF_FetchRows",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
784,
768
],
"parameters": {
"url": "https://datasets-server.huggingface.co/rows",
"options": {},
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "dataset",
"value": "={{ $json.dataset }}"
},
{
"name": "config",
"value": "={{ $json.config }}"
},
{
"name": "split",
"value": "={{ $json.split }}"
},
{
"name": "offset",
"value": "={{ $('SubTrigger').item.json.offset }}"
},
{
"name": "length",
"value": "={{ $('SubTrigger').item.json.length }}"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.3
},
{
"id": "857f2797-c61b-425c-b7a6-2dbad92b7337",
"name": "Extract_Rows",
"type": "n8n-nodes-base.set",
"position": [
1008,
768
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "aa61b92e-f947-4681-a051-373988ebd69e",
"name": "rows",
"type": "array",
"value": "={{ $json.rows }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "514b4db6-79ae-4bcd-9b04-7be4a320f66b",
"name": "HasRows?",
"type": "n8n-nodes-base.if",
"position": [
1232,
768
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "d73a6dea-1564-4f20-8230-67b555f854bd",
"operator": {
"type": "array",
"operation": "notEmpty",
"singleValue": true
},
"leftValue": "={{ $json.rows }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.3
},
{
"id": "80cb9377-07cd-441b-b61f-a137ae1a03ea",
"name": "Row_Splitter",
"type": "n8n-nodes-base.splitOut",
"position": [
1456,
672
],
"parameters": {
"options": {},
"fieldToSplitOut": "rows"
},
"typeVersion": 1
},
{
"id": "2a7a5365-85c4-4ade-a76b-21bb5746060e",
"name": "Transform_RemoveId_AddMeta",
"type": "n8n-nodes-base.code",
"position": [
1680,
672
],
"parameters": {
"jsCode": "/**\n * Remove Hugging Face _id so MongoDB can generate ObjectId\n */\n\nreturn items.map(item => {\n const row = item.json.row;\n\n if (!row || typeof row !== 'object') {\n return null;\n }\n\n // Destructure to remove _id\n const { _id, ...doc } = row;\n\n return {\n json: doc\n };\n}).filter(Boolean);\n"
},
"typeVersion": 2
},
{
"id": "e84f68de-6d2d-4f73-956d-58b42a6b8413",
"name": "Mongo_InsertOrUpsert",
"type": "n8n-nodes-base.mongoDb",
"position": [
1904,
672
],
"parameters": {
"fields": "={{ Object.keys($json).join(',') }}",
"options": {
"useDotNotation": false
},
"operation": "insert",
"collection": "airbnb"
},
"typeVersion": 1.2
},
{
"id": "8bf2959b-4d9f-4a09-aefc-1234ab2d1c02",
"name": "Config_Start",
"type": "n8n-nodes-base.set",
"position": [
1632,
208
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f70d3e9e-d6be-419f-ab93-c8d74d34b142",
"name": "offset",
"type": "number",
"value": 0
},
{
"id": "0102a45c-dc20-4d40-b4b7-7a63332228b8",
"name": "length",
"type": "number",
"value": 100
},
{
"id": "5fd666a8-6238-47a2-8c39-ee831d3b4165",
"name": "dataset",
"type": "string",
"value": "MongoDB/airbnb_embeddings"
},
{
"id": "0348c706-fbcd-45aa-bc4b-4682c888877c",
"name": "config",
"type": "string",
"value": "default"
},
{
"id": "8160e1e7-de4f-4846-bef4-90e5b61bde6d",
"name": "split",
"type": "string",
"value": "train"
},
{
"id": "58807bd2-d7d6-426d-bd28-affd82b0c564",
"name": "collection_name",
"type": "string",
"value": "airbnb"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "05dc3b69-7c33-40d7-ab39-2ccb20928ee6",
"name": "Trigger_Manual",
"type": "n8n-nodes-base.manualTrigger",
"position": [
1344,
224
],
"parameters": {},
"typeVersion": 1
},
{
"id": "a19f7baf-6f71-4902-9f02-f6f522f4877c",
"name": "ContinueLoop?",
"type": "n8n-nodes-base.if",
"position": [
2208,
208
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "5b4b56d7-7277-414e-a5af-0fe62ccc9e38",
"operator": {
"type": "number",
"operation": "notEquals"
},
"leftValue": "={{ $json.rows_count }}",
"rightValue": 0
}
]
}
},
"typeVersion": 2.3
},
{
"id": "2f6dcaf1-76d7-48aa-8656-c814bd05ac9f",
"name": "Stop",
"type": "n8n-nodes-base.noOp",
"position": [
2464,
224
],
"parameters": {},
"typeVersion": 1
},
{
"id": "57cdce0e-3f50-41bc-b191-f56f40601432",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
544,
-32
],
"parameters": {
"width": 720,
"height": 464,
"content": "## How it works:\n1- This workflow automates the migration of large datasets by fetching data in batches from the Hugging Face API. \n2- It begins with a configuration node where you define the dataset, split, and batch length. \n3- The main workflow triggers a subworkflow that fetches specific rows, extracts the data, and splits the array into individual items. \n4- A Code node transforms the data by removing the Hugging Face _id to allow MongoDB to generate its own unique ObjectIDs. \n5- After inserting the batch into MongoDB, the workflow calculates a new offset and loops until all rows have been processed.\n\n## Setup steps\n1. Hugging Face API: In the HF_FetchRows node, ensure the URL points to the correct Hugging Face dataset server.\n2. MongoDB Credentials: Connect your MongoDB account in the Mongo_InsertOrUpsert node and specify your target collection.\n3. Subworkflow ID: Update the Subworkflow_InsertBatch node with the specific ID of your local subworkflow to ensure the loop functions correctly.\n4. Configuration: Adjust the Config_Start node to set your desired dataset name and batch length (default is 100)\n"
},
"typeVersion": 1
},
{
"id": "a6d24010-70d8-4f33-a16d-654d691fa233",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
736,
672
],
"parameters": {
"color": 7,
"width": 624,
"height": 304,
"content": "Fetch & extract\nFetch rows from HF API, extract array, and split into items."
},
"typeVersion": 1
},
{
"id": "9ff85351-1953-45b9-a3da-63fee464dca1",
"name": "Sticky Note4",
"type": "n8n-nodes-base.stickyNote",
"position": [
1440,
592
],
"parameters": {
"color": 7,
"width": 1104,
"height": 304,
"content": "Transform & insert\nRemove HF _id, add metadata, then insert documents into MongoDB."
},
"typeVersion": 1
},
{
"id": "772be563-1834-4884-9338-fbcc7f6c3804",
"name": "InsertBatch",
"type": "n8n-nodes-base.executeWorkflow",
"position": [
1904,
176
],
"parameters": {
"mode": "each",
"options": {
"waitForSubWorkflow": true
},
"workflowId": {
"__rl": true,
"mode": "list",
"value": "P4SdjAY71rPIh9OB",
"cachedResultUrl": "/workflow/P4SdjAY71rPIh9OB",
"cachedResultName": "Hg_subworkflow"
},
"workflowInputs": {
"value": {
"split": "={{ $json.split }}",
"config": "={{ $json.config }}",
"length": "={{ $json.length }}",
"offset": "={{ $json.offset }}",
"dataset": "={{ $json.dataset }}",
"collection_name": "={{ $json.collection_name }}"
},
"schema": [
{
"id": "offset",
"type": "number",
"display": true,
"removed": false,
"required": false,
"displayName": "offset",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "length",
"type": "number",
"display": true,
"removed": false,
"required": false,
"displayName": "length",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "dataset",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "dataset",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "config",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "config",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "split",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "split",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "collection_name",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "collection_name",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": true
}
},
"typeVersion": 1.3
},
{
"id": "73449288-7730-4f6f-9695-765750124336",
"name": "Sticky Note5",
"type": "n8n-nodes-base.stickyNote",
"position": [
1520,
112
],
"parameters": {
"color": 7,
"width": 864,
"height": 272,
"content": "Loop & orchestration\nControl offset/length, call subworkflow per batch, repeat until no rows remain.\n"
},
"typeVersion": 1
},
{
"id": "c1740cac-c070-4c54-a3eb-ef7a56a2a65c",
"name": "NoRows_Offset",
"type": "n8n-nodes-base.set",
"position": [
1456,
992
],
"parameters": {
"options": {
"dotNotation": true
},
"assignments": {
"assignments": [
{
"id": "5e7e828f-07ee-4071-9088-88a09cb19d12",
"name": "rows_count",
"type": "number",
"value": 0
}
]
}
},
"typeVersion": 3.4
},
{
"id": "47809a64-adef-44ff-a561-fb0f87b534c6",
"name": "Sticky Note9",
"type": "n8n-nodes-base.stickyNote",
"position": [
1440,
944
],
"parameters": {
"color": 7,
"width": 224,
"height": 304,
"content": "### Set rows_count to zero"
},
"typeVersion": 1
}
],
"connections": {
"HasRows?": {
"main": [
[
{
"node": "Row_Splitter",
"type": "main",
"index": 0
}
],
[
{
"node": "NoRows_Offset",
"type": "main",
"index": 0
}
]
]
},
"Aggregate": {
"main": [
[
{
"node": "setOffset",
"type": "main",
"index": 0
}
]
]
},
"SubTrigger": {
"main": [
[
{
"node": "HF_FetchRows",
"type": "main",
"index": 0
}
]
]
},
"InsertBatch": {
"main": [
[
{
"node": "ContinueLoop?",
"type": "main",
"index": 0
}
]
]
},
"Config_Start": {
"main": [
[
{
"node": "InsertBatch",
"type": "main",
"index": 0
}
]
]
},
"Extract_Rows": {
"main": [
[
{
"node": "HasRows?",
"type": "main",
"index": 0
}
]
]
},
"HF_FetchRows": {
"main": [
[
{
"node": "Extract_Rows",
"type": "main",
"index": 0
}
]
]
},
"Row_Splitter": {
"main": [
[
{
"node": "Transform_RemoveId_AddMeta",
"type": "main",
"index": 0
}
]
]
},
"ContinueLoop?": {
"main": [
[
{
"node": "InsertBatch",
"type": "main",
"index": 0
}
],
[
{
"node": "Stop",
"type": "main",
"index": 0
}
]
]
},
"Trigger_Manual": {
"main": [
[
{
"node": "Config_Start",
"type": "main",
"index": 0
}
]
]
},
"Mongo_InsertOrUpsert": {
"main": [
[
{
"node": "Aggregate",
"type": "main",
"index": 0
}
]
]
},
"Transform_RemoveId_AddMeta": {
"main": [
[
{
"node": "Mongo_InsertOrUpsert",
"type": "main",
"index": 0
}
]
]
}
}
}
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
This n8n template provides a production-ready, memory-safe pipeline for ingesting large Hugging Face datasets into MongoDB using batch pagination. It is designed as a reusable data ingestion layer for RAG systems, recommendation engines, analytics pipelines, and ML workflows.
Source: https://n8n.io/workflows/12338/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Agendamiento_v2. Uses n8n-nodes-evolution-api, redis, httpRequest, executeWorkflowTrigger. Event-driven trigger; 59 nodes.
Cancelacion_v2. Uses executeWorkflowTrigger, redis, httpRequest, n8n-nodes-evolution-api. Event-driven trigger; 46 nodes.
This enables webhooks for nearly realtime updates (every 5 seconds) from Notion Databases.
Youtube Searcher. Uses splitInBatches, httpRequest, manualTrigger, executeWorkflowTrigger. Event-driven trigger; 21 nodes.
bronze. Uses httpRequest, postgres, executeWorkflowTrigger. Event-driven trigger; 10 nodes.