The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "xlsx-ingest",
"settings": {
"executionOrder": "v1",
"saveExecutionProgress": true,
"saveDataErrorExecution": "all",
"saveDataSuccessExecution": "all"
},
"nodes": [
{
"id": "manual",
"name": "Manual run",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
240,
220
],
"parameters": {}
},
{
"id": "schedule",
"name": "Every minute",
"type": "n8n-nodes-base.scheduleTrigger",
"typeVersion": 1.2,
"position": [
240,
420
],
"parameters": {
"rule": {
"interval": [
{
"field": "minutes",
"minutesInterval": 1
}
]
}
}
},
{
"id": "list",
"name": "List incoming spreadsheets",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
480,
320
],
"parameters": {
"language": "javaScript",
"jsCode": "// Walk /data/spreadsheets/incoming/<assignment>/*.xlsx and atomically claim each\n// file by renaming it into a .working/ subfolder. fs.renameSync is atomic on\n// the same filesystem, so two concurrent scheduler ticks can never claim the\n// same file -> no duplicate processing, no duplicate points.\nconst fs = require('fs');\nconst path = require('path');\nconst root = '/data/spreadsheets/incoming';\nconst out = [];\nlet entries = [];\ntry { entries = fs.readdirSync(root, { withFileTypes: true }); } catch (e) { return out; }\nfor (const e of entries) {\n if (!e.isDirectory()) continue;\n const assignment = e.name;\n const sub = path.join(root, assignment);\n const working = path.join(sub, '.working');\n let files = [];\n try { files = fs.readdirSync(sub); } catch (_) { continue; }\n for (const f of files) {\n if (f.startsWith('.')) continue; // skip dotfiles incl. .working\n const lo = f.toLowerCase();\n if (!lo.endsWith('.xlsx') && !lo.endsWith('.xlsm')) continue;\n const src = path.join(sub, f);\n const dst = path.join(working, f);\n try { fs.mkdirSync(working, { recursive: true }); } catch (_) {}\n try {\n fs.renameSync(src, dst); // atomic claim\n } catch (_) {\n continue; // someone else (or a previous tick) already grabbed it\n }\n out.push({ json: { assignment, filename: f, path: dst } });\n }\n}\nreturn out;\n"
}
},
{
"id": "process",
"name": "Ingest spreadsheet",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
720,
320
],
"parameters": {
"mode": "runOnceForEachItem",
"language": "javaScript",
"jsCode": "/**\n * Per-file pipeline:\n * 1. POST raw bytes to spreadsheet-mcp parser -> {sheets, profile, sha256}\n * 2. Ask Qwen to summarize the file (used for retrieval and SQL hinting)\n * 3. UPSERT into postgres.spreadsheets\n * 4. Embed the summary with bge-m3 and upsert into qdrant 'spreadsheets-meta'\n * 5. Move source file to processed/<assignment>/ (or failed/<assignment>/)\n */\nconst fs = require('fs');\nconst path = require('path');\nconst http = require('http');\nconst https = require('https');\nconst { URL } = require('url');\n\nconst PARSER_URL = ($env.PARSER_URL || 'http://spreadsheet-mcp:8090').replace(/\\/$/, '');\nconst LLM_BASE = ($env.LLM_BASE_URL || 'https://chat.example.com/v1').replace(/\\/$/, '');\nconst LLM_MODEL = $env.LLM_MODEL || 'Qwen3.6-35B-A3B-Q5_K_M';\nconst EMBED_BASE = ($env.EMBEDDINGS_BASE_URL || 'https://embed.example.com/v1').replace(/\\/$/, '');\nconst EMBED_MODEL = $env.EMBEDDINGS_MODEL || 'bge-m3';\nconst QDRANT_URL = ($env.QDRANT_URL || 'http://qdrant:6333').replace(/\\/$/, '');\nconst QDRANT_COL = 'spreadsheets-meta';\nconst PG_HOST = $env.POSTGRES_HOST || 'postgres';\nconst PG_PORT = Number($env.POSTGRES_PORT || 5432);\nconst PG_DB = $env.POSTGRES_DB || 'boxai';\nconst PG_USER = $env.POSTGRES_USER || 'boxai';\nconst PG_PASS = $env.POSTGRES_PASSWORD || '';\n\n// --- minimal http(s) JSON helper -------------------------------------------\nfunction httpJson(method, urlStr, body, headers) {\n return new Promise((resolve, reject) => {\n const u = new URL(urlStr);\n const lib = u.protocol === 'https:' ? https : http;\n const h = Object.assign({ 'Accept': 'application/json' }, headers || {});\n let payload = null;\n if (body != null) {\n payload = Buffer.isBuffer(body) ? body : Buffer.from(JSON.stringify(body));\n if (!h['Content-Type']) h['Content-Type'] = 'application/json';\n h['Content-Length'] = payload.length;\n }\n const req = lib.request({\n method,\n hostname: u.hostname,\n port: u.port || (u.protocol === 'https:' ? 443 : 80),\n path: u.pathname + u.search,\n headers: h,\n }, (res) => {\n const chunks = [];\n res.on('data', (c) => chunks.push(c));\n res.on('end', () => {\n const text = Buffer.concat(chunks).toString('utf8');\n if (res.statusCode >= 200 && res.statusCode < 300) {\n try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n } else {\n reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n }\n });\n });\n req.on('error', reject);\n if (payload) req.write(payload);\n req.end();\n });\n}\n\nfunction safeMove(src, destDir) {\n fs.mkdirSync(destDir, { recursive: true });\n const dest = path.join(destDir, path.basename(src));\n try { fs.renameSync(src, dest); }\n catch (_) { fs.copyFileSync(src, dest); fs.unlinkSync(src); }\n return dest;\n}\n\n// --- LLM summarizer --------------------------------------------------------\nasync function summarize(parsed, assignment) {\n // Send the model a compact view: per-sheet column names, types, samples, and\n // first 5 rows. Never the full data.\n const compact = parsed.profile.sheets.map((s) => ({\n sheet: s.name,\n row_count: s.row_count,\n columns: s.columns.map((c) => ({\n name: c.name, type: c.type,\n samples: (c.samples || []).slice(0, 5),\n distinct_estimate: c.distinct_estimate,\n null_count: c.null_count,\n })),\n head: (parsed.sheets[s.name] || []).slice(0, 5),\n }));\n\n const prompt = `You are profiling an uploaded spreadsheet so another LLM can later answer questions about it via SQL.\n\nFile: ${parsed.filename}\nAssignment: ${assignment}\nSheets and schemas (samples included):\n${JSON.stringify(compact, null, 2)}\n\nRespond ONLY with a single JSON object, no prose, no code fences. Schema:\n{\n \"summary\": string, // 2-4 sentences: what this file contains, time range if any, units, language\n \"topics\": string[], // 3-8 short tags, lowercase\n \"suggested_questions\": string[], // 3-6 questions a user might ask\n \"column_descriptions\": { \"<sheet>.<column>\": string } // one short line per non-obvious column\n}\nWrite the summary in the same language as the data when possible.`;\n\n const j = await httpJson('POST', `${LLM_BASE}/chat/completions`, {\n model: LLM_MODEL,\n temperature: 0.2,\n messages: [\n { role: 'system', content: 'You output strictly valid JSON.' },\n { role: 'user', content: prompt },\n ],\n });\n const txt = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || '';\n // Tolerate ```json fences\n const cleaned = txt.replace(/^```(?:json)?\\s*/i, '').replace(/```\\s*$/i, '').trim();\n try { return JSON.parse(cleaned); }\n catch (_) { return { summary: txt.slice(0, 800), topics: [], suggested_questions: [], column_descriptions: {} }; }\n}\n\n// --- Postgres via tiny TCP wire (use n8n Postgres node? we'd rather avoid an\n// extra node + credential setup, so use the pg module via a side-car HTTP).\n// Simpler: do the upsert through a generic SQL endpoint... we don't have one.\n// We DO have spreadsheet-mcp which has pg in it, but it's read-only. The\n// cleanest, smallest-surface approach is to add an internal HTTP route on the\n// parser sidecar to perform the UPSERT. But to keep this single-node-flow we\n// instead require n8n's Postgres node. That node lives outside the Code node.\n//\n// To stay in pure-JS Code, we go through Qdrant + a postgres-write-via-MCP\n// internal endpoint. To avoid adding more endpoints, we use the n8n Postgres\n// node downstream. This Code node just emits the prepared row; the next node\n// does the UPSERT.\n\nconst pdfPath = $json.path;\nconst assignment = $json.assignment;\nconst filename = $json.filename;\n\ntry {\n // 1. parse\n const buf = fs.readFileSync(pdfPath);\n const parsed = await httpJson('POST', `${PARSER_URL}/parse`,\n { filename, base64: buf.toString('base64') });\n\n // 2. summarize via Qwen\n const meta = await summarize(parsed, assignment);\n\n // 3. emit a row for the next (Postgres) node to UPSERT\n const downloadPath = `processed/${assignment}/${filename}`;\n\n return {\n status: 'parsed',\n assignment,\n filename,\n sha256: parsed.sha256,\n download_path: downloadPath,\n sheets: parsed.sheets,\n profile: parsed.profile,\n metadata: meta,\n src_path: pdfPath,\n // text used for the qdrant point\n embed_text:\n `Filename: ${filename}\\nAssignment: ${assignment}\\n` +\n `Sheets: ${parsed.profile.sheets.map(s => s.name + ' (' + s.row_count + ' rows, cols: ' + s.columns.map(c => c.name).join(', ') + ')').join(' | ')}\\n` +\n `Summary: ${meta.summary || ''}\\n` +\n `Topics: ${(meta.topics || []).join(', ')}\\n` +\n `Suggested questions: ${(meta.suggested_questions || []).join(' | ')}`\n };\n} catch (err) {\n let dest = null;\n try { dest = safeMove(pdfPath, `/data/spreadsheets/failed/${assignment}`); } catch (_) {}\n return {\n status: 'error',\n assignment, filename,\n error: String(err && err.message || err),\n moved_to: dest,\n };\n}\n"
}
},
{
"id": "skip-errors",
"name": "Continue if parsed",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
960,
320
],
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"leftValue": "={{ $json.status }}",
"rightValue": "parsed",
"operator": {
"type": "string",
"operation": "equals"
}
}
]
}
}
},
{
"id": "upsert-pg",
"name": "UPSERT spreadsheet",
"type": "n8n-nodes-base.postgres",
"typeVersion": 2.5,
"position": [
1200,
220
],
"parameters": {
"operation": "executeQuery",
"query": "INSERT INTO spreadsheets (assignment, filename, sha256, download_path, sheets, profile, metadata)\nVALUES ($1, $2, $3, $4, $5::jsonb, $6::jsonb, $7::jsonb)\nON CONFLICT (assignment, filename) DO UPDATE\n SET sha256 = EXCLUDED.sha256,\n download_path = EXCLUDED.download_path,\n sheets = EXCLUDED.sheets,\n profile = EXCLUDED.profile,\n metadata = EXCLUDED.metadata,\n uploaded_at = now()\nRETURNING id::text AS id;",
"options": {
"queryReplacement": "={{ $json.assignment }},{{ $json.filename }},{{ $json.sha256 }},{{ $json.download_path }},{{ JSON.stringify($json.sheets) }},{{ JSON.stringify($json.profile) }},{{ JSON.stringify($json.metadata) }}"
}
},
"credentials": {
"postgres": {
"name": "<your credential>"
}
}
},
{
"id": "qdrant-upsert",
"name": "Embed + upsert qdrant + move",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1440,
220
],
"parameters": {
"mode": "runOnceForEachItem",
"language": "javaScript",
"jsCode": "const fs = require('fs');\nconst path = require('path');\nconst http = require('http');\nconst https = require('https');\nconst crypto = require('crypto');\nconst { URL } = require('url');\n\nconst EMBED_BASE = ($env.EMBEDDINGS_BASE_URL || 'https://embed.example.com/v1').replace(/\\/$/, '');\nconst EMBED_MODEL = $env.EMBEDDINGS_MODEL || 'bge-m3';\nconst QDRANT_URL = ($env.QDRANT_URL || 'http://qdrant:6333').replace(/\\/$/, '');\nconst QDRANT_COL = 'spreadsheets-meta';\n\n// Deterministic UUIDv5 from a string. Using a fixed namespace UUID so that\n// (assignment, filename) -> always the same qdrant point id. Re-ingesting the\n// same file overwrites in place; no duplicates can ever exist in qdrant.\nconst NS = '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; // RFC 4122 DNS namespace\nfunction uuidv5(name, nsHex) {\n const ns = Buffer.from(nsHex.replace(/-/g, ''), 'hex');\n const h = crypto.createHash('sha1').update(ns).update(Buffer.from(name, 'utf8')).digest();\n const b = Buffer.from(h.subarray(0, 16));\n b[6] = (b[6] & 0x0f) | 0x50; // version 5\n b[8] = (b[8] & 0x3f) | 0x80; // RFC 4122 variant\n const hex = b.toString('hex');\n return `${hex.slice(0,8)}-${hex.slice(8,12)}-${hex.slice(12,16)}-${hex.slice(16,20)}-${hex.slice(20,32)}`;\n}\n\nfunction httpJson(method, urlStr, body) {\n return new Promise((resolve, reject) => {\n const u = new URL(urlStr);\n const lib = u.protocol === 'https:' ? https : http;\n const h = { 'Content-Type': 'application/json', 'Accept': 'application/json' };\n const payload = body == null ? null : Buffer.from(JSON.stringify(body));\n if (payload) h['Content-Length'] = payload.length;\n const req = lib.request({\n method, hostname: u.hostname,\n port: u.port || (u.protocol === 'https:' ? 443 : 80),\n path: u.pathname + u.search, headers: h,\n }, (res) => {\n const chunks = [];\n res.on('data', (c) => chunks.push(c));\n res.on('end', () => {\n const text = Buffer.concat(chunks).toString('utf8');\n if (res.statusCode >= 200 && res.statusCode < 300) {\n try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n } else reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n });\n });\n req.on('error', reject);\n if (payload) req.write(payload);\n req.end();\n });\n}\n\nfunction safeMove(src, destDir) {\n fs.mkdirSync(destDir, { recursive: true });\n const dest = path.join(destDir, path.basename(src));\n try { fs.renameSync(src, dest); } catch (_) { fs.copyFileSync(src, dest); fs.unlinkSync(src); }\n return dest;\n}\n\n// Pull the upserted id from the previous (Postgres) node, and the metadata\n// from the Code node before it.\nconst pgItem = $input.item.json;\nconst id = pgItem.id;\nconst meta = $('Ingest spreadsheet').item.json;\nconst assignment = meta.assignment;\nconst filename = meta.filename;\nconst srcPath = meta.src_path;\n\n// Stable point id keyed on (assignment, filename) -- guarantees uniqueness.\nconst pointId = uuidv5(`${assignment}/${filename}`, NS);\n\ntry {\n // ensure collection exists (idempotent)\n try { await httpJson('GET', `${QDRANT_URL}/collections/${QDRANT_COL}`); }\n catch (_) {\n await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}`,\n { vectors: { size: 1024, distance: 'Cosine' } });\n try { await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}/index`,\n { field_name: 'assignment', field_schema: 'keyword' }); } catch (_) {}\n }\n\n // embed the summary text\n const e = await httpJson('POST', `${EMBED_BASE}/embeddings`,\n { model: EMBED_MODEL, input: meta.embed_text });\n const vector = e.data[0].embedding;\n\n // upsert by deterministic id -- if the file is re-ingested the same point\n // is overwritten in place. No delete step needed; no duplicates possible.\n await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}/points?wait=true`, {\n points: [{\n id: pointId,\n vector,\n payload: {\n spreadsheet_id: id,\n assignment,\n filename,\n download_path: meta.download_path,\n summary: (meta.metadata && meta.metadata.summary) || '',\n },\n }],\n });\n\n const dest = safeMove(srcPath, `/data/spreadsheets/processed/${assignment}`);\n return {\n status: 'ok', assignment, filename, spreadsheet_id: id, point_id: pointId, moved_to: dest,\n };\n} catch (err) {\n let dest = null;\n try { dest = safeMove(srcPath, `/data/spreadsheets/failed/${assignment}`); } catch (_) {}\n return {\n status: 'error', assignment, filename, spreadsheet_id: id, point_id: pointId,\n error: String(err && err.message || err), moved_to: dest,\n };\n}\n"
}
}
],
"connections": {
"Manual run": {
"main": [
[
{
"node": "List incoming spreadsheets",
"type": "main",
"index": 0
}
]
]
},
"Every minute": {
"main": [
[
{
"node": "List incoming spreadsheets",
"type": "main",
"index": 0
}
]
]
},
"List incoming spreadsheets": {
"main": [
[
{
"node": "Ingest spreadsheet",
"type": "main",
"index": 0
}
]
]
},
"Ingest spreadsheet": {
"main": [
[
{
"node": "Continue if parsed",
"type": "main",
"index": 0
}
]
]
},
"Continue if parsed": {
"main": [
[
{
"node": "UPSERT spreadsheet",
"type": "main",
"index": 0
}
],
[]
]
},
"UPSERT spreadsheet": {
"main": [
[
{
"node": "Embed + upsert qdrant + move",
"type": "main",
"index": 0
}
]
]
}
}
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
postgres
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
xlsx-ingest. Uses postgres. Event-driven trigger; 7 nodes.
Source: https://github.com/emilcardell/contained-ai-toolbox/blob/07463c7bbec946996b8b3ca08f7e24054c8209f6/flows/xlsx-ingest.json — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
This workflow acts as a junior finance research analyst for a UK boutique M&A or corporate finance team. It listens for Slack messages, classifies the request, gathers company or market data, and prod
Agendamiento_v2. Uses n8n-nodes-evolution-api, redis, httpRequest, executeWorkflowTrigger. Event-driven trigger; 59 nodes.
Cancelacion_v2. Uses executeWorkflowTrigger, redis, httpRequest, n8n-nodes-evolution-api. Event-driven trigger; 46 nodes.
Components. Uses postgres, readWriteFile. Event-driven trigger; 42 nodes.
This N8N workflow is designed to enrich seller data stored in a Postgres database by performing automated Google search lookups. It uses Bright Data's Web Unlocker to bypass search result restrictions