The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json

{
  "name": "xlsx-ingest",
  "settings": {
    "executionOrder": "v1",
    "saveExecutionProgress": true,
    "saveDataErrorExecution": "all",
    "saveDataSuccessExecution": "all"
  },
  "nodes": [
    {
      "id": "manual",
      "name": "Manual run",
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        240,
        220
      ],
      "parameters": {}
    },
    {
      "id": "schedule",
      "name": "Every minute",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.2,
      "position": [
        240,
        420
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "minutes",
              "minutesInterval": 1
            }
          ]
        }
      }
    },
    {
      "id": "list",
      "name": "List incoming spreadsheets",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        480,
        320
      ],
      "parameters": {
        "language": "javaScript",
        "jsCode": "// Walk /data/spreadsheets/incoming/<assignment>/*.xlsx and atomically claim each\n// file by renaming it into a .working/ subfolder. fs.renameSync is atomic on\n// the same filesystem, so two concurrent scheduler ticks can never claim the\n// same file -> no duplicate processing, no duplicate points.\nconst fs = require('fs');\nconst path = require('path');\nconst root = '/data/spreadsheets/incoming';\nconst out = [];\nlet entries = [];\ntry { entries = fs.readdirSync(root, { withFileTypes: true }); } catch (e) { return out; }\nfor (const e of entries) {\n  if (!e.isDirectory()) continue;\n  const assignment = e.name;\n  const sub = path.join(root, assignment);\n  const working = path.join(sub, '.working');\n  let files = [];\n  try { files = fs.readdirSync(sub); } catch (_) { continue; }\n  for (const f of files) {\n    if (f.startsWith('.')) continue; // skip dotfiles incl. .working\n    const lo = f.toLowerCase();\n    if (!lo.endsWith('.xlsx') && !lo.endsWith('.xlsm')) continue;\n    const src = path.join(sub, f);\n    const dst = path.join(working, f);\n    try { fs.mkdirSync(working, { recursive: true }); } catch (_) {}\n    try {\n      fs.renameSync(src, dst); // atomic claim\n    } catch (_) {\n      continue; // someone else (or a previous tick) already grabbed it\n    }\n    out.push({ json: { assignment, filename: f, path: dst } });\n  }\n}\nreturn out;\n"
      }
    },
    {
      "id": "process",
      "name": "Ingest spreadsheet",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        720,
        320
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "language": "javaScript",
        "jsCode": "/**\n * Per-file pipeline:\n *   1. POST raw bytes to spreadsheet-mcp parser  -> {sheets, profile, sha256}\n *   2. Ask Qwen to summarize the file (used for retrieval and SQL hinting)\n *   3. UPSERT into postgres.spreadsheets\n *   4. Embed the summary with bge-m3 and upsert into qdrant 'spreadsheets-meta'\n *   5. Move source file to processed/<assignment>/  (or failed/<assignment>/)\n */\nconst fs   = require('fs');\nconst path = require('path');\nconst http = require('http');\nconst https = require('https');\nconst { URL } = require('url');\n\nconst PARSER_URL  = ($env.PARSER_URL  || 'http://spreadsheet-mcp:8090').replace(/\\/$/, '');\nconst LLM_BASE    = ($env.LLM_BASE_URL || 'https://chat.example.com/v1').replace(/\\/$/, '');\nconst LLM_MODEL   = $env.LLM_MODEL || 'Qwen3.6-35B-A3B-Q5_K_M';\nconst EMBED_BASE  = ($env.EMBEDDINGS_BASE_URL || 'https://embed.example.com/v1').replace(/\\/$/, '');\nconst EMBED_MODEL = $env.EMBEDDINGS_MODEL || 'bge-m3';\nconst QDRANT_URL  = ($env.QDRANT_URL || 'http://qdrant:6333').replace(/\\/$/, '');\nconst QDRANT_COL  = 'spreadsheets-meta';\nconst PG_HOST     = $env.POSTGRES_HOST || 'postgres';\nconst PG_PORT     = Number($env.POSTGRES_PORT || 5432);\nconst PG_DB       = $env.POSTGRES_DB   || 'boxai';\nconst PG_USER     = $env.POSTGRES_USER || 'boxai';\nconst PG_PASS     = $env.POSTGRES_PASSWORD || '';\n\n// --- minimal http(s) JSON helper -------------------------------------------\nfunction httpJson(method, urlStr, body, headers) {\n  return new Promise((resolve, reject) => {\n    const u = new URL(urlStr);\n    const lib = u.protocol === 'https:' ? https : http;\n    const h = Object.assign({ 'Accept': 'application/json' }, headers || {});\n    let payload = null;\n    if (body != null) {\n      payload = Buffer.isBuffer(body) ? body : Buffer.from(JSON.stringify(body));\n      if (!h['Content-Type']) h['Content-Type'] = 'application/json';\n      h['Content-Length'] = payload.length;\n    }\n    const req = lib.request({\n      method,\n      hostname: u.hostname,\n      port: u.port || (u.protocol === 'https:' ? 443 : 80),\n      path: u.pathname + u.search,\n      headers: h,\n    }, (res) => {\n      const chunks = [];\n      res.on('data', (c) => chunks.push(c));\n      res.on('end', () => {\n        const text = Buffer.concat(chunks).toString('utf8');\n        if (res.statusCode >= 200 && res.statusCode < 300) {\n          try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n        } else {\n          reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n        }\n      });\n    });\n    req.on('error', reject);\n    if (payload) req.write(payload);\n    req.end();\n  });\n}\n\nfunction safeMove(src, destDir) {\n  fs.mkdirSync(destDir, { recursive: true });\n  const dest = path.join(destDir, path.basename(src));\n  try { fs.renameSync(src, dest); }\n  catch (_) { fs.copyFileSync(src, dest); fs.unlinkSync(src); }\n  return dest;\n}\n\n// --- LLM summarizer --------------------------------------------------------\nasync function summarize(parsed, assignment) {\n  // Send the model a compact view: per-sheet column names, types, samples, and\n  // first 5 rows. Never the full data.\n  const compact = parsed.profile.sheets.map((s) => ({\n    sheet: s.name,\n    row_count: s.row_count,\n    columns: s.columns.map((c) => ({\n      name: c.name, type: c.type,\n      samples: (c.samples || []).slice(0, 5),\n      distinct_estimate: c.distinct_estimate,\n      null_count: c.null_count,\n    })),\n    head: (parsed.sheets[s.name] || []).slice(0, 5),\n  }));\n\n  const prompt = `You are profiling an uploaded spreadsheet so another LLM can later answer questions about it via SQL.\n\nFile: ${parsed.filename}\nAssignment: ${assignment}\nSheets and schemas (samples included):\n${JSON.stringify(compact, null, 2)}\n\nRespond ONLY with a single JSON object, no prose, no code fences. Schema:\n{\n  \"summary\": string,                 // 2-4 sentences: what this file contains, time range if any, units, language\n  \"topics\": string[],                // 3-8 short tags, lowercase\n  \"suggested_questions\": string[],   // 3-6 questions a user might ask\n  \"column_descriptions\": { \"<sheet>.<column>\": string }   // one short line per non-obvious column\n}\nWrite the summary in the same language as the data when possible.`;\n\n  const j = await httpJson('POST', `${LLM_BASE}/chat/completions`, {\n    model: LLM_MODEL,\n    temperature: 0.2,\n    messages: [\n      { role: 'system', content: 'You output strictly valid JSON.' },\n      { role: 'user',   content: prompt },\n    ],\n  });\n  const txt = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || '';\n  // Tolerate ```json fences\n  const cleaned = txt.replace(/^```(?:json)?\\s*/i, '').replace(/```\\s*$/i, '').trim();\n  try { return JSON.parse(cleaned); }\n  catch (_) { return { summary: txt.slice(0, 800), topics: [], suggested_questions: [], column_descriptions: {} }; }\n}\n\n// --- Postgres via tiny TCP wire (use n8n Postgres node? we'd rather avoid an\n// extra node + credential setup, so use the pg module via a side-car HTTP).\n// Simpler: do the upsert through a generic SQL endpoint... we don't have one.\n// We DO have spreadsheet-mcp which has pg in it, but it's read-only. The\n// cleanest, smallest-surface approach is to add an internal HTTP route on the\n// parser sidecar to perform the UPSERT. But to keep this single-node-flow we\n// instead require n8n's Postgres node. That node lives outside the Code node.\n//\n// To stay in pure-JS Code, we go through Qdrant + a postgres-write-via-MCP\n// internal endpoint. To avoid adding more endpoints, we use the n8n Postgres\n// node downstream. This Code node just emits the prepared row; the next node\n// does the UPSERT.\n\nconst pdfPath  = $json.path;\nconst assignment = $json.assignment;\nconst filename = $json.filename;\n\ntry {\n  // 1. parse\n  const buf = fs.readFileSync(pdfPath);\n  const parsed = await httpJson('POST', `${PARSER_URL}/parse`,\n    { filename, base64: buf.toString('base64') });\n\n  // 2. summarize via Qwen\n  const meta = await summarize(parsed, assignment);\n\n  // 3. emit a row for the next (Postgres) node to UPSERT\n  const downloadPath = `processed/${assignment}/${filename}`;\n\n  return {\n    status: 'parsed',\n    assignment,\n    filename,\n    sha256: parsed.sha256,\n    download_path: downloadPath,\n    sheets:   parsed.sheets,\n    profile:  parsed.profile,\n    metadata: meta,\n    src_path: pdfPath,\n    // text used for the qdrant point\n    embed_text:\n      `Filename: ${filename}\\nAssignment: ${assignment}\\n` +\n      `Sheets: ${parsed.profile.sheets.map(s => s.name + ' (' + s.row_count + ' rows, cols: ' + s.columns.map(c => c.name).join(', ') + ')').join(' | ')}\\n` +\n      `Summary: ${meta.summary || ''}\\n` +\n      `Topics: ${(meta.topics || []).join(', ')}\\n` +\n      `Suggested questions: ${(meta.suggested_questions || []).join(' | ')}`\n  };\n} catch (err) {\n  let dest = null;\n  try { dest = safeMove(pdfPath, `/data/spreadsheets/failed/${assignment}`); } catch (_) {}\n  return {\n    status: 'error',\n    assignment, filename,\n    error: String(err && err.message || err),\n    moved_to: dest,\n  };\n}\n"
      }
    },
    {
      "id": "skip-errors",
      "name": "Continue if parsed",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2.2,
      "position": [
        960,
        320
      ],
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "leftValue": "={{ $json.status }}",
              "rightValue": "parsed",
              "operator": {
                "type": "string",
                "operation": "equals"
              }
            }
          ]
        }
      }
    },
    {
      "id": "upsert-pg",
      "name": "UPSERT spreadsheet",
      "type": "n8n-nodes-base.postgres",
      "typeVersion": 2.5,
      "position": [
        1200,
        220
      ],
      "parameters": {
        "operation": "executeQuery",
        "query": "INSERT INTO spreadsheets (assignment, filename, sha256, download_path, sheets, profile, metadata)\nVALUES ($1, $2, $3, $4, $5::jsonb, $6::jsonb, $7::jsonb)\nON CONFLICT (assignment, filename) DO UPDATE\n  SET sha256        = EXCLUDED.sha256,\n      download_path = EXCLUDED.download_path,\n      sheets        = EXCLUDED.sheets,\n      profile       = EXCLUDED.profile,\n      metadata      = EXCLUDED.metadata,\n      uploaded_at   = now()\nRETURNING id::text AS id;",
        "options": {
          "queryReplacement": "={{ $json.assignment }},{{ $json.filename }},{{ $json.sha256 }},{{ $json.download_path }},{{ JSON.stringify($json.sheets) }},{{ JSON.stringify($json.profile) }},{{ JSON.stringify($json.metadata) }}"
        }
      },
      "credentials": {
        "postgres": {
          "name": "<your credential>"
        }
      }
    },
    {
      "id": "qdrant-upsert",
      "name": "Embed + upsert qdrant + move",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1440,
        220
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "language": "javaScript",
        "jsCode": "const fs   = require('fs');\nconst path = require('path');\nconst http = require('http');\nconst https = require('https');\nconst crypto = require('crypto');\nconst { URL } = require('url');\n\nconst EMBED_BASE  = ($env.EMBEDDINGS_BASE_URL || 'https://embed.example.com/v1').replace(/\\/$/, '');\nconst EMBED_MODEL = $env.EMBEDDINGS_MODEL || 'bge-m3';\nconst QDRANT_URL  = ($env.QDRANT_URL || 'http://qdrant:6333').replace(/\\/$/, '');\nconst QDRANT_COL  = 'spreadsheets-meta';\n\n// Deterministic UUIDv5 from a string. Using a fixed namespace UUID so that\n// (assignment, filename) -> always the same qdrant point id. Re-ingesting the\n// same file overwrites in place; no duplicates can ever exist in qdrant.\nconst NS = '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; // RFC 4122 DNS namespace\nfunction uuidv5(name, nsHex) {\n  const ns = Buffer.from(nsHex.replace(/-/g, ''), 'hex');\n  const h = crypto.createHash('sha1').update(ns).update(Buffer.from(name, 'utf8')).digest();\n  const b = Buffer.from(h.subarray(0, 16));\n  b[6] = (b[6] & 0x0f) | 0x50; // version 5\n  b[8] = (b[8] & 0x3f) | 0x80; // RFC 4122 variant\n  const hex = b.toString('hex');\n  return `${hex.slice(0,8)}-${hex.slice(8,12)}-${hex.slice(12,16)}-${hex.slice(16,20)}-${hex.slice(20,32)}`;\n}\n\nfunction httpJson(method, urlStr, body) {\n  return new Promise((resolve, reject) => {\n    const u = new URL(urlStr);\n    const lib = u.protocol === 'https:' ? https : http;\n    const h = { 'Content-Type': 'application/json', 'Accept': 'application/json' };\n    const payload = body == null ? null : Buffer.from(JSON.stringify(body));\n    if (payload) h['Content-Length'] = payload.length;\n    const req = lib.request({\n      method, hostname: u.hostname,\n      port: u.port || (u.protocol === 'https:' ? 443 : 80),\n      path: u.pathname + u.search, headers: h,\n    }, (res) => {\n      const chunks = [];\n      res.on('data', (c) => chunks.push(c));\n      res.on('end', () => {\n        const text = Buffer.concat(chunks).toString('utf8');\n        if (res.statusCode >= 200 && res.statusCode < 300) {\n          try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n        } else reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n      });\n    });\n    req.on('error', reject);\n    if (payload) req.write(payload);\n    req.end();\n  });\n}\n\nfunction safeMove(src, destDir) {\n  fs.mkdirSync(destDir, { recursive: true });\n  const dest = path.join(destDir, path.basename(src));\n  try { fs.renameSync(src, dest); } catch (_) { fs.copyFileSync(src, dest); fs.unlinkSync(src); }\n  return dest;\n}\n\n// Pull the upserted id from the previous (Postgres) node, and the metadata\n// from the Code node before it.\nconst pgItem = $input.item.json;\nconst id = pgItem.id;\nconst meta = $('Ingest spreadsheet').item.json;\nconst assignment = meta.assignment;\nconst filename   = meta.filename;\nconst srcPath    = meta.src_path;\n\n// Stable point id keyed on (assignment, filename) -- guarantees uniqueness.\nconst pointId = uuidv5(`${assignment}/${filename}`, NS);\n\ntry {\n  // ensure collection exists (idempotent)\n  try { await httpJson('GET', `${QDRANT_URL}/collections/${QDRANT_COL}`); }\n  catch (_) {\n    await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}`,\n      { vectors: { size: 1024, distance: 'Cosine' } });\n    try { await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}/index`,\n      { field_name: 'assignment', field_schema: 'keyword' }); } catch (_) {}\n  }\n\n  // embed the summary text\n  const e = await httpJson('POST', `${EMBED_BASE}/embeddings`,\n    { model: EMBED_MODEL, input: meta.embed_text });\n  const vector = e.data[0].embedding;\n\n  // upsert by deterministic id -- if the file is re-ingested the same point\n  // is overwritten in place. No delete step needed; no duplicates possible.\n  await httpJson('PUT', `${QDRANT_URL}/collections/${QDRANT_COL}/points?wait=true`, {\n    points: [{\n      id: pointId,\n      vector,\n      payload: {\n        spreadsheet_id: id,\n        assignment,\n        filename,\n        download_path: meta.download_path,\n        summary: (meta.metadata && meta.metadata.summary) || '',\n      },\n    }],\n  });\n\n  const dest = safeMove(srcPath, `/data/spreadsheets/processed/${assignment}`);\n  return {\n    status: 'ok', assignment, filename, spreadsheet_id: id, point_id: pointId, moved_to: dest,\n  };\n} catch (err) {\n  let dest = null;\n  try { dest = safeMove(srcPath, `/data/spreadsheets/failed/${assignment}`); } catch (_) {}\n  return {\n    status: 'error', assignment, filename, spreadsheet_id: id, point_id: pointId,\n    error: String(err && err.message || err), moved_to: dest,\n  };\n}\n"
      }
    }
  ],
  "connections": {
    "Manual run": {
      "main": [
        [
          {
            "node": "List incoming spreadsheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Every minute": {
      "main": [
        [
          {
            "node": "List incoming spreadsheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List incoming spreadsheets": {
      "main": [
        [
          {
            "node": "Ingest spreadsheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Ingest spreadsheet": {
      "main": [
        [
          {
            "node": "Continue if parsed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Continue if parsed": {
      "main": [
        [
          {
            "node": "UPSERT spreadsheet",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "UPSERT spreadsheet": {
      "main": [
        [
          {
            "node": "Embed + upsert qdrant + move",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

Credentials you'll need

Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.

postgres

Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

xlsx-ingest. Uses postgres. Event-driven trigger; 7 nodes.

Source: https://github.com/emilcardell/contained-ai-toolbox/blob/07463c7bbec946996b8b3ca08f7e24054c8209f6/flows/xlsx-ingest.json — original creator credit. Request a take-down →

More Data & Sheets workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

Data & Sheets

Generate Uk M&a Research, Pitch Decks and Briefs From Slack Using Anthropic and Google Docs/slides

This workflow acts as a junior finance research analyst for a UK boutique M&A or corporate finance team. It listens for Slack messages, classifies the request, gathers company or market data, and prod

HTTP Request, Google Drive, Google Docs +5

Data & Sheets

Agendamiento_v2

Agendamiento_v2. Uses n8n-nodes-evolution-api, redis, httpRequest, executeWorkflowTrigger. Event-driven trigger; 59 nodes.

N8N Nodes Evolution Api, Redis, HTTP Request +3

Data & Sheets

Cancelacion_v2

Cancelacion_v2. Uses executeWorkflowTrigger, redis, httpRequest, n8n-nodes-evolution-api. Event-driven trigger; 46 nodes.

Execute Workflow Trigger, Redis, HTTP Request +3

Data & Sheets

Components

Components. Uses postgres, readWriteFile. Event-driven trigger; 42 nodes.

Postgres, Read Write File

Data & Sheets

Enrich Seller Data with Email & Domain Lookup Using Bright Data & Google Search

This N8N workflow is designed to enrich seller data stored in a Postgres database by performing automated Google search lookups. It uses Bright Data's Web Unlocker to bypass search result restrictions

Postgres, N8N Nodes Brightdata

Xlsx Ingest

The workflow JSON

Credentials you'll need

About this workflow

Related workflows