The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "pdf-ingest",
"settings": {
"executionOrder": "v1",
"saveExecutionProgress": true,
"saveDataErrorExecution": "all",
"saveDataSuccessExecution": "all"
},
"nodes": [
{
"id": "manual",
"name": "Manual run",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
240,
220
],
"parameters": {}
},
{
"id": "schedule",
"name": "Every minute",
"type": "n8n-nodes-base.scheduleTrigger",
"typeVersion": 1.2,
"position": [
240,
420
],
"parameters": {
"rule": {
"interval": [
{
"field": "minutes",
"minutesInterval": 1
}
]
}
}
},
{
"id": "list",
"name": "List incoming PDFs",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
480,
320
],
"parameters": {
"language": "javaScript",
"jsCode": "// Walk /data/pdfs/incoming/<assignment>/*.pdf\n// Each immediate subfolder = one Open WebUI Knowledge collection.\nconst fs = require('fs');\nconst path = require('path');\nconst root = '/data/pdfs/incoming';\nconst out = [];\nlet entries = [];\ntry { entries = fs.readdirSync(root, { withFileTypes: true }); } catch (e) { return out; }\nfor (const e of entries) {\n if (!e.isDirectory()) continue;\n const assignment = e.name;\n const sub = path.join(root, assignment);\n let files = [];\n try { files = fs.readdirSync(sub); } catch (_) { continue; }\n for (const f of files) {\n if (!f.toLowerCase().endsWith('.pdf')) continue;\n out.push({ json: { assignment, filename: f, path: path.join(sub, f) } });\n }\n}\nreturn out;\n"
}
},
{
"id": "upload",
"name": "Upload to Open WebUI",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
720,
320
],
"parameters": {
"language": "javaScript",
"jsCode": "/**\n * Upload one PDF to Open WebUI:\n * 1. ensure Knowledge collection named <assignment> exists\n * 2. POST file to /api/v1/files/\n * 3. attach file to knowledge via /api/v1/knowledge/{kid}/file/add\n * 4. move source PDF to processed/<assignment>/ (or failed/<assignment>/)\n *\n * Open WebUI handles Tika extraction, chunking, bge-m3 embedding, and Qdrant upsert.\n */\nconst fs = require('fs');\nconst path = require('path');\nconst crypto = require('crypto');\nconst http = require('http');\nconst https = require('https');\nconst { URL } = require('url');\n\nconst OWUI_URL = ($env.OPENWEBUI_URL || 'http://openwebui:8080').replace(/\\/$/, '');\nconst OWUI_KEY = $env.OPENWEBUI_API_KEY || '';\nif (!OWUI_KEY) throw new Error('OPENWEBUI_API_KEY env var is missing on the n8n container');\n\nfunction httpJson(method, urlStr, body, extraHeaders) {\n return new Promise((resolve, reject) => {\n const u = new URL(urlStr);\n const lib = u.protocol === 'https:' ? https : http;\n const headers = Object.assign(\n { 'Authorization': 'Bearer ' + OWUI_KEY, 'Accept': 'application/json' },\n extraHeaders || {},\n );\n let payload = null;\n if (body != null && !Buffer.isBuffer(body)) {\n payload = Buffer.from(JSON.stringify(body));\n headers['Content-Type'] = headers['Content-Type'] || 'application/json';\n } else if (Buffer.isBuffer(body)) {\n payload = body;\n }\n if (payload) headers['Content-Length'] = payload.length;\n const req = lib.request({\n method,\n hostname: u.hostname,\n port: u.port || (u.protocol === 'https:' ? 443 : 80),\n path: u.pathname + u.search,\n headers,\n }, (res) => {\n const chunks = [];\n res.on('data', (c) => chunks.push(c));\n res.on('end', () => {\n const text = Buffer.concat(chunks).toString('utf8');\n if (res.statusCode >= 200 && res.statusCode < 300) {\n try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n } else {\n reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n }\n });\n });\n req.on('error', reject);\n if (payload) req.write(payload);\n req.end();\n });\n}\n\nfunction multipartBody(filePath, filename) {\n const boundary = '----n8nboxai' + crypto.randomBytes(12).toString('hex');\n const fileBuf = fs.readFileSync(filePath);\n const head = Buffer.from(\n '--' + boundary + '\\r\\n' +\n 'Content-Disposition: form-data; name=\"file\"; filename=\"' + filename.replace(/\"/g, '') + '\"\\r\\n' +\n 'Content-Type: application/pdf\\r\\n\\r\\n',\n 'utf8',\n );\n const tail = Buffer.from('\\r\\n--' + boundary + '--\\r\\n', 'utf8');\n return { boundary, body: Buffer.concat([head, fileBuf, tail]) };\n}\n\nasync function ensureKnowledge(name) {\n const list = await httpJson('GET', `${OWUI_URL}/api/v1/knowledge/`);\n const arr = Array.isArray(list) ? list : (list && list.data) || [];\n const found = arr.find(k => k && k.name === name);\n if (found) return found.id;\n const created = await httpJson('POST', `${OWUI_URL}/api/v1/knowledge/create`, {\n name,\n description: `boxai assignment '${name}' \u2014 PDFs auto-ingested from data/pdfs/incoming/${name}/.`,\n });\n return created.id;\n}\n\nasync function uploadFile(filePath, filename) {\n const { boundary, body } = multipartBody(filePath, filename);\n return httpJson('POST', `${OWUI_URL}/api/v1/files/`, body, {\n 'Content-Type': 'multipart/form-data; boundary=' + boundary,\n });\n}\n\nasync function attachFileToKnowledge(kid, fileId) {\n return httpJson('POST', `${OWUI_URL}/api/v1/knowledge/${kid}/file/add`, { file_id: fileId });\n}\n\nfunction safeMove(src, destDir) {\n fs.mkdirSync(destDir, { recursive: true });\n const dest = path.join(destDir, path.basename(src));\n try { fs.renameSync(src, dest); }\n catch (_) {\n fs.copyFileSync(src, dest);\n fs.unlinkSync(src);\n }\n return dest;\n}\n\nconst assignment = $json.assignment;\nconst filename = $json.filename;\nconst pdfPath = $json.path;\n\ntry {\n const kid = await ensureKnowledge(assignment);\n const file = await uploadFile(pdfPath, filename);\n await attachFileToKnowledge(kid, file.id);\n const dest = safeMove(pdfPath, `/data/pdfs/processed/${assignment}`);\n return [{ json: {\n status: 'ok',\n assignment,\n knowledge_id: kid,\n file_id: file.id,\n filename,\n moved_to: dest,\n } }];\n} catch (err) {\n let dest = null;\n try { dest = safeMove(pdfPath, `/data/pdfs/failed/${assignment}`); } catch (_) {}\n return [{ json: {\n status: 'error',\n assignment,\n filename,\n error: String(err && err.message || err),\n moved_to: dest,\n } }];\n}\n"
}
}
],
"connections": {
"Manual run": {
"main": [
[
{
"node": "List incoming PDFs",
"type": "main",
"index": 0
}
]
]
},
"Every minute": {
"main": [
[
{
"node": "List incoming PDFs",
"type": "main",
"index": 0
}
]
]
},
"List incoming PDFs": {
"main": [
[
{
"node": "Upload to Open WebUI",
"type": "main",
"index": 0
}
]
]
}
}
}
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
pdf-ingest. Event-driven trigger; 4 nodes.
Source: https://github.com/emilcardell/contained-ai-toolbox/blob/07463c7bbec946996b8b3ca08f7e24054c8209f6/flows/pdf-ingest.json — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Blotato. Uses googleSheets, @blotato/n8n-nodes-blotato. Event-driven trigger; 65 nodes.
This template is a hands-on, practical exam designed to help you master n8n Expressions—the key to accessing and manipulating data in your workflows.
This template is a hands-on, practical exam designed to test your understanding of the fundamental JSON data types. It's the perfect way to solidify your knowledge after learning the basics.
Agendamiento. Uses n8n-nodes-evolution-api, redis, dataTable, executeWorkflowTrigger. Event-driven trigger; 60 nodes.
Kv Cloudflare Key Value Database Full Api Integration Workflow. Uses stickyNote, httpRequest, manualTrigger. Event-driven trigger; 47 nodes.