AutomationFlowsGeneral › PDF Ingest

PDF Ingest

pdf-ingest. Event-driven trigger; 4 nodes.

Event trigger★★★★☆ complexity4 nodes
General Trigger: Event Nodes: 4 Complexity: ★★★★☆ Added:

The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json
{
  "name": "pdf-ingest",
  "settings": {
    "executionOrder": "v1",
    "saveExecutionProgress": true,
    "saveDataErrorExecution": "all",
    "saveDataSuccessExecution": "all"
  },
  "nodes": [
    {
      "id": "manual",
      "name": "Manual run",
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        240,
        220
      ],
      "parameters": {}
    },
    {
      "id": "schedule",
      "name": "Every minute",
      "type": "n8n-nodes-base.scheduleTrigger",
      "typeVersion": 1.2,
      "position": [
        240,
        420
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "minutes",
              "minutesInterval": 1
            }
          ]
        }
      }
    },
    {
      "id": "list",
      "name": "List incoming PDFs",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        480,
        320
      ],
      "parameters": {
        "language": "javaScript",
        "jsCode": "// Walk /data/pdfs/incoming/<assignment>/*.pdf\n// Each immediate subfolder = one Open WebUI Knowledge collection.\nconst fs = require('fs');\nconst path = require('path');\nconst root = '/data/pdfs/incoming';\nconst out = [];\nlet entries = [];\ntry { entries = fs.readdirSync(root, { withFileTypes: true }); } catch (e) { return out; }\nfor (const e of entries) {\n  if (!e.isDirectory()) continue;\n  const assignment = e.name;\n  const sub = path.join(root, assignment);\n  let files = [];\n  try { files = fs.readdirSync(sub); } catch (_) { continue; }\n  for (const f of files) {\n    if (!f.toLowerCase().endsWith('.pdf')) continue;\n    out.push({ json: { assignment, filename: f, path: path.join(sub, f) } });\n  }\n}\nreturn out;\n"
      }
    },
    {
      "id": "upload",
      "name": "Upload to Open WebUI",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        720,
        320
      ],
      "parameters": {
        "language": "javaScript",
        "jsCode": "/**\n * Upload one PDF to Open WebUI:\n *   1. ensure Knowledge collection named <assignment> exists\n *   2. POST file to /api/v1/files/\n *   3. attach file to knowledge via /api/v1/knowledge/{kid}/file/add\n *   4. move source PDF to processed/<assignment>/  (or failed/<assignment>/)\n *\n * Open WebUI handles Tika extraction, chunking, bge-m3 embedding, and Qdrant upsert.\n */\nconst fs = require('fs');\nconst path = require('path');\nconst crypto = require('crypto');\nconst http = require('http');\nconst https = require('https');\nconst { URL } = require('url');\n\nconst OWUI_URL = ($env.OPENWEBUI_URL || 'http://openwebui:8080').replace(/\\/$/, '');\nconst OWUI_KEY = $env.OPENWEBUI_API_KEY || '';\nif (!OWUI_KEY) throw new Error('OPENWEBUI_API_KEY env var is missing on the n8n container');\n\nfunction httpJson(method, urlStr, body, extraHeaders) {\n  return new Promise((resolve, reject) => {\n    const u = new URL(urlStr);\n    const lib = u.protocol === 'https:' ? https : http;\n    const headers = Object.assign(\n      { 'Authorization': 'Bearer ' + OWUI_KEY, 'Accept': 'application/json' },\n      extraHeaders || {},\n    );\n    let payload = null;\n    if (body != null && !Buffer.isBuffer(body)) {\n      payload = Buffer.from(JSON.stringify(body));\n      headers['Content-Type'] = headers['Content-Type'] || 'application/json';\n    } else if (Buffer.isBuffer(body)) {\n      payload = body;\n    }\n    if (payload) headers['Content-Length'] = payload.length;\n    const req = lib.request({\n      method,\n      hostname: u.hostname,\n      port: u.port || (u.protocol === 'https:' ? 443 : 80),\n      path: u.pathname + u.search,\n      headers,\n    }, (res) => {\n      const chunks = [];\n      res.on('data', (c) => chunks.push(c));\n      res.on('end', () => {\n        const text = Buffer.concat(chunks).toString('utf8');\n        if (res.statusCode >= 200 && res.statusCode < 300) {\n          try { resolve(JSON.parse(text)); } catch (_) { resolve(text); }\n        } else {\n          reject(new Error(`${method} ${urlStr} -> ${res.statusCode}: ${text.slice(0, 400)}`));\n        }\n      });\n    });\n    req.on('error', reject);\n    if (payload) req.write(payload);\n    req.end();\n  });\n}\n\nfunction multipartBody(filePath, filename) {\n  const boundary = '----n8nboxai' + crypto.randomBytes(12).toString('hex');\n  const fileBuf = fs.readFileSync(filePath);\n  const head = Buffer.from(\n    '--' + boundary + '\\r\\n' +\n    'Content-Disposition: form-data; name=\"file\"; filename=\"' + filename.replace(/\"/g, '') + '\"\\r\\n' +\n    'Content-Type: application/pdf\\r\\n\\r\\n',\n    'utf8',\n  );\n  const tail = Buffer.from('\\r\\n--' + boundary + '--\\r\\n', 'utf8');\n  return { boundary, body: Buffer.concat([head, fileBuf, tail]) };\n}\n\nasync function ensureKnowledge(name) {\n  const list = await httpJson('GET', `${OWUI_URL}/api/v1/knowledge/`);\n  const arr = Array.isArray(list) ? list : (list && list.data) || [];\n  const found = arr.find(k => k && k.name === name);\n  if (found) return found.id;\n  const created = await httpJson('POST', `${OWUI_URL}/api/v1/knowledge/create`, {\n    name,\n    description: `boxai assignment '${name}' \u2014 PDFs auto-ingested from data/pdfs/incoming/${name}/.`,\n  });\n  return created.id;\n}\n\nasync function uploadFile(filePath, filename) {\n  const { boundary, body } = multipartBody(filePath, filename);\n  return httpJson('POST', `${OWUI_URL}/api/v1/files/`, body, {\n    'Content-Type': 'multipart/form-data; boundary=' + boundary,\n  });\n}\n\nasync function attachFileToKnowledge(kid, fileId) {\n  return httpJson('POST', `${OWUI_URL}/api/v1/knowledge/${kid}/file/add`, { file_id: fileId });\n}\n\nfunction safeMove(src, destDir) {\n  fs.mkdirSync(destDir, { recursive: true });\n  const dest = path.join(destDir, path.basename(src));\n  try { fs.renameSync(src, dest); }\n  catch (_) {\n    fs.copyFileSync(src, dest);\n    fs.unlinkSync(src);\n  }\n  return dest;\n}\n\nconst assignment = $json.assignment;\nconst filename   = $json.filename;\nconst pdfPath    = $json.path;\n\ntry {\n  const kid = await ensureKnowledge(assignment);\n  const file = await uploadFile(pdfPath, filename);\n  await attachFileToKnowledge(kid, file.id);\n  const dest = safeMove(pdfPath, `/data/pdfs/processed/${assignment}`);\n  return [{ json: {\n    status: 'ok',\n    assignment,\n    knowledge_id: kid,\n    file_id: file.id,\n    filename,\n    moved_to: dest,\n  } }];\n} catch (err) {\n  let dest = null;\n  try { dest = safeMove(pdfPath, `/data/pdfs/failed/${assignment}`); } catch (_) {}\n  return [{ json: {\n    status: 'error',\n    assignment,\n    filename,\n    error: String(err && err.message || err),\n    moved_to: dest,\n  } }];\n}\n"
      }
    }
  ],
  "connections": {
    "Manual run": {
      "main": [
        [
          {
            "node": "List incoming PDFs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Every minute": {
      "main": [
        [
          {
            "node": "List incoming PDFs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List incoming PDFs": {
      "main": [
        [
          {
            "node": "Upload to Open WebUI",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

pdf-ingest. Event-driven trigger; 4 nodes.

Source: https://github.com/emilcardell/contained-ai-toolbox/blob/07463c7bbec946996b8b3ca08f7e24054c8209f6/flows/pdf-ingest.json — original creator credit. Request a take-down →

More General workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

General

Blotato. Uses googleSheets, @blotato/n8n-nodes-blotato. Event-driven trigger; 65 nodes.

Google Sheets, @Blotato/N8N Nodes Blotato
General

This template is a hands-on, practical exam designed to help you master n8n Expressions—the key to accessing and manipulating data in your workflows.

Stop And Error
General

This template is a hands-on, practical exam designed to test your understanding of the fundamental JSON data types. It's the perfect way to solidify your knowledge after learning the basics.

Stop And Error
General

Agendamiento. Uses n8n-nodes-evolution-api, redis, dataTable, executeWorkflowTrigger. Event-driven trigger; 60 nodes.

N8N Nodes Evolution Api, Redis, Data Table +2
General

Kv Cloudflare Key Value Database Full Api Integration Workflow. Uses stickyNote, httpRequest, manualTrigger. Event-driven trigger; 47 nodes.

HTTP Request