This workflow follows the Executecommand → HTTP Request recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "Document Ingestion Pipeline",
"nodes": [
{
"parameters": {
"content": "## Document Ingestion Pipeline\n\nThis workflow processes PDF documents and stores them as vector embeddings in PostgreSQL.\n\n**Steps:**\n1. Trigger manually or detect new files\n2. Read and extract text from PDFs\n3. Split text into chunks\n4. Generate embeddings via Ollama\n5. Store in pgvector database",
"height": 280,
"width": 320
},
"id": "sticky-note",
"name": "Instructions",
"type": "n8n-nodes-base.stickyNote",
"typeVersion": 1,
"position": [
-200,
-100
]
},
{
"parameters": {},
"id": "manual-trigger",
"name": "Manual Trigger",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
0,
200
]
},
{
"parameters": {
"command": "ls -1 /home/node/.n8n-files/*.pdf /home/node/.n8n-files/*.txt /home/node/.n8n-files/*.md 2>/dev/null || echo ''"
},
"id": "list-files",
"name": "List PDF Files",
"type": "n8n-nodes-base.executeCommand",
"typeVersion": 1,
"position": [
220,
200
]
},
{
"parameters": {
"jsCode": "const stdout = $input.first().json.stdout || '';\nconst supportedExtensions = ['.pdf', '.txt', '.md'];\nconst files = stdout.trim().split('\\n').filter(f => {\n if (!f) return false;\n return supportedExtensions.some(ext => f.toLowerCase().endsWith(ext));\n});\n\nif (files.length === 0) {\n throw new Error('No supported files found in /home/node/.n8n-files/ (supported: PDF, TXT, MD)');\n}\n\nreturn files.map(filePath => {\n const fileName = filePath.split('/').pop();\n const ext = fileName.split('.').pop().toLowerCase();\n return {\n json: {\n filePath,\n fileName,\n fileType: ext\n }\n };\n});"
},
"id": "parse-file-list",
"name": "Parse File List",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
440,
200
]
},
{
"parameters": {
"filePath": "={{ $json.filePath }}"
},
"id": "read-binary",
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFile",
"typeVersion": 1,
"position": [
660,
200
]
},
{
"parameters": {},
"id": "read-pdf",
"name": "Extract PDF Text",
"type": "n8n-nodes-base.readPDF",
"typeVersion": 1,
"position": [
880,
200
]
},
{
"parameters": {
"jsCode": "const input = $input.first();\nconst text = input.json.text || '';\nconst fileName = $('Parse File List').first().json.fileName;\n\nif (!text.trim()) {\n throw new Error(`No text extracted from ${fileName}`);\n}\n\n// Recursive character text splitter\nconst chunkSize = 1000;\nconst chunkOverlap = 200;\nconst chunks = [];\n\nlet start = 0;\nlet chunkIndex = 0;\n\nwhile (start < text.length) {\n const end = Math.min(start + chunkSize, text.length);\n let chunk = text.slice(start, end);\n \n // Try to break at sentence boundary\n if (end < text.length) {\n const lastPeriod = chunk.lastIndexOf('.');\n const lastNewline = chunk.lastIndexOf('\\n');\n const breakPoint = Math.max(lastPeriod, lastNewline);\n if (breakPoint > chunkSize * 0.5) {\n chunk = chunk.slice(0, breakPoint + 1);\n }\n }\n \n chunks.push({\n json: {\n documentName: fileName,\n chunkIndex: chunkIndex,\n content: chunk.trim(),\n metadata: {\n fileName,\n chunkIndex,\n charStart: start,\n charEnd: start + chunk.length\n }\n }\n });\n \n start += chunk.length - chunkOverlap;\n if (start <= chunks[chunks.length - 1]?.json?.metadata?.charStart) {\n start = chunks[chunks.length - 1].json.metadata.charEnd;\n }\n chunkIndex++;\n}\n\nreturn chunks;"
},
"id": "text-splitter",
"name": "Split into Chunks",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1100,
200
]
},
{
"parameters": {
"method": "POST",
"url": "http://192.168.50.49:11434/api/embeddings",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={{ JSON.stringify({ model: 'mxbai-embed-large:latest', prompt: $json.content }) }}",
"options": {
"timeout": 60000
}
},
"id": "generate-embedding",
"name": "Generate Embedding",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
1320,
200
]
},
{
"parameters": {
"jsCode": "const input = $input.first();\nconst chunk = $('Split into Chunks').item;\nconst embedding = input.json.embedding;\n\nif (!embedding || !Array.isArray(embedding)) {\n throw new Error('Invalid embedding response from Ollama');\n}\n\n// Format embedding as PostgreSQL vector string\nconst vectorString = '[' + embedding.join(',') + ']';\n\nreturn [{\n json: {\n documentName: chunk.json.documentName,\n chunkIndex: chunk.json.chunkIndex,\n content: chunk.json.content,\n embedding: vectorString,\n metadata: JSON.stringify(chunk.json.metadata)\n }\n}];"
},
"id": "format-for-db",
"name": "Format for Database",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1540,
200
]
},
{
"parameters": {
"operation": "executeQuery",
"query": "SELECT ingest_document_chunk(\n '{{ $json.documentName }}',\n {{ $json.chunkIndex }},\n '{{ $json.content.replace(/'/g, \"''\") }}',\n '{{ $json.embedding }}'::vector,\n '{{ $json.metadata }}'::jsonb\n) as id;",
"options": {}
},
"id": "store-in-pgvector",
"name": "Store in PGVector",
"type": "n8n-nodes-base.postgres",
"typeVersion": 2.5,
"position": [
1760,
200
],
"credentials": {
"postgres": {
"name": "<your credential>"
}
}
},
{
"parameters": {
"aggregate": "aggregateAllItemData",
"destinationFieldName": "results",
"options": {}
},
"id": "aggregate-results",
"name": "Aggregate Results",
"type": "n8n-nodes-base.aggregate",
"typeVersion": 1,
"position": [
1980,
200
]
},
{
"parameters": {
"jsCode": "const results = $input.first().json.results || [];\nconst totalChunks = results.length;\nconst uniqueDocs = [...new Set(results.map(r => r.documentName))];\n\nreturn [{\n json: {\n success: true,\n message: `Successfully processed ${uniqueDocs.length} document(s) with ${totalChunks} chunks`,\n documents: uniqueDocs,\n totalChunks\n }\n}];"
},
"id": "summary",
"name": "Generate Summary",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2200,
200
]
}
],
"connections": {
"Manual Trigger": {
"main": [
[
{
"node": "List PDF Files",
"type": "main",
"index": 0
}
]
]
},
"List PDF Files": {
"main": [
[
{
"node": "Parse File List",
"type": "main",
"index": 0
}
]
]
},
"Parse File List": {
"main": [
[
{
"node": "Read Binary File",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File": {
"main": [
[
{
"node": "Extract PDF Text",
"type": "main",
"index": 0
}
]
]
},
"Extract PDF Text": {
"main": [
[
{
"node": "Split into Chunks",
"type": "main",
"index": 0
}
]
]
},
"Split into Chunks": {
"main": [
[
{
"node": "Generate Embedding",
"type": "main",
"index": 0
}
]
]
},
"Generate Embedding": {
"main": [
[
{
"node": "Format for Database",
"type": "main",
"index": 0
}
]
]
},
"Format for Database": {
"main": [
[
{
"node": "Store in PGVector",
"type": "main",
"index": 0
}
]
]
},
"Store in PGVector": {
"main": [
[
{
"node": "Aggregate Results",
"type": "main",
"index": 0
}
]
]
},
"Aggregate Results": {
"main": [
[
{
"node": "Generate Summary",
"type": "main",
"index": 0
}
]
]
}
},
"settings": {
"executionOrder": "v1"
},
"staticData": null,
"tags": [
{
"name": "RAG"
},
{
"name": "Document Processing"
}
],
"triggerCount": 1
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
postgres
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
Document Ingestion Pipeline. Uses executeCommand, readBinaryFile, readPDF, httpRequest. Event-driven trigger; 12 nodes.
Source: https://github.com/mohsp-99/n8n-ollama/blob/0ba7d1fd1a606f212cfdf346f85fb70f905f5696/n8n-workflows/document-ingestion.json — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Turn existing PostgreSQL databases into AI-searchable Pinecone vector knowledge bases without manually defining every table and column.
HeyDinastia. Uses executeCommand, httpRequest, youTube, postgres. Webhook trigger; 66 nodes.
This simple philosophy changes the way we think about automated sales agents. Context changes everything. In this 4-part workflow, we start by creating a knowledge base that will act as context across
RAG AI Agent Template V5. Uses lmChatOpenAi, documentDefaultDataLoader, embeddingsOpenAi, googleDrive. Event-driven trigger; 56 nodes.
My workflow 2529. Uses lmChatOpenAi, documentDefaultDataLoader, embeddingsOpenAi, googleDrive. Event-driven trigger; 54 nodes.