This workflow corresponds to n8n.io template #12981 — we link there as the canonical source.

This workflow follows the Agent → Chat Trigger recipe pattern — see all workflows that pair these two integrations.

The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json

{
  "id": "78rVOQa5gbWxRnJN",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Turn any website into an AI customer support chatbot (OpenAI + Pinecone)",
  "tags": [],
  "nodes": [
    {
      "id": "8ecff878-cb1f-4e9c-98a0-3727269aee97",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1904,
        208
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "23adecea-f54b-49c7-8ac6-75df99bcc469",
      "name": "Embeddings OpenAI",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        592,
        384
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "673fa27a-9f5d-4a3c-8807-64c26aaaa576",
      "name": "Map Website URLs",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        -1632,
        208
      ],
      "parameters": {
        "url": "<your-website-url>",
        "sitemap": "only",
        "operation": "map",
        "requestOptions": {},
        "includeSubdomains": true
      },
      "credentials": {
        "firecrawlApi": {
          "name": "<your credential>"
        }
      },
      "retryOnFail": true,
      "typeVersion": 1
    },
    {
      "id": "d17427ae-ce8c-4015-af2d-c20a44acc11c",
      "name": "Filter and Normalize URLs",
      "type": "n8n-nodes-base.code",
      "position": [
        -1296,
        208
      ],
      "parameters": {
        "jsCode": "// input: { success: true, links: [ { url: \"...\" } ] }\n\nconst links = $json.links || [];\nconst seen = new Set();\n\nconst blockedKeywords = [\n  \"/press-blog\",\n  \"/newsletter\",\n  \"/assets-vault\",\n  \"press\",\n  \"gallery\",\n  \"project\",\n  \"newsletter\"\n];\n\nreturn links\n  .filter(link => {\n    let url = link.url;\n    if (!url) return false;\n\n    // \u274c Remove blocked keywords\n    for (const keyword of blockedKeywords) {\n      if (url.includes(keyword)) return false;\n    }\n\n    // \u274c Remove XML files\n    if (url.includes(\".xml\")) return false;\n\n    // \ud83d\udd04 Normalize trailing slash\n    url = url.endsWith(\"/\") ? url.slice(0, -1) : url;\n\n    // \u274c Remove duplicates\n    if (seen.has(url)) return false;\n    seen.add(url);\n\n    // Save normalized URL back\n    link.url = url;\n    return true;\n  })\n  .map(link => {\n    return {\n      url: link.url\n    };\n  });\n"
      },
      "typeVersion": 2
    },
    {
      "id": "bd51e7d4-485c-4759-b4f1-63b2d1219c35",
      "name": "Process URLs One by One",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -960,
        208
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "37897ec6-0cb2-481a-a7ad-15046192720d",
      "name": "Fetch Page Content",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -624,
        128
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "redirect": {
            "redirect": {
              "followRedirects": true
            }
          }
        }
      },
      "retryOnFail": true,
      "typeVersion": 3,
      "continueOnFail": true
    },
    {
      "id": "ddca10bf-eaca-47a3-8a8a-64ad2e247bfc",
      "name": "Extract HTML and URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -368,
        128
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "f84fa14c-2e29-46ef-a914-2267eb52b815",
              "name": "html",
              "type": "string",
              "value": "={{ $json.data ?? $json.html ?? \"\" }}"
            },
            {
              "id": "096f51e1-e470-4fde-8d62-3338ffcc4f6b",
              "name": "url",
              "type": "string",
              "value": "={{ $json.url ?? $('Process URLs One by One').item.json.url }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "e5da4265-7bb0-4b01-b6dd-97ede548b7c3",
      "name": "Clean HTML to Text",
      "type": "n8n-nodes-base.code",
      "position": [
        -176,
        128
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const html = $json.html;\n\n// VERY simple cleaner (replace later with readability)\nconst text = html\n  .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n  .replace(/<style[\\s\\S]*?<\\/style>/gi, '')\n  .replace(/<[^>]+>/g, ' ')\n  .replace(/\\s+/g, ' ')\n  .trim();\n\nreturn {\n  url: $json.url,\n  project_id: $json.project_id,\n  text\n};\n"
      },
      "typeVersion": 2
    },
    {
      "id": "f5c3ea61-c855-47e2-9110-3242ca6c0ce3",
      "name": "Check Text Length",
      "type": "n8n-nodes-base.if",
      "position": [
        48,
        128
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 3,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "aa3e5022-2d6d-43f6-bf87-9de112a3345f",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{$json.text.length > 50}}",
              "rightValue": ""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.3
    },
    {
      "id": "ba89389e-2dcc-4aa1-8f2a-7746641df62c",
      "name": "Split Text into Chunks",
      "type": "n8n-nodes-base.code",
      "position": [
        272,
        112
      ],
      "parameters": {
        "jsCode": "const raw = $json.text || \"\";\nconst text = raw.replace(/\\s+/g, \" \").trim(); // clean whitespace\nconst chunkSize = 1000;\nconst overlap = 200;\n\nconst chunks = [];\nlet i = 0;\n\nwhile (i < text.length) {\n  const chunk = text.slice(i, i + chunkSize).trim();\n\n  // Only save useful chunks\n  if (chunk.length > 50) {\n    chunks.push({\n      content: chunk,\n      url: $json.url,\n      project_id: $json.project_id\n    });\n  }\n\n  i += chunkSize - overlap;\n}\n\nreturn chunks;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "a9947307-f9f7-4280-860f-a9f18c81e530",
      "name": "Store in Vector Database",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        624,
        192
      ],
      "parameters": {
        "mode": "insert",
        "options": {
          "pineconeNamespace": "<your-pinecone-namespace-name>"
        },
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": ""
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "faeb8ce7-2e62-4782-840c-fde4f70d1be0",
      "name": "Document Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        784,
        400
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "url",
                "value": "={{ $json.url }}"
              },
              {
                "name": "content",
                "value": "={{ $json.content }}"
              }
            ]
          }
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "f13244de-8266-41a1-a918-474ab0021376",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2720,
        0
      ],
      "parameters": {
        "width": 592,
        "height": 688,
        "content": "## Turn any website into an AI\u2011powered customer\u2011support chatbot (Open\u202fAI\u202f+\u202fPinecone)\n\n### How it works\n1\ufe0f\u20e3 **Discover URLs** \u2013 Firecrawl reads the sitemap (including sub\u2011domains) and returns every page URL.  \n2\ufe0f\u20e3 **Clean & de\u2011duplicate** \u2013 JavaScript removes unwanted paths (press\u2011blog, newsletter, assets, .xml), trailing slashes and duplicate links.  \n3\ufe0f\u20e3 **Controlled crawling** \u2013 the remaining URLs are processed one\u2011by\u2011one, fetching the raw HTML.  \n4\ufe0f\u20e3 **HTML \u2192 plain text** \u2013 script/style tags are stripped and the rest of the markup is turned into readable text.  \n5\ufe0f\u20e3 **Validate & chunk** \u2013 pages shorter than 50\u202fchars are ignored; the rest is split into 1\u202f000\u2011char chunks with a 200\u2011char overlap.  \n6\ufe0f\u20e3 **Embed & store** \u2013 each chunk is turned into an OpenAI embedding and up\u2011serted into a Pinecone index (namespace = your\u2011site\u2011name).  \n7\ufe0f\u20e3 **Chat endpoint** \u2013 a LangChain agent receives a user message, decides (via the system prompt) whether to call the Pinecone retrieval tool, and replies **only** with information that actually exists in the vector store.\n\n### Setup steps\n1\ufe0f\u20e3 Add your **Firecrawl** API key (Credentials\u202f\u2192\u202fFirecrawl).  \n2\ufe0f\u20e3 Add your **OpenAI** API key (Credentials\u202f\u2192\u202fOpenAI).  \n3\ufe0f\u20e3 Create a **Pinecone** index (1536 dimensions for `text\u2011embedding\u2011ada\u2011002`).  \n4\ufe0f\u20e3 Put the index name and a namespace into the \u201cStore in Vector Database\u201d node.  \n5\ufe0f\u20e3 Replace `<your-website-url>` in the *Map Website URLs* node.  \n6\ufe0f\u20e3 Execute the workflow \u2013 it will fill the vector store.  \n7\ufe0f\u20e3 Deploy the **Chat Trigger** webhook (copy the URL) and embed it in your front\u2011end.\n\nWhen the index is populated the bot will answer any product, policy or FAQ question without hallucinations.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "18ac8782-ba5b-41e6-b45c-161b329c1b7f",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1696,
        80
      ],
      "parameters": {
        "color": 2,
        "height": 304,
        "content": "URL Discovery \u2013 Firecrawl reads the sitemap (including sub\u2011domains) and returns every page URL.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "026d9947-3d50-40f3-a6fd-369101cde755",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1376,
        80
      ],
      "parameters": {
        "color": 2,
        "width": 560,
        "height": 304,
        "content": "## URL Cleaning & Batching\nJavaScript removes press\u2011blog, newsletter, assets\u2011vault, .xml files, trailing slashes and duplicate URLs. \n\nThen SplitInBatches processes the list one\u2011by\u2011one.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "27cd19ce-584d-4f2a-9cf3-af3e6c7644d8",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -704,
        48
      ],
      "parameters": {
        "color": 3,
        "width": 672,
        "height": 240,
        "content": "Page Fetch & Text Extraction \u2013 HTTP request gets the HTML, a Set node normalises fields, and a small JS step strips script/style tags and converts the markup to plain readable text.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "0d1813dd-26c7-4db4-98b0-07bece863767",
      "name": "When chat message received",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -1856,
        752
      ],
      "parameters": {
        "mode": "webhook",
        "public": true,
        "options": {
          "allowedOrigins": "*"
        },
        "availableInChat": true
      },
      "typeVersion": 1.1
    },
    {
      "id": "c8283ada-3684-4a1e-b26b-f7c341730c83",
      "name": "Chat Memory",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -1504,
        976
      ],
      "parameters": {
        "contextWindowLength": 10
      },
      "typeVersion": 1.3
    },
    {
      "id": "953a61ab-99c4-437d-b180-9656dfc9ccc6",
      "name": "Customer Support Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -1600,
        752
      ],
      "parameters": {
        "options": {
          "maxIterations": 10,
          "systemMessage": "=You are **Alicia**, an AI customer support assistant for our company.\n\nToday's date is {{ $now.format('yyyy-MM-dd') }}.\nUse this date as the only reference for relative time.\n\n========================\nPRIMARY RULE\n========================\n\nAll business-related information MUST come from tools.\n\nThis includes:\n- Products\n- Services\n- Company information\n- Website content\n- Policies and FAQs\n- Pricing and availability\n- Contact details\n\nDo NOT answer these from memory or assumptions.\nIf a relevant tool exists, you MUST use it.\n\n========================\nPINECONE USAGE (CRITICAL)\n========================\n\nUse the Pinecone vector store whenever the user asks about:\n- Products or services\n- What the company offers\n- Policies, pricing, FAQs\n- Company or website information\n\nDo NOT use Pinecone for:\n- General greetings\n- Small talk\n- Personal details\n- Appointment scheduling\n\nIf Pinecone returns:\n- No results\n- Empty data\n- Errors\n\nReply:\n\"I could not find that information. Would you like me to create a support ticket for you?\"\n\n========================\nMEMORY USAGE\n========================\n\nMemory may only be used for:\n- Name\n- Email\n- Phone number\n\nAnd only after:\n- A successful booking or confirmed interaction.\n\nNever use memory for:\n- Products\n- Services\n- Company info\n- Availability\n\n========================\nGREETING RULE\n========================\n\nIf the user only says:\n\"Hi\", \"Hello\", or \"Hey\"\n\nReply exactly:\n\"Hi, how can I help you today?\"\n\nNo tools.\nNo extra text.\n\n========================\nRESPONSE RULES\n========================\n\n- Be friendly and conversational.\n- Use short, clear sentences.\n- Never mention internal tools.\n- Never invent information.\n- Never explain your reasoning.\n- Never say \"as an AI\".\n\n========================\nLINK RULE\n========================\n\nIf a link is included:\n- Use Markdown format: [Link text](URL)\n- Never show raw URLs.\n- Links must come from tool output only.\n\n========================\nFAILURE RULE\n========================\n\nIf any required tool fails:\n\nReply exactly:\n\"I could not find that information. Would you like me to create a support ticket for you?\"\n\nNo variations.\nNo extra text.\n\n========================\nEND\n========================\n",
          "returnIntermediateSteps": false
        }
      },
      "typeVersion": 3
    },
    {
      "id": "a7128a69-d4f9-43e2-af49-3c6b903a0719",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -1600,
        1168
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {},
        "builtInTools": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "f1b6383c-14c4-4562-b89f-3c50715e5fca",
      "name": "OpenAI Embeddings",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        -1328,
        1152
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "829e903c-08f8-433f-9650-456e3f267f14",
      "name": "Pinecone Vector Storage",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        -1376,
        976
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "topK": 10,
        "options": {
          "pineconeNamespace": "<your-pinecone-namespace-name>"
        },
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "toolDescription": "=Use this tool whenever the user asks anything about Blushu, our services, company info, website content, product/service details, policies, FAQs, contact information, pricing, or questions that require knowledge from the company knowledge base. \n"
      },
      "typeVersion": 1.3
    },
    {
      "id": "4a195c24-4dbe-47d4-8f0d-9bd64630f55a",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        224,
        -16
      ],
      "parameters": {
        "color": 4,
        "width": 688,
        "height": 528,
        "content": "## Chunking & Embedding\n\nIf the cleaned text >\u202f50\u202fchars it is split into 1\u202f000\u2011char chunks (200\u2011char overlap). Each chunk is sent to OpenAI embeddings and up\u2011serted into Pinecone.\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "f558e55f-3dbe-4138-8327-6abcb635924c",
  "connections": {
    "Chat Memory": {
      "ai_memory": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader": {
      "ai_document": [
        [
          {
            "node": "Store in Vector Database",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Map Website URLs": {
      "main": [
        [
          {
            "node": "Filter and Normalize URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Text Length": {
      "main": [
        [
          {
            "node": "Split Text into Chunks",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI": {
      "ai_embedding": [
        [
          {
            "node": "Store in Vector Database",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Embeddings": {
      "ai_embedding": [
        [
          {
            "node": "Pinecone Vector Storage",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Clean HTML to Text": {
      "main": [
        [
          {
            "node": "Check Text Length",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Page Content": {
      "main": [
        [
          {
            "node": "Extract HTML and URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract HTML and URL": {
      "main": [
        [
          {
            "node": "Clean HTML to Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Text into Chunks": {
      "main": [
        [
          {
            "node": "Store in Vector Database",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Pinecone Vector Storage": {
      "ai_tool": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Process URLs One by One": {
      "main": [
        [],
        [
          {
            "node": "Fetch Page Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store in Vector Database": {
      "main": [
        [
          {
            "node": "Process URLs One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter and Normalize URLs": {
      "main": [
        [
          {
            "node": "Process URLs One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "Customer Support Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "Map Website URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

Credentials you'll need

Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.

firecrawlApi
openAiApi

Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

This template provides a full end-to-end Retrieval-Augmented Generation (RAG) system using n8n. It includes two connected workflows: A data ingestion pipeline that crawls a website and stores its content in a vector database. A customer support chatbot that retrieves this…

Source: https://n8n.io/workflows/12981/ — original creator credit. Request a take-down →

More AI & RAG workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

AI & RAG

Alfred (funcional)

Alfred (funcional). Uses gmailTool, googleCalendarTool, gmail, embeddingsOpenAi. Event-driven trigger; 83 nodes.

Gmail Tool, Google Calendar Tool, Gmail +24

AI & RAG

End-to-end B2b Real Estate Lead Enrichment & Outreach with Apify, Relevance, Openai & Sheets

This advanced n8n workflow automates the full lead enrichment, qualification, and personalized outreach process tailored specifically for the B2B real estate sector. Integrating top platforms like Api

N8N Nodes Fillout, OpenAI Chat, Pinecone Vector Store +11

AI & RAG

Ai-powered Email Triage & Auto-response System with Openai Agents & Gmail

This n8n template automatically classifies incoming emails (Sales, Support, Internal, Finance, Promotions) and routes them to a dedicated OpenAI LLM Agent for processing. Depending on the category, th

OpenAI, Gmail, Text Classifier +16

AI & RAG

Automate Business Partner Outreach with Google Maps, Gpt-4 & Whatsapp

Automate Outreach Prospect automates finding, enriching, and messaging potential partners (like restaurants, malls, and bars) using Apify Google Maps scraping, Perplexity enrichment, OpenAI LLMs, Goog

@Devlikeapro/N8N Nodes Waha, Google Drive Trigger, @Apify/N8N Nodes Apify +14

AI & RAG

Chat with Docs - 5minai New Version

Chat with docs - 5minAI New version. Uses httpRequest, documentDefaultDataLoader, textSplitterRecursiveCharacterTextSplitter, embeddingsOpenAi. Event-driven trigger; 62 nodes.

HTTP Request, Document Default Data Loader, Text Splitter Recursive Character Text Splitter +10

Website to AI Support Chatbot with OpenAI and Pinecone

The workflow JSON

Credentials you'll need

About this workflow

Related workflows