{
  "id": "78rVOQa5gbWxRnJN",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Turn any website into an AI customer support chatbot (OpenAI + Pinecone)",
  "tags": [],
  "nodes": [
    {
      "id": "8ecff878-cb1f-4e9c-98a0-3727269aee97",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1904,
        208
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "23adecea-f54b-49c7-8ac6-75df99bcc469",
      "name": "Embeddings OpenAI",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        592,
        384
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "673fa27a-9f5d-4a3c-8807-64c26aaaa576",
      "name": "Map Website URLs",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        -1632,
        208
      ],
      "parameters": {
        "url": "<your-website-url>",
        "sitemap": "only",
        "operation": "map",
        "requestOptions": {},
        "includeSubdomains": true
      },
      "credentials": {
        "firecrawlApi": {
          "name": "<your credential>"
        }
      },
      "retryOnFail": true,
      "typeVersion": 1
    },
    {
      "id": "d17427ae-ce8c-4015-af2d-c20a44acc11c",
      "name": "Filter and Normalize URLs",
      "type": "n8n-nodes-base.code",
      "position": [
        -1296,
        208
      ],
      "parameters": {
        "jsCode": "// input: { success: true, links: [ { url: \"...\" } ] }\n\nconst links = $json.links || [];\nconst seen = new Set();\n\nconst blockedKeywords = [\n  \"/press-blog\",\n  \"/newsletter\",\n  \"/assets-vault\",\n  \"press\",\n  \"gallery\",\n  \"project\",\n  \"newsletter\"\n];\n\nreturn links\n  .filter(link => {\n    let url = link.url;\n    if (!url) return false;\n\n    // \u274c Remove blocked keywords\n    for (const keyword of blockedKeywords) {\n      if (url.includes(keyword)) return false;\n    }\n\n    // \u274c Remove XML files\n    if (url.includes(\".xml\")) return false;\n\n    // \ud83d\udd04 Normalize trailing slash\n    url = url.endsWith(\"/\") ? url.slice(0, -1) : url;\n\n    // \u274c Remove duplicates\n    if (seen.has(url)) return false;\n    seen.add(url);\n\n    // Save normalized URL back\n    link.url = url;\n    return true;\n  })\n  .map(link => {\n    return {\n      url: link.url\n    };\n  });\n"
      },
      "typeVersion": 2
    },
    {
      "id": "bd51e7d4-485c-4759-b4f1-63b2d1219c35",
      "name": "Process URLs One by One",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -960,
        208
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "37897ec6-0cb2-481a-a7ad-15046192720d",
      "name": "Fetch Page Content",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -624,
        128
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "redirect": {
            "redirect": {
              "followRedirects": true
            }
          }
        }
      },
      "retryOnFail": true,
      "typeVersion": 3,
      "continueOnFail": true
    },
    {
      "id": "ddca10bf-eaca-47a3-8a8a-64ad2e247bfc",
      "name": "Extract HTML and URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -368,
        128
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "f84fa14c-2e29-46ef-a914-2267eb52b815",
              "name": "html",
              "type": "string",
              "value": "={{ $json.data ?? $json.html ?? \"\" }}"
            },
            {
              "id": "096f51e1-e470-4fde-8d62-3338ffcc4f6b",
              "name": "url",
              "type": "string",
              "value": "={{ $json.url ?? $('Process URLs One by One').item.json.url }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "e5da4265-7bb0-4b01-b6dd-97ede548b7c3",
      "name": "Clean HTML to Text",
      "type": "n8n-nodes-base.code",
      "position": [
        -176,
        128
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const html = $json.html;\n\n// VERY simple cleaner (replace later with readability)\nconst text = html\n  .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n  .replace(/<style[\\s\\S]*?<\\/style>/gi, '')\n  .replace(/<[^>]+>/g, ' ')\n  .replace(/\\s+/g, ' ')\n  .trim();\n\nreturn {\n  url: $json.url,\n  project_id: $json.project_id,\n  text\n};\n"
      },
      "typeVersion": 2
    },
    {
      "id": "f5c3ea61-c855-47e2-9110-3242ca6c0ce3",
      "name": "Check Text Length",
      "type": "n8n-nodes-base.if",
      "position": [
        48,
        128
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 3,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "aa3e5022-2d6d-43f6-bf87-9de112a3345f",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{$json.text.length > 50}}",
              "rightValue": ""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.3
    },
    {
      "id": "ba89389e-2dcc-4aa1-8f2a-7746641df62c",
      "name": "Split Text into Chunks",
      "type": "n8n-nodes-base.code",
      "position": [
        272,
        112
      ],
      "parameters": {
        "jsCode": "const raw = $json.text || \"\";\nconst text = raw.replace(/\\s+/g, \" \").trim(); // clean whitespace\nconst chunkSize = 1000;\nconst overlap = 200;\n\nconst chunks = [];\nlet i = 0;\n\nwhile (i < text.length) {\n  const chunk = text.slice(i, i + chunkSize).trim();\n\n  // Only save useful chunks\n  if (chunk.length > 50) {\n    chunks.push({\n      content: chunk,\n      url: $json.url,\n      project_id: $json.project_id\n    });\n  }\n\n  i += chunkSize - overlap;\n}\n\nreturn chunks;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "a9947307-f9f7-4280-860f-a9f18c81e530",
      "name": "Store in Vector Database",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        624,
        192
      ],
      "parameters": {
        "mode": "insert",
        "options": {
          "pineconeNamespace": "<your-pinecone-namespace-name>"
        },
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": ""
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "faeb8ce7-2e62-4782-840c-fde4f70d1be0",
      "name": "Document Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        784,
        400
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "url",
                "value": "={{ $json.url }}"
              },
              {
                "name": "content",
                "value": "={{ $json.content }}"
              }
            ]
          }
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "f13244de-8266-41a1-a918-474ab0021376",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2720,
        0
      ],
      "parameters": {
        "width": 592,
        "height": 688,
        "content": "## Turn any website into an AI\u2011powered customer\u2011support chatbot (Open\u202fAI\u202f+\u202fPinecone)\n\n### How it works\n1\ufe0f\u20e3 **Discover URLs** \u2013 Firecrawl reads the sitemap (including sub\u2011domains) and returns every page URL.  \n2\ufe0f\u20e3 **Clean & de\u2011duplicate** \u2013 JavaScript removes unwanted paths (press\u2011blog, newsletter, assets, .xml), trailing slashes and duplicate links.  \n3\ufe0f\u20e3 **Controlled crawling** \u2013 the remaining URLs are processed one\u2011by\u2011one, fetching the raw HTML.  \n4\ufe0f\u20e3 **HTML \u2192 plain text** \u2013 script/style tags are stripped and the rest of the markup is turned into readable text.  \n5\ufe0f\u20e3 **Validate & chunk** \u2013 pages shorter than 50\u202fchars are ignored; the rest is split into 1\u202f000\u2011char chunks with a 200\u2011char overlap.  \n6\ufe0f\u20e3 **Embed & store** \u2013 each chunk is turned into an OpenAI embedding and up\u2011serted into a Pinecone index (namespace = your\u2011site\u2011name).  \n7\ufe0f\u20e3 **Chat endpoint** \u2013 a LangChain agent receives a user message, decides (via the system prompt) whether to call the Pinecone retrieval tool, and replies **only** with information that actually exists in the vector store.\n\n### Setup steps\n1\ufe0f\u20e3 Add your **Firecrawl** API key (Credentials\u202f\u2192\u202fFirecrawl).  \n2\ufe0f\u20e3 Add your **OpenAI** API key (Credentials\u202f\u2192\u202fOpenAI).  \n3\ufe0f\u20e3 Create a **Pinecone** index (1536 dimensions for `text\u2011embedding\u2011ada\u2011002`).  \n4\ufe0f\u20e3 Put the index name and a namespace into the \u201cStore in Vector Database\u201d node.  \n5\ufe0f\u20e3 Replace `<your-website-url>` in the *Map Website URLs* node.  \n6\ufe0f\u20e3 Execute the workflow \u2013 it will fill the vector store.  \n7\ufe0f\u20e3 Deploy the **Chat Trigger** webhook (copy the URL) and embed it in your front\u2011end.\n\nWhen the index is populated the bot will answer any product, policy or FAQ question without hallucinations.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "18ac8782-ba5b-41e6-b45c-161b329c1b7f",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1696,
        80
      ],
      "parameters": {
        "color": 2,
        "height": 304,
        "content": "URL Discovery \u2013 Firecrawl reads the sitemap (including sub\u2011domains) and returns every page URL.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "026d9947-3d50-40f3-a6fd-369101cde755",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1376,
        80
      ],
      "parameters": {
        "color": 2,
        "width": 560,
        "height": 304,
        "content": "## URL Cleaning & Batching\nJavaScript removes press\u2011blog, newsletter, assets\u2011vault, .xml files, trailing slashes and duplicate URLs. \n\nThen SplitInBatches processes the list one\u2011by\u2011one.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "27cd19ce-584d-4f2a-9cf3-af3e6c7644d8",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -704,
        48
      ],
      "parameters": {
        "color": 3,
        "width": 672,
        "height": 240,
        "content": "Page Fetch & Text Extraction \u2013 HTTP request gets the HTML, a Set node normalises fields, and a small JS step strips script/style tags and converts the markup to plain readable text.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "0d1813dd-26c7-4db4-98b0-07bece863767",
      "name": "When chat message received",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -1856,
        752
      ],
      "parameters": {
        "mode": "webhook",
        "public": true,
        "options": {
          "allowedOrigins": "*"
        },
        "availableInChat": true
      },
      "typeVersion": 1.1
    },
    {
      "id": "c8283ada-3684-4a1e-b26b-f7c341730c83",
      "name": "Chat Memory",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -1504,
        976
      ],
      "parameters": {
        "contextWindowLength": 10
      },
      "typeVersion": 1.3
    },
    {
      "id": "953a61ab-99c4-437d-b180-9656dfc9ccc6",
      "name": "Customer Support Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -1600,
        752
      ],
      "parameters": {
        "options": {
          "maxIterations": 10,
          "systemMessage": "=You are **Alicia**, an AI customer support assistant for our company.\n\nToday's date is {{ $now.format('yyyy-MM-dd') }}.\nUse this date as the only reference for relative time.\n\n========================\nPRIMARY RULE\n========================\n\nAll business-related information MUST come from tools.\n\nThis includes:\n- Products\n- Services\n- Company information\n- Website content\n- Policies and FAQs\n- Pricing and availability\n- Contact details\n\nDo NOT answer these from memory or assumptions.\nIf a relevant tool exists, you MUST use it.\n\n========================\nPINECONE USAGE (CRITICAL)\n========================\n\nUse the Pinecone vector store whenever the user asks about:\n- Products or services\n- What the company offers\n- Policies, pricing, FAQs\n- Company or website information\n\nDo NOT use Pinecone for:\n- General greetings\n- Small talk\n- Personal details\n- Appointment scheduling\n\nIf Pinecone returns:\n- No results\n- Empty data\n- Errors\n\nReply:\n\"I could not find that information. Would you like me to create a support ticket for you?\"\n\n========================\nMEMORY USAGE\n========================\n\nMemory may only be used for:\n- Name\n- Email\n- Phone number\n\nAnd only after:\n- A successful booking or confirmed interaction.\n\nNever use memory for:\n- Products\n- Services\n- Company info\n- Availability\n\n========================\nGREETING RULE\n========================\n\nIf the user only says:\n\"Hi\", \"Hello\", or \"Hey\"\n\nReply exactly:\n\"Hi, how can I help you today?\"\n\nNo tools.\nNo extra text.\n\n========================\nRESPONSE RULES\n========================\n\n- Be friendly and conversational.\n- Use short, clear sentences.\n- Never mention internal tools.\n- Never invent information.\n- Never explain your reasoning.\n- Never say \"as an AI\".\n\n========================\nLINK RULE\n========================\n\nIf a link is included:\n- Use Markdown format: [Link text](URL)\n- Never show raw URLs.\n- Links must come from tool output only.\n\n========================\nFAILURE RULE\n========================\n\nIf any required tool fails:\n\nReply exactly:\n\"I could not find that information. Would you like me to create a support ticket for you?\"\n\nNo variations.\nNo extra text.\n\n========================\nEND\n========================\n",
          "returnIntermediateSteps": false
        }
      },
      "typeVersion": 3
    },
    {
      "id": "a7128a69-d4f9-43e2-af49-3c6b903a0719",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -1600,
        1168
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {},
        "builtInTools": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "f1b6383c-14c4-4562-b89f-3c50715e5fca",
      "name": "OpenAI Embeddings",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        -1328,
        1152
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "829e903c-08f8-433f-9650-456e3f267f14",
      "name": "Pinecone Vector Storage",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        -1376,
        976
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "topK": 10,
        "options": {
          "pineconeNamespace": "<your-pinecone-namespace-name>"
        },
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "toolDescription": "=Use this tool whenever the user asks anything about Blushu, our services, company info, website content, product/service details, policies, FAQs, contact information, pricing, or questions that require knowledge from the company knowledge base. \n"
      },
      "typeVersion": 1.3
    },
    {
      "id": "4a195c24-4dbe-47d4-8f0d-9bd64630f55a",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        224,
        -16
      ],
      "parameters": {
        "color": 4,
        "width": 688,
        "height": 528,
        "content": "## Chunking & Embedding\n\nIf the cleaned text >\u202f50\u202fchars it is split into 1\u202f000\u2011char chunks (200\u2011char overlap). Each chunk is sent to OpenAI embeddings and up\u2011serted into Pinecone.\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "f558e55f-3dbe-4138-8327-6abcb635924c",
  "connections": {
    "Chat Memory": {
      "ai_memory": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader": {
      "ai_document": [
        [
          {
            "node": "Store in Vector Database",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Map Website URLs": {
      "main": [
        [
          {
            "node": "Filter and Normalize URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Text Length": {
      "main": [
        [
          {
            "node": "Split Text into Chunks",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI": {
      "ai_embedding": [
        [
          {
            "node": "Store in Vector Database",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Embeddings": {
      "ai_embedding": [
        [
          {
            "node": "Pinecone Vector Storage",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Clean HTML to Text": {
      "main": [
        [
          {
            "node": "Check Text Length",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Page Content": {
      "main": [
        [
          {
            "node": "Extract HTML and URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract HTML and URL": {
      "main": [
        [
          {
            "node": "Clean HTML to Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Text into Chunks": {
      "main": [
        [
          {
            "node": "Store in Vector Database",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Pinecone Vector Storage": {
      "ai_tool": [
        [
          {
            "node": "Customer Support Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Process URLs One by One": {
      "main": [
        [],
        [
          {
            "node": "Fetch Page Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store in Vector Database": {
      "main": [
        [
          {
            "node": "Process URLs One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter and Normalize URLs": {
      "main": [
        [
          {
            "node": "Process URLs One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "Customer Support Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "Map Website URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}