{
  "id": "pvB5795JWJtf6D0H",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Scrape and ingest web content into Pinecone with Firecrawl",
  "tags": [],
  "nodes": [
    {
      "id": "979bb858-9afc-4fbf-a2ed-c158f3b0a440",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1136,
        -384
      ],
      "parameters": {
        "width": 432,
        "height": 496,
        "content": "> ## How it works\n> 1. A webhook receives a URL via POST request\n> 2. The URL is validated and normalized, returning a 422 error if invalid\n> 3. Firecrawl scrapes the page and converts it to clean markdown\n> 4. OpenAI generates 1536-dimensional vector embeddings from the content\n> 5. The content and embeddings are stored in Pinecone\n> 6. A built-in RAG chat agent lets you query the knowledge base using natural language, with Cohere reranking for better retrieval\n>\n> ## Setup steps\n> 1. Create a Pinecone index with the settings from the \"Pinecone setup\" sticky\n> 2. Add your Firecrawl API key\n> 3. Add your OpenAI API key (for embeddings)\n> 4. Add your OpenRouter API key (for the chat agent)\n> 5. Add your Cohere API key (for reranking)\n> 6. Activate the workflow and send a POST request with `{\"url\": \"https://example.com\"}` to the webhook\n"
      },
      "typeVersion": 1
    },
    {
      "id": "c1cbef33-52b3-44dd-aa62-58f3fdecfc30",
      "name": "Receive URL",
      "type": "n8n-nodes-base.webhook",
      "position": [
        -1056,
        144
      ],
      "parameters": {
        "path": "dedaa64a-3dc9-43ea-82ac-7fac034af0b2",
        "options": {},
        "httpMethod": "POST",
        "responseMode": "responseNode"
      },
      "typeVersion": 2.1
    },
    {
      "id": "79cfc235-c533-4595-999f-09f3b7fb48a2",
      "name": "Validate and normalize URL",
      "type": "n8n-nodes-base.code",
      "onError": "continueErrorOutput",
      "position": [
        -832,
        144
      ],
      "parameters": {
        "jsCode": "const body = $input.first().json.body;\nconst raw = body?.url?.trim();\n\nif (!raw) {\n  return [{\n    json: {\n      status: 422,\n      message: \"Missing 'url' field in request body.\"\n    }\n  }];\n}\n\n// Strip protocol and path to get clean domain\nconst domain = raw.replace(/^https?:\\/\\//i, \"\").replace(/\\/.*$/, \"\");\n\n// Validate domain format\nconst isValid = /^[a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z]{2,})+$/.test(domain);\n\nif (!isValid) {\n  throw new Error(`Invalid URL: \"${raw}\" is not a valid domain or URL.`);\n}\nreturn [{\n  json: {\n    status: 200,\n    domain: domain,\n    url: `https://${domain}`\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "fe4f2f6c-d6e6-4d68-9133-8cd90fed5a39",
      "name": "Scrape page with Firecrawl",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        -464,
        -32
      ],
      "parameters": {
        "url": "={{ $('Validate and normalize URL').item.json.url }}",
        "operation": "scrape",
        "scrapeOptions": {
          "options": {
            "formats": {
              "format": [
                {}
              ]
            },
            "headers": {}
          }
        },
        "requestOptions": {}
      },
      "credentials": {
        "firecrawlApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "4f1cbecc-a01a-4c22-abcd-8263cc633c97",
      "name": "Return URL validation error",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        -608,
        240
      ],
      "parameters": {
        "options": {
          "responseKey": "={{ $json.error }}",
          "responseCode": 422
        }
      },
      "typeVersion": 1.5
    },
    {
      "id": "1e0853a9-651e-4fa0-a60d-65d22cf8df9d",
      "name": "Store embeddings in Pinecone",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        -256,
        -32
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": "firecrawl",
          "cachedResultName": "firecrawl"
        }
      },
      "credentials": {
        "pineconeApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "d9e2f16a-ef0d-4879-adce-2ef72b758f7d",
      "name": "Return ingestion result",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        112,
        -32
      ],
      "parameters": {
        "options": {
          "responseCode": 200
        },
        "respondWith": "json",
        "responseBody": "={\n  \"message\": \"Added {{$input.all().length}} items to Supabase\"\n}"
      },
      "executeOnce": true,
      "typeVersion": 1.5
    },
    {
      "id": "0bc6e04f-062a-4fde-9787-5d98ee68a67c",
      "name": "Generate OpenAI embeddings",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        -288,
        160
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "f5ba03be-4e36-4868-a119-5f15a8180ede",
      "name": "Load scraped content",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        -112,
        160
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "url",
                "value": "={{ $('Validate and normalize URL').item.json.url }}"
              }
            ]
          }
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "4beb288d-8c95-4fe2-8462-40d138c1771e",
      "name": "Receive chat message",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -1056,
        512
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1.4
    },
    {
      "id": "af6c4d23-10ff-4911-98fb-6f1c3d4defe3",
      "name": "Answer query from knowledge base",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -832,
        512
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3.1
    },
    {
      "id": "b29b726a-c5f6-4b4b-b9b3-800915ab0a86",
      "name": "OpenRouter LLM",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
      "position": [
        -928,
        704
      ],
      "parameters": {
        "model": "anthropic/claude-sonnet-4.6",
        "options": {}
      },
      "credentials": {
        "openRouterApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "aeb8b2cf-d054-45f6-8416-c7ce3cd13564",
      "name": "Chat memory",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -768,
        704
      ],
      "parameters": {},
      "typeVersion": 1.3
    },
    {
      "id": "23ca0f36-bf16-44d6-b7a3-b1d0ec5b984c",
      "name": "Retrieve documents from Pinecone",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        -576,
        688
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "options": {},
        "useReranker": true,
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": "firecrawl",
          "cachedResultName": "firecrawl"
        },
        "toolDescription": "Retrieve data for the AI Agent."
      },
      "credentials": {
        "pineconeApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "f8a8894d-0c58-4702-b57b-633e9803974c",
      "name": "Generate OpenAI embeddings1",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        -608,
        864
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "5c098782-83ad-4fd4-bbbb-d1d85b3df6e7",
      "name": "Rerank results with Cohere",
      "type": "@n8n/n8n-nodes-langchain.rerankerCohere",
      "position": [
        -448,
        880
      ],
      "parameters": {},
      "credentials": {
        "cohereApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "8acd099c-3f48-454b-bcf3-e75a239178a9",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -688,
        -384
      ],
      "parameters": {
        "color": 7,
        "width": 272,
        "height": 336,
        "content": "> ## Pinecone setup\n> Your Pinecone index must use 1536 dimensions to match the `text-embedding-3-small` OpenAI model.\n>\n> 1. Go to your Pinecone console and open your index settings\n> 2. Select text-embedding-3-small as the embedding model\n> 3. Confirm these settings:\n>\n> | Setting | Value |\n> |---|---|\n> | Modality | Text |\n> | Vector type | Dense |\n> | Dimension | 1536 |\n> | Metric | cosine |"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "binaryMode": "separate",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "e7c154c2-1552-4ffb-91d2-a35ab1e7d1ee",
  "connections": {
    "Chat memory": {
      "ai_memory": [
        [
          {
            "node": "Answer query from knowledge base",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Receive URL": {
      "main": [
        [
          {
            "node": "Validate and normalize URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenRouter LLM": {
      "ai_languageModel": [
        [
          {
            "node": "Answer query from knowledge base",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Load scraped content": {
      "ai_document": [
        [
          {
            "node": "Store embeddings in Pinecone",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Receive chat message": {
      "main": [
        [
          {
            "node": "Answer query from knowledge base",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate OpenAI embeddings": {
      "ai_embedding": [
        [
          {
            "node": "Store embeddings in Pinecone",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Rerank results with Cohere": {
      "ai_reranker": [
        [
          {
            "node": "Retrieve documents from Pinecone",
            "type": "ai_reranker",
            "index": 0
          }
        ]
      ]
    },
    "Scrape page with Firecrawl": {
      "main": [
        [
          {
            "node": "Store embeddings in Pinecone",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Validate and normalize URL": {
      "main": [
        [
          {
            "node": "Scrape page with Firecrawl",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Return URL validation error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate OpenAI embeddings1": {
      "ai_embedding": [
        [
          {
            "node": "Retrieve documents from Pinecone",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Store embeddings in Pinecone": {
      "main": [
        [
          {
            "node": "Return ingestion result",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Retrieve documents from Pinecone": {
      "ai_tool": [
        [
          {
            "node": "Answer query from knowledge base",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    }
  }
}