This workflow corresponds to n8n.io template #11682 — we link there as the canonical source.
This workflow follows the Agent → Chat Trigger recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"id": "UY2rhMDjLVtGntWm",
"meta": {
"templateCredsSetupCompleted": true
},
"name": "AI-Driven Company Data Extraction and Localization",
"tags": [],
"nodes": [
{
"id": "0d59c94f-1e64-41f2-b6b5-da88dfb2d1b1",
"name": "When chat message received",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-752,
0
],
"parameters": {
"public": true,
"options": {
"responseMode": "responseNodes"
},
"initialMessages": "Hi! I can help you extract company information. Please enter the URL to get started."
},
"typeVersion": 1.4
},
{
"id": "5b143ed0-e661-4ab1-8695-cf8c7ee50237",
"name": "Google Gemini Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
-288,
208
],
"parameters": {
"options": {}
},
"credentials": {
"googlePalmApi": {
"name": "<your credential>"
}
},
"typeVersion": 1
},
{
"id": "ca977928-4980-4ffb-94d1-5f16911c6ce7",
"name": "ExtractedChatInput",
"type": "n8n-nodes-base.code",
"position": [
64,
0
],
"parameters": {
"jsCode": "const rawText = $input.first().json.output || ''\nlet jsonString = rawText\n\nconst codeBlockRegex = /```(?:json)?\\s*([\\s\\S]*?)\\s*```/i\nconst match = rawText.match(codeBlockRegex)\n\nif (match && match[1]) {\n jsonString = match[1]\n}\n\nlet resultObject = { url: null }\n\ntry {\n resultObject = JSON.parse(jsonString)\n} catch (e) {\n}\n\nreturn [\n {\n json: resultObject\n }\n]"
},
"typeVersion": 2
},
{
"id": "8132e3b0-9fe4-4372-8bf0-8aa35b7de734",
"name": "Request User Input",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
480,
16
],
"parameters": {
"message": "Please enter the company website URL you want to search.",
"options": {}
},
"typeVersion": 1
},
{
"id": "a69cf0cc-3cd1-46f7-978b-0e4c5e3053a0",
"name": "Google Gemini Chat Model1",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
688,
176
],
"parameters": {
"options": {}
},
"credentials": {
"googlePalmApi": {
"name": "<your credential>"
}
},
"typeVersion": 1
},
{
"id": "78c24fcb-46b5-481a-af76-79e4f058e19c",
"name": "Config",
"type": "n8n-nodes-base.set",
"position": [
-528,
0
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "3a6a2908-f5cd-4b8d-98d1-c3260b5ddfa5",
"name": "chatInput",
"type": "string",
"value": "={{ $json.chatInput }}"
},
{
"id": "12378c7e-817a-4d34-9c6f-e1d4c16579f1",
"name": "targetCompanyFields",
"type": "string",
"value": "CEO,Address,Summary"
},
{
"id": "90186d6e-7cf3-425e-94e5-b409a5a56161",
"name": "language",
"type": "string",
"value": "English"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "057b79d3-9363-4623-832f-a03e69aa929b",
"name": "HttpRequestTool",
"type": "n8n-nodes-base.httpRequestTool",
"position": [
832,
176
],
"parameters": {
"url": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('URL', `URL to Get HTML`, 'string') }}",
"options": {
"timeout": 60000,
"redirect": {
"redirect": {
"maxRedirects": 10
}
},
"response": {
"response": {
"neverError": true,
"fullResponse": true
}
},
"allowUnauthorizedCerts": true
},
"sendHeaders": true,
"toolDescription": "Get Page Content. Retrieves the content from the specified URL.",
"headerParameters": {
"parameters": [
{
"name": "=User-Agent",
"value": "=Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.71 Safari/537.36"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "866b2c6f-3fe5-4506-8ee4-eb56d7d4a048",
"name": "ParsedCsv",
"type": "n8n-nodes-base.code",
"position": [
1008,
-16
],
"parameters": {
"jsCode": "const data = $input.first().json\n\nlet csvText = ''\nconst match = data.output.match(/```csv([\\s\\S]*?)```/)\n\nif (match) {\n csvText = match[1].trim()\n}\n\nif (!csvText) {\n throw new Error('CSV text not found')\n}\n\nconst lines = csvText.trim().split('\\n').map(line => line.replace(/\\r$/, ''))\n\nfunction parseCSVLine(line) {\n const cells = []\n let current = ''\n let insideQuotes = false\n\n for (let i = 0; i < line.length; i++) {\n const char = line[i]\n if (char === '\"') {\n if (insideQuotes && line[i + 1] === '\"') {\n current += '\"'\n i++\n } else {\n insideQuotes = !insideQuotes\n }\n } else if (char === ',' && !insideQuotes) {\n cells.push(current)\n current = ''\n } else {\n current += char\n }\n }\n cells.push(current)\n return cells\n}\n\nconst headers = parseCSVLine(lines[0]).map(h => h.trim())\n\nconst records = lines.slice(1).map(line => {\n if (!line.trim()) {\n return null\n }\n\n const cells = parseCSVLine(line)\n const obj = {}\n\n headers.forEach((key, idx) => {\n obj[key] = (cells[idx] || '').trim()\n })\n return obj\n}).filter(r => r !== null)\n\nreturn records"
},
"typeVersion": 2
},
{
"id": "31f9c590-632e-4bfa-bee8-50d4673d36f1",
"name": "Respond to Chat",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
1424,
-16
],
"parameters": {
"message": "={{ $json.text }}",
"options": {},
"waitUserReply": false
},
"typeVersion": 1
},
{
"id": "528fcf55-207b-482d-b766-9801a8e66513",
"name": "ChatResponse",
"type": "n8n-nodes-base.code",
"position": [
1216,
-16
],
"parameters": {
"jsCode": "const IGNORED_KEYS = ['autoFetchStatus', 'autoFetchReason']\nconst data = $input.first().json\nconst reason = data.autoFetchReason || '(No reason provided)'\n\nif (data.autoFetchStatus !== 'ok') {\n return {\n json: {\n text: \"\u26a0\ufe0f **Extraction finished with issues**\" + '\\n\\n' + reason\n }\n }\n}\n\nconst header = \"\u2705 **Successfully extracted company information**\"\nconst fields = Object.keys(data)\n .filter(key => !IGNORED_KEYS.includes(key))\n .map(key => {\n const label = key\n const value = data[key] || 'N/A'\n return `\u2022 **${label}**\\n\\n> ${value}`\n })\n .join('\\n\\n')\n\nreturn {\n json: {\n text: header + '\\n\\n' +\n reason + '\\n\\n' +\n '\\n\\n' +\n '---' + '\\n\\n' +\n '\\n\\n' +\n fields\n }\n}"
},
"typeVersion": 2
},
{
"id": "819c8bbe-fcbc-4a76-a15d-ccffe93d5a62",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1472,
-128
],
"parameters": {
"width": 672,
"height": 512,
"content": "## How it works\nPlease send a corporate website URL via chat.\n\nThe AI will investigate the company website on your behalf and return the extracted company information. \nSince this is set up as a conversational workflow, retrying or trying another URL is simple.\n\n## How to use\n* To get started, please set up the Credential in the `Gemini` node attached to the `AI Agent` node.\n * You can obtain an API key from [Google AI Studio](https://aistudio.google.com/api-keys).\n\nOnce configured, the workflow will run when you send a corporate website URL (e.g., https://example.com/) via chat.\n\n## Customizing this workflow\nYou can change the settings in the `Config` node.\n\n* You can modify `targetCompanyFields` to customize which company data fields are extracted.\n* You can modify `language` to receive the results in a language other than English."
},
"typeVersion": 1
},
{
"id": "519ff98c-29fd-4b01-9081-d31b9adc706d",
"name": "Request Next URL",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
1632,
-16
],
"parameters": {
"message": "Would you like to search for another company? Please enter the URL.",
"options": {}
},
"typeVersion": 1
},
{
"id": "aa787064-7d59-4361-98be-05b606666184",
"name": "AI Agent (Extract URL)",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-288,
0
],
"parameters": {
"text": "=Extract the homepage URL from the User Input and return it in the specified Output Format inside a code block.\n\nImportant:\nAlways return a valid JSON object.\nIf no URL is found, you MUST return the object with null: { \"url\": null }\n\n[Output Format]\n{ \"url\": \"extracted_url_or_null\" }\n\n[User Input]\n{{ $json.chatInput }}",
"options": {},
"promptType": "define"
},
"typeVersion": 3
},
{
"id": "dfab6b48-f012-4b10-966e-a518f45bc050",
"name": "Check URL",
"type": "n8n-nodes-base.if",
"position": [
272,
0
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "07702149-ff99-438e-86d9-17dec54e21c3",
"operator": {
"type": "string",
"operation": "notEmpty",
"singleValue": true
},
"leftValue": "={{ $json.url }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "b99fb313-e065-4a60-850e-b2e7707d3d69",
"name": "AI Agent (Access URL)",
"type": "@n8n/n8n-nodes-langchain.agent",
"onError": "continueRegularOutput",
"position": [
688,
-16
],
"parameters": {
"text": "=Please extract the following information based on the System Instructions.\n\n[Target URL]\n{{ $json.url }}\n\n[Target Fields to Extract]\n{{ $('Config').first().json.targetCompanyFields }}\n\n[Translation Language]\n{{ $('Config').first().json.language }}\n\nGenerate the CSV response now.",
"options": {
"maxIterations": "=15",
"systemMessage": "=You are an expert web scraper. Extract company info into strict CSV format.\n\n### Core Directive\n**Think Step-by-Step.**\nYour goal is to extract information **strictly from the Official Corporate Website**.\nSimulate a human researcher who validates the site's legitimacy before searching for data.\n\n### Tool Usage\n- Use `HttpRequestTool` to fetch pages.\n- **Iterative Workflow**: You MUST use the tool multiple times to follow links.\n\n### Execution Process\n1. **Analyze & Validate (CRITICAL)**:\n - **Primary Rule**: The site MUST be the Official Corporate Website. **Anything else MUST result in an 'error' status.**\n - **Mandatory Corporate Data Check**: The site MUST clearly confirm the **Official Company Name (Legal Entity)**. If the legal entity name cannot be confirmed on the landing page, Validation fails.\n - **IMMEDIATE REJECTION CRITERIA**:\n - **Non-Corporate Signal**: Any site that displays **\"Operator Information\"** instead of \"Company Profile\". This definitively means it is NOT a corporate website.\n - **Media/Blogs**: Comparison sites, Rankings, Reviews, News articles.\n - **Directories**: Yelp, Job boards, SNS.\n - **Action upon Validation Failure**: If the site meets ANY rejection criteria, proceed directly to Step 4 (Status Logic) and set 'autoFetchStatus' to 'error'. **DO NOT continue to navigation (Step 2).**\n\n2. **Navigate to Company Profile (MANDATORY)**:\n - **Prerequisite**: This step can only begin IF validation in Step 1 passed ('ok').\n - **Exclusion**: You MUST NOT use Service pages (e.g., links labeled 'Products', 'Services', 'Pricing') as a source for core corporate data.\n - **Assumption**: Detailed info (CEO, Address) is rarely on the homepage.\n - **Action**: You MUST search the menu or footer for links dedicated to corporate structure: \"Company Profile\", \"About Us (Corporate)\", \"Access\", or \"Contact\".\n - **Constraint**: Do NOT report \"N/A\" for core fields (like CEO, Address) without checking the specific \"Company Profile\" page.\n\n3. **Extraction**:\n - Extract strictly from the domain. No hallucinations.\n - Translate all extracted values into the language specified in the user request. Do NOT translate headers.\n\n4. **Status Logic and Output (CRITICAL)**:\n - **Status Definition**: 'ok' if Validation passed AND core data (Legal Name, CEO, Address) was SUCCESSFULLY extracted from the Official Corporate Page (Step 2). 'error' otherwise.\n - **Requirement**: `autoFetchReason` is MANDATORY. State clearly why the site was rejected (e.g., \"Found 'Operator Information', therefore not a corporate site\") or your successful navigation path.\n - **Format**: Single Markdown code block (```csv ... ```). Row 1 MUST include requested fields plus \"autoFetchStatus\" and \"autoFetchReason\".\n - **CSV SYNTAX (ABSOLUTE)**: All values in the data row MUST be enclosed in double quotes."
},
"promptType": "define"
},
"retryOnFail": true,
"typeVersion": 3,
"alwaysOutputData": true,
"waitBetweenTries": 5000
},
{
"id": "3ec14a4c-eb08-46dc-9cc2-41bdd0153aca",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-784,
-128
],
"parameters": {
"color": 7,
"width": 1424,
"height": 512,
"content": "## Extract URLs using AI\nThis step uses AI to extract URLs from the chat message. If no URL is detected, it prompts the user to enter a URL again."
},
"typeVersion": 1
},
{
"id": "9187913d-a41b-46ab-a207-d77ffcc30c6e",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
656,
-128
],
"parameters": {
"color": 7,
"width": 1184,
"height": 512,
"content": "## AI Search\nThe AI accesses the URL to extract company information and returns the results."
},
"typeVersion": 1
}
],
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "30227fcf-d248-4a8f-a428-e49a483b2a0f",
"connections": {
"Config": {
"main": [
[
{
"node": "AI Agent (Extract URL)",
"type": "main",
"index": 0
}
]
]
},
"Check URL": {
"main": [
[
{
"node": "AI Agent (Access URL)",
"type": "main",
"index": 0
}
],
[
{
"node": "Request User Input",
"type": "main",
"index": 0
}
]
]
},
"ParsedCsv": {
"main": [
[
{
"node": "ChatResponse",
"type": "main",
"index": 0
}
]
]
},
"ChatResponse": {
"main": [
[
{
"node": "Respond to Chat",
"type": "main",
"index": 0
}
]
]
},
"HttpRequestTool": {
"ai_tool": [
[
{
"node": "AI Agent (Access URL)",
"type": "ai_tool",
"index": 0
}
]
]
},
"Respond to Chat": {
"main": [
[
{
"node": "Request Next URL",
"type": "main",
"index": 0
}
]
]
},
"Request Next URL": {
"main": [
[
{
"node": "AI Agent (Extract URL)",
"type": "main",
"index": 0
}
]
]
},
"ExtractedChatInput": {
"main": [
[
{
"node": "Check URL",
"type": "main",
"index": 0
}
]
]
},
"Request User Input": {
"main": [
[
{
"node": "AI Agent (Extract URL)",
"type": "main",
"index": 0
}
]
]
},
"AI Agent (Access URL)": {
"main": [
[
{
"node": "ParsedCsv",
"type": "main",
"index": 0
}
]
]
},
"AI Agent (Extract URL)": {
"main": [
[
{
"node": "ExtractedChatInput",
"type": "main",
"index": 0
}
]
]
},
"Google Gemini Chat Model": {
"ai_languageModel": [
[
{
"node": "AI Agent (Extract URL)",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Google Gemini Chat Model1": {
"ai_languageModel": [
[
{
"node": "AI Agent (Access URL)",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"When chat message received": {
"main": [
[
{
"node": "Config",
"type": "main",
"index": 0
}
]
]
}
}
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
googlePalmApi
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
Please send a corporate website URL via chat.
Source: https://n8n.io/workflows/11682/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Automate Google Classroom via the Google Classroom API to efficiently manage courses, topics, teachers, students, announcements, and coursework.
Who is this workflow for? This workflow is designed for SEO analysts, content creators, marketing agencies, and developers who need to index a website and then interact with its content as if it were
This workflow contains community nodes that are only compatible with the self-hosted version of n8n.
This workflow implements a WhatsApp-based virtual restaurant assistant that automates customer interaction from the first message to post-dining follow-up.
This Chatbot automates the process of discovering job openings and generating tailored job application emails.