This workflow corresponds to n8n.io template #3901 — we link there as the canonical source.
This workflow follows the Chainllm → Google Sheets recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"nodes": [
{
"id": "f1b36f4b-6558-4e83-a999-e6f2d24e196c",
"name": "OpenRouter Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
620,
240
],
"parameters": {
"model": "openai/gpt-4.1",
"options": {}
},
"typeVersion": 1
},
{
"id": "89ca0a07-286f-4e68-9e85-0327a4859cc0",
"name": "Structured Output Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
900,
240
],
"parameters": {
"schemaType": "manual",
"inputSchema": "{\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"description\": { \"type\": \"string\" },\n \"rating\": { \"type\": \"number\" },\n \"reviews\": { \"type\": \"integer\" },\n \"price\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"description\", \"rating\", \"reviews\", \"price\"]\n }\n}"
},
"typeVersion": 1.2
},
{
"id": "e4800c1d-c0d8-4093-81ec-fc19ad0034cd",
"name": "scrap url",
"type": "n8n-nodes-base.httpRequest",
"position": [
240,
60
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "{{BRIGHTDATA_TOKEN}}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "1a1f768f-615d-4035-81b0-63b860f8e6ac",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
160,
-140
],
"parameters": {
"content": "## Web Scraper API\n\n[Inscription - Free Trial](https://get.brightdata.com/website-scraper)"
},
"typeVersion": 1
},
{
"id": "2f260d96-4fff-4a4f-af29-1e43f465d54c",
"name": "When clicking \u2018Test workflow\u2019",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-440,
200
],
"parameters": {},
"typeVersion": 1
},
{
"id": "4be9033f-0b9f-466d-916e-88fbb2a80417",
"name": "url",
"type": "n8n-nodes-base.splitInBatches",
"position": [
20,
200
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "21b6d21c-b977-4175-9068-e0e2e19fa472",
"name": "get urls to scrape",
"type": "n8n-nodes-base.googleSheets",
"position": [
-200,
200
],
"parameters": {
"options": {},
"sheetName": "{{TRACK_SHEET_GID}}",
"documentId": "{{WEB_SHEET_ID}}"
},
"credentials": {
"googleSheetsOAuth2Api": {
"name": "<your credential>"
}
},
"typeVersion": 4.5
},
{
"id": "25ef76ec-cf0d-422e-b060-68c49192a008",
"name": "clean html",
"type": "n8n-nodes-base.code",
"position": [
460,
60
],
"parameters": {
"jsCode": "// CleanHtmlFunction.js\n// Purpose: n8n Function node to clean HTML: remove doctype, scripts, styles, head, comments, classes, extra blank lines, and non-whitelisted tags\n\nreturn items.map(item => {\n const rawHtml = item.json.data;\n\n // 1) remove doctype, scripts, styles, comments and head section, and strip class attributes\n let cleaned = rawHtml\n .replace(/<!doctype html>/gi, '')\n .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n .replace(/<style[\\s\\S]*?<\\/style>/gi, '')\n .replace(/<!--[\\s\\S]*?-->/g, '')\n .replace(/<head[\\s\\S]*?<\\/head>/gi, '')\n .replace(/\\sclass=\"[^\"]*\"/gi, '');\n\n // 2) define whitelist of tags to keep\n const allowedTags = [\n 'h1','h2','h3','h4','h5','h6',\n 'p','ul','ol','li',\n 'strong','em','a','blockquote',\n 'code','pre'\n ];\n\n // 3) strip out all tags not in the whitelist, reconstruct allowed tags cleanly\n cleaned = cleaned.replace(\n /<\\/?([a-z][a-z0-9]*)\\b[^>]*>/gi,\n (match, tagName) => {\n const name = tagName.toLowerCase();\n if (allowedTags.includes(name)) {\n return match.startsWith('</') ? `</${name}>` : `<${name}>`;\n }\n return '';\n }\n );\n\n // 4) collapse multiple blank or whitespace-only lines into a single newline\n cleaned = cleaned.replace(/(\\s*\\r?\\n\\s*){2,}/g, '\\n');\n\n // 5) trim leading/trailing whitespace\n cleaned = cleaned.trim();\n\n return {\n json: { cleanedHtml: cleaned }\n };\n});"
},
"typeVersion": 2
},
{
"id": "f72660d5-8427-4655-acbe-10365273c27b",
"name": "extract data",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
680,
60
],
"parameters": {
"text": "={{ $json.cleanedHtml }}",
"messages": {
"messageValues": [
{
"message": "=You are an expert in web page scraping. Provide a structured response in JSON format. Only the response, without commentary.\n\nExtract the product information for {{ $(\u2018url\u2019).item.json.url.split(\u2019/s?k=\u2019)[1].split(\u2019&\u2019)[0] }} present on the page.\n\nname\ndescription\nrating\nreviews\nprice"
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.6
},
{
"id": "8b4af1bb-d7f8-456e-b630-ecd9b6e4bcdc",
"name": "add results",
"type": "n8n-nodes-base.googleSheets",
"position": [
1280,
200
],
"parameters": {
"columns": {
"value": {
"name": "={{ $json.output.name }}",
"price": "={{ $json.output.price }}",
"rating": "={{ $json.output.rating }}",
"reviews": "={{ $json.output.reviews }}",
"description": "={{ $json.output.description }}"
},
"schema": [
{
"id": "name",
"type": "string"
},
{
"id": "description",
"type": "string"
},
{
"id": "rating",
"type": "string"
},
{
"id": "reviews",
"type": "string"
},
{
"id": "price",
"type": "string"
}
],
"mappingMode": "defineBelow"
},
"options": {},
"operation": "append",
"sheetName": "{{RESULTS_SHEET_GID}}",
"documentId": "{{WEB_SHEET_ID}}"
},
"credentials": {
"googleSheetsOAuth2Api": {
"name": "<your credential>"
}
},
"typeVersion": 4.5
},
{
"id": "7a5ba438-2ede-4d6c-b8fa-9a958ba1ef3e",
"name": "Split items",
"type": "n8n-nodes-base.splitOut",
"position": [
1060,
60
],
"parameters": {
"include": "allOtherFields",
"options": {},
"fieldToSplitOut": "output"
},
"typeVersion": 1
}
],
"connections": {
"url": {
"main": [
[],
[
{
"node": "scrap url",
"type": "main",
"index": 0
}
]
]
},
"scrap url": {
"main": [
[
{
"node": "clean html",
"type": "main",
"index": 0
}
]
]
},
"clean html": {
"main": [
[
{
"node": "extract data",
"type": "main",
"index": 0
}
]
]
},
"Split items": {
"main": [
[
{
"node": "add results",
"type": "main",
"index": 0
}
]
]
},
"add results": {
"main": [
[
{
"node": "url",
"type": "main",
"index": 0
}
]
]
},
"extract data": {
"main": [
[
{
"node": "Split items",
"type": "main",
"index": 0
}
]
]
},
"get urls to scrape": {
"main": [
[
{
"node": "url",
"type": "main",
"index": 0
}
]
]
},
"OpenRouter Chat Model": {
"ai_languageModel": [
[
{
"node": "extract data",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "extract data",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"When clicking \u2018Test workflow\u2019": {
"main": [
[
{
"node": "get urls to scrape",
"type": "main",
"index": 0
}
]
]
}
}
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
googleSheetsOAuth2Api
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
This workflow automates web scraping of Amazon search result pages by retrieving raw HTML, cleaning it to retain only the relevant product elements, and then using an LLM to extract structured product data (name, description, rating, reviews, and price), before saving the…
Source: https://n8n.io/workflows/3901/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
This workflow demonstrates a simple way to run evals on a set of test cases stored in a Google Sheet.
Splitout Code. Uses lmChatOpenRouter, outputParserStructured, httpRequest, stickyNote. Event-driven trigger; 11 nodes.
This workflow demonstrates a simple way to run evals on a set of test cases stored in a Google Sheet.
The workflow loads a list of test cases from a Google Sheet (previous results stored from an LLM) For each test case, we execute a call to an LLM judge in parallel (using HTTP Request + Webhook nodes)
This workflow is designed to automate the generation and updating of SEO meta titles and descriptions for WooCommerce products using n8n. It leverages Google Sheets for data input, a FREE language mod