{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
      "name": "When clicking Test workflow",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1648,
        272
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "80562cea-7422-44ec-9886-1928bb8f81f1",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -624,
        336
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini"
        },
        "options": {
          "maxTokens": 500,
          "temperature": 0,
          "responseFormat": "json_object"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        -288,
        304
      ],
      "parameters": {
        "schemaType": "manual",
        "inputSchema": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"name\": { \n      \"type\": \"string\", \n      \"description\": \"Product name/title\" \n    },\n    \"description\": { \n      \"type\": \"string\", \n      \"description\": \"Product description or key features\" \n    },\n    \"rating\": { \n      \"type\": [\"number\", \"null\"], \n      \"description\": \"Average rating (e.g., 4.5)\" \n    },\n    \"reviews\": { \n      \"type\": [\"integer\", \"null\"], \n      \"description\": \"Number of reviews\" \n    },\n    \"price\": { \n      \"type\": [\"string\", \"null\"], \n      \"description\": \"Product price with currency\" \n    }\n  },\n  \"required\": [\"name\"]\n}"
      },
      "typeVersion": 1.3
    },
    {
      "id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
      "name": "1. Get Product URLs from Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        -1392,
        272
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
      "name": "2. Loop Through Each URL",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1168,
        272
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "c588ede7-1689-492d-a863-949ade5ffe33",
      "name": "3. Scrape Product Page HTML",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -960,
        128
      ],
      "parameters": {
        "url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
        "options": {
          "timeout": 60000,
          "response": {
            "response": {}
          }
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
      "name": "4. Extract Raw Data Elements",
      "type": "n8n-nodes-base.html",
      "position": [
        -752,
        128
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "productTitle",
              "cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
            },
            {
              "key": "price",
              "cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
            },
            {
              "key": "rating",
              "cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
            },
            {
              "key": "reviewCount",
              "cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
            },
            {
              "key": "featureBullets",
              "cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
            },
            {
              "key": "productDescription",
              "cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "2c491fda-9510-46f9-973a-754587601b7c",
      "name": "5. Clean & Structure Data with AI",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        -512,
        128
      ],
      "parameters": {
        "text": "={{ JSON.stringify($json, null, 2) }}",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" \u2192 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" \u2192 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n  \"name\": \"product title here\",\n  \"description\": \"description here or No description\",\n  \"rating\": 4.5,\n  \"reviews\": 1234,\n  \"price\": \"$29.99\"\n}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
      "name": "6. Format Final JSON Output",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -128,
        128
      ],
      "parameters": {
        "include": "selectedOtherFields",
        "options": {},
        "fieldToSplitOut": "output",
        "fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
      },
      "typeVersion": 1
    },
    {
      "id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
      "name": "7. Save Product Data to Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        80,
        272
      ],
      "parameters": {
        "columns": {
          "value": {},
          "schema": [],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "useAppend": true
        },
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 838351250,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
          "cachedResultName": "Sheet2"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2272,
        -144
      ],
      "parameters": {
        "width": 528,
        "height": 896,
        "content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n   - Sign up at https://scrape.do\n   - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n   - SCRAPEDO_TOKEN: Your Scrape.do API token\n   - WEB_SHEET_ID: Google Sheet document ID\n   - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n   - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n   - Create a Google Sheet with two tabs\n   - First tab: Add Amazon product URLs in a column named 'url'\n   - Second tab: Will store results (name, description, rating, reviews, price)\n   - Share the sheet with your service account email\n\n4. **Credentials:**\n   - Add Google Sheets OAuth2 credentials\n   - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "2. Loop Through Each URL": {
      "main": [
        [],
        [
          {
            "node": "3. Scrape Product Page HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "3. Scrape Product Page HTML": {
      "main": [
        [
          {
            "node": "4. Extract Raw Data Elements",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "6. Format Final JSON Output": {
      "main": [
        [
          {
            "node": "7. Save Product Data to Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking Test workflow": {
      "main": [
        [
          {
            "node": "1. Get Product URLs from Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4. Extract Raw Data Elements": {
      "main": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "5. Clean & Structure Data with AI": {
      "main": [
        [
          {
            "node": "6. Format Final JSON Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7. Save Product Data to Google Sheets": {
      "main": [
        [
          {
            "node": "2. Loop Through Each URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1. Get Product URLs from Google Sheets": {
      "main": [
        [
          {
            "node": "2. Loop Through Each URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}