{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "a221efb8-cc52-4307-a283-a5feb82e16ed",
      "name": "Schedule Trigger",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        960,
        384
      ],
      "parameters": {
        "rule": {
          "interval": [
            {}
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "32df9c8e-e19e-4ed5-a32c-7a72e2138531",
      "name": "Split Forums",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        1408,
        384
      ],
      "parameters": {
        "include": "selectedOtherFields",
        "options": {
          "destinationFieldName": "url"
        },
        "fieldToSplitOut": "forums",
        "fieldsToInclude": "geo"
      },
      "typeVersion": 1
    },
    {
      "id": "50452a8d-e10e-4d8e-bdf3-e31718084c6f",
      "name": "Iterate Forums",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        1632,
        384
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "16f66f13-eeec-43c3-930e-e1e2c45293ab",
      "name": "Google Gemini Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        2096,
        480
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "61e2f466-dbd5-4e0b-84a4-811bf4ac80d1",
      "name": "Extract Structured News Data",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        2080,
        256
      ],
      "parameters": {
        "text": "=",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "=You are an intelligent data extraction model specialized in reading unstructured forum or news data and converting it into structured JSON format."
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=Your task is to extract a list of news posts from the provided scraped text. Each post should include:\n- title\n- url\n- source (domain name)\n- points (integer, can be null)\n- comments (integer, can be null)\n- author (string, can be null)\n- posted_at (string, e.g. or \"2025-10-18\")\n\nReturn the result strictly as a JSON array following this schema:\n[\n  {\n    \"title\": \"\",\n    \"url\": \"\",\n    \"source\": \"\",\n    \"points\": 0,\n    \"comments\": 0,\n    \"author\": \"\",\n    \"posted_at\": \"\"\n  }\n]\n\nRules:\n- Output JSON only \u2014 no explanations, comments, or markdown formatting.\n- Never include text outside the JSON array.\n- Ensure all keys exist even if null.\n- Determine the posted dates using {{ $now.format('yyyy-MM-dd HH:mm:ss') }} as reference."
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=Example scraped text:\n\"1. Claude Haiku 4.5 (anthropic.com/news/claude-haiku-4-5) - 617 points by adocomplete 2 hours ago | 228 comments\n2. Claude Haiku 4.5 System Card (anthropic.com/system-card.pdf) - 51 points by vinhnx 1 day ago | 1 comment\""
            },
            {
              "type": "AIMessagePromptTemplate",
              "message": "=[\n  {\n    \"title\": \"Claude Haiku 4.5\",\n    \"url\": \"https://www.anthropic.com/news/claude-haiku-4-5\",\n    \"source\": \"anthropic.com\",\n    \"points\": 617,\n    \"comments\": 228,\n    \"author\": \"adocomplete\",\n    \"posted_at\": \"2025-10-18\"\n  },\n  {\n    \"title\": \"Claude Haiku 4.5 System Card\",\n    \"url\": \"https://www.anthropic.com/system-card.pdf\",\n    \"source\": \"anthropic.com\",\n    \"points\": 51,\n    \"comments\": 1,\n    \"author\": \"vinhnx\",\n    \"posted_at\": \"2025-10-17\"\n  }\n]"
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=={{ $json.data.results.first().content }}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "f895a2a7-ead3-4fea-b5de-a65abd5d747f",
      "name": "Parse JSON Output",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        2224,
        480
      ],
      "parameters": {
        "jsonSchemaExample": "[\n  {\n    \"title\": \"Claude Haiku 4.5\",\n    \"url\": \"https://www.anthropic.com/news/claude-haiku-4-5\",\n    \"source\": \"anthropic.com\",\n    \"points\": 617,\n    \"comments\": 228,\n    \"author\": \"adocomplete\",\n    \"posted_at\": \"2025-10-18\"\n  },\n  {\n    \"title\": \"Claude Haiku 4.5 System Card\",\n    \"url\": \"https://www.anthropic.com/system-card.pdf\",\n    \"source\": \"anthropic.com\",\n    \"points\": 51,\n    \"comments\": 1,\n    \"author\": \"vinhnx\",\n    \"posted_at\": \"2025-10-17\"\n  }\n]"
      },
      "typeVersion": 1.3
    },
    {
      "id": "2729e432-f311-4987-803e-195c87beda3e",
      "name": "Split News Items",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        2432,
        112
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "output"
      },
      "typeVersion": 1
    },
    {
      "id": "2f5c6ed4-0a4f-47d7-8c99-5380251165cf",
      "name": "Generate Unique Key",
      "type": "n8n-nodes-base.crypto",
      "position": [
        2656,
        112
      ],
      "parameters": {
        "value": "={{ `${$json.url}+${$json.author}` }}",
        "dataPropertyName": "key"
      },
      "executeOnce": false,
      "typeVersion": 1
    },
    {
      "id": "3db0bc16-f718-4086-a310-0c386b792ae2",
      "name": "Wait Between Scrapes",
      "type": "n8n-nodes-base.wait",
      "position": [
        2656,
        480
      ],
      "parameters": {
        "unit": "minutes",
        "amount": 1
      },
      "executeOnce": false,
      "typeVersion": 1.1
    },
    {
      "id": "426b16cc-72fb-47be-91a2-76059472d81d",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        0,
        0
      ],
      "parameters": {
        "width": 864,
        "height": 848,
        "content": "![Waha Johan](https://drive.google.com/thumbnail?id=1SHtHQ7h1pflq_L_obGfBK29wGUY16-Vg&sz=w2000)\n## AI-Powered News Scraper using Decodo and Gemini AI\n\nSign up for Decodo [HERE](https://visit.decodo.com/discount) for Discount\n\nAutomatically scrape, structure, and log forum or news content using Decodo and Google Gemini AI. This workflow extracts key details like titles, URLs, authors, and engagement stats, then appends them to a Google Sheet for tracking and analysis.\n\n## Who\u2019s it for?\nIdeal for data journalists, market researchers, or AI enthusiasts who want to monitor trending topics across specific domains.\n\n## How it works\n1. **Trigger:** Workflow runs on schedule.  \n2. **Data Setup:** Defines forum URLs and geolocation.  \n3. **Scraping:** Extracts raw text data using the Decodo API.  \n4. **AI Extraction:** Gemini parses and structures the scraped text into clean JSON.  \n5. **Data Storage:** Each news item is appended or updated in Google Sheets.  \n6. **Logging:** Records scraping results for monitoring and debugging.\n\n## How to set up\n- Add your **Decodo**, **Google Gemini**, and **Google Sheets** credentials in n8n.  \n- Adjust the **forum URLs**, **geolocation**, and **Google Sheet ID** in the `Workflow Config` node.  \n- Set your preferred trigger interval in `Schedule Trigger`.  \n- Activate and monitor from the n8n dashboard."
      },
      "typeVersion": 1
    },
    {
      "id": "d7080fb7-a9c5-488b-96b7-6d0212840f14",
      "name": "Update Google Sheet (News)",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2880,
        112
      ],
      "parameters": {
        "columns": {
          "value": {
            "key": "={{ $json.key }}",
            "url": "={{ $json.url }}",
            "title": "={{ $json.title }}",
            "author": "={{ $json.author }}",
            "points": "={{ $json.points }}",
            "source": "={{ $json.source }}",
            "comments": "={{ $json.comments }}",
            "posted_at": "={{ $json.posted_at }}",
            "last_updated": "={{ $now }}"
          },
          "schema": [
            {
              "id": "key",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "key",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "title",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "title",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "url",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "url",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "source",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "source",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "points",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "points",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "comments",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "comments",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "author",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "author",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "posted_at",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "posted_at",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "last_updated",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "last_updated",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "key"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "appendOrUpdate",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 800242193,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ZT7TGHRidqMlQpx-mCcqzUMdg_dzidPCZxNrabKJrhM/edit#gid=800242193",
          "cachedResultName": "News"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Workflow Config').item.json.sheet_id }}"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "bb8bd038-ae46-41c2-b825-7c57062f840e",
      "name": "Workflow Config",
      "type": "n8n-nodes-base.set",
      "position": [
        1184,
        384
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3896038b-41c9-496f-af43-dee58ca1ee4f",
              "name": "forums",
              "type": "array",
              "value": "={{[\n  \"https://news.ycombinator.com/from?site=openai.com\",\n  \"https://news.ycombinator.com/from?site=anthropic.com\",\n]}}"
            },
            {
              "id": "371bf5a1-4377-4f15-9521-f240efd7db47",
              "name": "geo",
              "type": "string",
              "value": "United States"
            },
            {
              "id": "232246ee-aebd-4b4c-abc3-15818c2830d7",
              "name": "sheet_id",
              "type": "string",
              "value": "={{ YOUR_SHEET_ID }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "647461db-0ac5-4e40-9cf4-e4efa25ef91e",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1136,
        288
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 240,
        "content": "### Specify forum URLs, geolocation, and Sheet ID"
      },
      "typeVersion": 1
    },
    {
      "id": "ce1e6d6f-6b4a-4ea4-8739-05851eed79dc",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2832,
        16
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 272,
        "content": "### Ensure your sheet tab matches the column schema"
      },
      "typeVersion": 1
    },
    {
      "id": "ec03d0bb-cad4-40cc-a9a9-a127e67daf71",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        912,
        288
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 240,
        "content": "### Adjust schedule (e.g., every day at midnight)"
      },
      "typeVersion": 1
    },
    {
      "id": "8e495053-ed4d-408a-92a9-49a0e26c6f17",
      "name": "Scrape Forum Data",
      "type": "@decodo/n8n-nodes-decodo.decodo",
      "position": [
        1856,
        256
      ],
      "parameters": {
        "geo": "={{ $json.geo }}",
        "url": "={{ $json.url }}"
      },
      "typeVersion": 1
    },
    {
      "id": "bff896ee-261c-4c98-91d9-805b2b94bf5c",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2384,
        256
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 272,
        "content": "### Ensure your sheet tab matches the column schema"
      },
      "typeVersion": 1
    },
    {
      "id": "58c57d13-dc9d-43e3-a5a6-816dbcb864e4",
      "name": "Log Scrape Results",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2432,
        352
      ],
      "parameters": {
        "columns": {
          "value": {
            "geo": "={{ $('Iterate Forums').item.json.geo }}",
            "forum": "={{ $('Iterate Forums').item.json.url }}",
            "news_count": "={{ $json.output.length }}",
            "scraped_at": "={{ $now }}"
          },
          "schema": [
            {
              "id": "forum",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "forum",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "geo",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "geo",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "news_count",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "news_count",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "scraped_at",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "scraped_at",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ZT7TGHRidqMlQpx-mCcqzUMdg_dzidPCZxNrabKJrhM/edit#gid=0",
          "cachedResultName": "Logs"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Workflow Config').item.json.sheet_id }}"
        }
      },
      "executeOnce": false,
      "typeVersion": 4.7
    },
    {
      "id": "f4675599-f622-4d87-96a6-17e0e755853b",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1216,
        -544
      ],
      "parameters": {
        "width": 1008,
        "height": 592,
        "content": "@[youtube](mJJt2dDDOgw)\n"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Split Forums": {
      "main": [
        [
          {
            "node": "Iterate Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Iterate Forums": {
      "main": [
        [],
        [
          {
            "node": "Scrape Forum Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Workflow Config": {
      "main": [
        [
          {
            "node": "Split Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Schedule Trigger": {
      "main": [
        [
          {
            "node": "Workflow Config",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split News Items": {
      "main": [
        [
          {
            "node": "Generate Unique Key",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse JSON Output": {
      "ai_outputParser": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Scrape Forum Data": {
      "main": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Log Scrape Results": {
      "main": [
        [
          {
            "node": "Wait Between Scrapes",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate Unique Key": {
      "main": [
        [
          {
            "node": "Update Google Sheet (News)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Model": {
      "ai_languageModel": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Wait Between Scrapes": {
      "main": [
        [
          {
            "node": "Iterate Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Structured News Data": {
      "main": [
        [
          {
            "node": "Split News Items",
            "type": "main",
            "index": 0
          },
          {
            "node": "Log Scrape Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}