AutomationFlowsWeb Scraping › News Aggregation with Deduplication and Ranking

News Aggregation with Deduplication and Ranking

News Aggregation with Deduplication and Ranking. Uses httpRequest, rssFeedRead. Event-driven trigger; 8 nodes.

Event trigger★★★★☆ complexity8 nodesHTTP RequestRSS Feed Read
Web Scraping Trigger: Event Nodes: 8 Complexity: ★★★★☆ Added:

This workflow follows the HTTP Request → RSS Feed Read recipe pattern — see all workflows that pair these two integrations.

The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json
{
  "name": "News Aggregation with Deduplication and Ranking",
  "nodes": [
    {
      "parameters": {},
      "name": "Manual Trigger",
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        240,
        400
      ]
    },
    {
      "parameters": {
        "url": "https://newsapi.org/v2/everything?q=AI&language=en&pageSize=20",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "httpHeaderAuth",
        "options": {}
      },
      "name": "Fetch NewsAPI",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        460,
        300
      ]
    },
    {
      "parameters": {
        "url": "https://blog.n8n.io/rss/",
        "options": {}
      },
      "name": "Fetch RSS Feed",
      "type": "n8n-nodes-base.rssFeedRead",
      "typeVersion": 1,
      "position": [
        460,
        500
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "title",
              "name": "title",
              "value": "={{ $json.title }}",
              "type": "string"
            },
            {
              "id": "url",
              "name": "url",
              "value": "={{ $json.url }}",
              "type": "string"
            },
            {
              "id": "source",
              "name": "source",
              "value": "={{ $json.source.name }}",
              "type": "string"
            },
            {
              "id": "timestamp",
              "name": "timestamp",
              "value": "={{ $json.publishedAt }}",
              "type": "string"
            },
            {
              "id": "summary",
              "name": "summary",
              "value": "={{ $json.description }}",
              "type": "string"
            },
            {
              "id": "score",
              "name": "score",
              "value": 0,
              "type": "number"
            }
          ]
        }
      },
      "name": "Normalize NewsAPI",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.3,
      "position": [
        680,
        300
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "title",
              "name": "title",
              "value": "={{ $json.title }}",
              "type": "string"
            },
            {
              "id": "url",
              "name": "url",
              "value": "={{ $json.link }}",
              "type": "string"
            },
            {
              "id": "source",
              "name": "source",
              "value": "n8n Blog",
              "type": "string"
            },
            {
              "id": "timestamp",
              "name": "timestamp",
              "value": "={{ $json.pubDate }}",
              "type": "string"
            },
            {
              "id": "summary",
              "name": "summary",
              "value": "={{ $json.contentSnippet }}",
              "type": "string"
            },
            {
              "id": "score",
              "name": "score",
              "value": 0,
              "type": "number"
            }
          ]
        }
      },
      "name": "Normalize RSS",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.3,
      "position": [
        680,
        500
      ]
    },
    {
      "parameters": {
        "mode": "append"
      },
      "name": "Merge Sources",
      "type": "n8n-nodes-base.merge",
      "typeVersion": 2.1,
      "position": [
        900,
        400
      ]
    },
    {
      "parameters": {
        "mode": "raw",
        "jsonOutput": "={{ (() => {\n  const items = $input.all().map(i => i.json);\n  const seen = new Set();\n  const unique = [];\n\n  for (const item of items) {\n    const url = (item.url || '').toLowerCase().replace(/^https?:\\/\\/(www\\.)?/, '').replace(/\\/$/, '');\n    if (url && !seen.has(url)) {\n      seen.add(url);\n      unique.push(item);\n    }\n  }\n\n  return unique;\n})() }}",
        "options": {}
      },
      "name": "Deduplicate",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1120,
        400
      ]
    },
    {
      "parameters": {
        "mode": "raw",
        "jsonOutput": "={{ (() => {\n  const items = $input.all().map(i => i.json);\n  const keywords = ['ai', 'automation', 'workflow', 'n8n'];\n\n  const scored = items.map(item => {\n    const text = (item.title + ' ' + item.summary).toLowerCase();\n    let score = 0;\n\n    keywords.forEach(kw => {\n      if (text.includes(kw)) score += 1;\n    });\n\n    const ageHours = (Date.now() - new Date(item.timestamp).getTime()) / (1000 * 60 * 60);\n    const recencyScore = ageHours < 24 ? 2 : ageHours < 48 ? 1 : 0;\n\n    return {\n      ...item,\n      keywordScore: score,\n      recencyScore,\n      finalScore: score + recencyScore\n    };\n  });\n\n  return scored.sort((a, b) => b.finalScore - a.finalScore).slice(0, 10);\n})() }}",
        "options": {}
      },
      "name": "Rank and Filter Top 10",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1340,
        400
      ]
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Fetch NewsAPI",
            "type": "main",
            "index": 0
          },
          {
            "node": "Fetch RSS Feed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch NewsAPI": {
      "main": [
        [
          {
            "node": "Normalize NewsAPI",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch RSS Feed": {
      "main": [
        [
          {
            "node": "Normalize RSS",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Normalize NewsAPI": {
      "main": [
        [
          {
            "node": "Merge Sources",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Normalize RSS": {
      "main": [
        [
          {
            "node": "Merge Sources",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Merge Sources": {
      "main": [
        [
          {
            "node": "Deduplicate",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Deduplicate": {
      "main": [
        [
          {
            "node": "Rank and Filter Top 10",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  }
}
Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

News Aggregation with Deduplication and Ranking. Uses httpRequest, rssFeedRead. Event-driven trigger; 8 nodes.

Source: https://github.com/splinesreticulating/n8n-v2-workflow-skill/blob/main/assets/examples/news-aggregation-workflow.json — original creator credit. Request a take-down →

More Web Scraping workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

Web Scraping

Extract And Decode Google News RSS URLs to Clean Article Links. Uses manualTrigger, limit, rssFeedRead, httpRequest. Event-driven trigger; 20 nodes.

RSS Feed Read, HTTP Request
Web Scraping

The workflow performs tasks that would normally require human intervention on Google News links, transforming the RSS feeds into data that can be used by an automated system like n8n, thus creating a

RSS Feed Read, HTTP Request
Web Scraping

AI Digest Daily Podcast. Uses rssFeedRead, httpRequest. Event-driven trigger; 11 nodes.

RSS Feed Read, HTTP Request
Web Scraping

Multi-Source Deduplication Example. Uses httpRequest, rssFeedRead. Event-driven trigger; 6 nodes.

HTTP Request, RSS Feed Read
Web Scraping

This n8n workflow automates the generation of short news videos using the HeyGen video API and RSS feeds from a Bangla news source, Prothom Alo. It is ideal for content creators, media publishers, or

HTTP Request, RSS Feed Read