{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "450b1757-d8d1-4eda-9ba8-8c3aedf9e1b3",
      "name": "Send a document",
      "type": "n8n-nodes-base.telegram",
      "position": [
        1120,
        1120
      ],
      "parameters": {
        "chatId": "123456789",
        "operation": "sendDocument",
        "binaryData": true,
        "additionalFields": {}
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "67914cf7-ac64-4da6-ba43-f899539721be",
      "name": "Structured Output Parser1",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        480,
        1152
      ],
      "parameters": {
        "autoFix": true,
        "jsonSchemaExample": "{\n  \"news_summary\": {\n    \"headline\": \"string\",\n    \"source_url\": \"string\",\n    \"published_date\": \"string\",\n    \"key_points\": \"string\",\n    \"summary\": \"string\",\n    \"extracted_images_url\": \"string\"\n  }\n}\n"
      },
      "typeVersion": 1.3
    },
    {
      "id": "8f10cdbc-761b-47c4-a9ca-e9c2453f4abb",
      "name": "VLM Agent2",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        368,
        1056
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "id",
          "value": "=vlm-agent-1"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "aec1ecd0-f22e-4e2a-a083-c33bc9533e99",
      "name": "VLM Agent3",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        768,
        1088
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "id",
          "value": "vlm-agent-1"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "95cd43fc-86ff-47bc-acce-910236b6da97",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        1152,
        752
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "output.research_paper_summary.extracted_images_url"
      },
      "typeVersion": 1
    },
    {
      "id": "fdc0c429-edd0-4457-8fac-c9bc0fb3b467",
      "name": "HTTP Request1",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1168,
        928
      ],
      "parameters": {
        "url": "={{ $json['output.research_paper_summary.extracted_images_url'] }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "1e005ab8-0e64-4e6c-ad90-2da728df3529",
      "name": "No Operation, do nothing1",
      "type": "n8n-nodes-base.noOp",
      "position": [
        944,
        1008
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "e58f4e54-5dca-4d2f-8589-87212c422825",
      "name": "Embeddings OpenAI",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        576,
        1568
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "1caedd4b-5aa1-419d-b87d-604b912e8ab2",
      "name": "Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        1120,
        1568
      ],
      "parameters": {
        "options": {},
        "dataType": "binary"
      },
      "typeVersion": 1.1
    },
    {
      "id": "f812c98d-5fc0-48ff-9f48-5206e7ac3b32",
      "name": "Insert Data to Store",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreInMemory",
      "position": [
        880,
        1280
      ],
      "parameters": {
        "mode": "insert",
        "memoryKey": {
          "__rl": true,
          "mode": "list",
          "value": "vector_store_key",
          "cachedResultName": "vector_store_key"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "13d1d33d-ccad-4ca3-be6a-d3e8e6772f29",
      "name": "Query Data Tool",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreInMemory",
      "position": [
        288,
        1632
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "toolName": "knowledge_base",
        "memoryKey": {
          "__rl": true,
          "mode": "list",
          "value": "vector_store_key"
        },
        "toolDescription": "Use this knowledge base to answer questions from the user"
      },
      "typeVersion": 1.2
    },
    {
      "id": "e8174bcc-08be-447e-96ce-72c1fcee74a9",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        192,
        1408
      ],
      "parameters": {
        "text": "={{ $json.message.text }}",
        "options": {},
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 2
    },
    {
      "id": "a6ec2ab2-2478-4103-a615-aee03deacaa9",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -80,
        1248
      ],
      "parameters": {
        "color": 7,
        "width": 584,
        "height": 508,
        "content": "\n\n\n\n## Chat with LLM\n\nAsk anything and get answer in telegram about \nprovided newspaper link"
      },
      "typeVersion": 1
    },
    {
      "id": "fabb3694-03c3-4a0e-96fb-e31d71b05928",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        720,
        1520
      ],
      "parameters": {
        "color": 4,
        "width": 320,
        "height": 240,
        "content": "### Embeddings\n\nThe Insert and Retrieve operation use the same embedding node.\n\nThis is to ensure that they are using the **exact same embeddings and settings**.\n\nDifferent embeddings might not work at all, or have unintended consequences.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "dd6bf692-7902-4691-9832-65e3bb9354c6",
      "name": "Code",
      "type": "n8n-nodes-base.code",
      "position": [
        -48,
        1040
      ],
      "parameters": {
        "jsCode": "const results = [];\n\nfor (const item of $input.all()) {\n  const url = item.json.URL_PARSE; // Input URL field\n\n  try {\n    // Fetch the raw HTML content\n    const html = await this.helpers.httpRequest({\n      method: 'GET',\n      url,\n      headers: {\n        'User-Agent': 'Mozilla/5.0',\n      },\n    });\n\n    // --- Extract title ---\n    const titleMatch = html.match(/<title>(.*?)<\\/title>/i);\n    const title = titleMatch ? titleMatch[1].trim() : null;\n\n    // --- Extract meta description ---\n    const descMatch = html.match(/<meta\\s+name=[\"']description[\"']\\s+content=[\"']([^\"']+)[\"']/i);\n    const description = descMatch ? descMatch[1].trim() : null;\n\n    // --- Extract all paragraphs (<p>...</p>) ---\n    const paragraphs = [];\n    const pRegex = /<p[^>]*>(.*?)<\\/p>/gi;\n    let match;\n    while ((match = pRegex.exec(html)) !== null) {\n      // Remove all HTML tags from paragraph text\n      const cleanText = match[1].replace(/<[^>]+>/g, '').trim();\n      if (cleanText) paragraphs.push(cleanText);\n    }\n    const text = paragraphs.join('\\n\\n');\n\n    // --- Extract all <img> URLs ---\n    const images = [];\n    const imgRegex = /<img[^>]+src=[\"']([^\"']+)[\"']/gi;\n    let imgMatch;\n    while ((imgMatch = imgRegex.exec(html)) !== null) {\n      images.push(imgMatch[1]);\n    }\n\n    // --- Extract Open Graph metadata ---\n    const ogTitleMatch = html.match(/<meta property=[\"']og:title[\"'] content=[\"']([^\"']+)[\"']/i);\n    const ogImageMatch = html.match(/<meta property=[\"']og:image[\"'] content=[\"']([^\"']+)[\"']/i);\n    const ogTitle = ogTitleMatch ? ogTitleMatch[1] : null;\n    const ogImage = ogImageMatch ? ogImageMatch[1] : null;\n\n    // --- Short summary (first 3 paragraphs) ---\n    const summary = paragraphs.slice(0, 3).join('\\n\\n');\n\n    // --- Add all extracted data to original item ---\n    item.json.source_url = url;\n    item.json.title = title;\n    item.json.description = description;\n    item.json.text = text;\n    item.json.images = images;\n    item.json.summary = summary;\n\n  } catch (error) {\n    // --- Handle errors ---\n    item.json.source_url = url;\n    item.json.title = null;\n    item.json.description = null;\n    item.json.text = null;\n    item.json.images = [];\n    item.json.summary = null;\n  }\n\n  results.push(item);\n}\n\n// Return updated items for downstream nodes\nreturn results;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "b2eacd7c-ff24-4daa-8bf7-f51270914b0e",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -656,
        1248
      ],
      "parameters": {
        "width": 576,
        "height": 640,
        "content": "\n1. **Telegram Trigger Node**  \n   - Listens for incoming messages from a Telegram bot.\n   - Captures the message text.\n\n2. **Link Detection**  \n   - The workflow checks the message for any valid HTTP/HTTPS URLs.\n\n3. **Scraping Node (Function Node)**  \n   - When a link is detected, the URL is passed to the scraping node.\n   - The node fetches the webpage, extracts:\n     - Page title\n     - Meta description\n     - All paragraph text\n     - Images\n     - Open Graph metadata\n     - Short summary (first 3 paragraphs)\n\n4. **Output**  \n   - The extracted data is stored in JSON format.\n   - Can be sent back to Telegram, stored in a database, or used in other workflows.\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "20955189-44a2-45e6-bf8d-638ff043a849",
      "name": "Check Whether URL",
      "type": "n8n-nodes-base.if",
      "position": [
        -416,
        1712
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "405c9d7c-b745-4a57-9123-3d19af7def77",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json.message.link_preview_options.url }}",
              "rightValue": "https"
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "4b29201d-3dd3-4539-9d77-8fad7bc2df30",
      "name": "Listen to Telegram for Link",
      "type": "n8n-nodes-base.telegramTrigger",
      "position": [
        -880,
        1712
      ],
      "parameters": {
        "updates": [
          "message"
        ],
        "additionalFields": {
          "chatIds": "123456789"
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "78878d14-d80d-4cc1-946b-0e2f5d2e25d4",
      "name": "Rename Link Field",
      "type": "n8n-nodes-base.set",
      "position": [
        -208,
        1744
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "5a6f8e47-e620-44cf-a004-66310ad8f158",
              "name": "URL_PARSE",
              "type": "string",
              "value": "={{ $json.message.link_preview_options.url }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "88dda24f-69ad-425f-ba93-b7df42312bf9",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -160,
        608
      ],
      "parameters": {
        "color": 4,
        "width": 512,
        "height": 640,
        "content": "# Web Page Scraper\n\n## Input\n- Expects `URL_PARSE` containing a valid HTTP/HTTPS URL.\n\n## Processing\n1. Fetches the page HTML using n8n\u2019s `httpRequest`.\n2. Extracts:\n   - Page `<title>` and meta description\n   - All paragraph text\n   - `<img>` URLs\n   - Open Graph title & image\n3. Generates a short summary (first 3 paragraphs).\n4. Handles errors by recording messages and keeping other fields empty.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "7ec330a3-8a34-44d7-8d3a-1e6f2ef57b22",
      "name": "VLM Run Highlighter",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "onError": "continueErrorOutput",
      "position": [
        544,
        848
      ],
      "parameters": {
        "text": "=Generate and give explanation of important parts of the following newspaper daily- {{$json.source_url + $json.title + $json.description + $json.text + $json.images + $json.summary}}. Highlight important news, and provide correct complete urls for news image downloading. If correct complete urls not found leave blank- \"\". Give result according to JSON schema.",
        "options": {},
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "b0330b9d-bddc-4a41-8ee4-9ae31c008fe4",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        352,
        624
      ],
      "parameters": {
        "color": 5,
        "width": 512,
        "height": 624,
        "content": "# Daily Newspaper Summarizer\n\n## Processing\n1. Analyze text \u2192 generate summary and highlight important news.\n2. Extract images \u2192 generate presigned URLs.\n3. Output JSON schema:\n   - `summary`\n   - `important_news`\n   - `images`"
      },
      "typeVersion": 1
    },
    {
      "id": "81ea8e52-b29e-49e3-8ac6-3cd9fb3594cb",
      "name": "Check URLs Validity",
      "type": "n8n-nodes-base.if",
      "position": [
        928,
        768
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "5a38f01f-075d-4d2f-a1b4-94503a993ad5",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json.output.news_summary.extracted_images_url }}",
              "rightValue": "=\"\""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "00425b38-d2fd-4c5b-aed7-3df8b1e02d1e",
      "name": "Covert to Text File",
      "type": "n8n-nodes-base.convertToFile",
      "position": [
        1296,
        1312
      ],
      "parameters": {
        "options": {},
        "operation": "toText",
        "sourceProperty": "output"
      },
      "typeVersion": 1.1
    },
    {
      "id": "14d5ab95-3f79-4eaa-8cc0-5c924a6a79ec",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        864,
        512
      ],
      "parameters": {
        "color": 3,
        "width": 544,
        "height": 736,
        "content": "# Telegram Notifier\n\n\n## How it Works\n1. Extract URLs and split them.\n2. Download images, make document containing news highlights and send as Telegram files.\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "3b0760e0-c63f-4f40-baa3-9aa1dae71382",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        512,
        1248
      ],
      "parameters": {
        "color": 6,
        "width": 896,
        "height": 224,
        "content": "## Store Highlight Report \n## Embedding\n\nConvert highlight summary file \ninto embedding to strore in vector database"
      },
      "typeVersion": 1
    },
    {
      "id": "0e063212-ab90-4a78-9eb7-aed52a7e6910",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        144,
        1632
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-nano",
          "cachedResultName": "gpt-4.1-nano"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "6bdcf79b-0d02-49db-88fc-049cb4ebbeab",
      "name": "Send a Reply",
      "type": "n8n-nodes-base.telegram",
      "position": [
        -32,
        1616
      ],
      "parameters": {
        "text": "={{ $json.output }}",
        "chatId": "123456789",
        "additionalFields": {}
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "277945f2-897e-4b0a-8972-0f3654c57fed",
      "name": "Start Asking",
      "type": "n8n-nodes-base.telegram",
      "position": [
        1280,
        1120
      ],
      "parameters": {
        "text": "=Start asking about provided link now",
        "chatId": "123456789",
        "forceReply": {},
        "replyMarkup": "forceReply",
        "additionalFields": {}
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "3657f4e8-3134-4c34-ba3c-9e6ac01f2fa4",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -656,
        400
      ],
      "parameters": {
        "color": 6,
        "width": 496,
        "height": 848,
        "content": "# \ud83d\udcf0 Newspaper Agent\n\n## Overview\n- The Newspaper Agent workflow automates the process of analyzing and summarizing online news articles directly within **n8n**.  \n- It listens for shared links from Telegram, scrapes the article content, summarizes it using a **Vision-Language Model (VLM)**, sends structured highlights and related images back to Telegram, and stores the summarized data for later conversational queries. User can chat and explore information from newspaper.\n\n## How It Works\n- The workflow starts when a user sends a newspaper link through **Telegram**.  \n- It then:\n  1. **Validates the URL** and scrapes article text, metadata, and images.  \n  2. **Sends content to VLM Run**, which generates structured highlights (headline, key points, summary, and extracted images).  \n  3. **Downloads and validates** the extracted image URLs.  \n  4. **Sends the summarized report** back to the user on Telegram.  \n  5. **Embeds the processed text** into a vector store for semantic Q&A using OpenAI embe\n\n## Requirements\n- **Telegram Bot Credentials** \u2014 For listening to incoming messages and sending replies.  \n- **VLM Run API Credentials** \u2014 Used for generating structured video-language summaries.  \n- **OpenAI API Key** \u2014 For creating text embeddings and interactive Q&A.  \n- **Google Drive OAuth2 (Optional)** \u2014 If you want to store or share extracted summaries or images externally.  \n- **HTTP Request Permissions** \u2014 To scrape content and download images from provided URLs.  \n"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Code": {
      "main": [
        [
          {
            "node": "VLM Run Highlighter",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent": {
      "main": [
        [
          {
            "node": "Send a Reply",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out": {
      "main": [
        [
          {
            "node": "HTTP Request1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "VLM Agent2": {
      "ai_languageModel": [
        [
          {
            "node": "VLM Run Highlighter",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "VLM Agent3": {
      "ai_languageModel": [
        [
          {
            "node": "Structured Output Parser1",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Start Asking": {
      "main": [
        []
      ]
    },
    "HTTP Request1": {
      "main": [
        [
          {
            "node": "Send a document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Query Data Tool": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Send a document": {
      "main": [
        [
          {
            "node": "Start Asking",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Whether URL": {
      "main": [
        [
          {
            "node": "Rename Link Field",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI": {
      "ai_embedding": [
        [
          {
            "node": "Insert Data to Store",
            "type": "ai_embedding",
            "index": 0
          },
          {
            "node": "Query Data Tool",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Rename Link Field": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check URLs Validity": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "No Operation, do nothing1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Covert to Text File": {
      "main": [
        [
          {
            "node": "Send a document",
            "type": "main",
            "index": 0
          },
          {
            "node": "Insert Data to Store",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "Insert Data to Store",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "VLM Run Highlighter": {
      "main": [
        [
          {
            "node": "Covert to Text File",
            "type": "main",
            "index": 0
          },
          {
            "node": "Check URLs Validity",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "VLM Run Highlighter",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser1": {
      "ai_outputParser": [
        [
          {
            "node": "VLM Run Highlighter",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Listen to Telegram for Link": {
      "main": [
        [
          {
            "node": "Check Whether URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}