This workflow follows the Gmail Trigger → HTTP Request recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"name": "veilleur \u2014 Gmail \u2192 Scraping",
"nodes": [
{
"parameters": {
"pollTimes": {
"item": [
{
"mode": "everyMinute"
}
]
},
"simple": false,
"filters": {},
"options": {}
},
"id": "186aaf37-4af9-4281-bc70-8d12cff2d84f",
"name": "Gmail Trigger",
"type": "n8n-nodes-base.gmailTrigger",
"typeVersion": 1.2,
"position": [
2128,
-464
],
"credentials": {
"gmailOAuth2": {
"name": "<your credential>"
}
}
},
{
"parameters": {
"jsCode": "// Extract links from email HTML body\n// Works with both simplified and raw Gmail output\nconst items = $input.all();\nconst results = [];\n\nfor (const item of items) {\n const data = item.json;\n\n // Try multiple locations where HTML body might be\n let html = '';\n\n // 1. n8n Gmail Trigger with Simplify OFF \u2014 html is at top level\n if (data.html) {\n html = data.html;\n }\n // 2. Simplified format (simple: true)\n else if (data.textHtml) {\n html = data.textHtml;\n }\n // 3. Raw format - multipart (simple: false, older versions)\n else if (data.payload?.parts) {\n for (const part of data.payload.parts) {\n if (part.mimeType === 'text/html' && part.body?.data) {\n html = Buffer.from(part.body.data, 'base64url').toString('utf-8');\n break;\n }\n if (part.parts) {\n for (const subpart of part.parts) {\n if (subpart.mimeType === 'text/html' && subpart.body?.data) {\n html = Buffer.from(subpart.body.data, 'base64url').toString('utf-8');\n break;\n }\n }\n }\n }\n }\n // 4. Raw format - single body\n else if (data.payload?.body?.data) {\n html = Buffer.from(data.payload.body.data, 'base64url').toString('utf-8');\n }\n\n const subject = data.subject || data.Subject || 'Sans sujet';\n const from = data.from?.value?.[0]?.address || data.From || data.from || 'unknown';\n\n if (!html) {\n results.push({\n json: {\n newsletter_name: subject,\n from: from,\n received_at: new Date().toISOString(),\n links: [],\n link_count: 0,\n _debug: 'No HTML body found. Available keys: ' + Object.keys(data).join(', ')\n }\n });\n continue;\n }\n\n const linkRegex = /href=[\"'](https?:\\/\\/[^\"'\\s>]+)[\"']/gi;\n const seen = new Set();\n const links = [];\n let match;\n\n while ((match = linkRegex.exec(html)) !== null) {\n let url = match[1];\n url = url.replace(/&/g, '&');\n\n const tldnMatch = url.match(/tldrnewsletter\\.com\\/CL0\\/(https?[^/]+)/);\n if (tldnMatch) {\n try { url = decodeURIComponent(tldnMatch[1]); } catch (e) {}\n }\n\n if (seen.has(url)) continue;\n seen.add(url);\n\n const skip = [\n 'unsubscribe', 'list-manage', 'mailchimp', 'tracking.tldrnewsletter',\n 'tldrnewsletter.com/CL0',\n 'click.', 'mailto:', 'tel:', '.png', '.jpg', '.gif', '.svg',\n 'facebook.com', 'twitter.com', 'x.com/share', 'linkedin.com/share',\n 'linkedin.com/feed', 'linkedin.com/in/',\n 'instagram.com', 'youtube.com/channel', 'open.spotify.com',\n 'play.google.com', 'apps.apple.com', 'beacon', 'pixel',\n 'doubleclick', 'analytics',\n 'campaign-archive', 'manage-preferences', 'view-in-browser',\n 'email.mg.', 'sendgrid.net', 'substack.com/subscribe',\n 'convertkit', 'beehiiv.com/subscribe',\n 'refer.tldr.tech', 'hub.sparklp.co', 'a.tldrnewsletter.com',\n 'tldr.tech/ai?utm', 'advertise.tldr.tech', 'jobs.ashbyhq.com',\n 'tldr.tech/ai/manage', 'go.clerk.com',\n 'webinars.atlassian.com'\n ];\n\n const urlLower = url.toLowerCase();\n if (skip.some(s => urlLower.includes(s))) continue;\n if (url.length < 20) continue;\n\n let title = '';\n try {\n const escapedUrl = url.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n const titleRegex = new RegExp('href=[\"\\'][^\"\\']* ' + escapedUrl.slice(0, 40) + '[^\"\\']*[\"\\'][^>]*>([^<]+)<', 'i');\n const titleMatch = html.match(titleRegex);\n if (titleMatch && titleMatch[1]) {\n title = titleMatch[1].trim();\n }\n } catch (e) {}\n\n links.push({ url, title });\n }\n\n results.push({\n json: {\n newsletter_name: subject,\n from: from,\n received_at: new Date().toISOString(),\n links: links,\n link_count: links.length\n }\n });\n}\n\nreturn results;"
},
"id": "de7e6d81-2ffb-4509-bc65-f2413f31135a",
"name": "Extract Links",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2352,
-464
]
},
{
"parameters": {
"jsCode": "// Flatten links for individual scraping\nconst items = $input.all();\nconst allLinks = [];\n\nfor (const item of items) {\n const newsletter = item.json.newsletter_name;\n const from = item.json.from;\n const receivedAt = item.json.received_at;\n const links = item.json.links || [];\n\n for (const link of links) {\n allLinks.push({\n json: {\n url: link.url,\n title: link.title,\n newsletter_name: newsletter,\n from: from,\n received_at: receivedAt\n }\n });\n }\n}\n\nreturn allLinks;"
},
"id": "4889904c-0ea7-46ac-895a-8521d44e7c35",
"name": "Flatten Links",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2576,
-464
]
},
{
"parameters": {
"options": {}
},
"id": "92a21139-7a5b-4b37-bc87-1f7a5bbeedf9",
"name": "Loop Over Links",
"type": "n8n-nodes-base.splitInBatches",
"typeVersion": 3,
"position": [
2800,
-464
]
},
{
"parameters": {
"amount": 1.5
},
"id": "8cc436b6-e735-4397-b235-edc8d8d8d025",
"name": "Wait",
"type": "n8n-nodes-base.wait",
"typeVersion": 1.1,
"position": [
3008,
-368
]
},
{
"parameters": {
"url": "=https://r.jina.ai/{{ $json.url }}",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "text/markdown"
}
]
},
"options": {
"response": {
"response": {
"responseFormat": "text"
}
},
"timeout": 15000
}
},
"id": "0b3fc763-b1c1-4b54-b109-53691cc23326",
"name": "Scrape via Jina",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
3232,
-368
],
"continueOnFail": true,
"onError": "continueRegularOutput"
},
{
"parameters": {
"jsCode": "// Merge scraped content back with metadata\nconst item = $input.first();\nconst batchItem = $('Loop Over Links').first();\n\nreturn [{\n json: {\n url: batchItem.json.url,\n title: batchItem.json.title,\n newsletter_name: batchItem.json.newsletter_name,\n from: batchItem.json.from,\n received_at: batchItem.json.received_at,\n content: item.json.data || item.json.body || '',\n scraped: !item.json.error\n }\n}];"
},
"id": "d125dc53-d3fe-4117-b0d6-2fab91276522",
"name": "Merge Content",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3456,
-368
]
},
{
"parameters": {
"jsCode": "// Aggregate all scraped articles and prepare as binary for file writing\n// Files are written to /data/veilleur/raw/ with date-prefixed names\n// Filename uses sender domain + HHmmss timestamp to avoid collisions\n// when multiple newsletters arrive on the same day.\nconst items = $input.all();\n\nfunction senderDomain(email) {\n const match = (email || '').match(/@([^>\\s]+)/);\n if (!match) return 'unknown';\n return match[1]\n .toLowerCase()\n .replace(/^(mail|email|newsletter|news|hello|info|noreply|no-reply)\\./, '')\n .replace(/\\.[a-z]{2,4}$/, '')\n .replace(/[^a-z0-9]+/g, '-')\n .replace(/^-+|-+$/g, '');\n}\n\nfunction timeSlug(isoDate) {\n const d = new Date(isoDate);\n return String(d.getUTCHours()).padStart(2, '0') + String(d.getUTCMinutes()).padStart(2, '0') + String(d.getUTCSeconds()).padStart(2, '0');\n}\n\nconst byNewsletter = {};\nfor (const item of items) {\n const key = item.json.newsletter_name || 'unknown';\n if (!byNewsletter[key]) {\n byNewsletter[key] = {\n newsletter: key,\n from: item.json.from,\n received_at: item.json.received_at,\n links: []\n };\n }\n byNewsletter[key].links.push({\n url: item.json.url,\n title: item.json.title,\n content: item.json.content || '',\n scraped: item.json.scraped\n });\n}\n\nconst results = [];\nfor (const [name, data] of Object.entries(byNewsletter)) {\n const received = data.received_at || new Date().toISOString();\n const date = received.split('T')[0];\n const domain = senderDomain(data.from);\n const time = timeSlug(received);\n const filename = `${date}-newsletter-${domain}-${time}.json`;\n const content = JSON.stringify(data, null, 2);\n const binaryData = await this.helpers.prepareBinaryData(\n Buffer.from(content, 'utf-8'),\n filename,\n 'application/json'\n );\n\n results.push({\n json: {\n date: date,\n filename: filename,\n filepath: `/data/veilleur/raw/${filename}`,\n newsletter: name,\n link_count: data.links.length\n },\n binary: {\n data: binaryData\n }\n });\n}\n\nreturn results;"
},
"id": "b9d83b73-82ce-40e4-96ae-cd6fb3760f02",
"name": "Prepare Output",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3680,
-464
]
},
{
"parameters": {
"operation": "write",
"fileName": "={{ $json.filepath }}",
"options": {}
},
"id": "bd088439-3f65-43bd-a9c8-47f711cb7066",
"name": "Write File",
"type": "n8n-nodes-base.readWriteFile",
"typeVersion": 1,
"position": [
4112,
-464
]
}
],
"connections": {
"Gmail Trigger": {
"main": [
[
{
"node": "Extract Links",
"type": "main",
"index": 0
}
]
]
},
"Extract Links": {
"main": [
[
{
"node": "Flatten Links",
"type": "main",
"index": 0
}
]
]
},
"Flatten Links": {
"main": [
[
{
"node": "Loop Over Links",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Links": {
"main": [
[
{
"node": "Prepare Output",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "Scrape via Jina",
"type": "main",
"index": 0
}
]
]
},
"Scrape via Jina": {
"main": [
[
{
"node": "Merge Content",
"type": "main",
"index": 0
}
]
]
},
"Merge Content": {
"main": [
[
{
"node": "Loop Over Links",
"type": "main",
"index": 0
}
]
]
},
"Prepare Output": {
"main": [
[
{
"node": "Write File",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"availableInMCP": false,
"timeSavedMode": "fixed",
"callerPolicy": "workflowsFromSameOwner"
},
"versionId": "d4f09ad5-9db9-457b-8fcb-3e8b11ee8516",
"meta": {
"templateCredsSetupCompleted": true
},
"id": "8jN0qjAJTp00dHND",
"tags": []
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
gmailOAuth2
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
veilleur — Gmail → Scraping. Uses gmailTrigger, httpRequest, readWriteFile. Event-driven trigger; 9 nodes.
Source: https://github.com/allienna/veilleur/blob/e66d90e21bce206c2bbc30699a51d691335c26e0/n8n/workflow-veilleur.json — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Limit. Uses gmailTrigger, httpRequest, limit, respondToWebhook. Event-driven trigger; 40 nodes.
This workflow is ideal for IT professionals, security analysts, and organizations looking to enhance their email security practices. It is particularly useful for those who need to analyze Gmail email
AICARE Email Blast System. Uses googleDrive, httpRequest, googleSheets, gmail. Event-driven trigger; 39 nodes.
Client Form → Draft → Approve → Sign → Deliver, fully automated
ResultAnalyser. Uses gmailTrigger, executeCommand, httpRequest, gmail. Event-driven trigger; 23 nodes.