This workflow corresponds to n8n.io template #15252 — we link there as the canonical source.
This workflow follows the Google Drive → Google Sheets recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"meta": {
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "ef9a086f-9fe1-4a9c-81ad-735d4fedb43c",
"name": "Wait",
"type": "n8n-nodes-base.wait",
"position": [
1088,
4656
],
"parameters": {
"unit": "minutes",
"amount": 2
},
"typeVersion": 1.1
},
{
"id": "45942447-fbef-46a1-9438-3b3cd0c25480",
"name": "Wait1",
"type": "n8n-nodes-base.wait",
"position": [
416,
4560
],
"parameters": {},
"typeVersion": 1.1
},
{
"id": "305220ed-6098-4df7-8f41-4c65a7b3bafc",
"name": "Compression",
"type": "n8n-nodes-base.compression",
"position": [
1728,
4368
],
"parameters": {},
"typeVersion": 1.1
},
{
"id": "e355f459-1bf2-476f-a000-7718740b53b5",
"name": "PSI Parser",
"type": "n8n-nodes-base.code",
"position": [
2480,
4256
],
"parameters": {
"jsCode": "// ============================================================\n// PSI EXTRACTOR & MERGER \u2014 Production v1.0\n// Node name in n8n: \"PSI Extractor\"\n// Place AFTER \"Page Speed Insights\" HTTP node\n// BEFORE \"Report Builder2\"\n//\n// This node:\n// 1. Reads the PSI API response from the HTTP node (current input)\n// 2. Reads the audit JSON from \"SEO Audit Parser1\" node by reference\n// 3. Extracts all key PSI metrics, scores, and opportunities\n// 4. Merges PSI data INTO the audit JSON\n// 5. Outputs a single complete audit+psi object for the Report Builder\n//\n// CWV Thresholds (Google Official):\n// LCP: Good < 2500ms | Needs Improvement 2500-4000ms | Poor > 4000ms\n// CLS: Good < 0.1 | Needs Improvement 0.1-0.25 | Poor > 0.25\n// TBT: Good < 200ms | Needs Improvement 200-600ms | Poor > 600ms\n// FCP: Good < 1800ms | Needs Improvement 1800-3000ms | Poor > 3000ms\n// TTFB: Good < 800ms | Needs Improvement 800-1800ms | Poor > 1800ms\n// SI: Good < 3400ms | Needs Improvement 3400-5800ms | Poor > 5800ms\n// ============================================================\n\n// Get PSI response from HTTP node (current node input)\nconst httpResponse = $input.first().json;\n\n// Handle both n8n wrapper format {body, headers, statusCode}\n// and direct response format\nconst psiBody = httpResponse.body || httpResponse;\nconst lr = psiBody.lighthouseResult || {};\nconst audits = lr.audits || {};\nconst cats = lr.categories || {};\nconst loadExp = psiBody.loadingExperience || {};\nconst cfgSettings = lr.configSettings || {};\n\n// Get audit JSON from parser node (by node reference)\n// IMPORTANT: change \"SEO Audit Parser\" if your node has a different name\nlet audit;\ntry {\n audit = $('SEO Audit Parser').first().json;\n} catch (e) {\n throw new Error('Cannot find SEO Audit Parser1 output. Make sure the parser node is named exactly \"SEO Audit Parser1\" and has run successfully.');\n}\n\n// \u2500\u2500 HELPER FUNCTIONS \u2500\u2500\nconst safeScore = (score) => score !== null && score !== undefined ? Math.round(score * 100) : null;\nconst safeMs = (val) => val ? Math.round(val) : 0;\n\n// CWV threshold classifier\nfunction cwvStatus(metric, value) {\n const thresholds = {\n lcp: [2500, 4000],\n cls: [0.1, 0.25],\n tbt: [200, 600],\n fcp: [1800, 3000],\n ttfb: [800, 1800],\n si: [3400, 5800],\n tti: [3800, 7300],\n fid: [100, 300],\n };\n const t = thresholds[metric];\n if (!t || value === null || value === undefined) return 'unknown';\n if (value <= t[0]) return 'good';\n if (value <= t[1]) return 'needs_improvement';\n return 'poor';\n}\n\n// \u2500\u2500 EXTRACT METRICS \u2500\u2500\nconst metricsItems = audits.metrics?.details?.items || [{}];\nconst m = metricsItems[0] || {};\n\n// Core Web Vitals (Lab data from Lighthouse)\nconst lcp_ms = safeMs(m.largestContentfulPaint);\nconst fcp_ms = safeMs(m.firstContentfulPaint);\nconst tbt_ms = safeMs(m.totalBlockingTime);\nconst cls_val = parseFloat((m.cumulativeLayoutShift || 0).toFixed(4));\nconst si_ms = safeMs(m.speedIndex);\nconst tti_ms = safeMs(m.interactive);\nconst ttfb_ms = safeMs(m.timeToFirstByte);\n\n// \u2500\u2500 EXTRACT FIELD DATA (CrUX) IF AVAILABLE \u2500\u2500\nconst fieldMetrics = loadExp.metrics || {};\nconst fieldLCP = fieldMetrics.LARGEST_CONTENTFUL_PAINT_MS;\nconst fieldFID = fieldMetrics.FIRST_INPUT_DELAY_MS;\nconst fieldCLS = fieldMetrics.CUMULATIVE_LAYOUT_SHIFT_SCORE;\nconst fieldINP = fieldMetrics.INTERACTION_TO_NEXT_PAINT;\nconst fieldFCP = fieldMetrics.FIRST_CONTENTFUL_PAINT_MS;\nconst fieldTTFB = fieldMetrics.EXPERIMENTAL_TIME_TO_FIRST_BYTE || fieldMetrics.TIME_TO_FIRST_BYTE_MS;\n\n// \u2500\u2500 EXTRACT OPPORTUNITIES \u2500\u2500\n// Unused JS\nconst unusedJsAudit = audits['unused-javascript'] || {};\nconst unusedCssAudit = audits['unused-css-rules'] || {};\nconst renderBlkAudit = audits['render-blocking-insight'] || audits['render-blocking-resources'] || {};\nconst cacheAudit = audits['cache-insight'] || audits['uses-long-cache-ttl'] || {};\nconst largeImages = audits['uses-responsive-images'] || audits['uses-optimized-images'] || {};\nconst unusedImages = audits['offscreen-images'] || {};\nconst textCompress = audits['uses-text-compression'] || {};\nconst webpImages = audits['uses-webp-images'] || {};\n\n// Opportunity savings\nconst totalOpportunitySavingsMs = [\n unusedJsAudit.metricSavings?.LCP || 0,\n unusedCssAudit.metricSavings?.LCP || 0,\n renderBlkAudit.numericValue || 0,\n].reduce((a, b) => a + b, 0);\n\n// \u2500\u2500 RESOURCE SUMMARY \u2500\u2500\nconst rsItems = audits['resource-summary']?.details?.items || [];\nconst totalItem = rsItems.find(i => i.resourceType === 'total') || {};\nconst resourceBreakdown = rsItems\n .filter(i => i.resourceType !== 'total' && i.requestCount > 0)\n .map(i => ({ type: i.label, requests: i.requestCount, size_kb: Math.round(i.transferSize / 1024) }));\n\n// \u2500\u2500 DIAGNOSTICS \u2500\u2500\nconst longTasksDisplay = audits['long-tasks']?.displayValue || '0';\nconst layoutShiftsDisplay = audits['layout-shifts']?.displayValue || '0';\nconst mainThreadMs = Math.round(parseFloat(audits['mainthread-work-breakdown']?.displayValue || '0') * 1000);\nconst jsBootupMs = safeMs(audits['bootup-time']?.numericValue);\nconst domSize = safeMs(audits['dom-size']?.numericValue);\nconst totalWeight = safeMs(audits['total-byte-weight']?.numericValue);\n\n// \u2500\u2500 UNSIZED IMAGES \u2500\u2500\nconst unsizedImages = (audits['unsized-images']?.details?.items || []).slice(0, 5).map(i => ({\n url: i.url || '', selector: i.node?.selector || ''\n}));\n\n// \u2500\u2500 LCP ELEMENT \u2500\u2500\nlet lcpElement = '';\nlet lcpIssue = '';\nconst lcpDiscovery = audits['lcp-discovery-insight'] || {};\nconst lcpItems = lcpDiscovery.details?.items || [];\nif (lcpItems.length > 0) {\n // First item has properties, second has the element details\n const props = lcpItems[0]?.items;\n if (props?.priorityHinted?.value === false) lcpIssue += 'Missing fetchpriority=high. ';\n if (lcpItems[1]?.snippet) lcpElement = lcpItems[1].snippet.substring(0, 100);\n else if (lcpItems[1]?.selector) lcpElement = lcpItems[1].selector;\n}\n\n// LCP breakdown timing\nconst lcpBreakdown = [];\nconst lbItems = audits['lcp-breakdown-insight']?.details?.items || [];\nif (lbItems[0]?.items) {\n lbItems[0].items.forEach(part => {\n lcpBreakdown.push({ label: part.label, duration_ms: Math.round(part.duration) });\n });\n}\n\n// \u2500\u2500 BUILD PSI OBJECT \u2500\u2500\nconst psi = {\n // Meta\n tested_url: lr.requestedUrl || audit.meta.website,\n fetch_time: lr.fetchTime || '',\n strategy: cfgSettings.formFactor || cfgSettings.emulatedFormFactor || 'mobile',\n lighthouse_version: lr.lighthouseVersion || '',\n \n // Overall performance score\n performance_score: safeScore(cats.performance?.score),\n \n // Core Web Vitals \u2014 Lab (Lighthouse)\n lcp: {\n ms: lcp_ms,\n display: audits['largest-contentful-paint']?.displayValue || `${(lcp_ms/1000).toFixed(1)} s`,\n score: safeScore(audits['largest-contentful-paint']?.score),\n status: cwvStatus('lcp', lcp_ms),\n element: lcpElement,\n issue: lcpIssue,\n breakdown: lcpBreakdown,\n },\n fcp: {\n ms: fcp_ms,\n display: audits['first-contentful-paint']?.displayValue || `${(fcp_ms/1000).toFixed(1)} s`,\n score: safeScore(audits['first-contentful-paint']?.score),\n status: cwvStatus('fcp', fcp_ms),\n },\n tbt: {\n ms: tbt_ms,\n display: audits['total-blocking-time']?.displayValue || `${tbt_ms} ms`,\n score: safeScore(audits['total-blocking-time']?.score),\n status: cwvStatus('tbt', tbt_ms),\n },\n cls: {\n value: cls_val,\n display: audits['cumulative-layout-shift']?.displayValue || cls_val.toFixed(3),\n score: safeScore(audits['cumulative-layout-shift']?.score),\n status: cwvStatus('cls', cls_val),\n shifts: parseInt(layoutShiftsDisplay) || 0,\n },\n si: {\n ms: si_ms,\n display: audits['speed-index']?.displayValue || `${(si_ms/1000).toFixed(1)} s`,\n score: safeScore(audits['speed-index']?.score),\n status: cwvStatus('si', si_ms),\n },\n tti: {\n ms: tti_ms,\n display: audits['interactive']?.displayValue || `${(tti_ms/1000).toFixed(1)} s`,\n score: safeScore(audits['interactive']?.score),\n status: cwvStatus('tti', tti_ms),\n },\n ttfb: {\n ms: ttfb_ms,\n display: `${ttfb_ms} ms`,\n status: cwvStatus('ttfb', ttfb_ms),\n raw_display: audits['server-response-time']?.displayValue || `${ttfb_ms} ms`,\n },\n\n // Field data (CrUX \u2014 real users, may not be available for small sites)\n field_data_available: loadExp.overall_category ? true : false,\n field_overall: loadExp.overall_category || null,\n field_lcp: fieldLCP ? { percentile: fieldLCP.percentile, category: fieldLCP.category } : null,\n field_fid: fieldFID ? { percentile: fieldFID.percentile, category: fieldFID.category } : null,\n field_cls: fieldCLS ? { percentile: fieldCLS.percentile, category: fieldCLS.category } : null,\n field_inp: fieldINP ? { percentile: fieldINP.percentile, category: fieldINP.category } : null,\n\n // Opportunities\n opportunities: {\n unused_javascript: {\n savings_kb: Math.round((unusedJsAudit.numericValue || 0) / 1024),\n savings_lcp_ms: unusedJsAudit.metricSavings?.LCP || 0,\n display: unusedJsAudit.displayValue || '',\n score: unusedJsAudit.score,\n items: (unusedJsAudit.details?.items || []).slice(0, 5).map(i => ({\n url: i.url || '', savings_kb: Math.round((i.wastedBytes || 0) / 1024)\n }))\n },\n unused_css: {\n savings_kb: Math.round((unusedCssAudit.numericValue || 0) / 1024),\n display: unusedCssAudit.displayValue || '',\n score: unusedCssAudit.score,\n items: (unusedCssAudit.details?.items || []).slice(0, 5).map(i => ({\n url: i.url || '', savings_kb: Math.round((i.wastedBytes || 0) / 1024)\n }))\n },\n render_blocking: {\n savings_ms: Math.round(renderBlkAudit.numericValue || 0),\n display: renderBlkAudit.displayValue || '',\n score: renderBlkAudit.score,\n items: (renderBlkAudit.details?.items || []).slice(0, 8).map(i => ({\n url: i.url || '', wasted_ms: Math.round(i.wastedMs || 0),\n size_kb: Math.round((i.totalBytes || 0) / 1024)\n }))\n },\n cache_policy: {\n savings_kb: Math.round((cacheAudit.numericValue || 0) / 1024),\n display: cacheAudit.displayValue || '',\n score: cacheAudit.score,\n },\n total_potential_lcp_savings_ms: Math.round(totalOpportunitySavingsMs),\n },\n\n // Diagnostics\n diagnostics: {\n long_tasks: parseInt(longTasksDisplay) || 0,\n layout_shifts: parseInt(layoutShiftsDisplay) || 0,\n main_thread_ms: mainThreadMs,\n js_execution_ms: jsBootupMs,\n total_requests: totalItem.requestCount || 0,\n total_size_kb: Math.round((totalItem.transferSize || totalWeight || 0) / 1024),\n unsized_images: unsizedImages,\n },\n\n // Page weight breakdown\n resource_breakdown: resourceBreakdown,\n};\n\n// \u2500\u2500 MERGE PSI INTO AUDIT \u2500\u2500\nconst fullAudit = {\n ...audit,\n psi,\n};\n\nreturn [{ json: fullAudit }];"
},
"typeVersion": 2
},
{
"id": "0286ef80-3d05-4d88-b0ff-f70470cc83fb",
"name": "Page Speed Insights",
"type": "n8n-nodes-base.httpRequest",
"position": [
2224,
4256
],
"parameters": {
"url": "https://pagespeedonline.googleapis.com/pagespeedonline/v5/runPagespeed",
"options": {},
"sendQuery": true,
"sendHeaders": true,
"queryParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $('Input Cleaner').item.json.URL }}"
},
{
"name": "category",
"value": "BEST_PRACTICES"
},
{
"name": "strategy",
"value": "MOBILE"
},
{
"name": "key",
"value": "Your-psi-api-key"
},
{
"name": "category",
"value": "PERFORMANCE"
},
{
"name": "category",
"value": "ACCESSIBILITY"
},
{
"name": "category",
"value": "SEO"
},
{
"name": "category",
"value": "BEST_PRACTICES"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "application/json"
}
]
}
},
"typeVersion": 4.4
},
{
"id": "d627609e-4d8f-4820-af09-a44e8383f02f",
"name": "Input Cleaner",
"type": "n8n-nodes-base.set",
"position": [
-32,
4560
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "25042583-ced8-468e-972a-b0ded3b3f7f3",
"name": "URL",
"type": "string",
"value": "={{ $json.extracted_url }}"
},
{
"id": "38f01e54-2d4a-4f34-b758-e359423037b3",
"name": "watermark",
"type": "string",
"value": "AuditCore"
},
{
"id": "b3838b53-cb52-40ee-97ad-6f1689e58d91",
"name": "message_id",
"type": "string",
"value": "={{ $json.event_ts }}"
},
{
"id": "a6b25987-609a-45cc-b4ef-9b85b2f717e2",
"name": "channel_id",
"type": "string",
"value": "={{ $json.channel }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "28010a06-1edf-4125-a061-d23371cef8c7",
"name": "Switch",
"type": "n8n-nodes-base.switch",
"position": [
864,
4432
],
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "4e6aa3b9-9339-4287-92c3-7285facd9290",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "done"
}
]
}
},
{
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "f7c4019b-5ef2-4e8f-ad59-0d21385470ba",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "timeout"
}
]
}
},
{
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "dffee38b-04e4-474b-96b0-096cbf6489e5",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "failed"
}
]
}
},
{
"conditions": {
"options": {
"version": 3,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "9df4f4d4-d031-4f61-9498-5fd4ac488511",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "running"
}
]
}
}
]
},
"options": {}
},
"typeVersion": 3.4
},
{
"id": "73b8ab9f-223a-4711-9568-a133e91dc281",
"name": "Start Crawl",
"type": "n8n-nodes-base.httpRequest",
"maxTries": null,
"position": [
192,
4560
],
"parameters": {
"url": "https://frog-api.salmanpro.me/v1/crawl/start",
"method": "POST",
"options": {},
"jsonBody": "={\n \"url\": \"{{ $json.URL }}\",\n \"exportTabs\": [\n \"Internal:HTML\",\n \"Response Codes:Client Error (4xx)\",\n \"Response Codes:Server Error (5xx)\",\n \"Response Codes:Redirection (3xx)\",\n \"Response Codes:Blocked by Robots.txt\",\n \"Response Codes:Internal Redirect Chain\",\n \"Page Titles:Missing\",\n \"Page Titles:Duplicate\",\n \"Page Titles:Over X Characters\",\n \"Meta Description:Missing\",\n \"Meta Description:Duplicate\",\n \"Meta Description:Over X Characters\",\n \"Meta Description:Below X Characters\",\n \"H1:Missing\",\n \"H1:Duplicate\",\n \"H1:Multiple\",\n \"H2:Missing\",\n \"Images:Missing Alt Text\",\n \"Images:Over X KB\",\n \"Canonicals:Missing\",\n \"Canonicals:Non-Indexable Canonical\",\n \"Canonicals:Multiple Conflicting\",\n \"Directives:Noindex\",\n \"Content:Exact Duplicates\",\n \"Content:Near Duplicates\",\n \"Content:Low Content Pages\",\n \"Security:HTTP URLs\",\n \"Security:Mixed Content\"\n ],\n \"bulkExports\": [\n \"Response Codes:Internal & External:Client Error (4xx) Inlinks\"\n ],\n \"reports\": [\n \"Crawl Overview\",\n \"Issues Overview\",\n \"Redirects:Redirect Chains\",\n \"Redirects:Redirect & Canonical Chains\",\n \"Canonicals:Non-Indexable Canonicals\"\n ],\n \"overwrite\": true\n}",
"sendBody": true,
"sendHeaders": true,
"specifyBody": "json",
"headerParameters": {
"parameters": [
{
"name": "Content-Type",
"value": "application/json"
},
{
"name": "Authorization",
"value": "Bearer YOUR_TOKEN_HERE"
}
]
}
},
"retryOnFail": false,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "89140d15-c9aa-4058-aead-3ffeeaec1894",
"name": "Check Crawl",
"type": "n8n-nodes-base.httpRequest",
"maxTries": 2,
"position": [
640,
4560
],
"parameters": {
"url": "=https://frog-api.salmanpro.me/v1/crawl/status/{{ $json.task_id }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Content-Type",
"value": "application/json"
},
{
"name": "Authorization",
"value": "Bearer "
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 2000
},
{
"id": "4bac123f-0f66-4bb7-b9d5-95b9e4b05763",
"name": "Fetch",
"type": "n8n-nodes-base.httpRequest",
"maxTries": null,
"position": [
1536,
4368
],
"parameters": {
"url": "=https://frog-api.salmanpro.me/v1/crawl/download/{{ $('Arrange Data').item.json.task_id }}",
"options": {
"response": {
"response": {
"responseFormat": "file"
}
}
},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Content-Type",
"value": "application/json"
},
{
"name": "Authorization",
"value": "Bearer "
}
]
}
},
"retryOnFail": false,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "4560d1f4-c544-4940-8f5a-c0ee7e05960e",
"name": "SEO Audit Parser",
"type": "n8n-nodes-base.code",
"position": [
1968,
4256
],
"parameters": {
"jsCode": "// ================================================================\n// SEO AUDIT PARSER \u2014 Production v5.3\n// FIXES FROM REPORT OVERVIEW REVIEW:\n//\n// FIX 1: real4xxCount \u2014 was broken_link_sources.length (capped at 25)\n// now = getCount('response_codes_client_error_(4xx).csv')\n// \u2192 Cover and health score now show 111, not 25\n//\n// FIX 2: broken_link_sources \u2014 now includes ALL 4xx (internal + external)\n// separated by issue_type field: 'Internal 404' or 'External 404'\n// Table shows top 25 by inlinks across both categories\n//\n// FIX 3: Thin content threshold standardised to 300 words everywhere\n// (was 200 in issue card label, 300 in parser logic \u2014 now consistent)\n//\n// FIX 4: Depth buckets \u2014 crawlOverview.depth used as primary source,\n// falls back to iStats.depthBuckets if crawl_overview has 0s\n//\n// FIX 5: Health score 4xx \u2014 uses raw SF 4xx count (all 4xx, not just 25)\n//\n// UNCHANGED: Everything else from v5.1 \u2014 all fields preserved\n// ================================================================\n\nconst item = $input.first();\nconst binary = item.binary;\nif (!binary) throw new Error('No binary data. Check Compression node is connected.');\n\n// \u2500\u2500 Line parser \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nfunction parseLine(line) {\n const result = [];\n let cur = '', inQ = false;\n for (let i = 0; i < line.length; i++) {\n const c = line[i];\n if (c === '\"') {\n if (inQ && line[i + 1] === '\"') { cur += '\"'; i++; }\n else inQ = !inQ;\n } else if (c === ',' && !inQ) { result.push(cur.trim()); cur = ''; }\n else { cur += c; }\n }\n result.push(cur.trim());\n return result;\n}\n\n// \u2500\u2500 Chunk-based CSV parser \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nfunction parseCSVFromBuffer(buffer, maxRows = 99999, earlyExit = false) {\n const CHUNK = 65536;\n const headers = [], rows = [];\n let totalRows = 0, hParsed = false, rem = '';\n let start = (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) ? 3 : 0;\n\n for (let off = start; off < buffer.length; off += CHUNK) {\n const chunk = buffer.slice(off, Math.min(off + CHUNK, buffer.length)).toString('utf-8');\n const combined = rem + chunk;\n const nli = combined.lastIndexOf('\\n');\n if (nli === -1) { rem = combined; continue; }\n const lines = combined.substring(0, nli + 1).split('\\n');\n rem = combined.substring(nli + 1);\n for (const raw of lines) {\n const line = raw.replace(/\\r$/, '').trim();\n if (!line) continue;\n if (!hParsed) { parseLine(line).forEach(h => headers.push(h.replace(/^\\uFEFF/, ''))); hParsed = true; }\n else {\n totalRows++;\n if (rows.length < maxRows) {\n const vals = parseLine(line);\n const obj = {};\n headers.forEach((h, i) => { obj[h] = (vals[i] ?? '').trim(); });\n rows.push(obj);\n } else if (earlyExit) { return { headers, rows, totalRows: rows.length }; }\n }\n }\n }\n if (rem.trim() && hParsed) {\n totalRows++;\n if (rows.length < maxRows) {\n const vals = parseLine(rem.replace(/\\r$/, '').trim());\n const obj = {};\n headers.forEach((h, i) => { obj[h] = (vals[i] ?? '').trim(); });\n rows.push(obj);\n }\n }\n return { headers, rows, totalRows };\n}\n\n// \u2500\u2500 Streaming internal_html aggregator \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nfunction processInternalHTML(buffer) {\n const CHUNK = 65536;\n let headers = null, ci = {}, hParsed = false;\n let totalRows = 0, sumRT = 0, sumWC = 0;\n let indexableCount = 0, nonIndexableCount = 0, h1 = 0, h2 = 0;\n const speedBuckets = { excellent:0, good:0, needs_work:0, slow:0, critical:0 };\n const wcBuckets = { empty:0, thin:0, medium:0, good:0, rich:0 };\n const depthBuckets = { d1:0, d2:0, d3:0, d4:0, d5plus:0 };\n const slowestArr = [], orphanArr = [], thinArr = [];\n\n const sf = v => { const n = parseFloat(v || '0'); return isNaN(n) ? 0 : n; };\n const si = v => { const n = parseInt(v || '0'); return isNaN(n) ? 0 : n; };\n\n function processRow(vals) {\n if (!vals || vals.length < 2) return;\n const rt = sf(vals[ci.rt]);\n const wc = si(vals[ci.wc]);\n const depth = si(vals[ci.depth]);\n const inlinks = si(ci.inlinks >= 0 ? vals[ci.inlinks] : (ci.uInlinks >= 0 ? vals[ci.uInlinks] : '0'));\n const hv = (ci.hv >= 0 ? vals[ci.hv] : '') || '';\n const url = (ci.url >= 0 ? vals[ci.url] : '') || '';\n const idx = (ci.idx >= 0 ? vals[ci.idx] : '') || '';\n const status = si(ci.status >= 0 ? vals[ci.status] : '0');\n const content = (ci.content >= 0 ? vals[ci.content] : '') || '';\n const sizeB = si(ci.size >= 0 ? vals[ci.size] : '0');\n\n totalRows++; sumRT += rt; sumWC += wc;\n if (idx === 'Indexable') indexableCount++;\n else if (idx === 'Non-Indexable') nonIndexableCount++;\n\n if (rt < 0.2) speedBuckets.excellent++;\n else if (rt < 0.5) speedBuckets.good++;\n else if (rt < 1.0) speedBuckets.needs_work++;\n else if (rt < 2.0) speedBuckets.slow++;\n else speedBuckets.critical++;\n\n if (rt >= 1.5 && slowestArr.length < 500) slowestArr.push({ url, rt: rt.toFixed(3), size_kb: Math.round(sizeB / 1024) });\n\n const isHtml = !content || content.toLowerCase().includes('html');\n if (isHtml) {\n if (wc === 0) wcBuckets.empty++;\n else if (wc <= 200) wcBuckets.thin++;\n else if (wc <= 600) wcBuckets.medium++;\n else if (wc <= 1500) wcBuckets.good++;\n else wcBuckets.rich++;\n // FIX: threshold = 300 words (consistent everywhere)\n if (wc > 0 && wc < 300 && status === 200 && idx === 'Indexable' && thinArr.length < 500)\n thinArr.push({ url, word_count: wc });\n }\n\n if (depth <= 1) depthBuckets.d1++;\n else if (depth === 2) depthBuckets.d2++;\n else if (depth === 3) depthBuckets.d3++;\n else if (depth === 4) depthBuckets.d4++;\n else depthBuckets.d5plus++;\n\n if (inlinks === 0 && idx === 'Indexable' && status === 200 && url && isHtml && orphanArr.length < 500)\n orphanArr.push({ url, word_count: wc, depth });\n\n if (hv.includes('2')) h2++; else h1++;\n }\n\n let rem = '', start = (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) ? 3 : 0;\n\n for (let off = start; off < buffer.length; off += CHUNK) {\n const chunk = buffer.slice(off, Math.min(off + CHUNK, buffer.length)).toString('utf-8');\n const combined = rem + chunk;\n const nli = combined.lastIndexOf('\\n');\n if (nli === -1) { rem = combined; continue; }\n const lines = combined.substring(0, nli + 1).split('\\n');\n rem = combined.substring(nli + 1);\n for (const raw of lines) {\n const line = raw.replace(/\\r$/, '').trim();\n if (!line) continue;\n if (!hParsed) {\n headers = parseLine(line);\n const fi = (...names) => { for (const n of names) { const i = headers.indexOf(n); if (i !== -1) return i; } return -1; };\n ci = {\n url: fi('Address'),\n rt: fi('Response Time'),\n wc: fi('Word Count'),\n depth: fi('Crawl Depth'),\n inlinks: fi('Inlinks'),\n uInlinks: fi('Unique Inlinks'),\n hv: fi('HTTP Version'),\n idx: fi('Indexability'),\n status: fi('Status Code'),\n content: fi('Content'),\n size: fi('Size (bytes)'),\n };\n hParsed = true;\n } else { processRow(parseLine(line)); }\n }\n }\n if (rem.replace(/\\r$/, '').trim() && hParsed) processRow(parseLine(rem.replace(/\\r$/, '').trim()));\n\n slowestArr.sort((a, b) => parseFloat(b.rt) - parseFloat(a.rt));\n thinArr.sort((a, b) => a.word_count - b.word_count);\n\n return {\n totalRows, sumRT, sumWC,\n indexableCount, nonIndexableCount,\n speedBuckets, wcBuckets, depthBuckets,\n slowestPages: slowestArr.slice(0, 10),\n orphanPages: orphanArr.slice(0, 25),\n thinPages: thinArr.slice(0, 15),\n http1Count: h1, http2Count: h2,\n };\n}\n\n// \u2500\u2500 Process internal_html.csv \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst ihKey = Object.keys(binary).find(k => binary[k]?.fileName === 'internal_html.csv');\nlet iStats = {\n totalRows:0, sumRT:0, sumWC:0, indexableCount:0, nonIndexableCount:0,\n speedBuckets:{excellent:0,good:0,needs_work:0,slow:0,critical:0},\n wcBuckets:{empty:0,thin:0,medium:0,good:0,rich:0},\n depthBuckets:{d1:0,d2:0,d3:0,d4:0,d5plus:0},\n slowestPages:[], orphanPages:[], thinPages:[], http1Count:0, http2Count:0,\n};\nif (ihKey) {\n const buf = await this.helpers.getBinaryDataBuffer(0, ihKey);\n iStats = processInternalHTML(buf);\n}\n\nconst totalInternal = iStats.totalRows || 1;\nconst indexableCount = iStats.indexableCount;\nconst nonIndexableCount = iStats.nonIndexableCount;\nconst speedBuckets = iStats.speedBuckets;\nconst wcBuckets = iStats.wcBuckets;\nconst depthBuckets = iStats.depthBuckets;\nconst slowestPages = iStats.slowestPages;\nconst thinContentPages = iStats.thinPages;\nconst http1Count = iStats.http1Count;\nconst http2Count = iStats.http2Count;\nconst avgResponseTime = totalInternal > 0 ? (iStats.sumRT / totalInternal).toFixed(3) : '0.000';\nconst avgWordCount = totalInternal > 0 ? Math.round(iStats.sumWC / totalInternal) : 0;\n\n// \u2500\u2500 File loading config \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst SKIP_FILES = new Set(['all_inlinks.csv', 'all_anchor_text.csv']);\nconst CORE_FILES = new Set([\n 'internal_html.csv', 'issues_overview_report.csv', 'crawl_overview.csv',\n 'client_error_(4xx)_inlinks.csv', 'response_codes_redirection_(3xx).csv',\n 'redirect_chains.csv', 'hreflang_all_issues.csv', 'page_titles_missing.csv',\n 'page_titles_duplicate.csv', 'page_titles_over_60_characters.csv',\n 'meta_description_missing.csv', 'meta_description_over_155_characters.csv',\n 'h1_missing.csv', 'h1_duplicate.csv', 'h1_multiple.csv', 'h2_missing.csv',\n 'images_missing_alt_text.csv', 'images_over_100_kb.csv', 'directives_noindex.csv',\n 'canonicals_missing.csv', 'canonicals_multiple_conflicting.csv',\n 'canonicals_nonindexable_canonical.csv', 'content_exact_duplicates.csv',\n 'content_near_duplicates.csv', 'content_low_content_pages.csv',\n 'security_http_urls.csv', 'security_mixed_content.csv',\n 'response_codes_client_error_(4xx).csv', 'response_codes_server_error_(5xx).csv',\n 'response_codes_blocked_by_robots_txt.csv', 'redirect_and_canonical_chains.csv',\n 'page_titles_below_30_characters.csv', 'meta_description_duplicate.csv',\n 'meta_description_below_70_characters.csv',\n]);\nconst SIZE_SKIP_THRESHOLD = 30 * 1024 * 1024;\nconst SAMPLE_LIMITS = {\n 'client_error_(4xx)_inlinks.csv': 25000,\n 'response_codes_redirection_(3xx).csv': 25000,\n};\n\nconst files = {};\nfor (const key of Object.keys(binary)) {\n const file = binary[key];\n if (!file?.fileName || file.fileName === 'internal_html.csv') continue;\n if (SKIP_FILES.has(file.fileName)) { files[file.fileName] = { headers:[], rows:[], totalRows:0, skipped:true }; continue; }\n const fileBytes = file.fileSize || 0;\n if (fileBytes > SIZE_SKIP_THRESHOLD && !CORE_FILES.has(file.fileName)) {\n files[file.fileName] = { headers:[], rows:[], totalRows:0, skipped:true, reason:`auto_skipped_${Math.round(fileBytes/1024/1024)}MB` };\n continue;\n }\n let buf;\n try { buf = await this.helpers.getBinaryDataBuffer(0, key); }\n catch (e) { files[file.fileName] = { headers:[], rows:[], totalRows:0, error:e.message }; continue; }\n files[file.fileName] = parseCSVFromBuffer(buf, SAMPLE_LIMITS[file.fileName] ?? 99999, false);\n}\n\n// \u2500\u2500 Accessors \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst getFile = n => files[n] || { headers:[], rows:[], totalRows:0 };\nconst getRows = n => getFile(n).rows || [];\nconst getCount = n => { if (n === 'internal_html.csv') return iStats.totalRows; return getFile(n).totalRows || 0; };\nconst getSample = (n, limit = 10) => getRows(n).slice(0, limit);\nconst safeInt = v => { const n = parseInt(v || '0'); return isNaN(n) ? 0 : n; };\nconst safeFloat = v => { const n = parseFloat(v || '0'); return isNaN(n) ? 0 : n; };\n\n// \u2500\u2500 Crawl overview parsing \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst ovFile = getFile('crawl_overview.csv');\nconst ovMeta = {};\nlet websiteUrl = '';\n\nif (ovFile.headers?.length >= 2) {\n const col0 = ovFile.headers[0], col1 = ovFile.headers[1];\n if (col1?.startsWith('http')) websiteUrl = col1;\n let currentSection = '';\n ovFile.rows.forEach(r => {\n const k = (r[col0] || '').trim(), v = (r[col1] || '').trim();\n if (k && v === '') { currentSection = k; }\n else if (k && v !== '') {\n ovMeta[k] = v;\n if (currentSection) ovMeta[`${currentSection}::${k}`] = v;\n }\n if (k === 'Site Crawled' && v.startsWith('http')) websiteUrl = v;\n });\n}\n\nfunction getDomain(url) {\n try { return new URL(url).hostname.toLowerCase().replace(/^www\\./, ''); }\n catch { return (url || '').replace(/https?:\\/\\//, '').split('/')[0].toLowerCase().replace(/^www\\./, ''); }\n}\nconst siteDomain = websiteUrl ? getDomain(websiteUrl) : '';\n\nconst ov = (section, key) => {\n if (section && ovMeta[`${section}::${key}`] !== undefined) return safeInt(ovMeta[`${section}::${key}`]);\n return safeInt(ovMeta[key] || '0');\n};\n\n// Build structured crawl overview object\nconst crawlOverview = {\n site_crawled: websiteUrl,\n crawl_date: ovMeta['Date'] || '',\n total_urls_encountered: ov('Summary','Total URLs Encountered') || ov('','Total URLs Encountered'),\n total_urls_crawled: ov('Summary','Total URLs Crawled') || ov('','Total URLs Crawled'),\n internal: {\n all: ov('Internal','All') || ov('Summary','Total Internal URLs'),\n html: ov('Internal','HTML'),\n javascript: ov('Internal','JavaScript'),\n css: ov('Internal','CSS'),\n images: ov('Internal','Images'),\n pdf: ov('Internal','PDF'),\n other: ov('Internal','Other'),\n indexable: ov('Summary','Total Internal Indexable URLs'),\n non_indexable: ov('Summary','Total Internal Non-Indexable URLs'),\n },\n external: {\n all: ov('External','All') || ov('Summary','Total External URLs'),\n html: ov('External','HTML'),\n },\n // FIX 3: Use crawlOverview response codes for the 4-card breakdown\n response_codes: {\n internal_2xx: ov('Response Codes','Internal Success (2xx)'),\n internal_3xx: ov('Response Codes','Internal Redirection (3xx)'),\n internal_4xx: ov('Response Codes','Internal Client Error (4xx)'),\n internal_5xx: ov('Response Codes','Internal Server Error (5xx)'),\n external_2xx: ov('Response Codes','External Success (2xx)'),\n external_3xx: ov('Response Codes','External Redirection (3xx)'),\n external_4xx: ov('Response Codes','External Client Error (4xx)'),\n no_response: ov('Response Codes','No Response') || ov('Response Codes','Internal No Response'),\n success_2xx: ov('Response Codes','Success (2xx)'),\n redirect_3xx: ov('Response Codes','Redirection (3xx)'),\n error_4xx: ov('Response Codes','Client Error (4xx)'),\n error_5xx: ov('Response Codes','Server Error (5xx)'),\n },\n security: {\n http_urls: ov('Security','HTTP URLs'),\n https_urls: ov('Security','HTTPS URLs'),\n mixed_content: ov('Security','Mixed Content'),\n },\n page_titles: {\n all: ov('Page Titles','All'),\n missing: ov('Page Titles','Missing'),\n duplicate:ov('Page Titles','Duplicate'),\n over_60: ov('Page Titles','Over 60 Characters'),\n below_30: ov('Page Titles','Below 30 Characters'),\n },\n meta_description: {\n missing: ov('Meta Description','Missing'),\n duplicate:ov('Meta Description','Duplicate'),\n over_155: ov('Meta Description','Over 155 Characters'),\n below_70: ov('Meta Description','Below 70 Characters'),\n },\n h1: { missing:ov('H1','Missing'), duplicate:ov('H1','Duplicate'), multiple:ov('H1','Multiple') },\n h2: { missing:ov('H2','Missing'), multiple:ov('H2','Multiple') },\n images_overview: {\n all: ov('Images','All'),\n over_100kb: ov('Images','Over 100 KB'),\n missing_alt: ov('Images','Missing Alt Text'),\n },\n canonicals: {\n all: ov('Canonicals','All'),\n missing: ov('Canonicals','Missing'),\n multiple_conflicting:ov('Canonicals','Multiple Conflicting'),\n non_indexable: ov('Canonicals','Non-Indexable Canonical'),\n },\n content: {\n low_content:ov('Content','Low Content Pages'),\n exact_dup: ov('Content','Exact Duplicates'),\n near_dup: ov('Content','Near Duplicates'),\n },\n // FIX 4: Depth from crawl overview (primary source)\n depth: {\n d0: ov('Depth (Clicks from Start URL)','0'),\n d1: ov('Depth (Clicks from Start URL)','1'),\n d2: ov('Depth (Clicks from Start URL)','2'),\n d3: ov('Depth (Clicks from Start URL)','3'),\n d4: ov('Depth (Clicks from Start URL)','4'),\n d5: ov('Depth (Clicks from Start URL)','5'),\n d6: ov('Depth (Clicks from Start URL)','6'),\n d7: ov('Depth (Clicks from Start URL)','7'),\n d8: ov('Depth (Clicks from Start URL)','8'),\n d9: ov('Depth (Clicks from Start URL)','9'),\n d10plus:ov('Depth (Clicks from Start URL)','10+'),\n },\n};\n\n// \u2500\u2500 Issues overview \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst SECURITY_HEADER_NOISE = new Set([\n 'Security: Missing Secure Referrer-Policy Header',\n 'Security: Missing X-Frame-Options Header',\n 'Security: Missing HSTS Header',\n 'Security: Missing X-Content-Type-Options Header',\n 'Security: Missing Content-Security-Policy Header',\n]);\nconst allIssuesRaw = getRows('issues_overview_report.csv').map(r => ({\n name: r['Issue Name'] || '',\n type: r['Issue Type'] || '',\n priority: r['Issue Priority'] || '',\n count: safeInt(r['URLs']),\n percent: r['% of Total'] || '0',\n is_noise: SECURITY_HEADER_NOISE.has(r['Issue Name'] || '')\n})).filter(i => i.count > 0);\n\nconst issuesOverview = allIssuesRaw.filter(i => !i.is_noise);\nconst securityHeaderNoise = allIssuesRaw.filter(i => i.is_noise);\nconst securityHeaderNoiseCount = securityHeaderNoise.reduce((s, i) => s + i.count, 0);\n\n// \u2500\u2500 Broken links \u2014 FIX 1 + FIX 2 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n// FIX 2: Build ALL 4xx broken links (internal AND external)\n// They are all the client's responsibility per user instruction\nconst brokenMapAll = {};\ngetRows('client_error_(4xx)_inlinks.csv').forEach(r => {\n const dest = r['Destination'] || '';\n const src = r['Source'] || '';\n const anchor = r['Anchor'] || '';\n const code = r['Status Code'] || '';\n if (!dest || dest.includes('cdn-cgi')) return;\n if (!brokenMapAll[dest]) {\n const destDomain = getDomain(dest);\n const isInternal = siteDomain && (destDomain === siteDomain || destDomain.endsWith('.' + siteDomain));\n brokenMapAll[dest] = {\n broken_url: dest, status_code: code, anchor_text: anchor,\n source_pages: [], total_inlinks: 0,\n issue_type: isInternal ? 'Internal 404' : 'External 404'\n };\n }\n brokenMapAll[dest].total_inlinks++;\n if (src && !brokenMapAll[dest].source_pages.includes(src)) brokenMapAll[dest].source_pages.push(src);\n});\n\nconst REAL_BROKEN = new Set(['404', '410', '400']);\nconst BOT_CODES = new Set(['403', '429', '401', '999']);\n\n// All 404-type broken sorted by inlinks \u2014 both internal and external\nconst allBroken404 = Object.values(brokenMapAll)\n .filter(b => REAL_BROKEN.has(b.status_code))\n .sort((a, b) => b.total_inlinks - a.total_inlinks);\n\n// broken_link_sources = ALL real 404s (internal + external), top 25 by inlinks\n// issue_type field distinguishes them: 'Internal 404' or 'External 404'\nconst broken_link_sources = allBroken404.slice(0, 25).map(b => ({\n broken_url: b.broken_url,\n status_code: b.status_code,\n anchor_text: b.anchor_text,\n total_inlinks: b.total_inlinks,\n pages_affected: b.source_pages.length,\n source_pages: b.source_pages.slice(0, 5),\n issue_type: b.issue_type, // 'Internal 404' or 'External 404'\n}));\n\n// COUNT HIERARCHY (each serves a different purpose):\n//\n// 1. broken404Count = unique URLs returning 404/410/400 from client_error_(4xx)_inlinks.csv\n// = shown in \"Genuinely Broken Pages\" heading + priority action + cover\n// = internal_404 + external_404 unique URLs\n//\n// 2. internalOnly4xxCount = Internal 4xx from crawl_overview (for HEALTH SCORE only)\n// = 13 for spacem12 \u2014 pages YOUR server returns 4xx for\n//\n// 3. real4xxCount = Total ALL 4xx from crawl_overview (for the 4xx response card)\n// = 213 for spacem12 = internal(13) + external(200)\n// = includes 403, 404, 410, 429 etc.\n\nconst broken404Count = allBroken404.length; // unique 404/410/400 URLs \u2014 used in headings + priority\nconst internalOnly4xxCount = ov('Response Codes','Internal Client Error (4xx)') || allBroken404.filter(b => b.issue_type === 'Internal 404').length;\nconst real4xxCount = ov('Response Codes','Client Error (4xx)') || getCount('response_codes_client_error_(4xx).csv'); // ALL 4xx for card display\n\n// 403 links \u2014 THREE categories:\n// 1. internal_403_links = your OWN site pages blocking crawler (most serious \u2014 WAF/Cloudflare)\n// 2. bot_blocked_links = famous external domains that block all crawlers (not actionable)\n// 3. manual_check_links = unknown external domains \u2014 need manual check if really broken\n\n// Expanded famous domains list \u2014 sites that actively block crawlers\nconst FAMOUS_DOMAINS = [\n // Social\n 'twitter.com','x.com','t.co','facebook.com','instagram.com','tiktok.com',\n 'linkedin.com','pinterest.com','youtube.com','reddit.com','threads.net','snapchat.com',\n // News & Media\n 'dawn.com','bbc.com','bbc.co.uk','cnn.com','reuters.com','bloomberg.com','forbes.com',\n 'nytimes.com','theguardian.com','wsj.com','ft.com','businessinsider.com','cnbc.com',\n 'independent.co.uk','dailymail.co.uk','express.co.uk','telegraph.co.uk',\n // Tech & Reference\n 'wikipedia.org','github.com','amazon.com','amazon.co.uk','apple.com','google.com',\n 'microsoft.com','cloudflare.com','archive.org','quora.com','medium.com','substack.com',\n // Stock / Image sites\n 'gettyimages.com','shutterstock.com','alamy.com','istock.com','123rf.com',\n // Streaming\n 'hotstar.com','netflix.com','hulu.com','disneyplus.com','spotify.com',\n // E-commerce\n 'ebay.com','alibaba.com','aliexpress.com','etsy.com','walmart.com',\n // Gov / Edu\n 'who.int','un.org','unesco.org','cswe.org','open.ac.uk','ox.ac.uk','harvard.edu',\n // Pakistan specific\n 'geo.tv','ary.digital','samaa.tv','dunyanews.tv','express.pk','jang.com.pk',\n 'thenews.com.pk','nation.com.pk','pakobserver.net','propakistani.pk',\n // Other commonly blocked\n 'nasaspaceflight.com','donaldjtrump.com','truthsocial.com','glassdoor.com',\n 'trustpilot.com','yelp.com','tripadvisor.com','booking.com','airbnb.com',\n];\nconst isFamousDomain = d => FAMOUS_DOMAINS.some(fd => d === fd || d.endsWith('.' + fd));\n\nconst allBroken403 = Object.values(brokenMapAll).filter(b => BOT_CODES.has(b.status_code));\n\nconst internal_403_links = allBroken403\n .filter(b => siteDomain && getDomain(b.broken_url).includes(siteDomain)).slice(0, 20)\n .map(b => ({ broken_url:b.broken_url, status_code:b.status_code, total_inlinks:b.total_inlinks, source_pages:b.source_pages.slice(0,3) }));\n\nconst external403 = allBroken403.filter(b => !(siteDomain && getDomain(b.broken_url).includes(siteDomain)));\n\n// Famous bot-blocked (safe to ignore \u2014 these sites block all crawlers by design)\nconst bot_blocked_links = external403\n .filter(b => isFamousDomain(getDomain(b.broken_url))).slice(0, 20)\n .map(b => ({ broken_url:b.broken_url, status_code:b.status_code, total_inlinks:b.total_inlinks, source_pages:b.source_pages.slice(0,3) }));\n\n// Unknown external 403s \u2014 need manual check (could be real broken links or bot-blocking)\nconst manual_check_links = external403\n .filter(b => !isFamousDomain(getDomain(b.broken_url))).slice(0, 20)\n .map(b => ({ broken_url:b.broken_url, status_code:b.status_code, total_inlinks:b.total_inlinks, source_pages:b.source_pages.slice(0,3) }));\n\n// \u2500\u2500 Redirects \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst redirectGroupMap = {};\nconst SOCIAL_NOISE = new Set(['facebook.com','twitter.com','x.com','instagram.com','linkedin.com','pinterest.com','youtube.com','tiktok.com','in.linkedin.com','uk.linkedin.com']);\nconst AFFILIATE_DOMAINS = new Set(['amzn.to','bit.ly','tinyurl.com','ow.ly','buff.ly','goo.gl','ift.tt','dlvr.it','tiny.cc','rb.gy','cutt.ly']);\n\nfunction isTrailingSlashOnly(from, to) { return from.replace(/\\/$/, '') === to.replace(/\\/$/, ''); }\n\ngetRows('response_codes_redirection_(3xx).csv').forEach(r => {\n const from = r['Address'] || '', to = r['Redirect URL'] || r['Redirect URI'] || '', code = r['Status Code'] || '';\n const inlinks = safeInt(r['Inlinks']);\n if (!from || !to) return;\n const fromD = getDomain(from), toD = getDomain(to);\n if (AFFILIATE_DOMAINS.has(fromD)) return;\n if (SOCIAL_NOISE.has(fromD) && isTrailingSlashOnly(from, to)) return;\n\n const fromIsHttp = from.startsWith('http://'), toIsHttps = to.startsWith('https://');\n const fromNoWww = !from.replace(/https?:\\/\\//, '').startsWith('www.');\n const toHasWww = to.replace(/https?:\\/\\//, '').startsWith('www.');\n const sameDomain = fromD === toD || ('www.' + fromD) === toD || fromD === toD.replace(/^www\\./, '');\n const isClientFrom = siteDomain && (fromD.includes(siteDomain) || siteDomain.includes(fromD));\n const isClientTo = siteDomain && (toD.includes(siteDomain) || siteDomain.includes(toD));\n\n let groupKey, groupLabel, groupType;\n if (isClientFrom && isClientTo) {\n if (fromIsHttp && toIsHttps && fromNoWww && toHasWww && sameDomain) { groupKey='__http_nonwww__'; groupLabel='HTTP + non-www \u2192 HTTPS + www (infrastructure)'; groupType='infrastructure'; }\n else if (fromIsHttp && toIsHttps && sameDomain) { groupKey='__http_https__'; groupLabel='HTTP \u2192 HTTPS (infrastructure)'; groupType='infrastructure'; }\n else if (fromNoWww && toHasWww && sameDomain && !fromIsHttp) { groupKey='__nonwww_www__'; groupLabel='Non-www \u2192 www (infrastructure)'; groupType='infrastructure'; }\n else { groupKey=from; groupLabel=null; groupType='internal'; }\n } else if (!isClientFrom && !isClientTo) {\n if (SOCIAL_NOISE.has(fromD)) return;\n groupKey=from; groupLabel=null; groupType='external';\n } else { groupKey=from; groupLabel=null; groupType='external'; }\n\n if (!redirectGroupMap[groupKey]) redirectGroupMap[groupKey] = { type:groupType, label:groupLabel||from, redirect_to:to, code, count:0, total_inlinks:0, examples:[] };\n redirectGroupMap[groupKey].count++;\n redirectGroupMap[groupKey].total_inlinks += inlinks;\n if (redirectGroupMap[groupKey].examples.length < 3) redirectGroupMap[groupKey].examples.push({ from, to, inlinks, code });\n});\n\nconst sortOrder = { infrastructure:0, internal:1, external:2 };\nconst allRedirectGroups = Object.values(redirectGroupMap).sort((a, b) => (sortOrder[a.type]??9)-(sortOrder[b.type]??9) || b.total_inlinks-a.total_inlinks);\nconst redirectGrouped = allRedirectGroups.filter(r => r.type !== 'external').slice(0, 30);\nconst externalRedirectGrouped = allRedirectGroups.filter(r => r.type === 'external').slice(0, 15);\nconst internalRedirectCount = redirectGrouped.reduce((sum, g) => sum + g.count, 0);\n\n// \u2500\u2500 H1 / title duplicates \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst h1TextMap = {};\ngetRows('h1_duplicate.csv').forEach(r => { const h=r['H1-1']||'', u=r['Address']||''; if(!h)return; if(!h1TextMap[h])h1TextMap[h]={h1:h,urls:[]}; h1TextMap[h].urls.push(u); });\nconst h1DuplicateGrouped = Object.values(h1TextMap).sort((a,b)=>b.urls.length-a.urls.length).slice(0,10).map(g=>({h1:g.h1,page_count:g.urls.length,sample_urls:g.urls.slice(0,3)}));\n\nconst secondH1Map = {};\ngetRows('h1_multiple.csv').forEach(r => { const h=r['H1-2']||''; if(h)secondH1Map[h]=(secondH1Map[h]||0)+1; });\nconst templateH1 = Object.entries(secondH1Map).sort((a,b)=>b[1]-a[1]).slice(0,3).map(([text,count])=>({text,count}));\n\nconst titleTextMap = {};\ngetRows('page_titles_duplicate.csv').forEach(r => { const t=r['Title 1']||'', u=r['Address']||''; if(!t)return; if(!titleTextMap[t])titleTextMap[t]={title:t,urls:[]}; titleTextMap[t].urls.push(u); });\nconst titleDuplicateGrouped = Object.values(titleTextMap).sort((a,b)=>b.urls.length-a.urls.length).slice(0,10).map(g=>({title:g.title,page_count:g.urls.length,sample_urls:g.urls.slice(0,3)}));\n\n// \u2500\u2500 Remaining sections \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\nconst missingAltRows = getSample('images_missing_alt_text.csv',15).map(r=>({url:r['Address']||'',size_kb:Math.round(safeInt(r['Size (bytes)'])/1024),inlinks:safeInt(r['IMG Inlinks'])}));\nconst oversizedRows = getSample('images_over_100_kb.csv',15).map(r=>({url:r['Address']||'',size_kb:Math.round(safeInt(r['Size (bytes)'])/1024),inlinks:safeInt(r['IMG Inlinks'])})).sort((a,b)=>b.size_kb-a.size_kb);\n\nconst INTENTIONAL_PATTERNS = ['/tag/','/author/','/page/','/feed/','/wp-','sitemap','login','admin','cart','checkout','wp-json','xmlrpc'];\nconst allNoindex = getRows('directives_noindex.csv').map(r=>({url:r['Address']||'',directive:r['Meta Robots 1']||''}));\nconst suspiciousNoindex = allNoindex.filter(r=>!INTENTIONAL_PATTERNS.some(p=>r.url.toLowerCase().includes(p)));\nconst noindexCount = suspiciousNoindex.length;\n\nconst canonicalMissingRows = getSample('canonicals_missing.csv',15).map(r=>({url:r['Address']||'',indexability:r['Indexability']||''}));\nconst canonicalMissingIndexable = canonicalMissingRows.filter(r=>r.indexability==='Indexable');\nconst metaTooLongRows = getSample('meta_description_over_155_characters.csv',15).map(r=>({url:r['Address']||'',meta:r['Meta Description 1']||'',length:safeInt(r['Meta Description 1 Length'])})).sort((a,b)=>b.length-a.length);\nconst metaMissingRows = getSample('meta_description_missing.csv',15).map(r=>({url:r['Address']||''}));\nconst errors4xxRows = getSample('response_codes_client_error_(4xx).csv',15).map(r=>({url:r['Address']||'',code:r['Status Code']||'',status:r['Status']||'',inlinks:safeInt(r['Inlinks'])})).sort((a,b)=>b.inlinks-a.inlinks);\nconst errors5xxRows = getSample('response_codes_server_error_(5xx).csv',10).map(r=>({url:r['Address']||'',code:r['Status Code']||'',status:r['Status']||'',inlinks:safeInt(r['Inlinks'])}));\nconst robotsBlockedRows = getSample('response_codes_blocked_by_robots_txt.csv',25).map(r=>({url:r['Address']||'',robots_line:r['Matched Robots.txt Line']||''}));\nconst robotsRuleMap = {};\nrobotsBlockedRows.forEach(r=>{const rule=r.robots_line||'Unknown rule'; robotsRuleMap[rule]=(robotsRuleMap[rule]||0)+1;});\nconst robotsRuleSummary = Object.entries(robotsRuleMap).sort((a,b)=>b[1]-a[1]).map(([rule,count])=>({rule,count}));\nconst httpPageRows = getSample('security_http_urls.csv',15).map(r=>({url:r['Address']||'',status_code:r['Status Code']||'',status:r['Status']||''}));\nconst lowContentRows = getSample('content_low_content_pages.csv',15).map(r=>({url:r['Address']||'',word_count:safeInt(r['Word Count'])})).sort((a,b)=>a.word_count-b.word_count);\nconst chainSample = getRows('redirect_chains.csv').slice(0,10).map(r=>({source:r['Source']||r['Address']||Object.values(r)[0]||'',hops:safeInt(r['Number of Redirects']||r['Hops']||'2'),final:r['Final Address']||r['Final URL']||'',loop:r['Loop']||'False'}));\n\n// External redirect TRUE count from crawl_overview (not from grouped table length)\n// TRUE redirect counts from crawl_overview.csv \u2014 authoritative, not capped by slice/group limits\nconst externalRedirectTrueCount = ov('Response Codes','External Redirection (3xx)') || externalRedirectGrouped.length;\nconst internalRedirectTrueCount = ov('Response Codes','Internal Redirection (3xx)') || internalRedirectCount;\n\nconst ORPHAN_IGNORE = ['/feed/','/wp-','/xmlrpc','/sitemap','admin','login','cart'];\nconst filteredOrphans = iStats.orphanPages.filter(p=>!ORPHAN_IGNORE.some(ig=>p.url.toLowerCase().includes(ig))).slice(0,25);\n\n// FIX 4: Depth buckets \u2014 use crawlOverview as primary, fall back to iStats\nconst coDepth = crawlOverview.depth;\nconst depthBucketsForReport = (coDepth.d1 + coDepth.d2 + coDepth.d3 + coDepth.d4 + coDepth.d5 > 0)\n ? {\n d1: coDepth.d0 + coDepth.d1, // depth 0 (homepage) + depth 1\n d2: coDepth.d2,\n d3: coDepth.d3,\n d4: coDepth.d4,\n d5plus:coDepth.d5 + coDepth.d6 + coDepth.d7 + coDepth.d8 + coDepth.d9 + coDepth.d10plus,\n }\n : depthBuckets; // fall back to iStats if crawl_overview depth is all zeros\n\n// \u2500\u2500 Health score \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n// CRITICAL: 4xx for health score = INTERNAL ONLY\n// External 4xx = links to other broken sites \u2014 client can update links but not fix those servers\n// Internal 4xx = your own site returning errors \u2014 directly your responsibility\n// Score denominator = total internal HTML pages crawled (from internal_html.csv)\nconst healthDenominator = totalInternal || 1;\n\nconst criticalCount =\n internalOnly4xxCount + // INTERNAL 4xx only (13 for spacem12, not 213)\n getCount('response_codes_server_error_(5xx).csv') +\n getCount('h1_missing.csv') +\n getCount('page_titles_missing.csv') +\n getCount('security_http_urls.csv') +\n getCount('content_exact_duplicates.csv');\n\nconst warningCount =\n getCount('meta_description_missing.csv') +\n getCount('page_titles_over_60_characters.csv') +\n getCount('h1_multiple.csv') +\n getCount('images_missing_alt_text.csv') +\n getCount('content_low_content_pages.csv') +\n noindexCount +\n internalRedirectCount +\n getCount('images_over_100_kb.csv');\n\nconst criticalDeduction = Math.min(50, Math.round((criticalCount / healthDenominator) * 60));\nconst warningDeduction = Math.min(30, Math.round((warnin
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
googleDriveOAuth2ApigoogleSheetsOAuth2ApislackApi
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
Type in Slack. Walk away. Get a professional PDF report and a structured Excel fix sheet delivered to Google Drive and posted back in your Slack thread — fully automated, zero manual work.
Source: https://n8n.io/workflows/15252/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
Expenses Tracker (video). Uses httpRequest, splitInBatches, googleSheets, googleDrive. Event-driven trigger; 21 nodes.
Transform your lead list into an AI-powered calling machine. This workflow automates your entire cold calling process using Vapi's conversational AI to initiate calls, qualify leads, capture detailed
This template monitors Google Drive folder for new files, extracts text from PDFs, images, text files, CSVs, and Google Docs., reads images with meta/llama-3.2-11b-vision-instruct, structures the resu
This workflow automates the full company enrichment pipeline: Simply import CSV company lists to Slack and save time on enrichment and CRM maintenance. It processes uploaded files, extracts company do
Automatically transform any website URL into a complete portfolio entry with professional screenshots and AI-generated Upwork project descriptions. Freelancers building their Upwork/portfolio from pas