{
  "id": "oaPG6H6gGGil57CB",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Stale Content Detector for Websites",
  "tags": [],
  "nodes": [
    {
      "id": "weekly-trigger",
      "name": "Weekly Scan (Monday 7 AM)",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        -16,
        240
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "cronExpression",
              "expression": "0 7 * * 1"
            }
          ]
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "config",
      "name": "Site Configuration",
      "type": "n8n-nodes-base.set",
      "position": [
        208,
        240
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "sitemap-url",
              "name": "sitemapUrl",
              "type": "string",
              "value": "https://yoursite.com/sitemap.xml"
            },
            {
              "id": "stale-days",
              "name": "staleDays",
              "type": "number",
              "value": 180
            },
            {
              "id": "alert-email",
              "name": "alertEmail",
              "type": "string",
              "value": "user@example.com"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "fetch-sitemap",
      "name": "Fetch Sitemap XML",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        432,
        240
      ],
      "parameters": {
        "url": "={{ $json.sitemapUrl }}",
        "options": {
          "response": {
            "response": {
              "responseFormat": "text"
            }
          }
        }
      },
      "typeVersion": 4.2,
      "continueOnFail": true
    },
    {
      "id": "parse-sitemap",
      "name": "Parse Sitemap URLs",
      "type": "n8n-nodes-base.code",
      "position": [
        656,
        240
      ],
      "parameters": {
        "jsCode": "const xml = $input.first().json.data || $input.first().json.body || '';\nconst staleDays = $('Site Configuration').first().json.staleDays || 180;\nconst alertEmail = $('Site Configuration').first().json.alertEmail;\n\n// Parse URLs and lastmod dates from sitemap XML\nconst urlRegex = /<url>([\\s\\S]*?)<\\/url>/g;\nconst locRegex = /<loc>([^<]+)<\\/loc>/;\nconst lastmodRegex = /<lastmod>([^<]+)<\\/lastmod>/;\n\nconst pages = [];\nlet match;\n\nwhile ((match = urlRegex.exec(xml)) !== null) {\n  const block = match[1];\n  const locMatch = block.match(locRegex);\n  const lastmodMatch = block.match(lastmodRegex);\n  \n  if (locMatch) {\n    const url = locMatch[1];\n    const lastmod = lastmodMatch ? lastmodMatch[1] : null;\n    \n    let isStale = false;\n    let daysSinceUpdate = null;\n    \n    if (lastmod) {\n      const lastModDate = new Date(lastmod);\n      const now = new Date();\n      daysSinceUpdate = Math.floor((now - lastModDate) / (1000 * 60 * 60 * 24));\n      isStale = daysSinceUpdate > staleDays;\n    } else {\n      // No lastmod = potentially stale, flag for review\n      isStale = true;\n      daysSinceUpdate = -1; // Unknown\n    }\n    \n    if (isStale) {\n      pages.push({\n        json: {\n          url,\n          lastModified: lastmod || 'unknown',\n          daysSinceUpdate,\n          staleDays,\n          alertEmail\n        }\n      });\n    }\n  }\n}\n\n// Sort by staleness (most stale first)\npages.sort((a, b) => b.json.daysSinceUpdate - a.json.daysSinceUpdate);\n\n// Limit to top 20 for AI analysis\nreturn pages.slice(0, 20);"
      },
      "typeVersion": 2
    },
    {
      "id": "check-has-stale",
      "name": "Any Stale Pages?",
      "type": "n8n-nodes-base.if",
      "position": [
        864,
        240
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "has-pages",
              "operator": {
                "type": "number",
                "operation": "gt"
              },
              "leftValue": "={{ $input.all().length }}",
              "rightValue": 0
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.3
    },
    {
      "id": "fetch-pages",
      "name": "Fetch Page Content",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1088,
        176
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "timeout": 10000,
          "response": {
            "response": {
              "responseFormat": "text"
            }
          }
        }
      },
      "typeVersion": 4.2,
      "continueOnFail": true
    },
    {
      "id": "extract-text",
      "name": "Extract Page Text",
      "type": "n8n-nodes-base.html",
      "position": [
        1312,
        176
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "title",
              "cssSelector": "title"
            },
            {
              "key": "body",
              "cssSelector": "body"
            }
          ]
        }
      },
      "typeVersion": 1.2,
      "continueOnFail": true
    },
    {
      "id": "ai-content-review",
      "name": "AI Content Freshness Analyzer",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        1520,
        176
      ],
      "parameters": {
        "text": "=Review this webpage for content freshness. The page was last updated {{ $('Parse Sitemap URLs').item.json.daysSinceUpdate }} days ago.\n\nURL: {{ $('Parse Sitemap URLs').item.json.url }}\nTitle: {{ $json.title || 'No title found' }}\nContent preview (first 1000 chars): {{ ($json.body || 'Could not extract content').substring(0, 1000) }}\n\nAnalyze:\n1. Does the content contain outdated references (old dates, deprecated tools, old pricing, dead links)?\n2. Is the topic still relevant or has the industry moved on?\n3. Priority level for refresh: LOW (still accurate), MEDIUM (some outdated elements), HIGH (significantly outdated), CRITICAL (misleading or wrong)\n4. Specific suggestions for what to update\n\nKeep your response to 4-5 sentences max.",
        "options": {
          "systemMessage": "You are a content strategist who audits web pages for freshness and accuracy. Be practical and specific in your recommendations. Only flag things that genuinely need updating."
        },
        "promptType": "define"
      },
      "typeVersion": 3.1
    },
    {
      "id": "openai-model-3",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        1536,
        400
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "id",
          "value": "gpt-4o-mini"
        },
        "options": {},
        "builtInTools": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "build-report",
      "name": "Build Report Row",
      "type": "n8n-nodes-base.set",
      "position": [
        1824,
        176
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "url",
              "name": "pageUrl",
              "type": "string",
              "value": "={{ $('Parse Sitemap URLs').item.json.url }}"
            },
            {
              "id": "last-mod",
              "name": "lastModified",
              "type": "string",
              "value": "={{ $('Parse Sitemap URLs').item.json.lastModified }}"
            },
            {
              "id": "days-stale",
              "name": "daysSinceUpdate",
              "type": "number",
              "value": "={{ $('Parse Sitemap URLs').item.json.daysSinceUpdate }}"
            },
            {
              "id": "ai-review",
              "name": "aiReview",
              "type": "string",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "save-report",
      "name": "Save to Content Audit Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2032,
        176
      ],
      "parameters": {
        "columns": {
          "value": {
            "page_url": "={{ $json.pageUrl }}",
            "ai_review": "={{ $json.aiReview }}",
            "scan_date": "={{ $now.toISO() }}",
            "last_modified": "={{ $json.lastModified }}",
            "days_since_update": "={{ $json.daysSinceUpdate }}"
          },
          "schema": [
            {
              "id": "scan_date",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "scan_date"
            },
            {
              "id": "page_url",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "page_url"
            },
            {
              "id": "last_modified",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "last_modified"
            },
            {
              "id": "days_since_update",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "days_since_update"
            },
            {
              "id": "ai_review",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "ai_review"
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": []
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1QUsgMCHaiJ7mfnVhMUYqHA9_2YEIUKXRxNvOfwFsgYE/edit#gid=0",
          "cachedResultName": "ContentAudit"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "https://docs.google.com/spreadsheets/d/1QUsgMCHaiJ7mfnVhMUYqHA9_2YEIUKXRxNvOfwFsgYE/edit?gid=0#gid=0"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "send-digest",
      "name": "Email Content Audit Report",
      "type": "n8n-nodes-base.gmail",
      "position": [
        2624,
        176
      ],
      "parameters": {
        "sendTo": "={{ $('Site Configuration').first().json.alertEmail }}",
        "message": "={{ $json.emailBody }}",
        "options": {},
        "subject": "=Weekly Stale Content Report - {{ $now.format('yyyy-MM-dd') }}"
      },
      "credentials": {
        "gmailOAuth2": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "aggregate-for-email",
      "name": "Combine Into One",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        2240,
        176
      ],
      "parameters": {
        "options": {},
        "aggregate": "aggregateAllItemData"
      },
      "typeVersion": 1
    },
    {
      "id": "build-email-body",
      "name": "Build Email Body",
      "type": "n8n-nodes-base.code",
      "position": [
        2400,
        176
      ],
      "parameters": {
        "jsCode": "const items = $input.first().json.data || [];\nconst staleDays = $('Site Configuration').first().json.staleDays || 180;\nconst totalPages = items.length;\n\nlet html = `\n<div style=\"font-family: Arial, sans-serif; max-width: 680px; margin: 0 auto; color: #333;\">\n  <h1 style=\"font-size: 22px; border-bottom: 2px solid #333; padding-bottom: 10px;\">Stale Content Audit Report</h1>\n  <p style=\"font-size: 16px; color: #555;\">Pages flagged for review: <strong>${totalPages}</strong></p>\n`;\n\nif (totalPages === 0) {\n  html += '<p style=\"color: #2a9d2a;\">No stale pages found. Your content is looking fresh!</p>';\n} else {\n  for (let i = 0; i < totalPages; i++) {\n    const page = items[i];\n    const url = page.page_url || 'Unknown';\n    const urlPath = url.replace(/^https?:\\/\\/[^/]+/, '') || '/';\n    const days = page.days_since_update;\n    const daysText = days && days > 0 ? `${days} days ago` : 'Unknown';\n    const review = page.ai_review || 'No review available';\n    \n    // Color code by staleness\n    let badgeColor = '#e67e22'; // orange default\n    if (days > 365) badgeColor = '#e74c3c'; // red\n    else if (days > 270) badgeColor = '#e67e22'; // orange\n    else badgeColor = '#f1c40f'; // yellow\n    \n    html += `\n  <div style=\"background: #f9f9f9; border-left: 4px solid ${badgeColor}; padding: 16px 20px; margin: 16px 0; border-radius: 4px;\">\n    <h3 style=\"margin: 0 0 6px 0; font-size: 16px;\">${i + 1}. ${urlPath}</h3>\n    <p style=\"margin: 0 0 10px 0; font-size: 13px; color: #888;\">Last updated: <strong style=\"color: ${badgeColor};\">${daysText}</strong></p>\n    <p style=\"margin: 0; font-size: 14px; line-height: 1.5; color: #444;\">${review}</p>\n  </div>`;\n  }\n}\n\nhtml += `\n  <hr style=\"border: none; border-top: 1px solid #ddd; margin: 24px 0;\" />\n  <p style=\"font-size: 13px; color: #999;\">This report checks all pages in your sitemap not updated in over ${staleDays} days.<br/>Full details with AI analysis are in your Google Sheet.</p>\n  <p style=\"font-size: 12px; color: #bbb;\">Generated by Stale Content Detector</p>\n</div>`;\n\nreturn [{ json: { emailBody: html, totalPages } }];"
      },
      "typeVersion": 2
    },
    {
      "id": "stale-overview",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -624,
        64
      ],
      "parameters": {
        "width": 480,
        "height": 720,
        "content": "## Stale Content Detector for Websites\n\n### How it works\n1. Every Monday at 7 AM, the workflow fetches your sitemap.xml and extracts all URLs with their last-modified dates\n2. Pages not updated within your configured threshold (default: 180 days) are flagged and sorted most-stale-first\n3. Each stale page is fetched and its content is analyzed by an AI agent that rates freshness as LOW, MEDIUM, HIGH, or CRITICAL with specific update suggestions\n4. Results are logged to Google Sheets and a color-coded HTML email digest is sent with all flagged pages and their AI verdicts\n\n### Setup steps\n- [ ] Open **Site Configuration** and set your sitemapUrl, staleDays (default: 180), and alertEmail\n- [ ] Create a Google Sheet with a **ContentAudit** tab (columns: scan_date, page_url, last_modified, days_since_update, ai_review)\n- [ ] Paste your Google Sheet URL into the **Save to Content Audit Sheet** node\n- [ ] Connect Gmail OAuth2 credentials on the **Email Content Audit Report** node\n- [ ] Connect Google Sheets credentials\n- [ ] Connect OpenAI API credentials on the **OpenAI Chat Model** node\n\n### Customization\nChange the staleDays threshold in Site Configuration. Increase the page limit above 20 in the Code node for larger sites. Add URL path filters to focus on blog posts or docs only. Replace Gmail with Slack for team notifications."
      },
      "typeVersion": 1
    },
    {
      "id": "stale-section-1",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -64,
        16
      ],
      "parameters": {
        "color": 7,
        "width": 416,
        "height": 272,
        "content": "## Trigger and configuration\n\nRuns every Monday at 7 AM. Set your sitemap URL, staleness threshold, and alert email in the Site Configuration node."
      },
      "typeVersion": 1
    },
    {
      "id": "stale-section-2",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        384,
        16
      ],
      "parameters": {
        "color": 7,
        "width": 624,
        "height": 272,
        "content": "## Fetch and parse sitemap\n\nFetches your sitemap.xml, extracts URLs with last-modified dates, and flags pages not updated within the configured threshold. Results are sorted most-stale-first, capped at 20."
      },
      "typeVersion": 1
    },
    {
      "id": "stale-section-3",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1040,
        -64
      ],
      "parameters": {
        "color": 7,
        "width": 784,
        "height": 464,
        "content": "## Content extraction and AI review\n\nFetches each stale page, extracts title and body text, then sends it to an AI agent that rates content freshness and provides specific update suggestions."
      },
      "typeVersion": 1
    },
    {
      "id": "stale-section-4",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1888,
        -48
      ],
      "parameters": {
        "color": 7,
        "width": 944,
        "height": 272,
        "content": "## Logging and email report\n\nLogs each reviewed page to Google Sheets with the full AI analysis. Results are aggregated into a color-coded HTML email digest and sent to your alert address."
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "timezone": "America/New_York",
    "binaryMode": "separate",
    "callerPolicy": "workflowsFromSameOwner",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "9124ccdc-706b-4e27-a40d-c1bae882d593",
  "connections": {
    "Any Stale Pages?": {
      "main": [
        [
          {
            "node": "Fetch Page Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Build Email Body": {
      "main": [
        [
          {
            "node": "Email Content Audit Report",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Build Report Row": {
      "main": [
        [
          {
            "node": "Save to Content Audit Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Combine Into One": {
      "main": [
        [
          {
            "node": "Build Email Body",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Page Text": {
      "main": [
        [
          {
            "node": "AI Content Freshness Analyzer",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Sitemap XML": {
      "main": [
        [
          {
            "node": "Parse Sitemap URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Content Freshness Analyzer",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Page Content": {
      "main": [
        [
          {
            "node": "Extract Page Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse Sitemap URLs": {
      "main": [
        [
          {
            "node": "Any Stale Pages?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Site Configuration": {
      "main": [
        [
          {
            "node": "Fetch Sitemap XML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Weekly Scan (Monday 7 AM)": {
      "main": [
        [
          {
            "node": "Site Configuration",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Save to Content Audit Sheet": {
      "main": [
        [
          {
            "node": "Combine Into One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Content Freshness Analyzer": {
      "main": [
        [
          {
            "node": "Build Report Row",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}