{
  "id": "uHuCUw3vQH7eXOyI",
  "name": "Service_and_Pods_Discovery",
  "tags": [],
  "nodes": [
    {
      "id": "de0cfea0-77c9-4918-9eb6-e18189c17cfd",
      "name": "\ud83d\udd52 Every 5 Min Trigger",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        -900,
        520
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "minutes"
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "4a46ca36-bba6-4670-96a4-487764a60cba",
      "name": "\ud83d\udce4 Send Alerts to Slack",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2240,
        580
      ],
      "parameters": {
        "url": "https://slack.com/api/chat.postMessage",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "channel",
              "value": "#k8s-alerts"
            },
            {
              "name": "text",
              "value": "={{$json[\"text\"]}}"
            }
          ]
        },
        "genericAuthType": "httpBearerAuth",
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "credentials": {
        "httpBearerAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "59853fd1-1249-4652-afbd-f7263d7fda9a",
      "name": "CrashLoopBackOff / Termination Reason",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -200,
        1060
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\"} == 1",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "ef3f8b16-c9a6-4af3-a4aa-19f7cbe02254",
      "name": "Pod Restart Spike (last 5m)",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -200,
        1440
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=increase(kube_pod_container_status_restarts_total[5m]) > 0",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "bcac438e-4fcc-4fba-8dc5-69571113b4b4",
      "name": "Normalization",
      "type": "n8n-nodes-base.code",
      "position": [
        1480,
        580
      ],
      "parameters": {
        "jsCode": "const output = [];\n\nfor (const item of items) {\n  const data = item.json;\n\n  // Case 1: service snapshot (object with namespace/service keys)\n  if (data && Object.keys(data).some(k => k.includes('/'))) {\n    for (const [svc, count] of Object.entries(data)) {\n      output.push({\n        type: \"service\",\n        name: svc,\n        count: count\n      });\n    }\n    continue;\n  }\n\n  // Case 2: already-normalized pod_restart object\n  if (data?.type === \"pod_restart\") {\n    output.push({\n      type: \"pod_restart\",\n      namespace: data.namespace,\n      pod: data.pod,\n      container: data.container || null,\n      value: data.value ? Number(data.value) : 0\n    });\n    continue;\n  }\n\n  // Case 3: raw Prometheus format with data.result array\n  if (data?.data?.result) {\n    for (const r of data.data.result) {\n      const ns = r.metric.namespace;\n      const pod = r.metric.pod;\n      const container = r.metric.container || null;\n      const reason = r.metric.reason || null;\n      const val = parseFloat(r.value[1]);\n\n      // classify\n      let issueType = \"pod\";\n      if (reason) issueType = \"pod_waiting_reason\";\n      if (r.metric.__name__?.includes(\"ready\") && val === 0) issueType = \"pod_not_ready\";\n      if (r.metric.__name__?.includes(\"restarts_total\")) issueType = \"pod_restart\";\n\n      // NEW: detect Pending pods\n      if (\n        r.metric.__name__?.includes(\"pod_status_phase\") &&\n        r.metric.phase === \"Pending\" &&\n        val > 0\n      ) {\n        issueType = \"pod_pending\";\n      }\n\n      output.push({\n        type: issueType,\n        namespace: ns,\n        pod,\n        container,\n        reason,\n        phase: r.metric.phase || null,\n        value: val\n      });\n    }\n  }\n}\n\nreturn output.map(o => ({ json: o }));\n"
      },
      "typeVersion": 2
    },
    {
      "id": "951d0319-c333-4ec0-89cd-28e2a413fda9",
      "name": "Slack formatter with summary",
      "type": "n8n-nodes-base.code",
      "position": [
        1920,
        580
      ],
      "parameters": {
        "jsCode": "const data = items.map(i => i.json);\n\n// Group data\nlet services = [];\nlet podsNotReady = [];\nlet podsWaiting = [];\nlet podsRestarts = [];\nlet podsPending = [];   // NEW group\n\nfor (const d of data) {\n  if (d.type === \"service\") {\n    services.push(d);\n  }\n  if (d.type === \"pod_not_ready\" && d.value === 0) {\n    podsNotReady.push(d);\n  }\n  if (d.type === \"pod_waiting_reason\" && d.value > 0) {\n    podsWaiting.push(d);\n  }\n  if (d.type === \"pod_restart\" && d.value > 0) {\n    podsRestarts.push(d);\n  }\n  if (d.type === \"pod_pending\" && d.value > 0) {   // NEW condition\n    podsPending.push(d);\n  }\n}\n\n// ---- Service comparison (detect changes) ----\nlet serviceChanges = [];\nif (services.length > 0) {\n  // Split into two snapshots (prev & curr)\n  const half = services.length / 2;\n  const prev = services.slice(0, half);\n  const curr = services.slice(half);\n\n  // Build dicts\n  const prevMap = {};\n  prev.forEach(s => prevMap[s.name] = s.count);\n  const currMap = {};\n  curr.forEach(s => currMap[s.name] = s.count);\n\n  for (const svc in currMap) {\n    const current = currMap[svc];\n    const previous = prevMap[svc] ?? 0;\n\n    if (current !== previous) {\n      if (previous === 0 && current > 0) {\n        serviceChanges.push(`\ud83c\udd95 Service \\`${svc}\\` appeared with *${current}* endpoints`);\n      } else if (current === 0) {\n        serviceChanges.push(`\u274c Service \\`${svc}\\` lost all endpoints (was *${previous}*)`);\n      } else {\n        serviceChanges.push(`\u26a0\ufe0f Service \\`${svc}\\` endpoint count changed from *${previous}* \u2192 *${current}*`);\n      }\n    }\n  }\n}\n\n// ---- Slack message builder ----\nlet msg = \":mag: *Kubernetes Cluster Health Update*\\n\\n\";\n\n// Service section\nmsg += \":gear: *Service Discovery Changes:*\\n\";\nif (serviceChanges.length > 0) {\n  msg += serviceChanges.map(c => `\u2022 ${c}`).join(\"\\n\") + \"\\n\\n\";\n} else {\n  msg += \"\u2705 No service endpoint changes detected\\n\\n\";\n}\n\n// Pods Not Ready\nif (podsNotReady.length > 0) {\n  msg += \":warning: *Pods Not Ready:*\\n\";\n  msg += podsNotReady.map(p => `\u2022 [${p.namespace}] \\`${p.pod}\\` (container: ${p.container})`).join(\"\\n\") + \"\\n\\n\";\n}\n\n// Pods Pending (NEW)\nif (podsPending.length > 0) {\n  msg += \":hourglass_flowing_sand: *Pods Pending Scheduling:*\\n\";\n  msg += podsPending.map(p => `\u2022 [${p.namespace}] \\`${p.pod}\\` (phase: ${p.phase})`).join(\"\\n\") + \"\\n\\n\";\n}\n\n// Pods Waiting\nif (podsWaiting.length > 0) {\n  msg += \":fire: *Pods in Waiting State:*\\n\";\n  msg += podsWaiting.map(p => `\u2022 [${p.namespace}] \\`${p.pod}\\` \u2013 reason: *${p.reason}*`).join(\"\\n\") + \"\\n\\n\";\n}\n\n// Pods Restarts\nif (podsRestarts.length > 0) {\n  msg += \":repeat: *Pods with Restart Spike (5m):*\\n\";\n  msg += podsRestarts.map(p => `\u2022 [${p.namespace}] \\`${p.pod}\\` (container: ${p.container}) \u2013 restarts: ${p.value}`).join(\"\\n\") + \"\\n\\n\";\n}\n\nreturn [{ json: { text: msg.trim() } }];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "27e4b29b-88be-432f-b319-b938cce8210f",
      "name": "Pods Running State",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -220,
        340
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=sum(kube_pod_status_phase{phase=\"Running\"}) by (namespace, pod)",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "c02b93c4-3feb-4cbf-b2a2-08ab5ab98783",
      "name": "Map Prometheus results into namespace/service",
      "type": "n8n-nodes-base.code",
      "position": [
        200,
        -460
      ],
      "parameters": {
        "jsCode": "// Map Prometheus results into { \"<namespace>/<service>\": value }\nconst out = {};\nfor (const r of $json[\"data\"][\"result\"]) {\n  const ns = r.metric.namespace;\n  const svc = r.metric.service;\n  const val = parseFloat(r.value[1]);\n  out[`${ns}/${svc}`] = val;\n}\nreturn [{ json: out }];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "a956b2e8-cdf2-41a2-b7f2-64c4babf6e67",
      "name": "Map Prometheus results into namespace/service1",
      "type": "n8n-nodes-base.code",
      "position": [
        200,
        -40
      ],
      "parameters": {
        "jsCode": "// Map Prometheus results into { \"<namespace>/<service>\": value }\nconst out = {};\nfor (const r of $json[\"data\"][\"result\"]) {\n  const ns = r.metric.namespace;\n  const svc = r.metric.service;\n  const val = parseFloat(r.value[1]);\n  out[`${ns}/${svc}`] = val;\n}\nreturn [{ json: out }];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "85775145-07dc-4647-94be-b5ed3a4dd5f5",
      "name": "Normalization Node",
      "type": "n8n-nodes-base.code",
      "position": [
        160,
        1120
      ],
      "parameters": {
        "jsCode": "const prom = $json;\nconst results = [];\n\nif (prom.data && prom.data.result) {\n  for (const r of prom.data.result) {\n    results.push({\n      type: \"pod_restart\",\n      namespace: r.metric.namespace,\n      pod: r.metric.pod,\n      container: r.metric.container || \"unknown\",\n      value: Math.round(Number(r.value[1])) // round off decimals\n    });\n  }\n}\n\nreturn results.map(r => ({ json: r }));\n"
      },
      "typeVersion": 2
    },
    {
      "id": "187490ee-ce35-4264-95f1-31131918ee09",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -980,
        720
      ],
      "parameters": {
        "content": "\u23f2 Runs every 5 minutes\n\nKicks off the monitoring workflow automatically\n\nEnsures continuous health checks of pods & endpoints"
      },
      "typeVersion": 1
    },
    {
      "id": "8564d65f-ab8b-47c2-87e3-140a6fc05da8",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -320,
        -280
      ],
      "parameters": {
        "content": "Runs a Prometheus query for current service endpoints\n\nFetches real-time endpoint data"
      },
      "typeVersion": 1
    },
    {
      "id": "36aa2ee7-b5c1-4ef4-bff2-2d97b00072cf",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        140,
        -280
      ],
      "parameters": {
        "content": "Maps the raw Prometheus query result into structured format:\nnamespace \u2192 service \u2192 endpoint"
      },
      "typeVersion": 1
    },
    {
      "id": "4bd71c97-a234-4887-93ff-af1f51fd7280",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -300,
        100
      ],
      "parameters": {
        "content": "Runs a Prometheus query for endpoints 5 minutes ago\n\nUseful for comparing trends / changes"
      },
      "typeVersion": 1
    },
    {
      "id": "f140b207-760e-4f46-981d-00de30d96adf",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        120,
        140
      ],
      "parameters": {
        "content": "Maps endpoint data from 5 minutes ago\n\nNormalizes it into namespace/service format"
      },
      "typeVersion": 1
    },
    {
      "id": "fe62ffac-c069-4dbf-bba1-2e5fdb3bcd70",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        620,
        80
      ],
      "parameters": {
        "content": "Merges current and 5-min old endpoint data\n\nAllows comparison for detecting endpoint changes"
      },
      "typeVersion": 1
    },
    {
      "id": "14e5b194-d5a6-4e3e-b638-a50d977ecf5b",
      "name": "Merge (endpoints)",
      "type": "n8n-nodes-base.merge",
      "position": [
        700,
        -120
      ],
      "parameters": {},
      "typeVersion": 3.2
    },
    {
      "id": "32a8c08c-556e-4a47-a437-ac16946c2704",
      "name": "Current endpoints",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -240,
        -460
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=sum by (namespace, service) (kube_endpoint_address_available)",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "9ff7200d-c20c-47bf-ad02-eb6f19d36b65",
      "name": "Endpoints 5m ago",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -240,
        -40
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=sum by (namespace, service) (kube_endpoint_address_available offset 5m)",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "6ee1beaa-23bb-466f-aecf-7cde506a809d",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -280,
        500
      ],
      "parameters": {
        "content": "Queries Prometheus for pods in Running state\n\nHelps verify overall cluster health"
      },
      "typeVersion": 1
    },
    {
      "id": "f489089f-4587-49a1-8a84-8938d31b7d49",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -280,
        840
      ],
      "parameters": {
        "content": "Queries Prometheus for pods stuck in Not Ready state\n\nUseful for spotting readiness probe failures"
      },
      "typeVersion": 1
    },
    {
      "id": "deb8cc8f-684c-4f70-82ac-eac1f97e1ff2",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -240,
        1240
      ],
      "parameters": {
        "content": "Queries pods in CrashLoopBackOff or terminated with error\n\nCritical for alerting on failed workloads"
      },
      "typeVersion": 1
    },
    {
      "id": "5b026032-b8f2-4479-a6a0-c17abb4ee90e",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -280,
        1640
      ],
      "parameters": {
        "content": "Queries pods that restarted multiple times in last 5 minutes\n\nDetects unhealthy pods with restart loops"
      },
      "typeVersion": 1
    },
    {
      "id": "2ffe714e-94de-4789-a60e-0d5f57dd7aca",
      "name": "Sticky Note10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        540,
        1220
      ],
      "parameters": {
        "content": "Normalizes pod restart data\n\nEnsures consistent format before merging"
      },
      "typeVersion": 1
    },
    {
      "id": "317d2898-3084-46bf-b62d-9cfc9032bb9d",
      "name": "Sticky Note11",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1040,
        740
      ],
      "parameters": {
        "content": "Merges all pod health data (running, not ready, crashloop, restarts, endpoints)\n\nProduces a single combined dataset"
      },
      "typeVersion": 1
    },
    {
      "id": "9c5263e2-a9b5-4bf3-9ed7-145b8c22f0d0",
      "name": "Merge All data collected",
      "type": "n8n-nodes-base.merge",
      "position": [
        1180,
        520
      ],
      "parameters": {
        "numberInputs": 6
      },
      "typeVersion": 3.2
    },
    {
      "id": "8aaddff7-b4a2-49b1-ab55-442f4f830231",
      "name": "Sticky Note12",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1420,
        740
      ],
      "parameters": {
        "content": "Cleans and structures merged data\n\nRemoves duplicates / empty values"
      },
      "typeVersion": 1
    },
    {
      "id": "efe1f39c-3909-4a96-9015-e0cb7dcb3381",
      "name": "Sticky Note13",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1820,
        740
      ],
      "parameters": {
        "content": "Converts monitoring results into human-readable Slack message format\n\nAdds severity & summary tags"
      },
      "typeVersion": 1
    },
    {
      "id": "dcb941ad-ef98-480d-a187-f37d1868f0ca",
      "name": "Sticky Note14",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2160,
        740
      ],
      "parameters": {
        "content": "Sends the final alert message to Slack channel via webhook\n\nNotifies DevOps team instantly"
      },
      "typeVersion": 1
    },
    {
      "id": "2e1fab83-c020-4b0e-bb17-093c1500c5cc",
      "name": "Containers Not Ready",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -220,
        680
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=kube_pod_container_status_ready == 0",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "dae09119-ffca-4864-bdb4-4474311bafcd",
      "name": "Pod Pending State",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -180,
        1880
      ],
      "parameters": {
        "url": "http://prometheus-kube-prometheus-prometheus.monitoring:9090/api/v1/query?query=kube_pod_status_phase{phase=\"Pending\"} > 0",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "73454815-0054-48f8-9c1b-7b7395c11660",
      "name": "Sticky Note15",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1380,
        -460
      ],
      "parameters": {
        "width": 580,
        "height": 760,
        "content": "\ud83d\udccc Trigger \u2192 Runs every 5 minutes via Cron \u23f1\ufe0f.\n\n\ud83d\udccc Prometheus Queries \u2192 Collects different metrics:\n\n\ud83d\udd01 Pod restarts\n\n\u23f3 Pending pods\n\n\ud83d\udeab Not ready pods\n\n\u2699\ufe0f Service discovery counts\n\n\ud83d\udccc Normalization Layer \u2192 All metrics transformed into a standard JSON object schema (pod_restart, pod_waiting_reason, pod_not_ready, service).\n\n\ud83d\udccc Merging Node \u2192 Combines Prometheus Service Discovery and Pods/Containers normalization streams into a single pipeline.\n\n\ud83d\udccc Error Filtering \u2192 Keeps only meaningful values (> 0 for errors, drop noise).\n\n\ud83d\udccc Slack Delivery \u2192 Sends structured health updates with emojis.\n\n\ud83d\udccc Deduplication \u2192 Avoids duplicate alerts in Slack.\n\n\ud83d\udccc Aggregation (5m window) \u2192 Sends summary updates with grouped insights"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "70d006e9-27b5-46cf-bec4-5155e6a326c3",
  "connections": {
    "Normalization": {
      "main": [
        [
          {
            "node": "Slack formatter with summary",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Endpoints 5m ago": {
      "main": [
        [
          {
            "node": "Map Prometheus results into namespace/service1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Current endpoints": {
      "main": [
        [
          {
            "node": "Map Prometheus results into namespace/service",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge (endpoints)": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Pod Pending State": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 5
          }
        ]
      ]
    },
    "Normalization Node": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 4
          }
        ]
      ]
    },
    "Pods Running State": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Containers Not Ready": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 2
          }
        ]
      ]
    },
    "Merge All data collected": {
      "main": [
        [
          {
            "node": "Normalization",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\ud83d\udd52 Every 5 Min Trigger": {
      "main": [
        [
          {
            "node": "Current endpoints",
            "type": "main",
            "index": 0
          },
          {
            "node": "Endpoints 5m ago",
            "type": "main",
            "index": 0
          },
          {
            "node": "Pods Running State",
            "type": "main",
            "index": 0
          },
          {
            "node": "Containers Not Ready",
            "type": "main",
            "index": 0
          },
          {
            "node": "CrashLoopBackOff / Termination Reason",
            "type": "main",
            "index": 0
          },
          {
            "node": "Pod Restart Spike (last 5m)",
            "type": "main",
            "index": 0
          },
          {
            "node": "Pod Pending State",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Pod Restart Spike (last 5m)": {
      "main": [
        [
          {
            "node": "Normalization Node",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Slack formatter with summary": {
      "main": [
        [
          {
            "node": "\ud83d\udce4 Send Alerts to Slack",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "CrashLoopBackOff / Termination Reason": {
      "main": [
        [
          {
            "node": "Merge All data collected",
            "type": "main",
            "index": 3
          }
        ]
      ]
    },
    "Map Prometheus results into namespace/service": {
      "main": [
        [
          {
            "node": "Merge (endpoints)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Map Prometheus results into namespace/service1": {
      "main": [
        [
          {
            "node": "Merge (endpoints)",
            "type": "main",
            "index": 1
          }
        ]
      ]
    }
  }
}