{
  "name": "W02 \u2014 OOMKilled Auto-Recovery",
  "nodes": [
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "alertmanager",
        "options": {}
      },
      "id": "webhook-trigger",
      "name": "AlertManager Webhook",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [
        240,
        300
      ]
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true
          },
          "conditions": [
            {
              "leftValue": "={{ $json.body.alerts?.[0]?.labels?.alertname }}",
              "rightValue": "PodOOMKilled",
              "operator": {
                "type": "string",
                "operation": "equals"
              }
            }
          ]
        }
      },
      "id": "filter-oom",
      "name": "Filter \u2014 PodOOMKilled only",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        460,
        300
      ]
    },
    {
      "parameters": {
        "jsCode": "const alert = $input.first().json.body.alerts[0];\nconst pod = alert.labels.pod;\nconst namespace = alert.labels.namespace;\nconst container = alert.labels.container;\n\n// Check static data store for prior OOM in last hour\nconst key = `oom_${namespace}_${pod}`;\nconst stored = $getWorkflowStaticData('node');\nconst now = Date.now();\nconst last = stored[key] || 0;\nconst isRepeat = (now - last) < 3600000; // 1 hour\n\n// Record this occurrence\nstored[key] = now;\n\nreturn [{ json: { pod, namespace, container, isRepeat, firedAt: new Date().toISOString() } }];"
      },
      "id": "check-repeat",
      "name": "Check Repeat OOM",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        680,
        300
      ]
    },
    {
      "parameters": {
        "conditions": {
          "options": {},
          "conditions": [
            {
              "leftValue": "={{ $json.isRepeat }}",
              "rightValue": false,
              "operator": {
                "type": "boolean",
                "operation": "equals"
              }
            }
          ]
        }
      },
      "id": "is-repeat",
      "name": "First Occurrence?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        900,
        300
      ]
    },
    {
      "parameters": {
        "resource": "command",
        "command": "=kubectl delete pod {{ $('Check Repeat OOM').first().json.pod }} -n {{ $('Check Repeat OOM').first().json.namespace }} 2>&1",
        "host": "10.0.10.10",
        "port": 22,
        "username": "kagiso",
        "options": {
          "execTimeout": 15000
        }
      },
      "id": "delete-pod",
      "name": "SSH \u2014 Delete OOMKilled Pod",
      "type": "n8n-nodes-base.ssh",
      "typeVersion": 1,
      "position": [
        1120,
        160
      ],
      "credentials": {
        "sshPrivateKey": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "amount": 60,
        "unit": "seconds"
      },
      "id": "wait-60",
      "name": "Wait 60s",
      "type": "n8n-nodes-base.wait",
      "typeVersion": 1.1,
      "position": [
        1340,
        160
      ]
    },
    {
      "parameters": {
        "resource": "command",
        "command": "=kubectl get pods -n {{ $('Check Repeat OOM').first().json.namespace }} --field-selector=status.phase=Running 2>&1 | grep -i {{ $('Check Repeat OOM').first().json.pod.replace(/-[a-z0-9]+-[a-z0-9]+$/, '') }} || echo 'NOT_RUNNING'",
        "host": "10.0.10.10",
        "port": 22,
        "username": "kagiso",
        "options": {
          "execTimeout": 15000
        }
      },
      "id": "check-running",
      "name": "SSH \u2014 Verify Pod Running",
      "type": "n8n-nodes-base.ssh",
      "typeVersion": 1,
      "position": [
        1560,
        160
      ],
      "credentials": {
        "sshPrivateKey": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "url": "={{ $vars.DISCORD_ALERTS_WEBHOOK }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        },
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "body",
              "value": "={{ (() => { const running = !$('SSH \u2014 Verify Pod Running').first().json.stdout.includes('NOT_RUNNING'); const pod = $('Check Repeat OOM').first().json.pod; const ns = $('Check Repeat OOM').first().json.namespace; const container = $('Check Repeat OOM').first().json.container; return JSON.stringify({ embeds: [{ title: running ? '\u2705 OOMKill Recovered' : '\u274c OOMKill Recovery Failed', color: running ? 3066993 : 15158332, fields: [ { name: 'Pod', value: `${ns}/${pod}`, inline: true }, { name: 'Container', value: container, inline: true }, { name: 'Action', value: 'Pod deleted \u2014 k8s rescheduled automatically', inline: false }, { name: 'Status', value: running ? 'Pod is Running \u2705' : 'Pod not yet Running \u2014 check manually', inline: false } ], footer: { text: 'n8n auto-recovery' }, timestamp: new Date().toISOString() }] }); })() }}"
            }
          ]
        }
      },
      "id": "discord-recovered",
      "name": "Discord \u2014 Recovery Result",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1780,
        160
      ]
    },
    {
      "parameters": {
        "url": "={{ $vars.DISCORD_CRITICAL_WEBHOOK }}",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        },
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "body",
              "value": "={{ JSON.stringify({ embeds: [{ title: '\ud83d\udd34 Repeated OOMKill \u2014 Manual Action Required', color: 15158332, fields: [ { name: 'Pod', value: `${$('Check Repeat OOM').first().json.namespace}/${$('Check Repeat OOM').first().json.pod}`, inline: true }, { name: 'Container', value: $('Check Repeat OOM').first().json.container, inline: true }, { name: 'Action', value: 'NOT auto-restarting \u2014 second OOMKill in 1 hour', inline: false }, { name: 'Recommended Fix', value: 'Increase memory limit in HelmRelease values and open a PR', inline: false } ], footer: { text: 'n8n OOM guard' }, timestamp: new Date().toISOString() }] }) }}"
            }
          ]
        }
      },
      "id": "discord-repeat",
      "name": "Discord \u2014 Repeated OOM Alert",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1120,
        440
      ]
    }
  ],
  "connections": {
    "AlertManager Webhook": {
      "main": [
        [
          {
            "node": "Filter \u2014 PodOOMKilled only",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter \u2014 PodOOMKilled only": {
      "main": [
        [
          {
            "node": "Check Repeat OOM",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "Check Repeat OOM": {
      "main": [
        [
          {
            "node": "First Occurrence?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "First Occurrence?": {
      "main": [
        [
          {
            "node": "SSH \u2014 Delete OOMKilled Pod",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Discord \u2014 Repeated OOM Alert",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "SSH \u2014 Delete OOMKilled Pod": {
      "main": [
        [
          {
            "node": "Wait 60s",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait 60s": {
      "main": [
        [
          {
            "node": "SSH \u2014 Verify Pod Running",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "SSH \u2014 Verify Pod Running": {
      "main": [
        [
          {
            "node": "Discord \u2014 Recovery Result",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1",
    "saveManualExecutions": true,
    "timezone": "Africa/Johannesburg"
  },
  "tags": [
    "homelab",
    "tier1",
    "remediation"
  ],
  "notes": "W02: AlertManager PodOOMKilled \u2192 auto-delete pod (k8s reschedules) \u2192 wait 60s \u2192 verify running \u2192 Discord. Second OOM within 1 hour: skip delete, post to #homelab-critical with recommendation to increase memory limit. Uses workflow static data to track per-pod OOM history across executions."
}