{
  "name": "Email Order ID Extractor (Body + Attachments + OCR)",
  "nodes": [
    {
      "parameters": {
        "jsCode": "const regex = /\\bOR-[A-Za-z0-9-]+\\b/;\n\nfunction stripHtml(html) {\n  return html\n    .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n    .replace(/<style[\\s\\S]*?<\\/style>/gi, '')\n    .replace(/<[^>]+>/g, ' ')\n    .replace(/\\s+/g, ' ')\n    .trim();\n}\n\nconst email = $input.first().json;\n\nconst body = email.textPlain || stripHtml(email.textHtml || email.textPlain || \"\");\n\nconst match = body.match(regex);\n\nreturn [{\n  json: {\n    ...email,\n    emailText: body,\n    orderId: match ? match[0] : null\n  }\n}];"
      },
      "name": "Step1 Search Email Body",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        256,
        0
      ],
      "id": "927fe791-dc0b-4826-8548-91291c7e4b4a"
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict",
            "version": 1
          },
          "conditions": [
            {
              "id": "24376317-e0bd-44c5-a7af-1ffafa2d3a92",
              "leftValue": "={{ $json.orderId }}",
              "rightValue": "",
              "operator": {
                "type": "string",
                "operation": "notEmpty",
                "singleValue": true
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "name": "Found In Body?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        512,
        0
      ],
      "id": "c4150f37-7722-4844-a7f8-991dd270944c",
      "alwaysOutputData": false
    },
    {
      "parameters": {
        "jsCode": "const email = $('Email Trigger (IMAP)').first();\nconst binaries = email.binary || {};\n\nconst output = [];\n\nfor (const [key, file] of Object.entries(binaries)) {\n  output.push({\n    json: {\n      fileName: file.fileName,\n      mimeType: file.mimeType\n    },\n    binary: {\n      data: file\n    }\n  });\n}\n\nreturn output;"
      },
      "name": "Step2 Split Attachments",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        752,
        128
      ],
      "id": "48fdb462-99dd-4a34-8c8a-28f783cbdf0f"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://YOUR_TEXT_EXTRACT_SERVICE/extract",
        "options": {}
      },
      "name": "Extract Text From Attachment",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4,
      "position": [
        1008,
        224
      ],
      "id": "f25ab5fd-3804-42dd-9a60-4145923aafee"
    },
    {
      "parameters": {
        "jsCode": "const regex = /\\bOR-[A-Z0-9-]{4,}\\b/;\nconst text = $json.text || \"\";\n\nconst match = text.match(regex);\n\nreturn [{\n  json: {\n    ...$json,\n    orderId: match ? match[0] : null\n  }\n}];"
      },
      "name": "Search Extracted Text",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1264,
        224
      ],
      "id": "c4e023e2-d464-4f7b-a5bd-cb840b4abfc0"
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{$json.orderId}}",
              "operation": "isNotEmpty"
            }
          ]
        },
        "options": {}
      },
      "name": "Found In Attachment?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        1504,
        224
      ],
      "id": "0a9d4e70-80bb-4c3e-aad7-8d3e0750e67a"
    },
    {
      "parameters": {
        "conditions": {
          "boolean": [
            {
              "value1": "={{$json.needsOcr}}",
              "operation": "isTrue"
            }
          ]
        },
        "options": {}
      },
      "name": "Needs OCR?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        1760,
        352
      ],
      "id": "81425410-323e-4cf1-b55e-56ba65d59c3a"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://YOUR_OCR_SERVICE/ocr",
        "options": {}
      },
      "name": "Run OCR",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4,
      "position": [
        2000,
        464
      ],
      "id": "729858db-a8ea-4564-bb67-547ef7953e94"
    },
    {
      "parameters": {
        "jsCode": "const regex = /\\bOR-[A-Z0-9-]{4,}\\b/;\nconst text = $json.text || \"\";\n\nconst match = text.match(regex);\n\nreturn [{ json: { ...$json, orderId: match ? match[0] : null } }];"
      },
      "name": "Search OCR Text",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        2256,
        464
      ],
      "id": "f84b2743-d523-4862-90a5-22f1443edf34"
    },
    {
      "parameters": {
        "downloadAttachments": true,
        "options": {
          "customEmailConfig": "[\"UNSEEN\"]",
          "trackLastMessageId": true
        }
      },
      "type": "n8n-nodes-base.emailReadImap",
      "typeVersion": 2.1,
      "position": [
        0,
        0
      ],
      "id": "62194033-4a1a-4046-b638-6d43fbc02ae5",
      "name": "Email Trigger (IMAP)",
      "credentials": {
        "imap": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "language": "pythonNative",
        "pythonCode": "# Loop over input items and add a new field called 'my_new_field' to the JSON of each one\nfor item in _items:\n  item[\"json\"][\"my_new_field\"] = 1\nreturn _items\n\n\nimport pdfplumber\nimport re\nimport sys\n\npattern = r\"\\bOR-[A-Za-z0-9-]+\\b\"   # example regex\n\ndef extract_from_pdf_binary(pdf_binary):\n    matches = []\n\n    with pdfplumber.open(BytesIO(pdf_binary)) as pdf:\n        for page_num, page in enumerate(pdf.pages, start=1):\n            text = page.extract_text()\n\n            if not text:\n                continue\n\n            found = re.findall(pattern, text)\n            for m in found:\n                matches.append({\n                    \"page\": page_num,\n                    \"match\": m\n                })\n\n    return matches\n\nfor item in _items:\n  item[\"json\"][\"my_new_field\"] = 1\nreturn _items"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1360,
        368
      ],
      "id": "2af0c70f-dd8b-4cf4-b29b-e0328e2c4e9c",
      "name": "Code in Python"
    }
  ],
  "connections": {
    "Step1 Search Email Body": {
      "main": [
        [
          {
            "node": "Found In Body?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Step2 Split Attachments": {
      "main": [
        [
          {
            "node": "Extract Text From Attachment",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Text From Attachment": {
      "main": [
        [
          {
            "node": "Search Extracted Text",
            "type": "main",
            "index": 0
          },
          {
            "node": "Code in Python",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Search Extracted Text": {
      "main": [
        [
          {
            "node": "Found In Attachment?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Run OCR": {
      "main": [
        [
          {
            "node": "Search OCR Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Email Trigger (IMAP)": {
      "main": [
        [
          {
            "node": "Step1 Search Email Body",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Found In Body?": {
      "main": [
        [],
        [
          {
            "node": "Step2 Split Attachments",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1",
    "binaryMode": "separate",
    "availableInMCP": false
  },
  "versionId": "946283a3-1be7-4c8e-8216-30dd24ab0f7b",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "id": "OSxeUttD1S6bPxUe",
  "tags": []
}