{
  "name": "AI Document Processor - Intelligent Data Extraction",
  "nodes": [
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "document-upload",
        "responseMode": "onReceived",
        "options": {
          "rawBody": true
        }
      },
      "id": "webhook-upload",
      "name": "Document Upload Webhook",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [
        250,
        400
      ]
    },
    {
      "parameters": {
        "jsCode": "// Process incoming document upload\nconst data = $input.item.json;\nconst body = data.body || {};\n\n// Extract file info from webhook\nconst fileName = body.fileName || data.fileName || 'unknown.pdf';\nconst fileUrl = body.fileUrl || data.fileUrl || '';\nconst fileType = fileName.split('.').pop().toLowerCase();\nconst uploadedBy = body.uploadedBy || data.uploadedBy || 'Unknown';\nconst documentType = body.documentType || 'unknown'; // invoice, contract, brief, etc.\n\n// Determine processing strategy based on file type\nlet processingMethod = 'ocr';\nif (['pdf', 'docx', 'doc'].includes(fileType)) {\n  processingMethod = 'text_extraction';\n} else if (['jpg', 'jpeg', 'png', 'gif'].includes(fileType)) {\n  processingMethod = 'ocr';\n}\n\nreturn {\n  processing_id: `DOC-${Date.now()}`,\n  file_name: fileName,\n  file_url: fileUrl,\n  file_type: fileType,\n  document_type: documentType,\n  uploaded_by: uploadedBy,\n  processing_method: processingMethod,\n  uploaded_at: new Date().toISOString(),\n  status: 'processing'\n};"
      },
      "id": "parse-upload",
      "name": "Parse Document Metadata",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        470,
        400
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.file_url }}",
        "options": {
          "response": {
            "response": {
              "fullResponse": false,
              "responseFormat": "file"
            }
          }
        }
      },
      "id": "download-file",
      "name": "Download Document",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        690,
        400
      ]
    },
    {
      "parameters": {
        "operation": "extractText",
        "options": {}
      },
      "id": "extract-text",
      "name": "Extract Text from PDF",
      "type": "n8n-nodes-base.readPDF",
      "typeVersion": 1,
      "position": [
        910,
        300
      ]
    },
    {
      "parameters": {
        "url": "https://api.ocr.space/parse/image",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "base64Image",
              "value": "=data:{{ $('Download Document').item.binary.data.mimeType }};base64,{{ $('Download Document').item.binary.data.data }}"
            },
            {
              "name": "language",
              "value": "eng"
            }
          ]
        },
        "options": {}
      },
      "id": "ocr-scan",
      "name": "OCR Scan (Images)",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        910,
        500
      ]
    },
    {
      "parameters": {
        "mode": "combine",
        "combinationMode": "mergeByPosition",
        "options": {}
      },
      "id": "merge-text",
      "name": "Merge Extracted Text",
      "type": "n8n-nodes-base.merge",
      "typeVersion": 3,
      "position": [
        1130,
        400
      ]
    },
    {
      "parameters": {
        "jsCode": "// Prepare text for AI analysis\nconst metadata = $('Parse Document Metadata').item.json;\nconst extractedText = $input.item.json.text || \n                      $input.item.json.ParsedResults?.[0]?.ParsedText || \n                      'No text extracted';\n\nconst documentType = metadata.document_type;\n\n// Define extraction prompts based on document type\nconst prompts = {\n  invoice: `Extract these fields from this invoice:\\n- Invoice Number\\n- Invoice Date\\n- Due Date\\n- Vendor Name\\n- Vendor Address\\n- Total Amount\\n- Tax Amount\\n- Line Items (description, quantity, unit price, total)\\n- Payment Terms\\n- PO Number (if present)`,\n  \n  contract: `Extract these fields from this contract:\\n- Contract Title\\n- Contract Date\\n- Effective Date\\n- Expiration Date\\n- Party A (name, address)\\n- Party B (name, address)\\n- Contract Value\\n- Payment Terms\\n- Key Obligations\\n- Termination Clauses`,\n  \n  brief: `Extract these fields from this brief:\\n- Project Name\\n- Client Name\\n- Project Type\\n- Budget\\n- Timeline/Deadline\\n- Objectives\\n- Deliverables\\n- Key Requirements\\n- Contact Person`,\n  \n  receipt: `Extract these fields from this receipt:\\n- Merchant Name\\n- Transaction Date\\n- Total Amount\\n- Tax Amount\\n- Payment Method\\n- Items Purchased\\n- Receipt Number`,\n  \n  unknown: `Analyze this document and extract all key structured data including:\\n- Document type\\n- Date(s)\\n- Names/Entities\\n- Amounts/Numbers\\n- Key terms or clauses`\n};\n\nconst prompt = prompts[documentType] || prompts.unknown;\n\nreturn {\n  processing_id: metadata.processing_id,\n  document_type: documentType,\n  extracted_text: extractedText.substring(0, 4000), // Limit for API\n  ai_prompt: prompt,\n  file_name: metadata.file_name,\n  uploaded_by: metadata.uploaded_by\n};"
      },
      "id": "prepare-ai-prompt",
      "name": "Prepare AI Extraction Prompt",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1350,
        400
      ]
    },
    {
      "parameters": {
        "url": "https://api.anthropic.com/v1/messages",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "anthropic-version",
              "value": "2023-06-01"
            },
            {
              "name": "content-type",
              "value": "application/json"
            }
          ]
        },
        "sendBody": true,
        "contentType": "json",
        "body": "={\n  \"model\": \"claude-3-5-sonnet-20241022\",\n  \"max_tokens\": 2048,\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"{{ $json.ai_prompt }}\\n\\nDocument text:\\n{{ $json.extracted_text }}\\n\\nRespond ONLY with valid JSON containing the extracted fields. Use null for missing fields. Ensure all amounts are numbers, all dates are in YYYY-MM-DD format.\"\n    }\n  ]\n}",
        "options": {}
      },
      "id": "ai-extraction",
      "name": "AI Data Extraction (Claude)",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1570,
        400
      ]
    },
    {
      "parameters": {
        "jsCode": "// Parse AI response and structure data\nconst aiResponse = $input.item.json;\nconst metadata = $('Prepare AI Extraction Prompt').item.json;\n\nlet extractedData = {};\n\ntry {\n  // Extract JSON from AI response\n  const aiText = aiResponse.content?.[0]?.text || '{}';\n  \n  // Try to parse JSON (handle markdown code blocks if present)\n  const jsonMatch = aiText.match(/```json\\n([\\s\\S]*?)\\n```/) || \n                    aiText.match(/```\\n([\\s\\S]*?)\\n```/) ||\n                    [null, aiText];\n  \n  extractedData = JSON.parse(jsonMatch[1]);\n} catch (error) {\n  extractedData = {\n    error: 'Failed to parse AI response',\n    raw_response: aiResponse.content?.[0]?.text?.substring(0, 500)\n  };\n}\n\n// Add metadata\nextractedData.processing_id = metadata.processing_id;\nextractedData.file_name = metadata.file_name;\nextractedData.document_type = metadata.document_type;\nextractedData.uploaded_by = metadata.uploaded_by;\nextractedData.processed_at = new Date().toISOString();\nextractedData.ai_model = 'claude-3-5-sonnet';\n\n// Count missing fields\nconst requiredFields = Object.keys(extractedData).filter(k => !['processing_id', 'file_name', 'document_type', 'uploaded_by', 'processed_at', 'ai_model', 'error'].includes(k));\nconst missingFields = requiredFields.filter(field => !extractedData[field] || extractedData[field] === null || extractedData[field] === '');\n\nextractedData.missing_fields_count = missingFields.length;\nextractedData.missing_fields = missingFields.join(', ');\nextractedData.extraction_confidence = missingFields.length === 0 ? 'high' : missingFields.length <= 2 ? 'medium' : 'low';\n\nreturn extractedData;"
      },
      "id": "parse-ai-response",
      "name": "Parse & Structure Extracted Data",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1790,
        400
      ]
    },
    {
      "parameters": {
        "authentication": "serviceAccount",
        "operation": "appendOrUpdate",
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "columns": {
          "mappingMode": "autoMapInputData",
          "value": {}
        },
        "options": {}
      },
      "id": "save-to-sheets",
      "name": "Save to Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.4,
      "position": [
        2010,
        400
      ]
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict"
          },
          "conditions": [
            {
              "id": "missing-check",
              "leftValue": "={{ $json.missing_fields_count }}",
              "rightValue": 0,
              "operator": {
                "type": "number",
                "operation": "gt"
              }
            },
            {
              "id": "confidence-check",
              "leftValue": "={{ $json.extraction_confidence }}",
              "rightValue": "low",
              "operator": {
                "type": "string",
                "operation": "equals",
                "singleValue": true
              }
            }
          ],
          "combinator": "or"
        },
        "options": {}
      },
      "id": "check-completeness",
      "name": "Check Data Completeness",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2,
      "position": [
        2230,
        400
      ]
    },
    {
      "parameters": {
        "select": "channel",
        "channelId": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "text": "=\u26a0\ufe0f **Document Review Needed**\\n\\n*Processing ID:* {{ $json.processing_id }}\\n*File:* {{ $json.file_name }}\\n*Type:* {{ $json.document_type }}\\n*Uploaded by:* {{ $json.uploaded_by }}\\n\\n*Issue:* {{ $json.missing_fields_count }} fields missing or incomplete\\n*Missing fields:* {{ $json.missing_fields }}\\n*Confidence:* {{ $json.extraction_confidence }}\\n\\n*Action needed:* Please review and complete the data in Google Sheets.\\n\\n[View in Sheets](SHEET_URL_HERE)",
        "otherOptions": {}
      },
      "id": "alert-slack-incomplete",
      "name": "Alert - Incomplete Data (Slack)",
      "type": "n8n-nodes-base.slack",
      "typeVersion": 2.2,
      "position": [
        2450,
        300
      ]
    },
    {
      "parameters": {
        "fromEmail": "documents@company.com",
        "toEmail": "={{ $json.uploaded_by }}",
        "subject": "\u26a0\ufe0f Document Processed - Review Required: {{ $json.file_name }}",
        "text": "=Hi,\\n\\nYour document has been processed, but we need your help to complete some information.\\n\\n**Document:** {{ $json.file_name }}\\n**Processing ID:** {{ $json.processing_id }}\\n**Type:** {{ $json.document_type }}\\n\\n**Status:** Review Required\\n**Missing fields:** {{ $json.missing_fields }}\\n\\nWe've extracted as much data as possible, but {{ $json.missing_fields_count }} field(s) need your attention.\\n\\nPlease review the data in our system and fill in the missing information.\\n\\nIf you have questions, reply to this email or contact support.\\n\\nThanks,\\nAutomated Document Processing Team",
        "options": {
          "senderName": "Document Processor"
        }
      },
      "id": "email-review-needed",
      "name": "Email - Review Needed",
      "type": "n8n-nodes-base.sendGrid",
      "typeVersion": 1,
      "position": [
        2450,
        500
      ]
    },
    {
      "parameters": {
        "fromEmail": "documents@company.com",
        "toEmail": "={{ $json.uploaded_by }}",
        "subject": "\u2705 Document Successfully Processed: {{ $json.file_name }}",
        "emailFormat": "html",
        "html": "=<!DOCTYPE html>\\n<html>\\n<head>\\n  <style>\\n    body { font-family: Arial, sans-serif; }\\n    .header { background: #4CAF50; color: white; padding: 20px; text-align: center; }\\n    .content { padding: 20px; }\\n    .success { color: #4CAF50; font-weight: bold; }\\n    .field { margin: 10px 0; padding: 10px; background: #f5f5f5; border-radius: 5px; }\\n    .field-label { font-weight: bold; color: #555; }\\n  </style>\\n</head>\\n<body>\\n  <div class=\\\"header\\\">\\n    <h1>\u2705 Document Successfully Processed</h1>\\n  </div>\\n  <div class=\\\"content\\\">\\n    <p>Hi,</p>\\n    <p class=\\\"success\\\">Your document has been successfully processed and all data extracted!</p>\\n    \\n    <div class=\\\"field\\\">\\n      <div class=\\\"field-label\\\">Document:</div>\\n      {{ $json.file_name }}\\n    </div>\\n    \\n    <div class=\\\"field\\\">\\n      <div class=\\\"field-label\\\">Processing ID:</div>\\n      {{ $json.processing_id }}\\n    </div>\\n    \\n    <div class=\\\"field\\\">\\n      <div class=\\\"field-label\\\">Document Type:</div>\\n      {{ $json.document_type }}\\n    </div>\\n    \\n    <div class=\\\"field\\\">\\n      <div class=\\\"field-label\\\">Processed:</div>\\n      {{ new Date($json.processed_at).toLocaleString() }}\\n    </div>\\n    \\n    <div class=\\\"field\\\">\\n      <div class=\\\"field-label\\\">Confidence:</div>\\n      {{ $json.extraction_confidence }} ({{ $json.missing_fields_count === 0 ? 'All fields extracted' : 'Some fields incomplete' }})\\n    </div>\\n    \\n    <p>The extracted data has been saved to your database and is ready for use.</p>\\n    \\n    <p>If you notice any errors, please contact support with your Processing ID.</p>\\n    \\n    <p>Thanks,<br>Automated Document Processing Team</p>\\n  </div>\\n</body>\\n</html>",
        "options": {
          "senderName": "Document Processor"
        }
      },
      "id": "email-success",
      "name": "Email - Success Confirmation",
      "type": "n8n-nodes-base.sendGrid",
      "typeVersion": 1,
      "position": [
        2450,
        100
      ]
    },
    {
      "parameters": {
        "select": "channel",
        "channelId": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "text": "=\u2705 **Document Processed Successfully**\\n\\n*File:* {{ $json.file_name }}\\n*Type:* {{ $json.document_type }}\\n*Uploaded by:* {{ $json.uploaded_by }}\\n*Confidence:* {{ $json.extraction_confidence }}\\n\\nAll fields extracted and saved to Google Sheets.",
        "otherOptions": {}
      },
      "id": "notify-slack-success",
      "name": "Notify - Success (Slack)",
      "type": "n8n-nodes-base.slack",
      "typeVersion": 2.2,
      "position": [
        2450,
        200
      ]
    },
    {
      "parameters": {
        "operation": "appendOrUpdate",
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "columns": {
          "mappingMode": "defineBelow",
          "value": {
            "Timestamp": "={{ $now.toISO() }}",
            "Processing ID": "={{ $json.processing_id }}",
            "File Name": "={{ $json.file_name }}",
            "Document Type": "={{ $json.document_type }}",
            "Uploaded By": "={{ $json.uploaded_by }}",
            "Status": "={{ $json.missing_fields_count === 0 ? 'Complete' : 'Incomplete' }}",
            "Missing Fields": "={{ $json.missing_fields || 'None' }}",
            "Confidence": "={{ $json.extraction_confidence }}"
          }
        },
        "options": {}
      },
      "id": "log-processing",
      "name": "Log to Audit Trail",
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.4,
      "position": [
        2670,
        400
      ]
    },
    {
      "parameters": {},
      "id": "error-trigger",
      "name": "Error Trigger",
      "type": "n8n-nodes-base.errorTrigger",
      "typeVersion": 1,
      "position": [
        250,
        700
      ]
    },
    {
      "parameters": {
        "jsCode": "// Format error details for notification\nconst error = $input.item.json.error || {};\nconst execution = $input.item.json.execution || {};\n\nreturn {\n  error_type: error.name || 'Unknown Error',\n  error_message: error.message || 'No error message available',\n  error_stack: error.stack?.substring(0, 500) || '',\n  failed_node: error.node?.name || 'Unknown Node',\n  workflow_id: execution.id || 'unknown',\n  workflow_name: 'AI Document Processor',\n  timestamp: new Date().toISOString(),\n  severity: 'high'\n};"
      },
      "id": "format-error",
      "name": "Format Error Details",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        470,
        700
      ]
    },
    {
      "parameters": {
        "select": "channel",
        "channelId": {
          "__rl": true,
          "mode": "list",
          "value": ""
        },
        "text": "=\ud83d\udea8 **Workflow Error Alert**\\n\\n*Workflow:* {{ $json.workflow_name }}\\n*Failed Node:* {{ $json.failed_node }}\\n*Error Type:* {{ $json.error_type }}\\n*Time:* {{ $json.timestamp }}\\n\\n*Error Message:*\\n```\\n{{ $json.error_message }}\\n```\\n\\n*Action needed:* Check workflow execution logs for details.\\n*Execution ID:* {{ $json.workflow_id }}",
        "otherOptions": {}
      },
      "id": "alert-error-slack",
      "name": "Alert - Error (Slack)",
      "type": "n8n-nodes-base.slack",
      "typeVersion": 2.2,
      "position": [
        690,
        700
      ]
    },
    {
      "parameters": {
        "fromEmail": "alerts@company.com",
        "toEmail": "devops@company.com",
        "subject": "\ud83d\udea8 Document Processor Workflow Error",
        "text": "=Workflow Error Alert\\n\\nWorkflow: {{ $json.workflow_name }}\\nFailed Node: {{ $json.failed_node }}\\nError Type: {{ $json.error_type }}\\nTime: {{ $json.timestamp }}\\n\\nError Message:\\n{{ $json.error_message }}\\n\\nExecution ID: {{ $json.workflow_id }}\\n\\nPlease investigate and resolve.",
        "options": {
          "senderName": "n8n Alerts"
        }
      },
      "id": "email-error-alert",
      "name": "Email - Error Alert",
      "type": "n8n-nodes-base.sendGrid",
      "typeVersion": 1,
      "position": [
        690,
        850
      ]
    }
  ],
  "connections": {
    "Document Upload Webhook": {
      "main": [
        [
          {
            "node": "Parse Document Metadata",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse Document Metadata": {
      "main": [
        [
          {
            "node": "Download Document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download Document": {
      "main": [
        [
          {
            "node": "Extract Text from PDF",
            "type": "main",
            "index": 0
          },
          {
            "node": "OCR Scan (Images)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Text from PDF": {
      "main": [
        [
          {
            "node": "Merge Extracted Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OCR Scan (Images)": {
      "main": [
        [
          {
            "node": "Merge Extracted Text",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Merge Extracted Text": {
      "main": [
        [
          {
            "node": "Prepare AI Extraction Prompt",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare AI Extraction Prompt": {
      "main": [
        [
          {
            "node": "AI Data Extraction (Claude)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Data Extraction (Claude)": {
      "main": [
        [
          {
            "node": "Parse & Structure Extracted Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse & Structure Extracted Data": {
      "main": [
        [
          {
            "node": "Save to Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Save to Google Sheets": {
      "main": [
        [
          {
            "node": "Check Data Completeness",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Data Completeness": {
      "main": [
        [
          {
            "node": "Email - Success Confirmation",
            "type": "main",
            "index": 0
          },
          {
            "node": "Notify - Success (Slack)",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Alert - Incomplete Data (Slack)",
            "type": "main",
            "index": 0
          },
          {
            "node": "Email - Review Needed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Email - Success Confirmation": {
      "main": [
        [
          {
            "node": "Log to Audit Trail",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Notify - Success (Slack)": {
      "main": [
        [
          {
            "node": "Log to Audit Trail",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Alert - Incomplete Data (Slack)": {
      "main": [
        [
          {
            "node": "Log to Audit Trail",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Email - Review Needed": {
      "main": [
        [
          {
            "node": "Log to Audit Trail",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Error Trigger": {
      "main": [
        [
          {
            "node": "Format Error Details",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Format Error Details": {
      "main": [
        [
          {
            "node": "Alert - Error (Slack)",
            "type": "main",
            "index": 0
          },
          {
            "node": "Email - Error Alert",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1",
    "errorWorkflow": ""
  },
  "staticData": null,
  "tags": [],
  "triggerCount": 2,
  "versionId": null
}