{
  "id": "kGkBDcAuzlqqWeWP",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "fin_ocr_V1.3",
  "tags": [],
  "nodes": [
    {
      "id": "520e70c0-0a88-4837-95bd-dfc7a62f6b57",
      "name": "Download file",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        288,
        144
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "binaryPropertyName": "data"
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "71c96b1b-91fb-4b15-b9e6-eafe713d34f5",
      "name": "Move failed",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1696,
        208
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('file name & ID').first().json.fileId }}"
        },
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive",
          "cachedResultUrl": "https://drive.google.com/drive/my-drive",
          "cachedResultName": "My Drive"
        },
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "1lhpEYJrR0ZpQ6yjsf_fa6Ob5X7Ms28ED",
          "cachedResultUrl": "https://drive.google.com/drive/folders/1lhpEYJrR0ZpQ6yjsf_fa6Ob5X7Ms28ED",
          "cachedResultName": "fin_ocr_failed"
        },
        "operation": "move"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "7f2ee535-6f0c-433a-a08d-daf53dc58ee9",
      "name": "Move successful",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1872,
        304
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('file name & ID').first().json.fileId }}"
        },
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive",
          "cachedResultUrl": "https://drive.google.com/drive/my-drive",
          "cachedResultName": "My Drive"
        },
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "1nZDoVJjKTITGSmb8BMu09TtHX8QGUkhr",
          "cachedResultUrl": "https://drive.google.com/drive/folders/1nZDoVJjKTITGSmb8BMu09TtHX8QGUkhr",
          "cachedResultName": "fin_ocr_done"
        },
        "operation": "move"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "1623bb25-f453-4e64-8ba9-3d8357c1a905",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -160,
        272
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "8a20d5de-c6d6-40e1-be01-5e00c7340010",
      "name": "file name & ID",
      "type": "n8n-nodes-base.set",
      "position": [
        64,
        144
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "2c6afd48-9425-4c53-aecc-02ddad6a80e0",
              "name": "fileId",
              "type": "string",
              "value": "={{ $json.id }}"
            },
            {
              "id": "0294a252-d8ea-4796-9a37-f88d27ac78f9",
              "name": "orfilename",
              "type": "string",
              "value": "={{ $json.originalFilename }}"
            }
          ]
        },
        "includeOtherFields": true
      },
      "typeVersion": 3.4
    },
    {
      "id": "b96ff546-abe2-4bf7-b8fd-67f29170a647",
      "name": "Done!",
      "type": "n8n-nodes-base.noOp",
      "position": [
        64,
        -48
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "a5b9cc98-fb3e-4faf-84e9-37cdd5b3fd91",
      "name": "AI Agent-OCR",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "onError": "continueErrorOutput",
      "position": [
        512,
        144
      ],
      "parameters": {
        "text": "=Extract the full text as output from the document as if you were reading it. Return the tables in html format. Return the equations in LaTeX representation.",
        "options": {
          "systemMessage": "You are a helpful assistant."
        },
        "promptType": "define",
        "needsFallback": true
      },
      "retryOnFail": true,
      "typeVersion": 2.2
    },
    {
      "id": "d27bfe6f-619d-49f6-a965-60dcac00c3d9",
      "name": "Data Cleaner",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "onError": "continueErrorOutput",
      "position": [
        864,
        80
      ],
      "parameters": {
        "modelId": {
          "__rl": true,
          "mode": "id",
          "value": "command-r7b:7b-12-2024-q8_0"
        },
        "options": {},
        "messages": {
          "values": [
            {
              "content": "=You are an expert financial data entry agent.\n\nYour input data will be a JSON object or text containing Optical character recognition (OCR) of an invoice data. It may contain missing fields, extra fields, or inconsistent formatting.\n\nYour task is to:\n1. Extract data from the input into a strictly valid JSON object using the following schema:\n{\n  \"InvoiceNumber\": \"\",\n  \"IssueDate\": \"\",\n  \"SupplierName\": \"\",\n  \"TotalAmount\": \"\",\n  \"Currency\": \"\",\n  \"Address\": \"\",\n  \"LineItems\": [\n    {\n      \"Description\": \"\",\n      \"Quantity\": \"\",\n      \"UnitPrice\": \"\",\n      \"VATRate\": \"\",\n      \"VATAmount\": \"\",\n      \"LineTotal\": \"\"\n    }\n  ]\n}\n2. Remove any fields not included above.\n3. If a value is missing or ambiguous, set it to null.\n4. Format \"LineItems\" as a list of dictionaries with the specified keys, and ensure all line item fields are present.\n5. Output only the sanitized, strictly valid Airtable-ready JSON object. Do not include any explanation or extra text.\n\nabout the Number fields (\"TotalAmount\", and all numeric fields in \"LineItems\"):\n1. All number fields must be formatted as valid JSON numbers (e.g., 17000, 1210.50) or be set to null.\n2. You MUST remove all currency symbols (e.g., \"$\", \"\u20ac\").\n3. You MUST remove all thousands separators (e.g., \",\").\n4. Ensure any decimal separator is a period \".\" (e.g., convert \"12,50\" to 12.50).\n5. For example: convert \"\u20ac17,000\" to 17000. Convert \"1.210,50\" to 1210.50.\n\nCritical: the fields MUST be filled JUST from the input data.\nInput:\n{{ $json.output }}"
            }
          ]
        },
        "jsonOutput": true
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.8
    },
    {
      "id": "39ed5668-cf2b-4f76-8cb6-0f3ea0231059",
      "name": "OCR folder",
      "type": "n8n-nodes-base.googleDriveTrigger",
      "position": [
        -464,
        272
      ],
      "parameters": {
        "event": "fileCreated",
        "options": {
          "fileType": "all"
        },
        "pollTimes": {
          "item": [
            {
              "mode": "everyMinute"
            }
          ]
        },
        "triggerOn": "specificFolder",
        "folderToWatch": {
          "__rl": true,
          "mode": "list",
          "value": "1Zld3XYxC1kYYm_rMla3tuqeZsAaol9DS",
          "cachedResultUrl": "https://drive.google.com/drive/folders/1Zld3XYxC1kYYm_rMla3tuqeZsAaol9DS",
          "cachedResultName": "fin_ocr"
        }
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "b803e5e5-68e1-45b7-9474-3eb5bb7d0626",
      "name": "AirTable - Create a record1",
      "type": "n8n-nodes-base.airtable",
      "position": [
        1312,
        304
      ],
      "parameters": {
        "base": {
          "__rl": true,
          "mode": "list",
          "value": "apptHu8glutmVeMb5",
          "cachedResultUrl": "https://airtable.com/apptHu8glutmVeMb5",
          "cachedResultName": "ocr_base"
        },
        "table": {
          "__rl": true,
          "mode": "list",
          "value": "tbln2iu3xlqtI0WUH",
          "cachedResultUrl": "https://airtable.com/apptHu8glutmVeMb5/tbln2iu3xlqtI0WUH",
          "cachedResultName": "Invoices"
        },
        "columns": {
          "value": {
            "FileID": "={{ $('file name & ID').item.json.id }}",
            "Address": "={{ $json.message.content.Address }}",
            "Currency": "={{ $json.message.content.Currency }}",
            "FileName": "={{ $('file name & ID').item.json.name }}",
            "IssueDate": "={{ DateTime.fromFormat($json.message.content.IssueDate, 'dd/MM/yy').toISODate() }}",
            "LineItems": "={{ $json.message.content.LineItems }}",
            "TotalAmount": "={{ $json.message.content.TotalAmount }}",
            "SupplierName": "={{ $json.message.content.SupplierName }}",
            "InvoiceNumber": "={{ $json.message.content.InvoiceNumber }}"
          },
          "schema": [
            {
              "id": "itemID",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "itemID",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "InvoiceNumber",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "InvoiceNumber",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "IssueDate",
              "type": "dateTime",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "IssueDate",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "SupplierName",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "SupplierName",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "TotalAmount",
              "type": "number",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "TotalAmount",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Currency",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Currency",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Address",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Address",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "LineItems",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "LineItems",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "FileName",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "FileName",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "FileID",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "FileID",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "id"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "typecast": true
        },
        "operation": "create"
      },
      "credentials": {
        "airtableTokenApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 2.1
    },
    {
      "id": "0443a7f6-67da-4536-b980-082b48432d05",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1216,
        -112
      ],
      "parameters": {
        "color": 6,
        "width": 560,
        "height": 640,
        "content": "## \ud83d\ude80 Welcome: OCR to Airtable Pipeline\nThis template automatically processes invoices and receipts:\n1.  **Watches** a Google Drive folder.\n2.  **Performs OCR** on the file.\n3.  **Cleans & Structures** the data into JSON.\n4.  **Saves** the clean data to Airtable.\n5.  **Moves** the processed file.\n\n---\n### \u26a0\ufe0f BEFORE YOU START\n    \n**1. Airtable**\nDuplicate the Airtable base from this [link](https://airtable.com/apptHu8glutmVeMb5/shrpowsDMM5r8mj1T) and get the access token.\n\n**2. AI Models (Ollama)**\nPull these two local models from your terminal:\n```bash\n# For OCR\nollama pull benhaotang/Nanonets-OCR-s:F16\n    \n# For Cleaning\nollama pull command-r7b:7b-12-2024-q8_0\n```\n**Follow the numbered steps on the other notes to configure!**\n\n*(Data set credit: [SROIE datasetv2](https://www.kaggle.com/datasets/urbikn/sroie-datasetv2))*"
      },
      "typeVersion": 1
    },
    {
      "id": "9972fd7b-b812-4e10-a044-1c6d88ca3f92",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -592,
        -16
      ],
      "parameters": {
        "width": 352,
        "height": 448,
        "content": "## 1. Configure \"New File\" Trigger\n\nThis node watches for new files to process.\n\n**Your To-Do:**\n1.  Select your Google Drive **Credentials**.\n2.  Choose the **Folder to Watch** (this is your \"inbox\" folder)."
      },
      "typeVersion": 1
    },
    {
      "id": "f781ba6c-36cb-4158-9fa3-195e7d18d28a",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        544,
        512
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "id",
          "value": "=benhaotang/Nanonets-OCR-s:F16"
        },
        "options": {
          "responseFormat": "text"
        }
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "484cc002-53bb-42ea-a54d-f6c75d6e06e7",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        336,
        464
      ],
      "parameters": {
        "color": 5,
        "width": 496,
        "height": 480,
        "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## 2. Configure OCR AI Model\n\nThis node connects to the AI model that performs the OCR.\n\n**Your To-Do:**\n1.  Select your **Credentials**. (If using Ollama, this is your OpenAI-compatible API, e.g., [http://localhost:11434/v1](http://localhost:11434/v1).\n2.  Set the **Model** to `benhaotang/Nanonets-OCR-s:F16`.\n\n*Pro-tip: You can swap this for a cloud model (like GPT-4o with vision) for even higher accuracy.*"
      },
      "typeVersion": 1
    },
    {
      "id": "eccbea42-cfd1-4a0b-9cc8-437813e4992b",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        784,
        -256
      ],
      "parameters": {
        "color": 2,
        "width": 416,
        "height": 448,
        "content": "## 3. Configure Data Cleaning AI\n\nThis AI node takes the raw OCR text, cleans it, and structures it into the required JSON format.\n\n**Your To-Do:**\n1.  Select your **Credentials** (can be the same as the OCR model).\n2.  Set the **Model** to `command-r7b:7b-12-2024-q8_0`.\n\n*Pro-tip: For complex invoices, a more powerful model (like GPT-4o) will give the best results. The prompt is already optimized for this!*"
      },
      "typeVersion": 1
    },
    {
      "id": "8db3ef29-185f-4ff6-a1bf-3b123eef61c6",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1104,
        288
      ],
      "parameters": {
        "color": 6,
        "width": 496,
        "height": 560,
        "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## 4. Configure Airtable Output\n\nThis node saves the clean JSON data to your Airtable base.\n\n**Your To-Do:**\n1.  Make sure you duplicated the base from the link in the main note!\n2.  Select your Airtable **Credentials**.\n3.  Select your duplicated **Base** (\"ocr_base\") and **Table** (\"Invoices\").\n4.  The fields should map automatically.\n\n*Note: The **\"Typecast\"** option is enabled so Airtable correctly handles date strings from the AI.*"
      },
      "typeVersion": 1
    },
    {
      "id": "27a3770b-391f-4688-89bd-19aca6b307ec",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1632,
        -48
      ],
      "parameters": {
        "color": 7,
        "width": 496,
        "height": 496,
        "content": "## 5. Configure \"Done\" & \"Failed\" Folders\n\nThese nodes move the processed file to keep your inbox clean.\n\n**Your To-Do:**\n1.  Select your Google Drive **Credentials** for both nodes.\n2.  In **`Move successful`**, select your \"Done\" folder.\n3.  In **`Move failed`**, select your \"Failed\" folder."
      },
      "typeVersion": 1
    },
    {
      "id": "ddfbafad-d739-4e26-b0f3-27ef4ccbeebd",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -80,
        -240
      ],
      "parameters": {
        "color": 4,
        "width": 464,
        "height": 320,
        "content": "## 6.\u2705 All Done!\n\nThis node signals that the loop has finished processing all the files from its last run.\n\nThe workflow is now idle and will wait for the next file to be added to your Google Drive folder. \ud83e\udd73"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "7c309cda-f010-4b55-b1f2-7cea8dfb865d",
  "connections": {
    "OCR folder": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Move failed": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent-OCR": {
      "main": [
        [
          {
            "node": "Data Cleaner",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Move failed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Data Cleaner": {
      "main": [
        [
          {
            "node": "AirTable - Create a record1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Move failed",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download file": {
      "main": [
        [
          {
            "node": "AI Agent-OCR",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "file name & ID": {
      "main": [
        [
          {
            "node": "Download file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [
          {
            "node": "Done!",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "file name & ID",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Move successful": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent-OCR",
            "type": "ai_languageModel",
            "index": 0
          },
          {
            "node": "AI Agent-OCR",
            "type": "ai_languageModel",
            "index": 1
          }
        ]
      ]
    },
    "AirTable - Create a record1": {
      "main": [
        [
          {
            "node": "Move successful",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}