{
  "id": "<your_workflow_id>",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Invoice OCR \u2192 Auto Append to Sheets (Internal)",
  "tags": [],
  "nodes": [
    {
      "id": "c79d74f7-f2d7-4e35-891d-c428fcfc5dcc",
      "name": "Schedule Trigger (Weekly Scan)",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        368,
        -416
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "weeks",
              "triggerAtDay": [
                1
              ],
              "triggerAtHour": 20
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "acb76a11-4ee8-468c-8023-a4f886f428c2",
      "name": "Get Parent Folder",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        640,
        -416
      ],
      "parameters": {
        "filter": {},
        "options": {},
        "resource": "fileFolder",
        "returnAll": true,
        "queryString": "=\u652f\u6255\u3044\u8acb\u6c42\u66f8\u81ea\u52d5\u8a08\u7b97\u7528\u30d5\u30a9\u30eb\u30c0"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "499da8a9-f455-4e8e-bf38-f3b5334ec442",
      "name": "Get Monthly Subfolder",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        848,
        -416
      ],
      "parameters": {
        "filter": {},
        "options": {},
        "resource": "fileFolder",
        "returnAll": true,
        "queryString": "='{{$json[\"id\"]}}' in parents and mimeType='application/vnd.google-apps.folder' and name contains '{{$now.setZone(\"Asia/Tokyo\").format(\"yyyy\u5e74MM\u6708\")}}\u5206'",
        "searchMethod": "query"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3,
      "alwaysOutputData": true
    },
    {
      "id": "6c95fb12-8dcd-4df3-a611-d099c3298274",
      "name": "List Files",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1040,
        -416
      ],
      "parameters": {
        "filter": {},
        "options": {
          "fields": [
            "webViewLink",
            "id",
            "mimeType",
            "name"
          ]
        },
        "resource": "fileFolder",
        "queryString": "='{{$json[\"id\"]}}' in parents and (mimeType='application/pdf' or mimeType contains 'image/')\n",
        "searchMethod": "query"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "93009ba2-3af8-4ba0-9044-cf5fa15e0965",
      "name": "Download File (Binary from Drive)",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1232,
        -416
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{$json[\"id\"]}}"
        },
        "options": {
          "binaryPropertyName": "data"
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "b0761836-e013-4728-bb12-dc2e760cfa7f",
      "name": "OCR (OCR.Space Parsing)",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1440,
        -416
      ],
      "parameters": {
        "url": "https://api.ocr.space/parse/image",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "sendHeaders": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "data"
            },
            {
              "name": "language",
              "value": "jpn"
            },
            {
              "name": "isOverlayRequired",
              "value": "false"
            },
            {
              "name": "OCREngine",
              "value": "2"
            },
            {
              "name": "isTable",
              "value": "true"
            },
            {
              "name": "scale",
              "value": "true"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "=apikey",
              "value": "=<your_ocr_api_key>"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2f843a4c-82dd-45ae-8be8-b438d93fb8ea",
      "name": "Clean OCR Text (Noise Removal)",
      "type": "n8n-nodes-base.code",
      "position": [
        1680,
        -416
      ],
      "parameters": {
        "jsCode": "// \u5404\u30a2\u30a4\u30c6\u30e0\u3054\u3068\u306bOCR\u7d50\u679c\u3092\u6574\u5f62\u3057\u3066\u8fd4\u3059\nreturn items.map(item => {\n  const parsed = item.json[\"ParsedResults\"]?.[0];\n  let text = parsed?.ParsedText || \"\";\n\n  // \u30ce\u30a4\u30ba\u9664\u53bb\u30fb\u6574\u5f62\n  text = text\n    .replace(/\\r/g, \"\\n\")             // \u6539\u884c\u30b3\u30fc\u30c9\u7d71\u4e00\n    .replace(/\\n{2,}/g, \"\\n\")         // \u4f59\u5206\u306a\u6539\u884c\u3092\u524a\u9664\n    .replace(/[^\\S\\n]+/g, \" \")        // \u4e0d\u8981\u306a\u7a7a\u767d\u3092\u524a\u9664\n    .replace(/\u3000/g, \" \")              // \u5168\u89d2\u30b9\u30da\u30fc\u30b9\u3092\u534a\u89d2\u306b\n    .replace(/[\u201c\u201d]/g, '\"')            // \u5909\u306a\u5f15\u7528\u7b26\u3092\u7d71\u4e00\n    .replace(/[\u2018\u2019]/g, \"'\")\n    .replace(/[\u5186\u00a5]/g, \"\u5186\");         // \u5186\u8a18\u53f7\u3092\u7d71\u4e00\n\n  return {\n    json: {\n      text: text.trim()\n    }\n  };\n});\n"
      },
      "typeVersion": 2
    },
    {
      "id": "888bed82-b755-4416-948c-4db664c1b371",
      "name": "AI Extraction (Generate Structured JSON)",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "position": [
        304,
        -32
      ],
      "parameters": {
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini",
          "cachedResultName": "GPT-4O-MINI"
        },
        "options": {},
        "messages": {
          "values": [
            {
              "role": "system",
              "content": "You are an AI assistant that analyzes Japanese invoice OCR text and accurately extracts the information required for accounting.\n\n# Input\nBelow is invoice text extracted by OCR.\nIt contains noise, line breaks, and layout issues.\nCarefully reconstruct the content and convert it into structured data as accurately as possible.\n\n---\n{{ $json.text }}\n---\n\n# Output Format\nReturn ONLY the following JSON. Do not include any explanations, chatty text, or code fences.\n\n{\n  \"invoice_date\": \"YYYY-MM-DD\",\n  \"due_date\": \"YYYY-MM-DD\",\n  \"client_name\": \"e.g., Your Client Company\",\n  \"subtotal\": 0,\n  \"tax\": 0,\n  \"total\": 0,\n  \"bank_info\": {\n    \"bank_name\": \"\",\n    \"branch\": \"\",\n    \"account_type\": \"ordinary|checking|savings|unknown\",\n    \"account_number\": \"\",\n    \"account_name\": \"\"\n  },\n  \"items\": [\n    {\n      \"description\": \"\",\n      \"quantity\": 0,\n      \"unit_price\": 0,\n      \"amount\": 0\n    }\n  ],\n  \"notes\": \"\"\n}\n\n# Extraction Rules\n- Identify dates from labels like \"\u8acb\u6c42\u65e5/\u767a\u884c\u65e5\" (invoice/issue date) and \"\u652f\u6255\u671f\u9650/\u304a\u652f\u6255\u3044\u671f\u65e5\" (due date), then normalize to YYYY-MM-DD.\n- Normalize amounts (subtotal, tax, total) by removing commas, currency symbols, and the \"\u5186\" unit, and output integers. Ensure consistency for tax-inclusive/exclusive totals when needed.\n- For bank info, prioritize extracting the set: bank name, branch, account type, account number, and account holder. Ignore values that cannot be confidently linked.\n- If there are multiple line items, return multiple objects in the items array. When quantity/unit_price/amount are unknown, use 0 (not \"unknown\").\n- Prefer client names that end with honorifics like \"\u5fa1\u4e2d\" or \"\u69d8\" when present.\n- For unknown fields, use \"unknown\". Always include all keys.\n- Return valid JSON only. No extra characters or leading/trailing newlines.\n"
            },
            {
              "content": "=={{$json.text}}"
            }
          ]
        },
        "jsonOutput": true
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.8
    },
    {
      "id": "2805e6a9-0304-4842-b5b6-eb4996b08dec",
      "name": "Parse AI Output to JSON",
      "type": "n8n-nodes-base.code",
      "position": [
        656,
        -32
      ],
      "parameters": {
        "jsCode": "// \u5404\u30a2\u30a4\u30c6\u30e0\uff08PDF\u3054\u3068\uff09\u3092\u72ec\u7acb\u3057\u3066\u51e6\u7406\nreturn items.map(item => {\n  const content = item.json.message?.content;\n\n  // JSON\u69cb\u9020\u3092\u5b89\u5168\u306b\u30d1\u30fc\u30b9\n  let parsed = {};\n  try {\n    parsed = typeof content === \"string\" ? JSON.parse(content) : content;\n  } catch (e) {\n    parsed = { error: \"Invalid JSON\", raw: content };\n  }\n\n  // \u5404\u30a2\u30a4\u30c6\u30e0\u3068\u3057\u3066\u51fa\u529b\uff08\u500b\u5225\u306b\u30b9\u30d7\u30ec\u30c3\u30c9\u30b7\u30fc\u30c8\u306b\u6e21\u308b\uff09\n  return { json: parsed };\n});\n"
      },
      "typeVersion": 2
    },
    {
      "id": "0d2e5f79-3c09-4352-af43-d7d895669991",
      "name": "Append to Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        848,
        -32
      ],
      "parameters": {
        "columns": {
          "value": {
            "\u5408\u8a08": "={{ $json.total }}",
            "\u5c0f\u8a08": "={{ $json.subtotal }}",
            "\u6d88\u8cbb\u7a0e": "={{ $json.tax }}",
            "\u8acb\u6c42\u65e5": "={{ $json.invoice_date }}",
            "\u9280\u884c\u540d": "={{ $json.bank_info.bank_name }} {{ $json.bank_info.branch }} {{ $json.bank_info.account_type }}",
            "PDF\u30ea\u30f3\u30af": "={{ $('List Files').item.json.webViewLink }}",
            "\u53d6\u5f15\u5148\u540d": "={{ $json.client_name }}",
            "\u53e3\u5ea7\u540d\u7fa9": "={{ $json.bank_info.account_name }}",
            "\u53e3\u5ea7\u756a\u53f7": "={{ $json.bank_info.account_number }}",
            "\u652f\u6255\u671f\u9650": "={{ $json.due_date }}"
          },
          "schema": [
            {
              "id": "\u8acb\u6c42\u65e5",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u8acb\u6c42\u65e5",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u652f\u6255\u671f\u9650",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u652f\u6255\u671f\u9650",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u53d6\u5f15\u5148\u540d",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u53d6\u5f15\u5148\u540d",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u5c0f\u8a08",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u5c0f\u8a08",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u6d88\u8cbb\u7a0e",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u6d88\u8cbb\u7a0e",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u5408\u8a08",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u5408\u8a08",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u9280\u884c\u540d",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u9280\u884c\u540d",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u53e3\u5ea7\u756a\u53f7",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u53e3\u5ea7\u756a\u53f7",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "\u53e3\u5ea7\u540d\u7fa9",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "\u53e3\u5ea7\u540d\u7fa9",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "PDF\u30ea\u30f3\u30af",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "PDF\u30ea\u30f3\u30af",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "={{ ($item(0).$node[\"Get Monthly Subfolder\"].json.name || \"\u8acb\u6c42\u66f8\u53f0\u5e33\").trim() }}"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ \"<your_google_sheet_id>\" }}\n"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "07e4ec6a-0526-44e6-a7ef-95b2d3e7cc22",
      "name": "Schedule Trigger (Recurring Execution)",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        272,
        -640
      ],
      "parameters": {
        "color": 3,
        "width": 336,
        "height": 384,
        "content": "## Schedule Trigger (Recurring Execution)\nPurpose\n- Periodically scan the Drive folder that stores invoice PDFs.\n- Can also be executed manually for debugging."
      },
      "typeVersion": 1
    },
    {
      "id": "7175d70d-bd7a-46c5-96c1-71691e215da5",
      "name": "Google Drive Folder Discovery",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        624,
        -640
      ],
      "parameters": {
        "color": 2,
        "width": 752,
        "height": 384,
        "content": "## Google Drive Folder Discovery\nPurpose\n- Get the parent folder (e.g., \"\u652f\u6255\u3044\u8acb\u6c42\u66f8\u81ea\u52d5\u8a08\u7b97\u7528\u30d5\u30a9\u30eb\u30c0\" meaning \"Folder for automatic invoice calculation\").\n- Auto-detect the current-month subfolder (e.g., a folder named \"2025\u5e7410\u6708\u5206\" meaning \"October 2025\").\n- Search PDF/image files inside the target folder."
      },
      "typeVersion": 1
    },
    {
      "id": "7c0a7024-98f7-42f9-8937-4aced9dcbf7a",
      "name": "AI Extraction & Sheets Append",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        272,
        -224
      ],
      "parameters": {
        "color": 4,
        "width": 848,
        "height": 384,
        "content": "## AI Structured Extraction & Google Sheets Append\nPurpose\n- Convert OCR results to a strict JSON structure using GPT.\n- Extract invoice_date / due_date / client_name / subtotal / tax / total / bank_info, etc.\n- Normalize with a Code node, then append one row per invoice to Google Sheets."
      },
      "typeVersion": 1
    },
    {
      "id": "bb521714-8b47-4a03-abc0-9e18a471021c",
      "name": "OCR Processing (PDF \u2192 Text Cleanup)",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1392,
        -640
      ],
      "parameters": {
        "color": 6,
        "width": 400,
        "height": 384,
        "content": "## OCR Processing (PDF \u2192 Text Cleanup)\nPurpose\n- Download Drive files as binary.\n- Use the OCR.Space API for text extraction.\n- Remove noise and normalize line breaks."
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "<your_version_id>",
  "connections": {
    "List Files": {
      "main": [
        [
          {
            "node": "Download File (Binary from Drive)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Parent Folder": {
      "main": [
        [
          {
            "node": "Get Monthly Subfolder",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Monthly Subfolder": {
      "main": [
        [
          {
            "node": "List Files",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OCR (OCR.Space Parsing)": {
      "main": [
        [
          {
            "node": "Clean OCR Text (Noise Removal)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse AI Output to JSON": {
      "main": [
        [
          {
            "node": "Append to Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Clean OCR Text (Noise Removal)": {
      "main": [
        [
          {
            "node": "AI Extraction (Generate Structured JSON)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Schedule Trigger (Weekly Scan)": {
      "main": [
        [
          {
            "node": "Get Parent Folder",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download File (Binary from Drive)": {
      "main": [
        [
          {
            "node": "OCR (OCR.Space Parsing)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Extraction (Generate Structured JSON)": {
      "main": [
        [
          {
            "node": "Parse AI Output to JSON",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}