{
  "id": "PubBpd4HHWaOtHJV",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Nanonets Document Processing",
  "tags": [],
  "nodes": [
    {
      "id": "ce555c80-3e22-4cda-90fd-8a741dc77f52",
      "name": "HTTP Request",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -960,
        0
      ],
      "parameters": {
        "url": "=https://app.nanonets.com/api/v4/workflows/28f99e34-2bfb-4d50-8ac3-17eaa5ee2ecf/documents/",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "=file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "=data"
            },
            {
              "name": "async",
              "value": "false"
            }
          ]
        },
        "genericAuthType": "httpBasicAuth"
      },
      "typeVersion": 4.2
    },
    {
      "id": "72b6d303-d070-4a26-a218-37e2fdf22d3d",
      "name": "HTTP Request2",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -280,
        0
      ],
      "parameters": {
        "url": "=https://app.nanonets.com/api/v4/workflows/28f99e34-2bfb-4d50-8ac3-17eaa5ee2ecf/documents/{{ $json.documentId }}",
        "options": {},
        "authentication": "genericCredentialType",
        "genericAuthType": "httpBasicAuth"
      },
      "typeVersion": 4.2
    },
    {
      "id": "b24e5adb-e583-40db-95fa-112250b240e3",
      "name": "Convert to File",
      "type": "n8n-nodes-base.convertToFile",
      "position": [
        140,
        0
      ],
      "parameters": {
        "options": {},
        "operation": "xls"
      },
      "typeVersion": 1.1
    },
    {
      "id": "9b4575f9-5f13-4d3a-befd-827844f52799",
      "name": "Code",
      "type": "n8n-nodes-base.code",
      "position": [
        -740,
        0
      ],
      "parameters": {
        "jsCode": "// Get the string data from the previous node\nconst inputDataString = $('HTTP Request').item.json.data;\n\n// Parse the string into a real JSON object\nconst parsedData = JSON.parse(inputDataString);\n\n// Get the document_id from the parsed data\nconst docId = parsedData.document_id;\n\n// Create a new, clean object to pass to the next node\nconst newOutput = {\n  json: {\n    documentId: docId\n  }\n};\n\n// Return the new object\nreturn newOutput;"
      },
      "typeVersion": 2
    },
    {
      "id": "0a82a366-71ed-4fa7-8e07-2c7573bee169",
      "name": "Code1",
      "type": "n8n-nodes-base.code",
      "position": [
        -60,
        0
      ],
      "parameters": {
        "jsCode": "// Get the string data from the previous node\nconst inputDataString = $('HTTP Request2').item.json.data;\n\n// Parse the string into a real JSON object\nconst parsedData = JSON.parse(inputDataString);\n\n// 1. Create a base object with all the main header fields\nconst headerData = {\n  document_id: parsedData.document_id,\n  original_document_name: parsedData.original_document_name,\n  status: parsedData.status\n};\n\nconst fields = parsedData.pages[0]?.data?.fields;\nif (fields) {\n  for (const key in fields) {\n    if (Array.isArray(fields[key]) && fields[key].length > 0 && typeof fields[key][0].value !== 'undefined') {\n      headerData[key] = fields[key][0].value;\n    }\n  }\n}\n\n// 2. Process the line items from the first table\nconst finalOutputItems = [];\nconst firstTable = parsedData.pages[0]?.data?.tables[0];\n\n// Check if the table and its cells exist\nif (firstTable && firstTable.cells && Array.isArray(firstTable.cells)) {\n  const rows = {};\n  \n  // Group all cells by their row number\n  firstTable.cells.forEach(cell => {\n    // If we haven't seen this row number before, create a new object for it\n    if (!rows[cell.row]) {\n      rows[cell.row] = {};\n    }\n    // Use the table header (e.g., 'Description', 'Quantity') as the key\n    const headerKey = \"line_item_\" + cell.header.replace(/ /g, '_').toLowerCase();\n    rows[cell.row][headerKey] = cell.text;\n  });\n\n  // Now, create a final output for each reconstructed row\n  for (const rowNum in rows) {\n    const lineItemData = rows[rowNum];\n    // Combine the main header data with this line's data\n    const fullItem = { ...headerData, ...lineItemData };\n    finalOutputItems.push({ json: fullItem });\n  }\n}\n\n// 3. Return the results\nif (finalOutputItems.length === 0) {\n  // If no line items were found, return just the header data\n  return [{ json: headerData }];\n}\n\n// Return one item for each line item row that was found\nreturn finalOutputItems;"
      },
      "typeVersion": 2
    },
    {
      "id": "757c2b59-f47c-4f8d-aa34-134273b45023",
      "name": "Wait",
      "type": "n8n-nodes-base.wait",
      "position": [
        -500,
        0
      ],
      "parameters": {
        "amount": 15
      },
      "typeVersion": 1.1
    },
    {
      "id": "835a183d-826d-497d-ad04-7b57ba9472b2",
      "name": "Webhook",
      "type": "n8n-nodes-base.webhook",
      "position": [
        -1240,
        0
      ],
      "parameters": {
        "path": "1a03d800-3e91-4284-9323-0609c0974f18",
        "options": {},
        "httpMethod": "POST",
        "responseMode": "responseNode"
      },
      "typeVersion": 2
    },
    {
      "id": "cc68eafe-9e80-4319-913a-d650c6715987",
      "name": "Respond to Webhook",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        360,
        0
      ],
      "parameters": {
        "options": {},
        "respondWith": "binary"
      },
      "typeVersion": 1.4
    },
    {
      "id": "c19705bc-e2cd-47d6-a57f-d11cad5d97eb",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -980,
        -360
      ],
      "parameters": {
        "width": 520,
        "height": 340,
        "content": "Workflow Overview\nThis workflow automates PDF document parsing with Nanonets. Upload a document (via form or webhook), and get structured data\u2014including line items\u2014returned as an Excel file.\n\n\nUse the Form Trigger node for user-uploaded files, or\nUse the Webhook node for programmatic POST/file uploads.\n\n3. Before The Nanonets API Call\nSetup Required\n\nAdd your Nanonets API credentials in n8n\u2019s credentials manager (HTTP Basic Auth).\n\nInsert your Nanonets Workflow ID in the HTTP Request nodes as needed."
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "55c67182-80fb-4972-9148-7e715dd3a6e9",
  "connections": {
    "Code": {
      "main": [
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait": {
      "main": [
        [
          {
            "node": "HTTP Request2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code1": {
      "main": [
        [
          {
            "node": "Convert to File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request2": {
      "main": [
        [
          {
            "node": "Code1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Convert to File": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}