{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "08ae2ea6-5ad1-4fdf-ac75-4e22811437cc",
      "name": "When clicking \u2018Execute workflow\u2019",
      "type": "n8n-nodes-base.manualTrigger",
      "disabled": true,
      "position": [
        -1860,
        -220
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "f81acfdb-2eae-4824-a4ec-2540ff15fa12",
      "name": "Status is:",
      "type": "n8n-nodes-base.switch",
      "position": [
        -40,
        20
      ],
      "parameters": {
        "rules": {
          "values": [
            {
              "outputKey": "Succeeded",
              "conditions": {
                "options": {
                  "version": 2,
                  "leftValue": "",
                  "caseSensitive": true,
                  "typeValidation": "strict"
                },
                "combinator": "and",
                "conditions": [
                  {
                    "id": "a11576d1-4bfa-46ce-abce-25be2bc75a20",
                    "operator": {
                      "type": "string",
                      "operation": "equals"
                    },
                    "leftValue": "={{ $json.status }}",
                    "rightValue": "Succeeded"
                  }
                ]
              },
              "renameOutput": true
            },
            {
              "outputKey": "Processing",
              "conditions": {
                "options": {
                  "version": 2,
                  "leftValue": "",
                  "caseSensitive": true,
                  "typeValidation": "strict"
                },
                "combinator": "and",
                "conditions": [
                  {
                    "id": "19b80bb6-63f5-47f7-9d58-321de4f6893c",
                    "operator": {
                      "name": "filter.operator.equals",
                      "type": "string",
                      "operation": "equals"
                    },
                    "leftValue": "={{ $json.status }}",
                    "rightValue": "Processing"
                  }
                ]
              },
              "renameOutput": true
            },
            {
              "outputKey": "Failed",
              "conditions": {
                "options": {
                  "version": 2,
                  "leftValue": "",
                  "caseSensitive": true,
                  "typeValidation": "strict"
                },
                "combinator": "and",
                "conditions": [
                  {
                    "id": "b8822d01-57d4-47fd-95e9-5255ac5059f4",
                    "operator": {
                      "name": "filter.operator.equals",
                      "type": "string",
                      "operation": "equals"
                    },
                    "leftValue": "={{ $json.status }}",
                    "rightValue": "Failed"
                  }
                ]
              },
              "renameOutput": true
            }
          ]
        },
        "options": {}
      },
      "typeVersion": 3.2
    },
    {
      "id": "77949b9c-b3a1-4cd9-b643-d7f49dc64726",
      "name": "Google Gemini Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1360,
        80
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.5-pro-preview-05-06"
      },
      "credentials": {
        "googlePalmApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "1e53fcc8-4697-48c4-90cb-c07dee049949",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1620,
        280
      ],
      "parameters": {
        "schemaType": "manual",
        "inputSchema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"TableOfContents\",\n  \"description\": \"A JSON schema for representing a hierarchical table of contents with manually inlined nesting (up to 3 levels) to avoid $ref or $defs, and simplified keywords.\",\n  \"type\": \"object\",\n  \"properties\": {\n    \"tableOfContents\": {\n      \"type\": \"array\",\n      \"description\": \"The root list of top-level headings (Level 1).\",\n      \"items\": {\n        \"type\": \"object\",\n        \"description\": \"Represents a Level 1 heading.\",\n        \"properties\": {\n          \"text\": {\n            \"type\": \"string\",\n            \"description\": \"The text content of the Level 1 heading.\"\n          },\n          \"level\": {\n            \"type\": \"integer\",\n            \"description\": \"The hierarchical level of the heading (e.g., 1 for H1, 2 for H2, etc.).\"\n          },\n          \"children\": {\n            \"type\": \"array\",\n            \"description\": \"An array of Level 2 subheadings.\",\n            \"items\": {\n              \"type\": \"object\",\n              \"description\": \"Represents a Level 2 heading.\",\n              \"properties\": {\n                \"text\": {\n                  \"type\": \"string\",\n                  \"description\": \"The text content of the Level 2 heading.\"\n                },\n                \"level\": {\n                  \"type\": \"integer\",\n                  \"description\": \"The hierarchical level of the heading.\"\n                },\n                \"children\": {\n                  \"type\": \"array\",\n                  \"description\": \"An array of Level 3 subheadings.\",\n                  \"items\": {\n                    \"type\": \"object\",\n                    \"description\": \"Represents a Level 3 heading.\",\n                    \"properties\": {\n                      \"text\": {\n                        \"type\": \"string\",\n                        \"description\": \"The text content of the Level 3 heading.\"\n                      },\n                      \"level\": {\n                        \"type\": \"integer\",\n                        \"description\": \"The hierarchical level of the heading.\"\n                      },\n                      \"children\": {\n                        \"type\": \"array\",\n                        \"description\": \"Level 3 headings typically have no children in this model, but the array must exist.\",\n                        \"items\": {\n                          \"type\": \"object\",\n                          \"description\": \"Schema for items (if any, typically none) in the children array of a Level 3 heading. These items must be objects.\",\n                          \"additionalProperties\": false\n                        }\n                      }\n                    },\n                    \"required\": [\n                      \"text\",\n                      \"level\",\n                      \"children\"\n                    ],\n                    \"additionalProperties\": false\n                  }\n                }\n              },\n              \"required\": [\n                \"text\",\n                \"level\",\n                \"children\"\n              ],\n              \"additionalProperties\": false\n            }\n          }\n        },\n        \"required\": [\n          \"text\",\n          \"level\",\n          \"children\"\n        ],\n        \"additionalProperties\": false\n      }\n    }\n  },\n  \"required\": [\n    \"tableOfContents\"\n  ],\n  \"additionalProperties\": false\n}\n"
      },
      "typeVersion": 1.2
    },
    {
      "id": "ecccb5bf-6625-476e-b010-e50e3b89a80b",
      "name": "Auto-fixing Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserAutofixing",
      "position": [
        1480,
        80
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "97b36f68-cd64-437b-8af0-dada28b40ea8",
      "name": "Set File Name",
      "type": "n8n-nodes-base.set",
      "position": [
        -980,
        20
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "49f89680-ba63-43f1-af72-7e5afd8ecb0e",
              "name": "fileName",
              "type": "string",
              "value": "={{ $('Merge').item.binary.data.fileName.replaceAll('.pdf','') }}"
            },
            {
              "id": "646e8985-5587-41f6-b4b2-d781ecff9e7c",
              "name": "fileNameSnake",
              "type": "string",
              "value": "={{ $('Merge').item.binary.data.fileName.replaceAll('.pdf','').toSnakeCase() }}"
            },
            {
              "id": "a7eafca2-13e6-46d1-b281-2ef4ea4d6a93",
              "name": "createdAt",
              "type": "string",
              "value": "={{ $now }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "46c53d9b-9387-4415-9b8c-b01a12e391a3",
      "name": "When Executed by Another Workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -1860,
        300
      ],
      "parameters": {
        "workflowInputs": {
          "values": [
            {
              "name": "URL"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "f2145258-4cb1-4339-81d4-f9dfe524b972",
      "name": "Extract Sections headers as fallback",
      "type": "n8n-nodes-base.code",
      "position": [
        880,
        -180
      ],
      "parameters": {
        "jsCode": "// Input: Full JSON from Chunkr task\nconst chunks = $(\"Status is:\").first().json.output.chunks;\nconst headings = [];\n\nfor (const chunk of chunks) {\n  for (const segment of chunk.segments) {\n    if (segment.segment_type === 'SectionHeader') {\n      // We store the content of the heading\n      headings.push(segment.content.trim());\n    }\n  }\n}\n\n// Remove duplicates that might span chunks\nconst uniqueHeadings = [...new Set(headings)];\n\n// Return the ordered list of unique headings\nreturn [{ json: { headings: uniqueHeadings } }];"
      },
      "typeVersion": 2
    },
    {
      "id": "47b749ec-4832-45a0-826e-13ef23fd4647",
      "name": "Take beginning of Document to look for Table of contents",
      "type": "n8n-nodes-base.code",
      "position": [
        280,
        -180
      ],
      "parameters": {
        "jsCode": "\nconst taskResult = $input.first().json;\n\n\n\n// Define how many chunks you want to extract\nconst numberOfChunksToExtract = 10;\n\n// An array to hold the text content from the selected chunks\nlet textContentArray = [];\n\n// Check if the output and chunks exist in the task result\nif (taskResult.output && taskResult.output.chunks) {\n  const allChunks = taskResult.output.chunks;\n\n  // Loop through the first 'numberOfChunksToExtract' chunks,\n  // or fewer if the document doesn't have that many.\n  for (let i = 0; i < Math.min(numberOfChunksToExtract, allChunks.length); i++) {\n    const chunk = allChunks[i];\n    // Ensure the chunk and its 'embed' field exist and are not empty\n    if (chunk && chunk.embed && chunk.embed.trim() !== '') {\n      textContentArray.push(chunk.embed);\n    }\n  }\n}\n\n// Join all the collected text content with a double newline for separation\nconst concatenatedText = textContentArray.join('\\n\\n');\n\n// Return a single JSON object with the specified key and the concatenated text\nreturn [{\n  json: {\n    \"firstXNumberOfChunks\": concatenatedText\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "7f136d22-2195-4d56-803a-a9f6384f3557",
      "name": "Stop and Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        200,
        220
      ],
      "parameters": {
        "errorMessage": "The chunkr Task failed!"
      },
      "typeVersion": 1
    },
    {
      "id": "228fed4c-c2a9-4dde-a270-e674ae61b9da",
      "name": "Google Gemini Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1500,
        280
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.5-flash-preview-05-20"
      },
      "credentials": {
        "googlePalmApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "9a21bf6c-208f-45a7-bb78-f27935b53b5d",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2640,
        -240
      ],
      "parameters": {
        "width": 580,
        "height": 260,
        "content": "### Welcome to the Document Processing Workflow!\n\nThis workflow automates the extraction and structuring of content from PDF documents. It leverages Chunkr.ai for document parsing and an AI Agent to build a comprehensive Table of Contents.\n\n**To get started, you need a Chunkr.ai API Key.**\n1.  Go to [Chunkr AI](https://chunkr.ai)\n2.  Click \"Login\" and create your account.\n3.  Once logged in, navigate to \"API Keys\" in your dashboard to generate your key."
      },
      "typeVersion": 1
    },
    {
      "id": "447c7ec5-a094-4034-afc4-fcd7aae5f4de",
      "name": "Convert the PDF to base64",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        -1180,
        20
      ],
      "parameters": {
        "options": {},
        "operation": "binaryToPropery",
        "binaryPropertyName": "=data"
      },
      "typeVersion": 1
    },
    {
      "id": "dfc84ad3-4a85-4641-bc16-1f89c54b1c3a",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1940,
        -460
      ],
      "parameters": {
        "width": 480,
        "height": 440,
        "content": "### Node: When clicking \u2018Execute workflow\u2019 (Manual Trigger)\n\nThis node allows you to manually start the workflow.\n\n**How to use:**\n1.  Simply click the \"Execute workflow\" button in the n8n interface.\n2.  When triggered manually, the workflow will automatically download a predefined PDF from Google Drive for processing."
      },
      "typeVersion": 1
    },
    {
      "id": "b61a3d2e-9773-4e03-ad49-380fca4bb04d",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1940,
        20
      ],
      "parameters": {
        "width": 480,
        "height": 460,
        "content": "### Node: When Executed by Another Workflow (Webhook Trigger)\n\nThis node allows external workflows to trigger this process.\n\n**How to use:**\n1.  Ensure the connected workflow provides a `URL` variable in its input. This `URL` should point to the PDF document you wish to process.\n2.  The workflow will then download the PDF from the provided URL.\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "0f1ddb9a-dc65-44b7-b5ed-b4f9fa64e743",
      "name": "Download PDF from URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1620,
        300
      ],
      "parameters": {
        "url": "={{ $json.URL }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "b1095619-41f5-4381-9516-3c221ef98388",
      "name": "Download PDF from Google Drive",
      "type": "n8n-nodes-base.googleDrive",
      "disabled": true,
      "position": [
        -1640,
        -220
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "list",
          "value": "11ReFvvEnHKiFegKJ5tjm-MMfJn96nBng",
          "cachedResultUrl": "https://drive.google.com/file/d/11ReFvvEnHKiFegKJ5tjm-MMfJn96nBng/view?usp=drivesdk",
          "cachedResultName": "S1-Handlungsempfehlung: Diagnostik und Therapie der Loiasis (Afrikanischer Augenwurm).pdf"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "ceb2c044-a55c-4c9d-9736-6769cce0ed12",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1100,
        -500
      ],
      "parameters": {
        "width": 1000,
        "height": 400,
        "content": "### Node: POST Chunkr Task (HTTP Request) & GET Chunkr task\n\n### \ud83d\udea8\ud83d\udea8\ud83d\udea8 INSERT A CHUNKR.AI API KEY HERE\n\n\nThis node sends your document to Chunkr.ai for processing.\n\n**How to use:**\n1.  **Authorization:** Replace <your_api_key> with your actual Chunkr.ai API Key. You obtained this in the \"Welcome\" step.\n2.  **File Content:** The `file` parameter is dynamically set to the base64 encoded PDF from \"Extract from File\".\n3.  **File Name:** The `file_name` parameter uses the `fileNameSnake` variable from \"Set File Name\".\n4.  This creates a new processing task on Chunkr.ai"
      },
      "typeVersion": 1
    },
    {
      "id": "45dd3735-1c1a-4b46-ad10-d79234f01b7a",
      "name": "Wait Before Polling the Chunkr Result",
      "type": "n8n-nodes-base.wait",
      "position": [
        -480,
        20
      ],
      "parameters": {
        "amount": 10
      },
      "typeVersion": 1.1
    },
    {
      "id": "572c6ae1-cf9a-43ff-9c09-3bc650875d70",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        680,
        -600
      ],
      "parameters": {
        "width": 480,
        "height": 380,
        "content": "### Node: Extract Sections headers as fallback (Code)\n\nThis node extracts all detected section headers that were identified by chunkr. Chunkr is not ideal at finding these but does a great job generally in OCR\n\n**What it does:**\n* It iterates through the `chunks` and `segments` returned by Chunkr.ai.\n* It specifically looks for segments with `segment_type` equal to `'SectionHeader'`.\n* All found section header texts are collected and duplicates are removed.\n* **Purpose:** This provides a list of potential headings as a fallback, especially if the AI Agent cannot find a clear Table of Contents in the document's beginning."
      },
      "typeVersion": 1
    },
    {
      "id": "56cbaf07-965c-4718-b282-dd1b471ffa90",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        120,
        -600
      ],
      "parameters": {
        "width": 500,
        "height": 380,
        "content": "### Node: Take beginning of Document to look for Table of contents (Code)\n\n### \ud83d\udea8\ud83d\udea8\ud83d\udea8 INSERT A CHUNKR.AI API KEY HERE\n\n\nThis node extracts initial document content for AI analysis.\n\n**What it does:**\n* It takes the full Chunkr.ai output and extracts the text content of the first 10 \"chunks\".\n* This concatenated text is then passed to the AI Agent.\n* **Purpose:** The AI Agent will use this initial text to identify if a formal Table of Contents is present within the document itself, which is often more accurate than programmatically extracted section headers."
      },
      "typeVersion": 1
    },
    {
      "id": "2b103146-6917-4745-adff-cb790dbdd7a6",
      "name": "Return each section individually",
      "type": "n8n-nodes-base.code",
      "position": [
        2040,
        -400
      ],
      "parameters": {
        "jsCode": "// --- Two-Pass Heading Mapping with Levels, Rich Content, Simplified Boundaries & Multi-Item Output ---\n\n// Get data from the two specified previous nodes\nconst goldenTocContainer = $(\"Table of Content Agent\").first().json.output;\nconst chunkrOutputContainer = $('GET Chunkr Task').first().json.output;\n\nconsole.log(\"Script Start: Levels, Rich Content, Simplified Boundaries & Multi-Item Output\");\n\nconst goldenTocArray = goldenTocContainer?.tableOfContents || [];\nconst chunkrChunks = chunkrOutputContainer?.chunks || [];\n\nif (!goldenTocArray.length) {\n    console.error(\"STOP: Golden ToC array is empty or not found.\");\n    return [{ json: { error: \"Golden ToC array is empty.\" }}];\n}\nif (!chunkrChunks.length) {\n    console.error(\"STOP: Chunkr chunks array is empty or not found.\");\n    return [{ json: { error: \"Chunkr chunks array is empty.\" }}];\n}\nconsole.log(`Inputs loaded: Golden ToC items: ${goldenTocArray.length}, Chunkr Chunks: ${chunkrChunks.length}`);\n\n// --- Helper Functions ---\nfunction normalizeText(text) {\n    if (typeof text !== 'string') return '';\n    return text.toLowerCase().replace(/\\s+/g, ' ').trim();\n}\n\nconst flatGoldenHeadings = [];\nlet uniqueIdCounter = 0;\n\nfunction flattenGoldenToC(nodes, defaultParentLevel = 0) {\n    for (const node of nodes) {\n        const currentItemLevel = node.level !== undefined ? node.level : defaultParentLevel + 1;\n        if (node.text) {\n            flatGoldenHeadings.push({\n                id: uniqueIdCounter++,\n                originalText: node.text.trim(),\n                normalizedText: normalizeText(node.text.trim()),\n                level: currentItemLevel,\n                isMapped: false,\n                matchDetails: { // Initialize with fields that will hold extracted content\n                    type: \"Unmatched\", chunkIndex: -1, segmentIndex: -1, chunkId: null, // chunkId kept for internal ref if needed\n                    matchedSegmentContent: \"\", sourceSegmentType: null,\n                    sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\"\n                    // Boundary indices for output are removed here\n                }\n            });\n        }\n        if (node.children && node.children.length > 0) {\n            flattenGoldenToC(node.children, currentItemLevel);\n        }\n    }\n}\nflattenGoldenToC(goldenTocArray, 0);\nconsole.log(`Golden ToC flattened: ${flatGoldenHeadings.length} total headings with levels.`);\n\nconst usedChunkIndices = new Set();\nlet internalProcessedToc = [];\nlet identifiedPdfToCItem = null;\n\n// --- Step 1: Identify and Create Entry for the PDF's Own Table of Contents Chunk ---\nlet pdfTocChunkIndex = -1;\nlet maxTocItemsInAChunk = 0;\nconst minThreshold = 3;\nconst percentageThreshold = Math.floor(flatGoldenHeadings.length * 0.10);\nconst TOC_CHUNK_THRESHOLD_COUNT = Math.min(Math.max(minThreshold, percentageThreshold), 15);\n\nconsole.log(`Pre-computation: Identifying PDF's own ToC Chunk (threshold: > ${TOC_CHUNK_THRESHOLD_COUNT} distinct golden items)`);\nif (flatGoldenHeadings.length > 0) {\n    for (let chunkIdx = 0; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        const chunk = chunkrChunks[chunkIdx];\n        if (!chunk.segments || chunk.segments.length === 0) continue;\n        let combinedChunkText = chunk.segments.reduce((acc, seg) => acc + (seg.content ? normalizeText(seg.content) + \" \" : \"\"), \"\").trim();\n        if (!combinedChunkText) continue;\n        const foundIds = new Set(flatGoldenHeadings.filter(gh => gh.normalizedText && combinedChunkText.includes(gh.normalizedText)).map(gh => gh.id));\n        const distinctItemsCount = foundIds.size;\n        if (distinctItemsCount > TOC_CHUNK_THRESHOLD_COUNT && distinctItemsCount > maxTocItemsInAChunk) {\n            maxTocItemsInAChunk = distinctItemsCount;\n            pdfTocChunkIndex = chunkIdx;\n        }\n    }\n}\n\nif (pdfTocChunkIndex !== -1) {\n    const tocChunk = chunkrChunks[pdfTocChunkIndex];\n    let tocContentSample = (tocChunk.segments && tocChunk.segments.length > 0)\n        ? tocChunk.segments.map(s => s.content || \"\").join(' ').substring(0, 150) + \"...\"\n        : \"[No segment content for sample]\";\n    identifiedPdfToCItem = {\n        id: \"pdf_toc_\" + pdfTocChunkIndex,\n        originalText: \"Document Table of Contents (Auto-Detected)\",\n        level: 1,\n        isMapped: true,\n        matchDetails: {\n            type: \"Auto-Detected PDF ToC\",\n            chunkIndex: pdfTocChunkIndex, segmentIndex: 0, chunkId: tocChunk.chunk_id,\n            matchedSegmentContent: `[Chunk ${pdfTocChunkIndex} (ID: ${tocChunk.chunk_id || 'N/A'}) auto-identified as PDF ToC, ~${maxTocItemsInAChunk} golden items. Sample: ${normalizeText(tocContentSample)}]`,\n            sourceSegmentType: \"AggregatedChunkAsPDFToC\",\n            sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\"\n        }\n    };\n    usedChunkIndices.add(pdfTocChunkIndex);\n    console.log(`PDF ToC Chunk identified: Index ${pdfTocChunkIndex}.`);\n} else {\n    console.log(\"No single dominant PDF ToC Chunk identified.\");\n}\n\n// --- Pass 1: Anchor Golden Headings with 'SectionHeader' ---\nconsole.log(\"\\n--- Starting Pass 1 for Golden ToC: Matching 'SectionHeader' Segments ---\");\nflatGoldenHeadings.forEach(goldenHeading => {\n    if (!goldenHeading.normalizedText || goldenHeading.isMapped) return;\n    for (let chunkIdx = 0; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        if (usedChunkIndices.has(chunkIdx)) continue;\n        const chunk = chunkrChunks[chunkIdx];\n        for (let segIdx = 0; segIdx < chunk.segments.length; segIdx++) {\n            const segment = chunk.segments[segIdx];\n            if (segment.segment_type === 'SectionHeader' && segment.content && normalizeText(segment.content) === goldenHeading.normalizedText) {\n                console.log(`  \u2705 PASS 1: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) -> SectionHeader in Chunk ${chunkIdx}`);\n                goldenHeading.isMapped = true;\n                goldenHeading.matchDetails = { // Overwrite initial matchDetails\n                    type: \"SectionHeader Exact\", chunkIndex: chunkIdx, segmentIndex: segIdx, chunkId: chunk.chunk_id,\n                    matchedSegmentContent: segment.content, sourceSegmentType: segment.segment_type,\n                    sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\"\n                };\n                usedChunkIndices.add(chunkIdx);\n                break;\n            }\n        }\n        if (goldenHeading.isMapped) break;\n    }\n});\nconsole.log(`--- Pass 1 Complete: ${flatGoldenHeadings.filter(h => h.isMapped).length} golden headings mapped initially.`);\n\n// --- Pass 2: Find Remaining Golden Headings (Content Includes) ---\nconsole.log(\"\\n--- Starting Pass 2 for Golden ToC: Content Search (Simplified) ---\");\nlet searchAfterChunkIndex_Pass2 = identifiedPdfToCItem ? identifiedPdfToCItem.matchDetails.chunkIndex : -1;\n\nflatGoldenHeadings.forEach(goldenHeading => {\n    if (goldenHeading.isMapped) {\n        if (goldenHeading.matchDetails.chunkIndex > searchAfterChunkIndex_Pass2) {\n            searchAfterChunkIndex_Pass2 = goldenHeading.matchDetails.chunkIndex;\n        }\n        return;\n    }\n    if (!goldenHeading.normalizedText) return;\n    console.log(`Pass 2 - Seeking: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) (after chunk ${searchAfterChunkIndex_Pass2})`);\n    let potentialMatches = [];\n    for (let chunkIdx = searchAfterChunkIndex_Pass2 + 1; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        if (usedChunkIndices.has(chunkIdx)) continue;\n        const chunk = chunkrChunks[chunkIdx];\n        for (let segIdx = 0; segIdx < chunk.segments.length; segIdx++) {\n            const segment = chunk.segments[segIdx];\n            if (segment.content && normalizeText(segment.content).includes(goldenHeading.normalizedText)) {\n                potentialMatches.push({\n                    chunkIndex: chunkIdx, segmentIndex: segIdx, chunkId: chunk.chunk_id,\n                    matchedSegmentContent: segment.content, segmentType: segment.segment_type\n                });\n            }\n        }\n    }\n    if (!potentialMatches.length) {\n        console.log(`  \u274c PASS 2: NO RAW MATCHES for \"${goldenHeading.originalText}\" (L${goldenHeading.level}).`);\n        return;\n    }\n    potentialMatches.sort((a, b) => (a.chunkIndex !== b.chunkIndex ? a.chunkIndex - b.chunkIndex : a.segmentIndex - b.segmentIndex));\n    let chosenMatch = potentialMatches.find(match => !usedChunkIndices.has(match.chunkIndex));\n\n    if (chosenMatch) {\n        console.log(`  \u2705 PASS 2: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) -> Chunk ${chosenMatch.chunkIndex}, Seg ${chosenMatch.segmentIndex}`);\n        goldenHeading.isMapped = true;\n        goldenHeading.matchDetails = { // Overwrite initial matchDetails\n            type: \"Content Includes\", chunkIndex: chosenMatch.chunkIndex, segmentIndex: chosenMatch.segmentIndex, chunkId: chosenMatch.chunkId,\n            matchedSegmentContent: chosenMatch.matchedSegmentContent, sourceSegmentType: chosenMatch.segmentType,\n            sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\"\n        };\n        usedChunkIndices.add(chosenMatch.chunkIndex);\n        searchAfterChunkIndex_Pass2 = chosenMatch.chunkIndex;\n    } else {\n        console.log(`  \u274c PASS 2: NO SUITABLE UNUSED CHUNK for \"${goldenHeading.originalText}\" (L${goldenHeading.level}).`);\n    }\n});\n\n// --- Consolidate, Sort, and Extract Section Content (Text, HTML, Markdown) ---\nif (identifiedPdfToCItem) {\n    internalProcessedToc.push(identifiedPdfToCItem);\n}\nflatGoldenHeadings.forEach(gh => internalProcessedToc.push(gh));\n\ninternalProcessedToc.sort((a, b) => {\n    const aChunk = a.matchDetails.chunkIndex;\n    const bChunk = b.matchDetails.chunkIndex;\n    const aSeg = a.matchDetails.segmentIndex;\n    const bSeg = b.matchDetails.segmentIndex;\n    if (aChunk !== bChunk) return aChunk - bChunk;\n    if (aSeg !== bSeg) return aSeg - bSeg;\n    return (a.id && b.id) ? String(a.id).localeCompare(String(b.id)) : 0;\n});\n\nconsole.log(\"\\n--- Extracting Section Content (Text, HTML, Markdown) for Mapped Items ---\");\n\nfunction getContentStartPoint(headingChunkIdx, headingSegIdx, allChunkrChunks) {\n    let contentStartChunkIdx = headingChunkIdx;\n    let contentStartSegmentIdx = headingSegIdx + 1;\n    if (headingChunkIdx >= allChunkrChunks.length || !allChunkrChunks[headingChunkIdx] || !allChunkrChunks[headingChunkIdx].segments) {\n        return { chunkIdx: headingChunkIdx, segmentIdx: headingSegIdx };\n    }\n    const headingChunk = allChunkrChunks[headingChunkIdx];\n    if (contentStartSegmentIdx >= headingChunk.segments.length) {\n        contentStartChunkIdx++;\n        contentStartSegmentIdx = 0;\n    }\n    return { chunkIdx: contentStartChunkIdx, segmentIdx: contentStartSegmentIdx };\n}\n\nfunction extractSectionContents(contentStartChunkIdx, contentStartSegmentIdx, nextSectionStartChunkIdx, nextSectionStartSegIdx, allChunkrChunks) {\n    let accumulatedText = \"\";\n    let accumulatedHtml = \"\";\n    let accumulatedMarkdown = \"\";\n    for (let cIdx = contentStartChunkIdx; cIdx < allChunkrChunks.length; cIdx++) {\n        const chunk = allChunkrChunks[cIdx];\n        if (!chunk || !chunk.segments) continue;\n        const sStart = (cIdx === contentStartChunkIdx) ? contentStartSegmentIdx : 0;\n        let sEnd = chunk.segments.length;\n        if (cIdx === nextSectionStartChunkIdx) sEnd = nextSectionStartSegIdx;\n        for (let sIdx = sStart; sIdx < sEnd; sIdx++) {\n            const segment = chunk.segments[sIdx];\n            if (segment) {\n                if (segment.content) accumulatedText += segment.content + \"\\n\";\n                accumulatedHtml += (segment.html || \"\") + \"\\n\";\n                accumulatedMarkdown += (segment.markdown || \"\") + \"\\n\";\n            }\n        }\n        if (cIdx >= nextSectionStartChunkIdx && nextSectionStartChunkIdx < allChunkrChunks.length) break;\n    }\n    return {\n        text: accumulatedText.trim(),\n        html: accumulatedHtml.trim(),\n        markdown: accumulatedMarkdown.trim()\n    };\n}\n\nfor (let i = 0; i < internalProcessedToc.length; i++) {\n    const currentItem = internalProcessedToc[i];\n    if (!currentItem.isMapped || !currentItem.matchDetails || currentItem.matchDetails.chunkIndex === -1) {\n        continue;\n    }\n\n    const headingChunkIdx = currentItem.matchDetails.chunkIndex;\n    const headingSegIdx = currentItem.matchDetails.segmentIndex;\n\n    // Determine end of current section (start of next *mapped* section)\n    let nextSectionStartChunkIdx = chunkrChunks.length;\n    let nextSectionStartSegIdx = 0;\n\n    for (let j = i + 1; j < internalProcessedToc.length; j++) {\n        if (internalProcessedToc[j].isMapped && internalProcessedToc[j].matchDetails && internalProcessedToc[j].matchDetails.chunkIndex !== -1) {\n            nextSectionStartChunkIdx = internalProcessedToc[j].matchDetails.chunkIndex;\n            nextSectionStartSegIdx = internalProcessedToc[j].matchDetails.segmentIndex;\n            break;\n        }\n    }\n    // These end boundaries are used for extraction, but not stored in matchDetails for output\n    \n    let contentExtractionStartChunk = headingChunkIdx;\n    let contentExtractionStartSegment = headingSegIdx;\n\n    if (currentItem.id && String(currentItem.id).startsWith(\"pdf_toc_\")) {\n        contentExtractionStartChunk = headingChunkIdx;\n        contentExtractionStartSegment = 0; \n    } else {\n        const contentStartPoint = getContentStartPoint(headingChunkIdx, headingSegIdx, chunkrChunks);\n        contentExtractionStartChunk = contentStartPoint.chunkIdx;\n        contentExtractionStartSegment = contentStartPoint.segmentIdx;\n    }\n    \n    const sectionContents = extractSectionContents(\n        contentExtractionStartChunk, contentExtractionStartSegment,\n        nextSectionStartChunkIdx, nextSectionStartSegIdx,\n        chunkrChunks\n    );\n\n    currentItem.matchDetails.sectionText = sectionContents.text;\n    currentItem.matchDetails.sectionHTML = sectionContents.html;\n    currentItem.matchDetails.sectionMarkdown = sectionContents.markdown;\n}\n\n// --- Final Output Formatting to Individual n8n Items (Simplified) ---\nconst outputN8nItems = [];\ninternalProcessedToc.forEach(item => {\n    if (item.isMapped && item.matchDetails && item.matchDetails.chunkIndex !== -1) {\n        const detail = item.matchDetails;\n        outputN8nItems.push({\n            json: {\n                heading: item.originalText,\n                headingLevel: item.level !== undefined ? item.level : 1,\n                sectionText: detail.sectionText || \"\",\n                sectionHTML: detail.sectionHTML || \"\",\n                sectionMarkdown: detail.sectionMarkdown || \"\"\n                // Removed: sourceChunkId, sectionStartChunkIndex, sectionStartSegmentIndex,\n                // sectionEndChunkIndex, sectionEndSegmentIndex\n            }\n        });\n    }\n});\n\nconsole.log(`--- Processing Complete. Returning ${outputN8nItems.length} mapped sections as individual items. ---`);\nif (outputN8nItems.length > 0) {\n    console.log(\"\\n--- Sample of First Output Item (JSON content) ---\");\n    console.log(JSON.stringify(outputN8nItems[0].json, null, 2));\n}\n\nif (outputN8nItems.length === 0 && (goldenTocArray.length > 0 || chunkrChunks.length > 0)) {\n    console.warn(\"No sections were successfully mapped to output.\");\n    return [{ json: { warning: \"No sections mapped.\" } }];\n}\n\nreturn outputN8nItems;"
      },
      "typeVersion": 2
    },
    {
      "id": "8622fab4-edcc-41d4-8456-0c652e8f6eb2",
      "name": "Table of Content Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        1420,
        -160
      ],
      "parameters": {
        "text": "=You are an expert at understanding document structure. Based on the following ordered list of section headings from a document, please analyze the numbering (e.g., 1., 1.1, 1.1.1, A., B.) and semantic content to create a nested JSON object representing the document's hierarchy.\n\nEach node in the JSON must have a \"title\" (the heading text) and a \"children\" (an array of child nodes).\n\n**Example:**\nInput List: [\"1. Introduction\", \"1.1 Background\", \"1.2 Scope\", \"2. Methodology\", \"2.1 Data Collection\"]\nDesired JSON Output:\n[\n  {\n    \"title\": \"1. Introduction\",\n    \"children\": [\n      { \"title\": \"1.1 Background\", \"children\": [] },\n      { \"title\": \"1.2 Scope\", \"children\": [] }\n    ]\n  },\n  {\n    \"title\": \"2. Methodology\",\n    \"children\": [\n      { \"title\": \"2.1 Data Collection\", \"children\": [] }\n    ]\n  }\n]\n\n**These Headings were extracted programatically:**\n(these are generated using OCR and should only be used as fallback. this might include too many headings, or may be missing some. only use if no TOC is found in the beginning of the document)\n{{ $('Extract Sections headers as fallback').first().json.headings }}\n\n\n**This is the first pages of the document**\n{{ $('Take beginning of Document to look for Table of contents').first().json.firstXNumberOfChunks }}\n(may or may not contain a table of contents. if it does, use as primary source of truth)\n\nyour task is to use the above sources to construct a table of contents.\noutput in json using the required format.",
        "options": {},
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 2
    },
    {
      "id": "b627fe7d-f342-40f6-912f-c090a619c96c",
      "name": "Return the whole document",
      "type": "n8n-nodes-base.code",
      "position": [
        2120,
        340
      ],
      "parameters": {
        "jsCode": "const goldenTocContainer = $(\"Table of Content Agent\").first().json.output;\nconst chunkrOutputContainer = $('GET Chunkr Task').first().json.output;\n\n\nconsole.log(\"Script Start: Adding Heading Levels\");\n\nconst goldenTocArray = goldenTocContainer?.tableOfContents || [];\nconst chunkrChunks = chunkrOutputContainer?.chunks || [];\n\nif (!goldenTocArray.length) {\n    console.error(\"STOP: Golden ToC array is empty or not found.\");\n    return [{ json: { error: \"Golden ToC array is empty.\", processedSections: [] }}];\n}\nif (!chunkrChunks.length) {\n    console.error(\"STOP: Chunkr chunks array is empty or not found.\");\n    return [{ json: { error: \"Chunkr chunks array is empty.\", processedSections: [] }}];\n}\nconsole.log(`Inputs loaded: Golden ToC items: ${goldenTocArray.length}, Chunkr Chunks: ${chunkrChunks.length}`);\n\n// --- Helper Functions ---\nfunction normalizeText(text) {\n    if (typeof text !== 'string') return '';\n    return text.toLowerCase().replace(/\\s+/g, ' ').trim();\n}\n\nconst flatGoldenHeadings = [];\nlet uniqueIdCounter = 0;\n\n// Modified flattenGoldenToC to capture the 'level' property\n// Assumes 'node.level' from your input goldenTocArray is the authoritative source.\n// Adds a defaultLevel for robustness if 'node.level' is unexpectedly missing.\nfunction flattenGoldenToC(nodes, defaultParentLevel = 0) {\n    for (const node of nodes) {\n        // Determine the current item's level: use explicit if present, else infer from parent.\n        const currentItemLevel = node.level !== undefined ? node.level : defaultParentLevel + 1;\n\n        if (node.text) {\n            flatGoldenHeadings.push({\n                id: uniqueIdCounter++,\n                originalText: node.text.trim(),\n                normalizedText: normalizeText(node.text.trim()),\n                level: currentItemLevel, // Store the heading level\n                isMapped: false,\n                matchDetails: { // Initialize all expected matchDetail fields\n                    type: \"Unmatched\", chunkIndex: -1, segmentIndex: -1, chunkId: null,\n                    matchedSegmentContent: \"\", sourceSegmentType: null,\n                    sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\",\n                    sectionStartChunkIndex: -1, sectionStartSegmentIndex: -1,\n                    sectionEndChunkIndex: -1, sectionEndSegmentIndex: -1\n                }\n            });\n        }\n        if (node.children && node.children.length > 0) {\n            // Children's level is determined by their own 'level' or inferred from current item's level\n            flattenGoldenToC(node.children, currentItemLevel);\n        }\n    }\n}\nflattenGoldenToC(goldenTocArray, 0); // Initial call, top-level items will use their own 'level' or default to 1\nconsole.log(`Golden ToC flattened: ${flatGoldenHeadings.length} total headings with levels.`);\n\nconst usedChunkIndices = new Set();\nlet internalProcessedToc = [];\nlet identifiedPdfToCItem = null;\n\n// --- Step 1: Identify and Create Entry for the PDF's Own Table of Contents Chunk ---\nlet pdfTocChunkIndex = -1;\nlet maxTocItemsInAChunk = 0;\nconst minThreshold = 3;\nconst percentageThreshold = Math.floor(flatGoldenHeadings.length * 0.10);\nconst TOC_CHUNK_THRESHOLD_COUNT = Math.min(Math.max(minThreshold, percentageThreshold), 15);\n\nconsole.log(`Pre-computation: Identifying PDF's own ToC Chunk (threshold: > ${TOC_CHUNK_THRESHOLD_COUNT} distinct golden items)`);\nif (flatGoldenHeadings.length > 0) {\n    for (let chunkIdx = 0; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        const chunk = chunkrChunks[chunkIdx];\n        if (!chunk.segments || chunk.segments.length === 0) continue;\n        let combinedChunkText = chunk.segments.reduce((acc, seg) => acc + (seg.content ? normalizeText(seg.content) + \" \" : \"\"), \"\").trim();\n        if (!combinedChunkText) continue;\n        const foundIds = new Set(flatGoldenHeadings.filter(gh => gh.normalizedText && combinedChunkText.includes(gh.normalizedText)).map(gh => gh.id));\n        const distinctItemsCount = foundIds.size;\n        if (distinctItemsCount > TOC_CHUNK_THRESHOLD_COUNT && distinctItemsCount > maxTocItemsInAChunk) {\n            maxTocItemsInAChunk = distinctItemsCount;\n            pdfTocChunkIndex = chunkIdx;\n        }\n    }\n}\n\nif (pdfTocChunkIndex !== -1) {\n    const tocChunk = chunkrChunks[pdfTocChunkIndex];\n    let tocContentSample = (tocChunk.segments && tocChunk.segments.length > 0)\n        ? tocChunk.segments.map(s => s.content || \"\").join(' ').substring(0, 150) + \"...\"\n        : \"[No segment content for sample]\";\n    identifiedPdfToCItem = {\n        id: \"pdf_toc_\" + pdfTocChunkIndex,\n        originalText: \"Document Table of Contents (Auto-Detected)\",\n        level: 1, // Assign level 1 to the auto-detected ToC\n        isMapped: true,\n        matchDetails: {\n            type: \"Auto-Detected PDF ToC\",\n            chunkIndex: pdfTocChunkIndex, segmentIndex: 0, chunkId: tocChunk.chunk_id,\n            matchedSegmentContent: `[Chunk ${pdfTocChunkIndex} (ID: ${tocChunk.chunk_id || 'N/A'}) auto-identified as PDF ToC, ~${maxTocItemsInAChunk} golden items. Sample: ${normalizeText(tocContentSample)}]`,\n            sourceSegmentType: \"AggregatedChunkAsPDFToC\",\n            sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\",\n            sectionStartChunkIndex: pdfTocChunkIndex, sectionStartSegmentIndex: 0,\n            sectionEndChunkIndex: -1, sectionEndSegmentIndex: -1\n        }\n    };\n    usedChunkIndices.add(pdfTocChunkIndex);\n    console.log(`PDF ToC Chunk identified: Index ${pdfTocChunkIndex}. Level set to 1.`);\n} else {\n    console.log(\"No single dominant PDF ToC Chunk identified.\");\n}\n\n// --- Pass 1: Anchor Golden Headings with 'SectionHeader' ---\nconsole.log(\"\\n--- Starting Pass 1 for Golden ToC: Matching 'SectionHeader' Segments ---\");\nflatGoldenHeadings.forEach(goldenHeading => {\n    if (!goldenHeading.normalizedText || goldenHeading.isMapped) return;\n    for (let chunkIdx = 0; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        if (usedChunkIndices.has(chunkIdx)) continue;\n        const chunk = chunkrChunks[chunkIdx];\n        for (let segIdx = 0; segIdx < chunk.segments.length; segIdx++) {\n            const segment = chunk.segments[segIdx];\n            if (segment.segment_type === 'SectionHeader' && segment.content && normalizeText(segment.content) === goldenHeading.normalizedText) {\n                console.log(`  \u2705 PASS 1: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) -> SectionHeader in Chunk ${chunkIdx}`);\n                goldenHeading.isMapped = true;\n                goldenHeading.matchDetails = {\n                    type: \"SectionHeader Exact\", chunkIndex: chunkIdx, segmentIndex: segIdx, chunkId: chunk.chunk_id,\n                    matchedSegmentContent: segment.content, sourceSegmentType: segment.segment_type,\n                    sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\",\n                    sectionStartChunkIndex: chunkIdx, sectionStartSegmentIndex: segIdx,\n                    sectionEndChunkIndex: -1, sectionEndSegmentIndex: -1\n                };\n                usedChunkIndices.add(chunkIdx);\n                break;\n            }\n        }\n        if (goldenHeading.isMapped) break;\n    }\n});\nconsole.log(`--- Pass 1 Complete: ${flatGoldenHeadings.filter(h => h.isMapped).length} golden headings mapped initially.`);\n\n// --- Pass 2: Find Remaining Golden Headings (Content Includes) ---\nconsole.log(\"\\n--- Starting Pass 2 for Golden ToC: Content Search (Simplified) ---\");\nlet searchAfterChunkIndex_Pass2 = identifiedPdfToCItem ? identifiedPdfToCItem.matchDetails.chunkIndex : -1;\n\nflatGoldenHeadings.forEach(goldenHeading => {\n    if (goldenHeading.isMapped) {\n        if (goldenHeading.matchDetails.chunkIndex > searchAfterChunkIndex_Pass2) {\n            searchAfterChunkIndex_Pass2 = goldenHeading.matchDetails.chunkIndex;\n        }\n        return;\n    }\n    if (!goldenHeading.normalizedText) return;\n\n    console.log(`Pass 2 - Seeking: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) (after chunk ${searchAfterChunkIndex_Pass2})`);\n    let potentialMatches = [];\n    for (let chunkIdx = searchAfterChunkIndex_Pass2 + 1; chunkIdx < chunkrChunks.length; chunkIdx++) {\n        if (usedChunkIndices.has(chunkIdx)) continue;\n        const chunk = chunkrChunks[chunkIdx];\n        for (let segIdx = 0; segIdx < chunk.segments.length; segIdx++) {\n            const segment = chunk.segments[segIdx];\n            if (segment.content && normalizeText(segment.content).includes(goldenHeading.normalizedText)) {\n                potentialMatches.push({\n                    chunkIndex: chunkIdx, segmentIndex: segIdx, chunkId: chunk.chunk_id,\n                    matchedSegmentContent: segment.content, segmentType: segment.segment_type\n                });\n            }\n        }\n    }\n    if (!potentialMatches.length) {\n        console.log(`  \u274c PASS 2: NO RAW MATCHES for \"${goldenHeading.originalText}\" (L${goldenHeading.level}).`);\n        return;\n    }\n    potentialMatches.sort((a, b) => (a.chunkIndex !== b.chunkIndex ? a.chunkIndex - b.chunkIndex : a.segmentIndex - b.segmentIndex));\n    let chosenMatch = potentialMatches.find(match => !usedChunkIndices.has(match.chunkIndex));\n\n    if (chosenMatch) {\n        console.log(`  \u2705 PASS 2: \"${goldenHeading.originalText}\" (L${goldenHeading.level}) -> Chunk ${chosenMatch.chunkIndex}, Seg ${chosenMatch.segmentIndex}`);\n        goldenHeading.isMapped = true;\n        goldenHeading.matchDetails = {\n            type: \"Content Includes\", chunkIndex: chosenMatch.chunkIndex, segmentIndex: chosenMatch.segmentIndex, chunkId: chosenMatch.chunkId,\n            matchedSegmentContent: chosenMatch.matchedSegmentContent, sourceSegmentType: chosenMatch.segmentType,\n            sectionText: \"\", sectionHTML: \"\", sectionMarkdown: \"\",\n            sectionStartChunkIndex: chosenMatch.chunkIndex, sectionStartSegmentIndex: chosenMatch.segmentIndex,\n            sectionEndChunkIndex: -1, sectionEndSegmentIndex: -1\n        };\n        usedChunkIndices.add(chosenMatch.chunkIndex);\n        searchAfterChunkIndex_Pass2 = chosenMatch.chunkIndex;\n    } else {\n        console.log(`  \u274c PASS 2: NO SUITABLE UNUSED CHUNK for \"${goldenHeading.originalText}\" (L${goldenHeading.level}).`);\n    }\n});\n\n// --- Consolidate, Sort, and Extract Section Content (Text, HTML, Markdown) ---\nif (identifiedPdfToCItem) {\n    internalProcessedToc.push(identifiedPdfToCItem);\n}\nflatGoldenHeadings.forEach(gh => internalProcessedToc.push(gh));\n\ninternalProcessedToc.sort((a, b) => {\n    const aChunk = a.matchDetails.chunkIndex;\n    const bChunk = b.matchDetails.chunkIndex;\n    const aSeg = a.matchDetails.segmentIndex;\n    const bSeg = b.matchDetails.segmentIndex;\n    if (aChunk !== bChunk) return aChunk - bChunk;\n    if (aSeg !== bSeg) return aSeg - bSeg;\n    return (a.id && b.id) ? String(a.id).localeCompare(String(b.id)) : 0;\n});\n\nconsole.log(\"\\n--- Extracting Section Content (Text, HTML, Markdown) for Mapped Items ---\");\n\nfunction getContentStartPoint(headingChunkIdx, headingSegIdx, allChunkrChunks) {\n    let contentStartChunkIdx = headingChunkIdx;\n    let contentStartSegmentIdx = headingSegIdx + 1;\n    if (headingChunkIdx >= allChunkrChunks.length || !allChunkrChunks[headingChunkIdx] || !allChunkrChunks[headingChunkIdx].segments) {\n        return { chunkIdx: headingChunkIdx, segmentIdx: headingSegIdx };\n    }\n    const headingChunk = allChunkrChunks[headingChunkIdx];\n    if (contentStartSegmentIdx >= headingChunk.segments.length) {\n        contentStartChunkIdx++;\n        contentStartSegmentIdx = 0;\n    }\n    return { chunkIdx: contentStartChunkIdx, segmentIdx: contentStartSegmentIdx };\n}\n\nfunction extractSectionContents(contentStartChunkIdx, contentStartSegmentIdx, nextSectionStartChunkIdx, nextSectionStartSegIdx, allChunkrChunks) {\n    let accumulatedText = \"\";\n    let accumulatedHtml = \"\";\n    let accumulatedMarkdown = \"\";\n    for (let cIdx = contentStartChunkIdx; cIdx < allChunkrChunks.length; cIdx++) {\n        const chunk = allChunkrChunks[cIdx];\n        if (!chunk || !chunk.segments) continue;\n        const sStart = (cIdx === contentStartChunkIdx) ? contentStartSegmentIdx : 0;\n        let sEnd = chunk.segments.length;\n        if (cIdx === nextSectionStartChunkIdx) sEnd = nextSectionStartSegIdx;\n        for (let sIdx = sStart; sIdx < sEnd; sIdx++) {\n            const segment = chunk.segments[sIdx];\n            if (segment) {\n                if (segment.content) accumulatedText += segment.content + \"\\n\";\n                accumulatedHtml += (segment.html || \"\") + \"\\n\";\n                accumulatedMarkdown += (segment.markdown || \"\") + \"\\n\";\n            }\n        }\n        if (cIdx >= nextSectionStartChunkIdx && nextSectionStartChunkIdx < allChunkrChunks.length) break;\n    }\n    return {\n        text: accumulatedText.trim(),\n        html: accumulatedHtml.trim(),\n        markdown: accumulatedMarkdown.trim()\n    };\n}\n\nfor (let i = 0; i < internalProcessedToc.length; i++) {\n    const currentItem = internalProcessedToc[i];\n    if (!currentItem.isMapped || !currentItem.matchDetails || currentItem.matchDetails.chunkIndex === -1) continue;\n\n    const headingChunkIdx = currentItem.matchDetails.chunkIndex;\n    const headingSegIdx = currentItem.matchDetails.segmentIndex;\n\n    let nextSectionStartChunkIdx = chunkrChunks.length;\n    let nextSectionStartSegIdx = 0;\n\n    for (let j = i + 1; j < internalProcessedToc.length; j++) {\n        if (internalProcessedToc[j].isMapped && internalProcessedToc[j].matchDetails && internalProcessedToc[j].matchDetails.chunkIndex !== -1) {\n            nextSectionStartChunkIdx = internalProcessedToc[j].matchDetails.chunkIndex;\n            nextSectionStartSegIdx = internalProcessedToc[j].matchDetails.segmentIndex;\n            break;\n        }\n    }\n    currentItem.matchDetails.sectionEndChunkIndex = nextSectionStartChunkIdx;\n    currentItem.matchDetails.sectionEndSegmentIndex = nextSectionStartSegIdx;\n    \n    let contentExtractionStartChunk = headingChunkIdx;\n    let contentExtractionStartSegment = headingSegIdx;\n\n    if (currentItem.id && String(currentItem.id).startsWith(\"pdf_toc_\")) {\n        contentExtractionStartChunk = headingChunkIdx; // Start of its own chunk\n        contentExtractionStartSegment = 0;             // From the very first segment\n    } else {\n        // For regular headings, content starts *after* the heading's segment\n        const contentStartPoint = getContentStartPoint(headingChunkIdx, headingSegIdx, chunkrChunks);\n        contentExtractionStartChunk = contentStartPoint.chunkIdx;\n        contentExtractionStartSegment = contentStartPoint.segmentIdx;\n    }\n    \n    const sectionContents = extractSectionContents(\n        contentExtractionStartChunk, contentExtractionStartSegment,\n        nextSectionStartChunkIdx, nextSectionStartSegIdx,\n        chunkrChunks\n    );\n\n    currentItem.matchDetails.sectionText = sectionContents.text;\n    currentItem.matchDetails.sectionHTML = sectionContents.html;\n    currentItem.matchDetails.sectionMarkdown = sectionContents.markdown;\n}\n\n// --- Final Output Formatting to a Single n8n Item with an Array of Sections ---\nconst outputSectionsArray = [];\ninternalProcessedToc.forEach(item => {\n    if (item.isMapped && item.matchDetails && item.matchDetails.chunkIndex !== -1) {\n        const detail = item.matchDetails;\n        outputSectionsArray.push({\n            heading: item.originalText,\n            headingLevel: item.level !== undefined ? item.level : 1, // Add headingLevel, default to 1 if missing\n            sectionText: detail.sectionText || \"\",\n            sectionHTML: detail.sectionHTML || \"\",\n            sectionMarkdown: detail.sectionMarkdown || \"\",\n            sourceChunkId: detail.chunkId || null,\n            sectionStartChunkIndex: detail.sectionStartChunkIndex,\n            sectionStartSegmentIndex: detail.sectionStartSegmentIndex,\n            sectionEndChunkIndex: detail.sectionEndChunkIndex,\n            sectionEndSegmentIndex: detail.sectionEndSegmentIndex,\n        });\n    }\n});\n\nconsole.log(`--- Processing Complete. Returning 1 item with ${outputSectionsArray.length} mapped sections. ---`);\nif (outputSectionsArray.length > 0) {\n    console.log(\"\\n--- Sample of First Section in Output Array (showing heading and level) ---\");\n    const sample = outputSectionsArray[0];\n    console.log(`Heading: \"${sample.heading}\", Level: ${sample.headingLevel}, TextLen: ${sample.sectionText.length}, HTMLLen: ${sample.sectionHTML.length}, MDLen: ${sample.sectionMarkdown.length}`);\n}\n\nif (outputSectionsArray.length === 0 && (goldenTocArray.length > 0 || chunkrChunks.length > 0)) {\n    console.warn(\"No sections were successfully mapped to output array.\");\n    return [{ json: { warning: \"No sections mapped.\", processedSections: [] } }];\n}\n\nreturn [{ json: { processedSections: outputSectionsArray } }];"
      },
      "typeVersion": 2
    },
    {
      "id": "14257369-a732-45cc-a45d-16408f9408d7",
      "name": "Create HTML document",
      "type": "n8n-nodes-base.code",
      "position": [
        2580,
        140
      ],
      "parameters": {
        "jsCode": "// n8n Code Node: Generate Full HTML Document\n\n// Assuming the input from the previous node is items[0]\nconst inputData = $('Return the whole document').first().json;\nconst processedSections = inputData.processedSections;\n\nif (!processedSections || !Array.isArray(processedSections)) {\n  console.error(\"Error: processedSections array not found in input or is not an array.\");\n  // Return an error or an empty HTML string to prevent workflow failure\n  return [{ json: { error: \"Input data is not in the expected format.\", html_output: \"\", fileName: \"error.html\" } }];\n}\n\nlet fullHtmlContent = \"\";\n\n// Start HTML Document\nfullHtmlContent += \"<!DOCTYPE html>\\n\";\nfullHtmlContent += '<html lang=\"en\">\\n';\nfullHtmlContent += \"<head>\\n\";\nfullHtmlContent += '  <meta charset=\"UTF-8\">\\n';\nfullHtmlContent += '  <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\\n';\n\n// Attempt to use the first H1 heading as the page title\nlet pageTitle = \"Generated Document\";\nif (processedSections.length > 0 && processedSections[0].heading) {\n    // A simple way to find a prominent heading for the title\n    const firstRealHeading = processedSections.find(s => s.headingLevel > 0 && s.heading !== \"Document Table of Contents (Auto-Detected)\");\n    pageTitle = firstRealHeading ? firstRealHeading.heading : (processedSections[0].heading || pageTitle);\n}\nfullHtmlContent += `  <title>${pageTitle.replace(/</g, \"&lt;\").replace(/>/g, \"&gt;\")}</title>\\n`;\n\n// Optional: Add some basic styling\nfullHtmlContent += `  <style>\n    body { font-family: -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, Helvetica, Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 900px; margin: 0 auto; color: #333; }\n    h1, h2, h3, h4, h5, h6 { margin-top: 1.8em; margin-bottom: 0.6em; line-height: 1.2; color: #111; }\n    h1 { font-size: 2.2em; }\n    h2 { font-size: 1.8em; }\n    h3 { font-size: 1.5em; }\n    p { margin-bottom: 1em; }\n    pre, code { font-family: monospace; background-color: #f4f4f4; padding: 2px 4px; border-radius: 3px;}\n    pre { padding: 10px; overflow-x: auto; }\n    table { border-collapse: collapse; width: 100%; margin-bottom: 1em; }\n    th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }\n    th { background-color: #f2f2f2; }\n    img { max-width: 100%; height: auto; display: block; margin: 1em 0; }\n  </style>\\n`;\n\nfullHtmlContent += \"</head>\\n\";\nfullHtmlContent += \"<body>\\n\\n\";\n\nprocessedSections.forEach(section => {\n  // Ensure heading and headingLevel are present and valid\n  if (section.heading && typeof section.headingLevel === 'number' && section.headingLevel > 0 && section.headingLevel <= 6) {\n    // Basic sanitization for heading text to prevent HTML injection if source is untrusted\n    const safeHeading = section.heading.replace(/</g, \"&lt;\").replace(/>/g, \"&gt;\");\n    fullHtmlContent += `<h${section.headingLevel}>${safeHeading}</h${section.headingLevel}>\\n`;\n  }\n\n  // Append the section's HTML content\n  // It's assumed section.sectionHTML is already valid HTML for the body of the section\n  fullHtmlContent += (section.sectionHTML || \"\") + \"\\n\\n\";\n});\n\nfullHtmlContent += \"</body>\\n\";\nfullHtmlContent += \"</html>\";\n\n// Output the generated HTML string. This can then be used by a \"Write Binary File\" node\n// or other nodes that can handle file content.\nreturn [{ json: { fullHtmlContent} }];"
      },
      "typeVersion": 2
    },
    {
      "id": "3c472dcc-492b-4eb1-8c4d-bef5565047ba",
      "name": "HTML",
      "type": "n8n-nodes-base.html",
      "position": [
        2780,
        140
      ],
      "parameters": {
        "html": "{{ $json.fullHtmlContent }}"
      },
      "typeVersion": 1.2
    },
    {
      "id": "b912a32d-ae45-40f7-a5a4-e180cf306c6e",
      "name": "Move Binary Data",
      "type": "n8n-nodes-base.moveBinaryData",
      "position": [
        3000,
        140
      ],
      "parameters": {
        "mode": "jsonToBinary",
        "options": {
          "fileName": "={{ $('Set File Name').item.json.fileNameSnake }}.html",
          "mimeType": "text/html",
          "useRawData": true
        },
        "sourceKey": "html",
        "convertAllData": false
      },
      "typeVersion": 1
    },
    {
      "id": "10b96111-14e2-4061-82ae-643fb243894d",
      "name": "Create Markdown Document",
      "type": "n8n-nodes-base.code",
      "position": [
        2600,
        500
      ],
      "parameters": {
        "jsCode": "// n8n Code Node: Generate Full Markdown Document\n\n// Assuming the input from the previous node is items[0]\nconst inputData = $('Return the whole document').first().json;\nconst processedSections = inputData.processedSections;\n\nif (!processedSections || !Array.isArray(processedSections)) {\n  console.error(\"Error: processedSections array not found in input or is not an array.\");\n  return [{ json: { error: \"Input data is not in the expected format.\", markdown_output: \"\", fileName: \"error.md\" } }];\n}\n\nlet fullMarkdownContent = \"\";\n\nprocessedSections.forEach(section => {\n  // Ensure heading and headingLevel are present and valid\n  if (section.heading && typeof section.headingLevel === 'number' && section.headingLevel > 0) {\n    // Repeat '#' for the heading level\n    const markdownHeaderPrefix = '#'.repeat(section.headingLevel);\n    fullMarkdownContent += `${markdownHeaderPrefix} ${section.heading}\\n\\n`;\n  }\n\n  // Append the section's Markdown content\n  // It's assumed section.sectionMarkdown is already valid Markdown for the body of the section\n  fullMarkdownContent += (section.sectionMarkdown || \"\") + \"\\n\\n\"; // Add extra newline for spacing between sections\n});\n\n// Trim any excessive newlines at the very end\nfullMarkdownContent = fullMarkdownContent.trim();\n\n// Output the generated Markdown string.\nreturn [{ json: { fullMarkdownContent} }];"
      },
      "typeVersion": 2
    },
    {
      "id": "8eeb8129-e5e5-434f-b33a-35c7ea465a6d",
      "name": "Convert to File",
      "type": "n8n-nodes-base.convertToFile",
      "position": [
        2780,
        500
      ],
      "parameters": {
        "options": {
          "fileName": "={{ $('Set File Name').first().json.fileNameSnake }}.md"
        },
        "operation": "toText",
        "sourceProperty": "fullMarkdownContent"
      },
      "typeVersion": 1.1
    },
    {
      "id": "f50391c5-8ebc-48a6-a9d1-4c0d835ff5ea",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1260,
        -600
      ],
      "parameters": {
        "width": 560,
        "height": 400,
        "content": "### Node: AI Agent (AI Agent)\n\nThis is where the Table of Contents is intelligently constructed.\n\n**What it does:**\n* It combines information from two sources:\n    * The raw section headers (from \"Extract Sections headers as fallback\").\n    * The beginning of the document (from \"Take beginning of Document to look for Table of contents\").\n* **Goal:** The AI's task is to create a nested JSON object representing the document's hierarchy, forming the Table of Contents."
      },
      "typeVersion": 1
    },
    {
      "id": "c03c377b-6204-40e6-8079-56c726e8f8a8",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1940,
        -640
      ],
      "parameters": {
        "width": 400,
        "height": 400,
        "content": "## Return each section individually\n\nYou can return each heading + section individually, if you want need to process each chapter one by one. The current configuration outputs three versions for the section depending on your needs: Markdown, HTML, or just the text. "
      },
      "typeVersion": 1
    },
    {
      "id": "2b6e50ac-702a-4ff9-86cf-7fbde7a58dba",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1980,
        60
      ],
      "parameters": {
        "width": 400,
        "height": 400,
        "content": "## ... OR Return the Document as a whole\n\nIf you want to process the document as a whole you can choose this path. You can download the markdown or html versions of the input PDF. \n\nif you use trigger by another workflow, you can just return the plain html or markdown without creating a file. this way you can use this workflow as a tool that an agent could use to parse PDFs"
      },
      "typeVersion": 1
    },
    {
      "id": "c4ec2654-ed9d-4bc3-a1a4-3e8f44aa115d",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2600,
        -820
      ],
      "parameters": {
        "width": 1540,
        "height": 260,
        "content": "# Convert PDFs to structured JSON with the correct subheading hierarchy\n\n## This workflow outputs finds the actual headings of each section, the corresponding text, and outputs each section as an individual item (or the whole document as one large JSON, depending on how you execute this workflow"
      },
      "typeVersion": 1
    },
    {
      "id": "a887cdad-a2c3-4477-a6ed-72d007f560a2",
      "name": "GET Chunkr Task",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -260,
        -60
      ],
      "parameters": {
        "url": "=https://api.chunkr.ai/api/v1/task/{{ $('POST Chunkr Task').item.json.task_id }}",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "<YOUR_CHUNKR_API_KEY>"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "cd52c24f-1698-4887-8d5d-248eb0a904fd",
      "name": "POST Chunkr Task",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -700,
        20
      ],
      "parameters": {
        "url": "https://api.chunkr.ai/api/v1/task/parse",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n  \"chunk_processing\": null,\n  \"error_handling\": null,\n  \"expires_in\": 123,\n  \"file\": \"{{ $('Convert the PDF to base64').item.json.data}}\",\n  \"file_name\": \"{{ $json.fileNameSnake }}\",\n  \"high_resolution\": false,\n  \"llm_processing\": null,\n  \"ocr_strategy\": null,\n  \"pipeline\": null,\n  \"segment_processing\": null,\n  \"segmentation_strategy\": null\n}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "=<YOUR_CHUNKR_API_KEY>"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "5350ae70-85e1-44d2-bdb1-47b7a02708e5",
      "name": "Merge",
      "type": "n8n-nodes-base.merge",
      "position": [
        -1380,
        20
      ],
      "parameters": {},
      "typeVersion": 3.2
    }
  ],
  "connections": {
    "HTML": {
      "main": [
        [
          {
            "node": "Move Binary Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "Convert the PDF to base64",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Status is:": {
      "main": [
        [
          {
            "node": "Take beginning of Document to look for Table of contents",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Wait Before Polling the Chunkr Result",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Stop and Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set File Name": {
      "main": [
        [
          {
            "node": "POST Chunkr Task",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "GET Chunkr Task": {
      "main": [
        [
          {
            "node": "Status is:",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "POST Chunkr Task": {
      "main": [
        [
          {
            "node": "Wait Before Polling the Chunkr Result",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create HTML document": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download PDF from URL": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Table of Content Agent": {
      "main": [
        [
          {
            "node": "Return each section individually",
            "type": "main",
            "index": 0
          },
          {
            "node": "Return the whole document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Markdown Document": {
      "main": [
        [
          {
            "node": "Convert to File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Table of Content Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "Auto-fixing Output Parser",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Auto-fixing Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "Table of Content Agent",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Convert the PDF to base64": {
      "main": [
        [
          {
            "node": "Set File Name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "Auto-fixing Output Parser",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Return the whole document": {
      "main": [
        [
          {
            "node": "Create HTML document",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create Markdown Document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download PDF from Google Drive": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Return each section individually": {
      "main": [
        []
      ]
    },
    "When Executed by Another Workflow": {
      "main": [
        [
          {
            "node": "Download PDF from URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Sections headers as fallback": {
      "main": [
        [
          {
            "node": "Table of Content Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \u2018Execute workflow\u2019": {
      "main": [
        [
          {
            "node": "Download PDF from Google Drive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait Before Polling the Chunkr Result": {
      "main": [
        [
          {
            "node": "GET Chunkr Task",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Take beginning of Document to look for Table of contents": {
      "main": [
        [
          {
            "node": "Extract Sections headers as fallback",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}