{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Extract Arabic Text from PDFs with Mistral OCR & Telegram Bot to Google Docs",
  "nodes": [
    {
      "id": "workflow-start-sticky",
      "name": "\ud83d\udccb Workflow Overview",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -480,
        720
      ],
      "parameters": {
        "width": 320,
        "height": 240,
        "content": "## Arabic PDF OCR Telegram Bot\n\nThis workflow:\n1. Receives PDF files via Telegram\n2. Uses Mistral AI OCR to extract Arabic text\n3. Creates a Google Doc with extracted content\n4. Sends progress updates to user\n\n**Requirements:**\n- Telegram Bot API credentials\n- Mistral AI API credentials  \n- Google Docs API credentials"
      },
      "typeVersion": 1
    },
    {
      "id": "3067ee6f-21dc-4206-a087-629a1ec2b484",
      "name": "Download Document from Telegram",
      "type": "n8n-nodes-base.telegram",
      "position": [
        224,
        864
      ],
      "parameters": {
        "fileId": "={{ $json.message.document.file_id }}",
        "resource": "file",
        "additionalFields": {}
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "2130cb79-a8ac-4ab1-b432-7d6bf3996715",
      "name": "Telegram Bot Trigger",
      "type": "n8n-nodes-base.telegramTrigger",
      "position": [
        -240,
        880
      ],
      "parameters": {
        "updates": [
          "message"
        ],
        "additionalFields": {}
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "e5980bbd-fa94-46e6-aa2d-d8c3768082a7",
      "name": "Check If Document Attached",
      "type": "n8n-nodes-base.if",
      "position": [
        -16,
        880
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "f514a6e8-30b6-4662-9827-3487ca7edbb3",
              "operator": {
                "type": "string",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $json.message.document.file_name }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "validation-sticky",
      "name": "\ud83d\udd0d Document Validation",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -16,
        760
      ],
      "parameters": {
        "width": 200,
        "height": 80,
        "content": "Checks if the Telegram message contains a document attachment. Only PDF files should be processed."
      },
      "typeVersion": 1
    },
    {
      "id": "138f927c-073f-4b90-a54c-2421ae98def0",
      "name": "Upload PDF to Mistral AI",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        480,
        1104
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/files",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "purpose",
              "value": "ocr"
            },
            {
              "name": "file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "=data"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "mistral-upload-sticky",
      "name": "\ud83d\udce4 Mistral AI Upload",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        480,
        1000
      ],
      "parameters": {
        "width": 200,
        "height": 80,
        "content": "Uploads the PDF file to Mistral AI's file storage for OCR processing. Returns a file ID for subsequent operations."
      },
      "typeVersion": 1
    },
    {
      "id": "b786de5e-14a0-4332-b386-682cd2303cad",
      "name": "Get Mistral Signed URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        704,
        992
      ],
      "parameters": {
        "url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
        "options": {},
        "sendQuery": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "expiry",
              "value": "24"
            }
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "e9a16218-816e-4c05-b0d6-4bea6b3d44b8",
      "name": "Process OCR with Mistral",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        944,
        912
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/ocr",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n  \"model\": \"mistral-ocr-latest\",\n  \"document\": {\n    \"type\": \"document_url\",\n    \"document_url\": \"{{ $json.url }}\"\n  },\n  \"include_image_base64\": true\n}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "ocr-processing-sticky",
      "name": "\ud83e\udd16 OCR Processing",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        944,
        800
      ],
      "parameters": {
        "width": 200,
        "height": 80,
        "content": "Mistral AI processes the PDF and extracts text using OCR. The response includes markdown-formatted text organized by pages."
      },
      "typeVersion": 1
    },
    {
      "id": "20911406-a0ed-4d0a-9e21-5b2d502bf6b4",
      "name": "Parse OCR Results by Page",
      "type": "n8n-nodes-base.code",
      "position": [
        1200,
        912
      ],
      "parameters": {
        "jsCode": "// Extract pages from OCR results and format for processing\nconst pages = items[0].json.pages || [];\nconst output = [];\n\nfor (const page of pages) {\n  // page.index is your page number (0-based). Add +1 for 1-based numbering\n  const pageNumber = page.index+1;\n  const content    = page.markdown; // Extract markdown content\n  const images     = page.images;\n  \n  output.push({\n    json: { pageNumber, content },\n  });\n}\n\nreturn output;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "485286ee-6874-4913-b9a0-55bc150f371b",
      "name": "Update Google Doc with Content",
      "type": "n8n-nodes-base.googleDocs",
      "position": [
        2144,
        848
      ],
      "parameters": {
        "actionsUi": {
          "actionFields": [
            {
              "text": "={{ \n  $('Aggregate OCR Results').item.json.content.map((c, i) => \n    `${c}\\n\\n(Page Number: ${$('Aggregate OCR Results').item.json.pageNumber[i]})`\n  ).join('\\n\\n--------\\n\\n')\n}}\n\n",
              "action": "insert"
            }
          ]
        },
        "operation": "update",
        "documentURL": "={{ $json.id }}"
      },
      "credentials": {
        "googleDocsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 2
    },
    {
      "id": "e63eac1d-a055-44b5-acf4-6f955d8b843d",
      "name": "Create New Google Doc",
      "type": "n8n-nodes-base.googleDocs",
      "position": [
        1664,
        912
      ],
      "parameters": {
        "title": "=OCR Result from {{ $('Telegram Bot Trigger').item.json.message.document.file_name }}",
        "folderId": "1ZJzZimxzxtlq--x2BxwgBLQ5PMJ96f2s"
      },
      "credentials": {
        "googleDocsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 2
    },
    {
      "id": "google-docs-sticky",
      "name": "\ud83d\udcc4 Google Docs Creation",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1664,
        800
      ],
      "parameters": {
        "width": 200,
        "height": 80,
        "content": "Creates a new Google Doc with the extracted text content, organized by page numbers for easy reference."
      },
      "typeVersion": 1
    },
    {
      "id": "87ca83dd-d76c-4837-84b4-420147788bfb",
      "name": "Process Document Updates",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        1872,
        768
      ],
      "parameters": {
        "options": {},
        "batchSize": "=1"
      },
      "typeVersion": 3
    },
    {
      "id": "d55b3813-8b0d-46c6-a629-b24f3b8a9b38",
      "name": "Aggregate OCR Results",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        1440,
        912
      ],
      "parameters": {
        "options": {},
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "=pageNumber"
            },
            {
              "fieldToAggregate": "content"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "0ea6ad36-f231-4eb2-9f67-1c17738e7c8f",
      "name": "Send Document Link to User",
      "type": "n8n-nodes-base.telegram",
      "position": [
        2384,
        720
      ],
      "parameters": {
        "text": "=<a href=\"https://docs.google.com/document/d/{{ $json.documentId }}\">{{ $('Create New Google Doc').item.json.name }}</a>\n",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "parse_mode": "HTML",
          "appendAttribution": false,
          "message_thread_id": 0,
          "reply_to_message_id": 0,
          "disable_web_page_preview": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "313d1ec1-9529-41b6-ae95-0ea9427a1d33",
      "name": "Request PDF File Format",
      "type": "n8n-nodes-base.telegram",
      "position": [
        80,
        1072
      ],
      "parameters": {
        "text": "Please send the file in PDF format",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "4790cab5-de75-4d68-a7a8-ec07bea7c7b0",
      "name": "Status: File Received (1/5)",
      "type": "n8n-nodes-base.telegram",
      "position": [
        400,
        768
      ],
      "parameters": {
        "text": "File received successfully (1/5)",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "d793d21a-f248-4dec-8798-a4a37336a650",
      "name": "Status: Sent to Processor (2/5)",
      "type": "n8n-nodes-base.telegram",
      "position": [
        640,
        768
      ],
      "parameters": {
        "text": "File sent to processor (2/5)",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "79cb13ce-e794-48e3-bb79-929aaa0be07d",
      "name": "Status: File Signed (3/5)",
      "type": "n8n-nodes-base.telegram",
      "position": [
        880,
        752
      ],
      "parameters": {
        "text": "File signed for processing (3/5)",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "8787ffff-8643-416e-85ca-2dc3bdc22f22",
      "name": "Status: Results Received (4/5)",
      "type": "n8n-nodes-base.telegram",
      "position": [
        1120,
        768
      ],
      "parameters": {
        "text": "OCR results received (4/5)",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "3dfb1769-9f47-4e39-8b48-0b9476a3b5ed",
      "name": "Status: Creating Document (5/5)",
      "type": "n8n-nodes-base.telegram",
      "position": [
        1584,
        720
      ],
      "parameters": {
        "text": "Creating document after processing (5/5)",
        "chatId": "={{ $('Telegram Bot Trigger').item.json.message.chat.id }}",
        "additionalFields": {
          "appendAttribution": false
        }
      },
      "credentials": {
        "telegramApi": {
          "name": "<your credential>"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1.2
    },
    {
      "id": "progress-updates-sticky",
      "name": "\ud83d\udcc8 Progress Updates",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        400,
        640
      ],
      "parameters": {
        "width": 320,
        "height": 80,
        "content": "Users receive real-time progress updates via Telegram messages, showing the current step (1/5 through 5/5) in the OCR processing workflow."
      },
      "typeVersion": 1
    },
    {
      "id": "final-result-sticky",
      "name": "\u2705 Final Result",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2384,
        600
      ],
      "parameters": {
        "width": 200,
        "height": 80,
        "content": "Users receive a clickable link to the Google Doc containing the extracted Arabic text, organized by page numbers."
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Telegram Bot Trigger": {
      "main": [
        [
          {
            "node": "Check If Document Attached",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate OCR Results": {
      "main": [
        [
          {
            "node": "Create New Google Doc",
            "type": "main",
            "index": 0
          },
          {
            "node": "Status: Creating Document (5/5)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create New Google Doc": {
      "main": [
        [
          {
            "node": "Process Document Updates",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Mistral Signed URL": {
      "main": [
        [
          {
            "node": "Process OCR with Mistral",
            "type": "main",
            "index": 0
          },
          {
            "node": "Status: File Signed (3/5)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Process Document Updates": {
      "main": [
        [],
        [
          {
            "node": "Update Google Doc with Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Process OCR with Mistral": {
      "main": [
        [
          {
            "node": "Parse OCR Results by Page",
            "type": "main",
            "index": 0
          },
          {
            "node": "Status: Results Received (4/5)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Upload PDF to Mistral AI": {
      "main": [
        [
          {
            "node": "Get Mistral Signed URL",
            "type": "main",
            "index": 0
          },
          {
            "node": "Status: Sent to Processor (2/5)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse OCR Results by Page": {
      "main": [
        [
          {
            "node": "Aggregate OCR Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check If Document Attached": {
      "main": [
        [
          {
            "node": "Download Document from Telegram",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Request PDF File Format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Google Doc with Content": {
      "main": [
        [
          {
            "node": "Process Document Updates",
            "type": "main",
            "index": 0
          },
          {
            "node": "Send Document Link to User",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download Document from Telegram": {
      "main": [
        [
          {
            "node": "Upload PDF to Mistral AI",
            "type": "main",
            "index": 0
          },
          {
            "node": "Status: File Received (1/5)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}