{
  "id": "",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Auto-RAG: Google Drive to Pinecone Vector Store",
  "tags": [],
  "nodes": [
    {
      "id": "3f932fc0-5749-4cb9-9248-90ac63cbdff8",
      "name": "Download file",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -816,
        -160
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Watch Drive Folder (new files)').first().json.id }}"
        },
        "options": {
          "fileName": "={{ $('Watch Drive Folder (new files)').first().json.name }}"
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "95f22ca5-7f79-4fed-9e8d-091bbbc2a35a",
      "name": "Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        320,
        96
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "title_ar",
                "value": "={{ $json.metadata.title_ar }}"
              },
              {
                "name": "lesson_formatted",
                "value": "={{ $json.metadata.lesson_formatted }}"
              },
              {
                "name": "book_id",
                "value": "={{ $json.metadata.book_id }}"
              },
              {
                "name": "grade",
                "value": "={{ $json.metadata.grade }}"
              },
              {
                "name": "lang",
                "value": "={{ $json.metadata.lang }}"
              },
              {
                "name": "field",
                "value": "={{ $json.metadata.field }}"
              },
              {
                "name": "source_file",
                "value": "={{ $json.metadata.source_file }}"
              },
              {
                "name": "tags",
                "value": "={{ $('Filename \u2192 Lesson Metadata').item.json.tags }}"
              },
              {
                "name": "searchable_terms",
                "value": "={{ $('Filename \u2192 Lesson Metadata').item.json.searchable_terms }}"
              }
            ]
          },
          "splitPages": false
        },
        "textSplittingMode": "custom"
      },
      "typeVersion": 1.1
    },
    {
      "id": "7471c670-fe8f-40da-b2aa-efca5a1655c8",
      "name": "Recursive Character Text Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "position": [
        320,
        336
      ],
      "parameters": {
        "options": {},
        "chunkSize": 1200,
        "chunkOverlap": 150
      },
      "typeVersion": 1
    },
    {
      "id": "69b191fb-9605-4c08-b1c3-abdc3fbf1d10",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2080,
        -304
      ],
      "parameters": {
        "width": 576,
        "height": 384,
        "content": "## Google Drive \u2192 Pinecone Automation\nUploads any OCR JSON file from Google Drive, extracts text and lesson details, converts them into clean chunks, creates AI embeddings, and stores them for fast semantic search. Automatically archives processed files to keep your workspace clean.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "e5898c12-5b1c-4587-b94a-7017371e9b7d",
      "name": "Watch Drive Folder (new files)",
      "type": "n8n-nodes-base.googleDriveTrigger",
      "position": [
        -1296,
        -160
      ],
      "parameters": {
        "event": "fileCreated",
        "options": {},
        "pollTimes": {
          "item": [
            {
              "mode": "everyMinute"
            }
          ]
        },
        "triggerOn": "specificFolder",
        "folderToWatch": {
          "__rl": true,
          "mode": "list",
          "value": "YOUR_INPUT_FOLDER_ID",
          "cachedResultUrl": "",
          "cachedResultName": ""
        }
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "ce7a1d5b-ce96-4cf0-abcb-6117f12db462",
      "name": "Filename \u2192 Lesson Metadata",
      "type": "n8n-nodes-base.code",
      "position": [
        -1040,
        -160
      ],
      "parameters": {
        "jsCode": "// Get filename from previous \"Download file\" node or current item\nconst rawName =\n  $json.name ||\n  ($binary?.data?.fileName) ||\n  ($binary?.data_0?.fileName) ||\n  \"\";\n\n// ---- Normalize name: strip path, extension, GCS prefix, and _output- suffix\nfunction cleanBase(n) {\n  if (!n) return \"\";\n\n  // 1) \u062e\u064f\u062f \u0627\u0644\u0627\u0633\u0645 \u0641\u0642\u0637 \u0645\u0646 \u063a\u064a\u0631 \u0627\u0644\u0645\u0633\u0627\u0631\n  let name = String(n).split(\"/\").pop();\n\n  // 2) \u0634\u064a\u0644 \u0627\u0644\u0627\u0645\u062a\u062f\u0627\u062f .json (\u0644\u0648 \u0645\u0648\u062c\u0648\u062f)\n  name = name.replace(/\\.json$/i, \"\");\n\n  // 3) \u0634\u064a\u0644 \u0623\u064a \u062c\u0632\u0621 \u0645\u0646 \u0623\u0648\u0644 _output- \u0644\u0622\u062e\u0631 \u0627\u0644\u0633\u0644\u0633\u0644\u0629 (\u062e\u0627\u0635\u0629 Vision)\n  name = name.replace(/_output-.*$/i, \"\");\n\n  // 4) \u0628\u0639\u0636 \u0627\u0644\u0640 generators \u0628\u064a\u0636\u064a\u0641\u0648\u0627 \u0628\u0627\u062f\u0626\u0629 \u0642\u0628\u0644 arabic_\n  // \u0634\u064a\u0644 \u0623\u064a \u062d\u0627\u062c\u0629 \u0642\u0628\u0644 \u0623\u0648\u0644 \u0638\u0647\u0648\u0631 \u0644\u0640 \"arabic_\"\n  const pivot = name.indexOf(\"arabic_\");\n  if (pivot > 0) name = name.slice(pivot);\n\n  // \u0645\u062b\u0627\u0644 \u0627\u0644\u0646\u0627\u062a\u062c:\n  // arabic_g12_ar_lesson01_\u0627\u0631\u0627\u062f\u0647-\u0627\u0644\u062a\u063a\u064a\u064a\u0631_\u0627\u0644\u0642\u0631\u0627\u0621\u0647\n  return name;\n}\n\nconst base = cleanBase(rawName);\n\n// \u0623\u0645\u062b\u0644\u0629 \u0628\u0639\u062f \u0627\u0644\u062a\u0646\u0638\u064a\u0641:\n// arabic_g12_ar_lesson01_\u0627\u0631\u0627\u062f\u0647-\u0627\u0644\u062a\u063a\u064a\u064a\u0631_\u0627\u0644\u0642\u0631\u0627\u0621\u0647\n// arabic_g12_ar_lesson02_\u0627\u0628\u0648-\u0627\u0644\u0631\u064a\u062d\u0627\u0646-\u0627\u0644\u0628\u064a\u0631\u0648\u0646\u064a_\u0627\u0644\u0642\u0631\u0627\u0621\u0647\n// arabic_g12_ar_lesson03_\u0627\u0644\u0642\u062f\u0633-\u0645\u062f\u064a\u0646\u0629-\u0639\u0631\u0628\u064a\u0629-\u0627\u0633\u0644\u0627\u0645\u064a\u0629_\u0627\u0644\u0642\u0631\u0627\u0621\u0647\n\nconst re = new RegExp(\n  '^' +\n    '(?<subject>[a-z]+)_' +          // arabic\n    '(?<grade>g\\\\d{2})_' +           // g12\n    '(?<lang>[a-z]+)_' +             // ar\n    'lesson(?<lesson>\\\\d{2})_' +     // lesson01\n    '(?<title_ar>.+?)_' +            // \u0627\u0631\u0627\u062f\u0647-\u0627\u0644\u062a\u063a\u064a\u064a\u0631 (\u0642\u062f \u064a\u062d\u062a\u0648\u064a \u0639\u0644\u0649 \u0634\u0631\u0637\u0627\u062a)\n    '(?<field>[^_]+)' +              // \u0627\u0644\u0642\u0631\u0627\u0621\u0647 (\u0628\u062f\u0648\u0646 underscores)\n  '$',\n  'u' // Unicode\n);\n\nconst m = base.match(re);\n\nif (!m) {\n  return [{\n    json: {\n      meta_error: 'filename_not_matched',\n      source_file: rawName,\n      cleaned_base: base,\n    },\n  }];\n}\n\nconst { subject, grade, lang, title_ar, field } = m.groups;\nconst lesson = m.groups.lesson ? Number(m.groups.lesson) : null;\nconst grade_num = Number(grade.replace(/^g/i, ''));\n\n// \u062a\u0646\u0638\u064a\u0641\u0627\u062a \u0628\u0633\u064a\u0637\u0629\nconst clean_title_ar = title_ar ? title_ar.replace(/-/g, ' ') : '';\nconst clean_field = field ? field.replace(/-/g, ' ') : '';\n\n// \u0645\u0639\u0644\u0648\u0645\u0627\u062a \u0625\u0636\u0627\u0641\u064a\u0629 \u062d\u0633\u0628 \u0627\u0644\u0645\u0627\u062f\u0629\nconst subjectInfo = {\n  arabic: {\n    subject_ar: '\u0627\u0644\u0644\u063a\u0629 \u0627\u0644\u0639\u0631\u0628\u064a\u0629',\n    subject_en: 'Arabic Language',\n  },\n};\n\nconst currentSubject = subjectInfo[subject] || {\n  subject_ar: subject,\n  subject_en: subject,\n};\n\n// \u062a\u062d\u062f\u064a\u062f \u0646\u0648\u0639 \u0627\u0644\u0645\u062d\u062a\u0648\u0649 \u062d\u0633\u0628 \u0627\u0644\u0645\u062c\u0627\u0644\nconst fieldTypes = {\n  '\u0627\u0644\u0642\u0631\u0627\u0621\u0647': 'reading',\n  '\u0627\u0644\u0642\u0631\u0627\u0621\u0629': 'reading',\n  '\u0627\u0644\u0646\u062d\u0648': 'grammar',\n  '\u0627\u0644\u0628\u0644\u0627\u063a\u0629': 'rhetoric',\n  '\u0627\u0644\u0646\u0635\u0648\u0635': 'texts',\n  '\u0627\u0644\u0623\u062f\u0628': 'literature',\n};\n\nconst content_type = fieldTypes[clean_field] || 'general';\n\n// \u0627\u0633\u0645 \u0627\u0644\u0640 PDF \u0627\u0644\u0623\u0635\u0644\u064a (\u0628\u0639\u062f \u0627\u0644\u062a\u0646\u0638\u064a\u0641)\nconst source_pdf = `${base}.pdf`;\n\nreturn [{\n  json: {\n    // Basic info\n    subject,                                // \"arabic\"\n    subject_ar: currentSubject.subject_ar,  // \"\u0627\u0644\u0644\u063a\u0629 \u0627\u0644\u0639\u0631\u0628\u064a\u0629\"\n    subject_en: currentSubject.subject_en,  // \"Arabic Language\"\n    grade,                                  // \"g12\"\n    grade_num,                              // 12\n    grade_ar: '\u0627\u0644\u0635\u0641 \u0627\u0644\u062b\u0627\u0644\u062b \u0627\u0644\u062b\u0627\u0646\u0648\u064a',\n    lang,                                   // \"ar\"\n\n    // Lesson info\n    lesson,                                 // 1, 2, 3...\n    lesson_formatted: `lesson${String(lesson).padStart(2, '0')}`, // \"lesson01\"\n\n    // Content info\n    title_ar: clean_title_ar,               // \"\u0625\u0631\u0627\u062f\u0629 \u0627\u0644\u062a\u063a\u064a\u064a\u0631\"\n    field: clean_field,                     // \"\u0627\u0644\u0642\u0631\u0627\u0621\u0629\"\n    field_en: content_type,                 // \"reading\"\n\n    // Metadata for Pinecone\n    book_id: `${subject}_${grade}_${lang}_2025`,        // \"arabic_g12_ar_2025\"\n    namespace: `${subject}-${grade}-${lang}-update`,    // \"arabic-g12-ar-update\"\n    document_type: 'lesson_material',\n    curriculum: 'Egyptian Ministry of Education',\n\n    // Source info\n    source_file: source_pdf,                // \u0627\u0644\u0640 PDF \u0627\u0644\u0623\u0635\u0644\u064a\n    source_json: rawName,                   // \u0627\u0633\u0645 JSON \u0627\u0644\u062d\u0627\u0644\u064a\n    created_via: 'gdrive_ingest',\n\n    // Additional searchable fields\n    searchable_terms: [\n      clean_title_ar,\n      clean_field,\n      `\u0627\u0644\u062f\u0631\u0633 ${lesson}`,\n      `lesson ${lesson}`,\n      currentSubject.subject_ar,\n    ].filter(Boolean),\n\n    // Tags\n    tags: [\n      subject,\n      grade,\n      content_type,\n      'egyptian_curriculum',\n      'high_school',\n    ],\n  },\n}];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "c856144d-e4cb-41b7-bb5e-6d0184edff1d",
      "name": "Vision JSON \u2192 Clean Text Chunks",
      "type": "n8n-nodes-base.code",
      "position": [
        -368,
        -160
      ],
      "parameters": {
        "jsCode": "// ==== \u0627\u0633\u062a\u062e\u0631\u0627\u062c field \u0645\u0646 \u0627\u0633\u0645 \u0627\u0644\u0645\u0644\u0641 ====\nfunction extractFieldFromBaseName(baseName) {\n  const parts = baseName.split('_');\n  const lastPart = parts[parts.length - 1];\n\n  // \u062a\u062d\u0648\u064a\u0644 \u0627\u0644\u0634\u0631\u0637\u0627\u062a \u0644\u0645\u0633\u0627\u0641\u0627\u062a \u0648\u062a\u0646\u0638\u064a\u0641\n  let field = lastPart ? lastPart.replace(/-/g, ' ') : '\u0627\u0644\u0642\u0631\u0627\u0621\u0629';\n\n  // mapping \u0644\u0644\u062d\u0627\u0644\u0627\u062a \u0627\u0644\u0634\u0627\u0626\u0639\u0629\n  const fieldMap = {\n    '\u0627\u0644\u0642\u0631\u0627\u0621\u0647': '\u0627\u0644\u0642\u0631\u0627\u0621\u0629',\n    '\u0627\u0644\u0627\u062f\u0628 \u0648 \u0627\u0644\u0646\u0635\u0648\u0635': '\u0627\u0644\u0623\u062f\u0628',\n    '\u0627\u0644\u0646\u062b\u0631 \u0648 \u0627\u0644\u0641\u0646\u0648\u0646': '\u0627\u0644\u0646\u062b\u0631',\n    '\u0627\u0644\u062a\u062f\u0631\u064a\u0628\u0627\u062a \u0627\u0644\u0644\u063a\u0648\u064a\u0629': '\u0627\u0644\u062a\u062f\u0631\u064a\u0628\u0627\u062a',\n    '\u0627\u0644\u0642\u0635\u0647': '\u0627\u0644\u0642\u0635\u0629',\n    '\u062a\u062f\u0631\u064a\u0628\u0627\u062a \u0639\u0627\u0645\u0629': '\u062a\u062f\u0631\u064a\u0628\u0627\u062a',\n  };\n\n  return fieldMap[field] || field;\n}\n\n// ==== \u0642\u0631\u0627\u0621\u0629 \u0627\u0644\u0640 JSON \u0645\u0646 \u0627\u0644\u0640 Binary ====\nconst item = $input.first();\nconst binContainer = item.binary || $binary || {};\nconst binKey = Object.keys(binContainer)[0];\nconst bin = binKey ? binContainer[binKey] : null;\n\nif (!bin || !bin.data) {\n  return [\n    {\n      json: {\n        error: 'no_binary_data',\n        hint:\n          '\u062a\u0623\u0643\u062f \u0625\u0646 Google Drive \u2192 Operation = Download \u0648\u0625\u0646 Binary Property = data',\n        gotBinaryKeys: Object.keys(binContainer),\n      },\n    },\n  ];\n}\n\nconst fname = bin.fileName || item.json?.name || 'file.json';\nconst buf = Buffer.from(bin.data, 'base64');\n\nlet docAll;\ntry {\n  docAll = JSON.parse(buf.toString('utf8'));\n} catch (e) {\n  return [{ json: { error: 'invalid_json', message: String(e) } }];\n}\n\n// ==== \u0627\u0633\u062a\u062e\u0631\u0627\u062c \u0627\u0644\u0646\u0635 \u0645\u0646 Vision / Document AI ====\nfunction extractTextFromVision(o) {\n  if (!o) return '';\n\n  // 1) Vision async PDF: responses[].fullTextAnnotation.text\n  if (Array.isArray(o.responses) && o.responses.length) {\n    const parts = [];\n\n    for (const r of o.responses) {\n      const t1 = r?.fullTextAnnotation?.text || '';\n      const t2 = r?.textAnnotations?.[0]?.description || '';\n      const t = t1 && t1.trim() ? t1 : t2 || '';\n\n      if (t && t.trim()) parts.push(t);\n    }\n\n    if (parts.length) return parts.join('\\n');\n  }\n\n  // 2) Document AI (document.text)\n  if (typeof o.document?.text === 'string' && o.document.text.trim()) {\n    return o.document.text;\n  }\n\n  // 3) fallback \u0642\u062f\u064a\u0645\n  if (typeof o.text === 'string' && o.text.trim()) return o.text;\n\n  return '';\n}\n\nlet full = extractTextFromVision(docAll);\n\n// \u0644\u0648 \u0645\u0641\u064a\u0634 \u0646\u0635 \u0648\u0627\u0636\u062d\nif (!full || !full.trim()) {\n  return [\n    {\n      json: {\n        error: 'no_text_extracted',\n        hint:\n          '\u0645\u0644\u0641 Vision JSON \u0644\u0627 \u064a\u062d\u062a\u0648\u064a \u0639\u0644\u0649 fullTextAnnotation.text. \u0631\u0627\u062c\u0639 \u0627\u0644\u0625\u0639\u062f\u0627\u062f\u0627\u062a \u0623\u0648 \u062c\u0631\u0651\u0628 \u0645\u0644\u0641 \u0622\u062e\u0631.',\n        file: fname,\n        debug_keys: Object.keys(docAll),\n      },\n    },\n  ];\n}\n\n// ==== Helpers \u0644\u0645\u0639\u0627\u0644\u062c\u0629 \u0627\u0644\u0646\u0635 \u0627\u0644\u0639\u0631\u0628\u064a ====\nfunction normalizeAr(t) {\n  return t\n    .replace(/\\u0640/g, '') // \u062a\u0637\u0648\u064a\u0644\n    .replace(\n      /[\\u0617-\\u061A\\u064B-\\u0652\\u0657-\\u065F\\u0670\\u06D6-\\u06ED]/g,\n      '',\n    ) // \u062a\u0634\u0643\u064a\u0644\n    .replace(/[\u0625\u0623\u0622\u0627]/g, '\u0627')\n    .replace(/\u0649/g, '\u064a')\n    .replace(/[\u0660-\u0669]/g, (d) => '\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669'.indexOf(d))\n    .replace(/\\s+/g, ' ')\n    .trim();\n}\n\nfunction splitSentences(t) {\n  return t\n    .split(/(?<=[\\.!\\\u061f\\?\u2026])\\s+|\\n+/)\n    .map((s) => s.trim())\n    .filter(Boolean);\n}\n\nfunction makeChunks(text, max = 900, overlap = 150) {\n  const sents = splitSentences(text);\n  const out = [];\n  let cur = '';\n\n  for (const s of sents) {\n    if (cur.length + s.length + 1 <= max) {\n      cur = (cur ? cur + ' ' : '') + s;\n    } else {\n      if (cur) out.push(cur);\n      const tail = out.length ? out[out.length - 1].slice(-overlap) : '';\n      cur = (tail + ' ' + s).slice(-max).trim();\n    }\n  }\n\n  if (cur) out.push(cur);\n  return out;\n}\n\nfunction pullAnchor() {\n  return '';\n} // \u0645\u0634 \u0647\u0646\u062d\u062a\u0627\u062c\u0647\u0627 \u0645\u0639 Vision responses\n\nfunction parseLessonMeta(name) {\n  const m = /lesson(\\d{2})/i.exec(name) || [];\n  const key = m[1] ? `lesson${m[1]}` : 'lessonXX';\n\n  const titles = {\n    lesson01: '\u0625\u0631\u0627\u062f\u0629 \u0627\u0644\u062a\u063a\u064a\u064a\u0631',\n    lesson02: '\u0623\u0628\u0648 \u0627\u0644\u0631\u064a\u062d\u0627\u0646 \u0627\u0644\u0628\u064a\u0631\u0648\u0646\u064a',\n    lesson03: '\u0627\u0644\u0642\u062f\u0633 \u0645\u062f\u064a\u0646\u0629 \u0639\u0631\u0628\u064a\u0629 \u0625\u0633\u0644\u0627\u0645\u064a\u0629',\n    lesson04: '\u0627\u0644\u0639\u0644\u0645 \u0641\u064a \u0627\u0644\u0625\u0633\u0644\u0627\u0645',\n    lesson05: '\u0642\u064a\u0645 \u0625\u0646\u0633\u0627\u0646\u064a\u0629',\n  };\n\n  return { key, title: titles[key] || '' };\n}\n\nfunction toAsciiId(s) {\n  return String(s)\n    .replace(/\\.json$/i, '')\n    .replace(/[^\\w\\-]+/g, '_')\n    .slice(-200);\n}\n\nfunction cleanFileBase(fname) {\n  const noJson = fname.replace(/\\.json$/i, '');\n  const noOutput = noJson.replace(/_output-.*$/i, ''); // \u064a\u0634\u064a\u0644 \u0646\u0647\u0627\u064a\u0629 _output-..\n  const noPrefix = noOutput.replace(/^g12-arabic-ocr-2_/, ''); // \u0644\u0648 \u0641\u064a\u0647 \u0628\u0627\u062f\u0626\u0629\n  return noPrefix;\n}\n\n// ==== \u062a\u0646\u0638\u064a\u0641 \u0627\u0644\u0646\u0635 \u0648\u062a\u062c\u0632\u0626\u062a\u0647 ====\nlet cleaned = normalizeAr(full);\n\ncleaned = cleaned\n  .replace(/\u0627\u0644\u0644\u063a\u0629 \u0627\u0644\u0639\u0631\u0628\u064a\u0629.*\u0627\u0644\u0635\u0641 \u0627\u0644\u062b\u0627\u0644\u062b \u0627\u0644\u062b\u0627\u0646\u0648\u064a/gi, ' ')\n  .replace(\n    /\u062a\u0643\u0646\u0648\\s*\u0628\u0631\u0646\u062a|\u062a\u062f\u0631\u064a\u0628\u0627\u062a\\s*\u0648\u0623\u0646\u0634\u0637\u0629|\u0648\u0632\u0627\u0631\u0629 \u0627\u0644\u062a\u0631\u0628\u064a\u0629 \u0648\u0627\u0644\u062a\u0639\u0644\u064a\u0645|\u062d\u0642\u0648\u0642 \u0627\u0644\u0637\u0628\u0639/gi,\n    ' ',\n  )\n  .replace(/\\s+/g, ' ')\n  .trim();\n\nconst chunks = makeChunks(cleaned, 900, 150);\n\n// \u0644\u0648 \u0645\u0641\u064a\u0634 chunks \u0628\u0639\u062f \u0627\u0644\u062a\u0646\u0638\u064a\u0641\nif (!chunks.length) {\n  return [\n    {\n      json: {\n        error: 'empty_chunks_after_clean',\n        file: fname,\n        note: '\u0627\u0644\u0646\u0635 \u0627\u0644\u0645\u0633\u062a\u062e\u0631\u062c \u0628\u0639\u062f \u0627\u0644\u062a\u0646\u0638\u064a\u0641 \u0623\u0635\u0628\u062d \u0641\u0627\u0631\u063a.',\n      },\n    },\n  ];\n}\n\n// ==== Metadata + IDs ====\nconst baseName = cleanFileBase(fname);\nconst { key: lesson_key, title } = parseLessonMeta(baseName);\nconst srcPdf = `${baseName}.pdf`;\n\nconst meta = {\n  title_ar: title,\n  lesson_formatted: lesson_key,\n  book_id: 'arabic_g12_ar_2025',\n  namespace: 'arabic-g12-ar',\n  grade: 'g12',\n  lang: 'ar',\n  field: extractFieldFromBaseName(baseName),\n  source_file: srcPdf,\n};\n\n// ==== \u0625\u062e\u0631\u0627\u062c \u0627\u0644\u0640 chunks ====\nreturn chunks.map((text, i) => ({\n  json: {\n    id: `${toAsciiId(lesson_key + '_' + baseName)}_${String(i + 1)\n      .padStart(4, '0')}`,\n    text,\n    metadata: meta,\n  },\n}));\n"
      },
      "typeVersion": 2
    },
    {
      "id": "342f3191-0910-4154-b9a9-2736852363bf",
      "name": "Generate Embeddings (OpenAI)",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        96,
        160
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "a7e9184b-3391-4208-a37c-255aa0b30bc2",
      "name": "Insert into Pinecone Vector Store",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        256,
        -160
      ],
      "parameters": {
        "mode": "insert",
        "options": {
          "pineconeNamespace": "={{ $('Filename \u2192 Lesson Metadata').item.json.namespace }}"
        },
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": "YOUR_PINECONE_INDEX",
          "cachedResultName": "arabic-g12"
        },
        "embeddingBatchSize": 64
      },
      "credentials": {
        "pineconeApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "ad674e55-0848-401f-a480-0982c725a31f",
      "name": "Move File to Archive",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1008,
        -160
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Watch Drive Folder (new files)').item.json.id }}"
        },
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive"
        },
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "YOUR_ARCHIVE_FOLDER_ID",
          "cachedResultUrl": "",
          "cachedResultName": ""
        },
        "operation": "move"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "2826f9fd-610c-4410-86de-d2b225bae14b",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1344,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 736,
        "height": 384,
        "content": "## 01 \u2013 Input & Metadata Extraction\nThis section watches a Google Drive folder for new files, downloads each JSON file, and extracts structured lesson metadata from the filename. It prepares the raw file and metadata for text extraction and RAG ingestion.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "c069ce44-000c-4890-8dd7-58235ab76297",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -560,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 464,
        "height": 384,
        "content": "## 02 \u2013 Vision Parsing & Text Cleaning\nParses Google Vision JSON, extracts full Arabic text, removes noise, normalizes characters, and generates clean text chunks with consistent structure for downstream RAG processing.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "1f35019a-0a17-4b22-b02c-f51ae49bd7ff",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        0,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 832,
        "height": 864,
        "content": "## 03 \u2013 Embeddings & Vector Storage\nGenerates vector embeddings for each cleaned text chunk and stores them, along with metadata, in a Pinecone index. This section forms the core of the RAG ingestion process for fast semantic search.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "224d141d-38b3-42ad-82bd-961cbcea42d3",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        864,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 496,
        "height": 320,
        "content": "## 04 \u2013 Archive Processed Files\nMoves the processed Google Drive file into an archive folder to prevent duplicate ingestion and keep the input directory organized for future uploads.\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "timezone": "Africa/Cairo",
    "callerPolicy": "workflowsFromSameOwner",
    "executionOrder": "v1"
  },
  "versionId": "",
  "connections": {
    "Download file": {
      "main": [
        [
          {
            "node": "Vision JSON \u2192 Clean Text Chunks",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "Insert into Pinecone Vector Store",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Filename \u2192 Lesson Metadata": {
      "main": [
        [
          {
            "node": "Download file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate Embeddings (OpenAI)": {
      "ai_embedding": [
        [
          {
            "node": "Insert into Pinecone Vector Store",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Watch Drive Folder (new files)": {
      "main": [
        [
          {
            "node": "Filename \u2192 Lesson Metadata",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Insert into Pinecone Vector Store": {
      "main": [
        [
          {
            "node": "Move File to Archive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Recursive Character Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Default Data Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "Vision JSON \u2192 Clean Text Chunks": {
      "main": [
        [
          {
            "node": "Insert into Pinecone Vector Store",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}