{
  "name": "17 \u00b7 RAG Ingest: N\u1ea1p Company Knowledge v\u00e0o Pinecone (Gemini Embedding)",
  "nodes": [
    {
      "parameters": {
        "formTitle": "N\u1ea1p D\u1eef Li\u1ec7u C\u00f4ng Ty v\u00e0o Pinecone",
        "formDescription": "Upload file company_knowledge.json \u0111\u1ec3 n\u1ea1p tri th\u1ee9c c\u00f4ng ty v\u00e0o Pinecone vector store. Ch\u1ec9 ch\u1ea5p nh\u1eadn file .json.",
        "formFields": {
          "values": [
            {
              "fieldLabel": "data",
              "fieldType": "file",
              "requiredField": true,
              "acceptFileTypes": ".json",
              "multipleFiles": false
            }
          ]
        },
        "options": {
          "respondWith": "text",
          "responseText": "\u2705 File \u0111\u00e3 \u0111\u01b0\u1ee3c nh\u1eadn! Qu\u00e1 tr\u00ecnh n\u1ea1p d\u1eef li\u1ec7u v\u00e0o Pinecone \u0111ang ch\u1ea1y. Ki\u1ec3m tra k\u1ebft qu\u1ea3 trong tab Executions c\u1ee7a n8n."
        }
      },
      "id": "wf17-node-001",
      "name": "\ud83d\udcc1 Form Upload company_knowledge.json",
      "type": "n8n-nodes-base.formTrigger",
      "typeVersion": 2.2,
      "position": [
        240,
        360
      ],
      "notes": "Form upload: truy c\u1eadp URL webhook sau khi Activate workflow \u2192 ch\u1ecdn file company_knowledge.json t\u1eeb m\u00e1y t\u00ednh \u2192 Submit. Kh\u00f4ng c\u1ea7n docker cp hay b\u1ea5t k\u1ef3 l\u1ec7nh terminal n\u00e0o. File .json \u0111\u01b0\u1ee3c nh\u1eadn d\u01b0\u1edbi d\u1ea1ng binary v\u1edbi key 'data' \u2192 chuy\u1ec3n tr\u1ef1c ti\u1ebfp sang node Parse JSON."
    },
    {
      "parameters": {
        "operation": "fromJson",
        "options": {}
      },
      "id": "wf17-node-003",
      "name": "\ud83e\uddf7 Parse JSON T\u1eeb File",
      "type": "n8n-nodes-base.extractFromFile",
      "typeVersion": 1,
      "position": [
        460,
        360
      ],
      "notes": "Chuy\u1ec3n binary file \u2192 object JSON. To\u00e0n b\u1ed9 n\u1ed9i dung company_knowledge.json n\u1eb1m trong $json.data."
    },
    {
      "parameters": {
        "jsCode": "// Bi\u1ebfn company_knowledge.json th\u00e0nh danh s\u00e1ch 'documents' \u0111\u1ec3 n\u1ea1p v\u00e0o Pinecone.\n// M\u1ed7i entry (s\u1ea3n ph\u1ea9m, quy tr\u00ecnh, ch\u00ednh s\u00e1ch, FAQ) \u2192 1 document c\u00f3 `text` + metadata.\n\nconst root = $input.first().json.data || $input.first().json;\nconst company = root.company || {};\nconst products = root.products || [];\nconst processes = root.processes || [];\nconst policies = root.policies || {};\nconst faqs = root.faq || [];\n\nconst vnd = (n) => (typeof n === 'number' && n > 0 ? n.toLocaleString('vi-VN') + '\u0111' : 'Li\u00ean h\u1ec7 b\u00e1o gi\u00e1');\nconst docs = [];\n\n// --- Th\u00f4ng tin c\u00f4ng ty ---\nif (company.name) {\n  docs.push({\n    json: {\n      text: [\n        `TH\u00d4NG TIN C\u00d4NG TY: ${company.name} \u2014 ${company.tagline || ''}`,\n        `Hotline: ${company.hotline || ''} | Email: ${company.email || ''}`,\n        `Website: ${company.website || ''} | \u0110\u1ecba ch\u1ec9: ${company.address || ''}`,\n        `Gi\u1edd l\u00e0m vi\u1ec7c: ${company.working_hours || ''}`,\n        `Th\u00e0nh l\u1eadp: ${company.founded || ''} | Quy m\u00f4: ${company.employees || ''} nh\u00e2n vi\u00ean`,\n        `M\u00f4 t\u1ea3: ${company.description || ''}`\n      ].filter(Boolean).join('\\n'),\n      doc_type: 'company_info',\n      ref_id: 'company',\n      title: company.name,\n      category: 'Th\u00f4ng tin c\u00f4ng ty'\n    }\n  });\n}\n\n// --- S\u1ea3n ph\u1ea9m / D\u1ecbch v\u1ee5 ---\nfor (const p of products) {\n  const features = Array.isArray(p.features)\n    ? p.features.map(f => `  - ${f}`).join('\\n')\n    : (Array.isArray(p.scope) ? p.scope.map(f => `  - ${f}`).join('\\n') : '');\n  const techList = Array.isArray(p.technologies) ? `C\u00f4ng ngh\u1ec7: ${p.technologies.join(', ')}` : '';\n\n  docs.push({\n    json: {\n      text: [\n        `S\u1ea2N PH\u1ea8M/D\u1ecaCH V\u1ee4: ${p.name} (m\u00e3 ${p.id})`,\n        `Danh m\u1ee5c: ${p.category}`,\n        `Gi\u00e1: ${vnd(p.price)} ${p.price_unit || ''}`.trim(),\n        `T\u00ecnh tr\u1ea1ng: ${p.in_stock ? 'C\u00f2n h\u00e0ng / \u0110ang cung c\u1ea5p' : 'H\u1ebft h\u00e0ng'}`,\n        p.warranty_months ? `B\u1ea3o h\u00e0nh: ${p.warranty_months} th\u00e1ng` : '',\n        p.support ? `H\u1ed7 tr\u1ee3: ${p.support}` : '',\n        `M\u00f4 t\u1ea3: ${p.description}`,\n        features ? `T\u00ednh n\u0103ng/Ph\u1ea1m vi:\\n${features}` : '',\n        techList,\n        p.process ? `Quy tr\u00ecnh: ${p.process}` : '',\n        p.duration ? `Th\u1eddi gian: ${p.duration}` : '',\n        Array.isArray(p.tags) && p.tags.length ? `T\u1eeb kh\u00f3a: ${p.tags.join(', ')}` : ''\n      ].filter(Boolean).join('\\n'),\n      doc_type: 'product',\n      ref_id: p.id,\n      title: p.name,\n      category: p.category\n    }\n  });\n}\n\n// --- Quy tr\u00ecnh n\u1ed9i b\u1ed9 ---\nfor (const pr of processes) {\n  const steps = Array.isArray(pr.steps) ? pr.steps.join('\\n') : '';\n  docs.push({\n    json: {\n      text: [\n        `QUY TR\u00ccNH: ${pr.name} (m\u00e3 ${pr.id})`,\n        `Ph\u00f2ng ban: ${pr.department}`,\n        steps ? `C\u00e1c b\u01b0\u1edbc:\\n${steps}` : '',\n        pr.sla ? `SLA: ${pr.sla}` : '',\n        pr.discount_policy ? `Ch\u00ednh s\u00e1ch gi\u1ea3m gi\u00e1: ${pr.discount_policy}` : '',\n        pr.note ? `L\u01b0u \u00fd: ${pr.note}` : '',\n        pr.contact ? `Li\u00ean h\u1ec7: ${pr.contact}` : ''\n      ].filter(Boolean).join('\\n'),\n      doc_type: 'process',\n      ref_id: pr.id,\n      title: pr.name,\n      category: pr.department\n    }\n  });\n}\n\n// --- Ch\u00ednh s\u00e1ch ---\nconst policyLabels = {\n  work_from_home: 'Ch\u00ednh s\u00e1ch L\u00e0m vi\u1ec7c t\u1eeb xa (WFH)',\n  overtime: 'Ch\u00ednh s\u00e1ch L\u00e0m th\u00eam gi\u1edd (OT)',\n  equipment: 'Ch\u00ednh s\u00e1ch Trang thi\u1ebft b\u1ecb',\n  confidentiality: 'Ch\u00ednh s\u00e1ch B\u1ea3o m\u1eadt th\u00f4ng tin',\n  training: 'Ch\u00ednh s\u00e1ch \u0110\u00e0o t\u1ea1o',\n  code_of_conduct: 'Quy t\u1eafc \u1ee8ng x\u1eed'\n};\nfor (const [key, value] of Object.entries(policies)) {\n  const label = policyLabels[key] || key;\n  docs.push({\n    json: {\n      text: `CH\u00cdNH S\u00c1CH \u2014 ${label}:\\n${value}`,\n      doc_type: 'policy',\n      ref_id: `policy-${key}`,\n      title: label,\n      category: 'Ch\u00ednh s\u00e1ch'\n    }\n  });\n}\n\n// --- FAQ ---\nfaqs.forEach((f, i) => {\n  docs.push({\n    json: {\n      text: `C\u00c2U H\u1eceI TH\u01af\u1edcNG G\u1eb6P:\\nH\u1ecfi: ${f.q}\\n\u0110\u00e1p: ${f.a}`,\n      doc_type: 'faq',\n      ref_id: `faq-${i + 1}`,\n      title: f.q,\n      category: 'FAQ'\n    }\n  });\n});\n\nif (!docs.length) {\n  throw new Error('Kh\u00f4ng c\u00f3 document n\u00e0o \u0111\u01b0\u1ee3c t\u1ea1o. Ki\u1ec3m tra l\u1ea1i n\u1ed9i dung company_knowledge.json.');\n}\n\nconsole.log(`\u0110\u00e3 t\u1ea1o ${docs.length} documents t\u1eeb company_knowledge.json`);\nreturn docs;"
      },
      "id": "wf17-node-004",
      "name": "\ud83e\udde9 T\u1ea1o Documents T\u1eeb Knowledge JSON",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        680,
        360
      ],
      "notes": "Chuy\u1ec3n t\u1eebng m\u1ee5c (c\u00f4ng ty, s\u1ea3n ph\u1ea9m, quy tr\u00ecnh, ch\u00ednh s\u00e1ch, FAQ) th\u00e0nh document c\u00f3 `text` (n\u1ed9i dung \u0111\u1ec3 embedding) v\u00e0 metadata (doc_type, ref_id, title, category) \u0111\u1ec3 AI c\u00f3 th\u1ec3 tr\u00edch ngu\u1ed3n khi tr\u1ea3 l\u1eddi."
    },
    {
      "parameters": {
        "mode": "insert",
        "pineconeIndex": {
          "__rl": true,
          "mode": "id",
          "value": "company-kb"
        },
        "embeddingBatchSize": 100,
        "options": {
          "pineconeNamespace": "company"
        }
      },
      "id": "wf17-node-005",
      "name": "\ud83d\uddc4\ufe0f Pinecone: N\u1ea1p Documents (Insert)",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "typeVersion": 1.3,
      "position": [
        900,
        360
      ],
      "credentials": {
        "pineconeApi": {
          "name": "<your credential>"
        }
      },
      "notes": "N\u1ea1p documents v\u00e0o Pinecone index 'company-kb', namespace 'company'.\nQUAN TR\u1eccNG: Index ph\u1ea3i \u0111\u01b0\u1ee3c t\u1ea1o tr\u01b0\u1edbc trong Pinecone console v\u1edbi:\n  - Dimensions: 768 (Gemini text-embedding-004)\n  - Metric: cosine\nNamespace 'company' gi\u00fap ph\u00e2n t\u00e1ch d\u1eef li\u1ec7u n\u1ebfu d\u00f9ng chung index v\u1edbi h\u1ec7 th\u1ed1ng kh\u00e1c."
    },
    {
      "parameters": {
        "dataType": "json",
        "jsonMode": "expressionData",
        "jsonData": "={{ $json.text }}",
        "textSplittingMode": "custom",
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "doc_type",
                "value": "={{ $json.doc_type }}"
              },
              {
                "name": "ref_id",
                "value": "={{ $json.ref_id }}"
              },
              {
                "name": "title",
                "value": "={{ $json.title }}"
              },
              {
                "name": "category",
                "value": "={{ $json.category }}"
              }
            ]
          }
        }
      },
      "id": "wf17-node-006",
      "name": "\ud83d\udcda Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "typeVersion": 1.1,
      "position": [
        900,
        580
      ],
      "notes": "L\u1ea5y field `text` l\u00e0m n\u1ed9i dung document v\u00e0 g\u1eafn metadata (doc_type, ref_id, title, category). textSplittingMode=custom \u0111\u1ec3 d\u00f9ng Text Splitter b\u00ean d\u01b0\u1edbi."
    },
    {
      "parameters": {
        "chunkSize": 800,
        "chunkOverlap": 100,
        "options": {}
      },
      "id": "wf17-node-007",
      "name": "\u2702\ufe0f Recursive Text Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "typeVersion": 1,
      "position": [
        900,
        780
      ],
      "notes": "Chia document d\u00e0i th\u00e0nh \u0111o\u1ea1n ~800 k\u00fd t\u1ef1, ch\u1ed3ng l\u1ea5n 100 k\u00fd t\u1ef1 \u0111\u1ec3 gi\u1eef ng\u1eef c\u1ea3nh. M\u1ed7i entry c\u1ee7a ch\u00fang ta th\u01b0\u1eddng ng\u1eafn (< 800 k\u00fd t\u1ef1) n\u00ean h\u1ea7u h\u1ebft n\u1eb1m g\u1ecdn trong 1 chunk."
    },
    {
      "parameters": {
        "modelName": "models/gemini-embedding-001",
        "options": {}
      },
      "id": "wf17-node-008",
      "name": "\ud83d\udd22 Gemini Embeddings",
      "type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
      "typeVersion": 1,
      "position": [
        1140,
        580
      ],
      "credentials": {
        "googlePalmApi": {
          "name": "<your credential>"
        }
      },
      "notes": "T\u1ea1o vector 768 chi\u1ec1u cho m\u1ed7i document b\u1eb1ng Gemini gemini-embedding-001.\nCRITICAL: Workflow chatbot (02-chatbot.json) PH\u1ea2I d\u00f9ng C\u00d9NG model n\u00e0y. Sai model \u2192 vector kh\u00f4ng kh\u1edbp \u2192 RAG kh\u00f4ng t\u00ecm \u0111\u01b0\u1ee3c k\u1ebft qu\u1ea3 m\u00e0 kh\u00f4ng b\u00e1o l\u1ed7i."
    }
  ],
  "connections": {
    "\ud83d\udcc1 Form Upload company_knowledge.json": {
      "main": [
        [
          {
            "node": "\ud83e\uddf7 Parse JSON T\u1eeb File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\ud83e\uddf7 Parse JSON T\u1eeb File": {
      "main": [
        [
          {
            "node": "\ud83e\udde9 T\u1ea1o Documents T\u1eeb Knowledge JSON",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\ud83e\udde9 T\u1ea1o Documents T\u1eeb Knowledge JSON": {
      "main": [
        [
          {
            "node": "\ud83d\uddc4\ufe0f Pinecone: N\u1ea1p Documents (Insert)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "\ud83d\udcda Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "\ud83d\uddc4\ufe0f Pinecone: N\u1ea1p Documents (Insert)",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "\u2702\ufe0f Recursive Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "\ud83d\udcda Default Data Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "\ud83d\udd22 Gemini Embeddings": {
      "ai_embedding": [
        [
          {
            "node": "\ud83d\uddc4\ufe0f Pinecone: N\u1ea1p Documents (Insert)",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1",
    "saveExecutionProgress": true,
    "saveManualExecutions": true,
    "saveDataErrorExecution": "all",
    "saveDataSuccessExecution": "all",
    "timezone": "Asia/Ho_Chi_Minh",
    "executionTimeout": 300
  },
  "meta": {
    "templateCredsSetupCompleted": false,
    "description": "Ingestion workflow: Form Upload file company_knowledge.json \u2192 Parse JSON \u2192 t\u1ea1o documents \u2192 chia nh\u1ecf \u2192 Gemini embedding \u2192 n\u1ea1p v\u00e0o Pinecone (index=company-kb, namespace=company). Kh\u00f4ng c\u1ea7n docker cp."
  },
  "id": "wf-17-rag-ingest-company",
  "tags": [
    {
      "name": "enterprise"
    },
    {
      "name": "rag"
    },
    {
      "name": "pinecone"
    },
    {
      "name": "gemini"
    },
    {
      "name": "community"
    }
  ]
}