{
  "id": "p7ouvtTJsBVq0Udh",
  "name": "YouTube Data API Caption Extractor",
  "tags": [],
  "nodes": [
    {
      "id": "2c39f34c-4315-48c2-985c-bc4eb058ccbf",
      "name": "When Executed by Another Workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -640,
        96
      ],
      "parameters": {
        "workflowInputs": {
          "values": [
            {
              "name": "youtubeVideoId"
            },
            {
              "name": "preferredLanguage"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "8f0aa842-1dcf-4552-8556-455b0f48b12e",
      "name": "Set Variables",
      "type": "n8n-nodes-base.set",
      "position": [
        -416,
        96
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "728dc681-2337-4247-bad5-a13b010e075f",
              "name": "youtubeVideoId",
              "type": "string",
              "value": "={{ $json.youtubeVideoId }}"
            },
            {
              "id": "f07a61d0-90d0-4024-bcb8-8f73622dbd62",
              "name": "preferredLanguage",
              "type": "string",
              "value": "={{ $json.preferredLanguage }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8aafb029-41bd-45b6-8b66-44450c11a4aa",
      "name": "List Captions",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -192,
        96
      ],
      "parameters": {
        "url": "=https://www.googleapis.com/youtube/v3/captions?part=snippet&videoId={{ $('Set Variables').item.json.youtubeVideoId }}",
        "options": {},
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "youTubeOAuth2Api"
      },
      "credentials": {
        "youTubeOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "1fbd0b72-aa3b-47e5-8b9d-f0c929a95782",
      "name": "IF Has Captions?",
      "type": "n8n-nodes-base.if",
      "position": [
        32,
        96
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 1,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "cc-available",
              "operator": {
                "type": "number",
                "operation": "gt"
              },
              "leftValue": "={{ $json.items.length }}",
              "rightValue": 0
            }
          ]
        }
      },
      "typeVersion": 2
    },
    {
      "id": "6d1d6d7e-f31b-4118-8d3c-363a5e429c90",
      "name": "Caption Language Selector",
      "type": "n8n-nodes-base.code",
      "position": [
        256,
        0
      ],
      "parameters": {
        "jsCode": "// Extract caption ID with preferred language priority or first available\nconst captionList = $input.first().json;\nconst captions = captionList.items || [];\nconst preferredLanguage = $('Set Variables').item.json.preferredLanguage || 'es';\n\nlet captionId = null;\nlet language = null;\n\n// Search preferred language first\nconst preferredCaption = captions.find(caption => caption.snippet.language === preferredLanguage);\nif (preferredCaption) {\n  captionId = preferredCaption.id;\n  language = preferredCaption.snippet.language;\n} else {\n  // Fallback: first available caption\n  if (captions.length > 0) {\n    captionId = captions[0].id;\n    language = captions[0].snippet.language;\n  }\n}\n\nreturn [{\n  json: {\n    captionId,\n    language,\n    videoId: captionList.items[0]?.snippet.videoId,\n    totalCaptions: captions.length,\n    status: captionId ? 'found' : 'no_captions',\n    preferredLanguage: preferredLanguage\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "8a47ef75-31ba-4b85-bd45-b5dd2952d262",
      "name": "Download VTT",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        480,
        0
      ],
      "parameters": {
        "url": "=https://www.googleapis.com/youtube/v3/captions/{{ $('Caption Language Selector').item.json.captionId }}",
        "options": {},
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "youTubeOAuth2Api"
      },
      "credentials": {
        "youTubeOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "bbb1500b-27ee-4c4f-a8b8-38b9476517c5",
      "name": "Caption File Conversion",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        704,
        0
      ],
      "parameters": {
        "options": {},
        "operation": "text",
        "destinationKey": "content"
      },
      "typeVersion": 1
    },
    {
      "id": "66469f1c-ce7c-4208-8f6b-8b20b0d85130",
      "name": "Clean Transcript",
      "type": "n8n-nodes-base.code",
      "position": [
        928,
        0
      ],
      "parameters": {
        "jsCode": "// Get subtitle content from previous node \"Caption File Conversion\"\nconst subtitleContent = $('Caption File Conversion').first().json.content;\n\n// Split into lines\nconst lines = subtitleContent.split('\\n');\n\n// Function to detect timestamp lines (new comma format)\nconst isTimestampLine = (line) => {\n    const timestampRegex = /^(\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3},)+\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}/;\n    return timestampRegex.test(line.trim());\n};\n\n// Function to detect WEBVTT headers (if any)\nconst isHeaderLine = (line) => {\n    const headerRegex = /^(WEBVTT|Kind:|Language:)/i;\n    return headerRegex.test(line.trim());\n};\n\n// Filter: exclude timestamps, headers and empty lines\nconst textLines = lines.filter(line => {\n    return !isTimestampLine(line) && !isHeaderLine(line) && line.trim() !== '';\n});\n\n// Clean text: remove [M\u00fasica], normalize whitespace\nconst cleanText = (line) => {\n    let cleaned = line;\n    \n    // Remove non-text elements like [M\u00fasica]\n    cleaned = cleaned.replace(/\\[\\w+\\]/g, '');\n    \n    // Remove repeated words (optional)\n    cleaned = cleaned.replace(/\\b(\\w+)\\s+\\1\\b/g, '$1');\n    \n    return cleaned.trim();\n};\n\nconst cleanedLines = textLines.map(cleanText);\n\n// Join ALL text into single string, remove all \\n and normalize spaces\nconst plainText = cleanedLines.join(' ').replace(/\\s+/g, ' ').trim();\n\nreturn [{\n    json: {\n        videoId: $('Set Variables').item.json.youtubeVideoId,\n        language: $('Caption Language Selector').item.json.language,\n        text: plainText,\n        wordCount: plainText.split(' ').length,\n        charCount: plainText.length,\n        status: 'success',\n        source: 'youtube_captions'\n    }\n}];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "e74576a0-4159-4d6c-b53d-8b28e5b6a0ae",
      "name": "No Captions Fallback",
      "type": "n8n-nodes-base.code",
      "position": [
        256,
        192
      ],
      "parameters": {
        "jsCode": "// No captions available - return structured error\nreturn [{\n  json: {\n    videoId: $('Set Variables').item.json.youtubeVideoId,\n    language: $('Set Variables').item.json.preferredLanguage,\n    error: 'No captions available for this video',\n    status: 'no_captions',\n    suggestion: 'Use Whisper AI fallback',\n    source: 'youtube_api'\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "b7c37941-d9f1-48f6-b776-e15c2af0a604",
      "name": "Stop and Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        480,
        192
      ],
      "parameters": {
        "errorMessage": "There's no captions in YouTube Video"
      },
      "typeVersion": 1
    },
    {
      "id": "8c08aeb1-51ea-43df-918a-d18b15adad9d",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1472,
        -512
      ],
      "parameters": {
        "width": 764,
        "height": 1344,
        "content": "## \ud83d\ude80 Try It Out!\n\n### **YouTube Caption Extractor** *(Your Channel Only)*\nExtracts clean transcripts from **YOUR CHANNEL** YouTube video captions using YouTube Data API v3.\n\n**\u26a0\ufe0f API Limitation**: Only works with videos from **YOUR OWN CHANNEL**. Cannot access external/public videos.\n\n---\n\n### \ud83c\udfaf **Use Cases**\n- AI summarization & sentiment analysis\n- Keyword extraction from your content\n- Content generation from your videos\n- Batch transcript processing\n\n---\n\n### \ud83d\udd04 **How It Works** *(6 Steps)*\n\n1. **\ud83d\udce5 Input**: Receives `videoId` + `preferredLanguage`\n2. **\ud83d\udd0d API**: Lists captions from **your channel**\n3. **\ud83c\udd94 Selector**: Picks preferred language (fallback to first)\n4. **\ud83d\udce5 Download**: Gets VTT subtitle file\n5. **\ud83e\uddf9 Cleaning**: Removes timestamps, [Music], duplicates\n6. **\u2705 Output**: Clean transcript + metadata\n\n---\n\n### \ud83d\ude80 **How to Use**\n* Trigger with JSON payload: `{\"youtubeVideoId\": \"YOUR_VIDEO_ID\", \"preferredLanguage\": \"es\"}`\n* **Video ID must belong to your authenticated YouTube channel**\n* Works as sub-workflow (Execute Workflow Trigger) or replace with Webhook/Form trigger\n* Handles videos with no captions gracefully with structured error response\n* Output ready for downstream AI processing or storage\n\n**\u26a0\ufe0f Setup Required**:\n- **Change YouTube credentials** in **\"List Captions\"** and **\"Download VTT\"** nodes\n- Video ID from **your authenticated channel**\n- Sub-workflow or Webhook trigger\n- Graceful no-captions handling\n\n---\n\n### \ud83d\udd27 **Requirements**\n- \u2705 YouTube OAuth2 (`youtube.captions.read` scope)\n- \u2705 **Update credentials** in List Captions + Download VTT nodes\n- \u2705 n8n HTTP Request + Code nodes\n\n---\n\n### \ud83d\udcac **Need Help?**\n- [n8n Forum](https://community.n8n.io/)\n\n**Happy Automating! \ud83c\udf89**"
      },
      "typeVersion": 1
    },
    {
      "id": "1e048b1f-a7dc-44f5-9793-d9f6e3fe3885",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -656,
        -208
      ],
      "parameters": {
        "color": 7,
        "width": 344,
        "height": 460,
        "content": "## 1. Input Processing\n\u2022 Captures workflow execution trigger\n\u2022 Sets youtubeVideoId and preferredLanguage variables\n\u2022 Passes structured data to YouTube API calls\n\nExample input:\n{\n  \"youtubeVideoId\": \"nxub8Bmia68\",\n  \"preferredLanguage\": \"es\"\n}"
      },
      "typeVersion": 1
    },
    {
      "id": "0cf07d93-7c98-4185-ba30-329660e83a8b",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -208,
        -208
      ],
      "parameters": {
        "color": 7,
        "width": 344,
        "height": 460,
        "content": "## 2. API Discovery & Validation\n\n\u2022 Lists all available captions for the video\n\u2022 Checks if captions exist (items.length > 0)\n\u2022 Branches to success (TRUE) or fallback (FALSE)\n\u2022 YouTube API: captions.list endpoint\n\nRequires YouTube OAuth2 with:\n\u2022 youtube.force-ssl\n\u2022 youtube.captions.read"
      },
      "typeVersion": 1
    },
    {
      "id": "14c6ab17-ac4c-4727-a765-61dd90d7403d",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        448,
        -304
      ],
      "parameters": {
        "color": 7,
        "width": 624,
        "height": 460,
        "content": "## 3. Transcript Processing\n\n\u2022 Downloads VTT caption file\n\u2022 Extracts plain text from binary VTT\n\u2022 Advanced cleaning removes:\n  \u2022 Timestamps (00:01:23.456,00:01:25.789)\n  \u2022 WEBVTT headers\n  \u2022 [M\u00fasica] sound effects\n  \u2022 Duplicate words\n  \u2022 Extra whitespace\n\nOutput: Clean, readable transcript text"
      },
      "typeVersion": 1
    },
    {
      "id": "90d6cf23-15b3-43cb-b063-5932cd3c3d67",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1104,
        -288
      ],
      "parameters": {
        "color": 7,
        "width": 624,
        "height": 460,
        "content": "## \u2705 Final Output Format\n\nSuccess response:\n{\n  \"videoId\": \"nxub8Bmia68\",\n  \"language\": \"es\",\n  \"text\": \"Clean transcript text...\",\n  \"wordCount\": 1250,\n  \"charCount\": 6789,\n  \"status\": \"success\",\n  \"source\": \"youtube_captions\"\n}\n\nReady for downstream AI processing!"
      },
      "typeVersion": 1
    },
    {
      "id": "1ed5a7bb-13fb-4a72-b968-9bc9c01241f0",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        176,
        -320
      ],
      "parameters": {
        "color": 7,
        "width": 264,
        "height": 476,
        "content": "## \u26a0\ufe0f Language Priority Logic\n1. Exact match: preferredLanguage (es, en, fr...)\n2. Fallback: first available caption\n3. Returns captionId, language, totalCaptions\\n\\nSmart selection preserves preferred language when available."
      },
      "typeVersion": 1
    },
    {
      "id": "3e4a4d50-6641-4298-9336-654c6655a4d0",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        176,
        176
      ],
      "parameters": {
        "color": 7,
        "width": 472,
        "height": 380,
        "content": "\n\n\n\n\n\n\n\n\n\n\n\n## \u274c Error Handling Path\n\nWhen no captions available:\n* Returns structured error response\n* Includes videoId and language info\n* Suggests Whisper AI fallback\n* Graceful workflow termination\n* Prevents silent failures."
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "callerPolicy": "workflowsFromSameOwner",
    "errorWorkflow": "I8euEHwLoUMOskgW",
    "timeSavedMode": "fixed",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "2d4773e9-edff-4d32-8b1c-abfd62ab6fd2",
  "connections": {
    "Download VTT": {
      "main": [
        [
          {
            "node": "Caption File Conversion",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List Captions": {
      "main": [
        [
          {
            "node": "IF Has Captions?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Variables": {
      "main": [
        [
          {
            "node": "List Captions",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "IF Has Captions?": {
      "main": [
        [
          {
            "node": "Caption Language Selector",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "No Captions Fallback",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "No Captions Fallback": {
      "main": [
        [
          {
            "node": "Stop and Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Caption File Conversion": {
      "main": [
        [
          {
            "node": "Clean Transcript",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Caption Language Selector": {
      "main": [
        [
          {
            "node": "Download VTT",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When Executed by Another Workflow": {
      "main": [
        [
          {
            "node": "Set Variables",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}