{
  "id": "fvScVpOkvJa4CQTY",
  "name": "Extract YouTube auto-generated transcripts and save them for future use",
  "tags": [],
  "nodes": [
    {
      "id": "2e715548-dc0c-4f27-893d-aa944dda57b9",
      "name": "Trigger: Execute Workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "maxTries": 1,
      "position": [
        272,
        1056
      ],
      "parameters": {
        "workflowInputs": {
          "values": [
            {
              "name": "youtubeVideoUrl"
            }
          ]
        }
      },
      "retryOnFail": false,
      "typeVersion": 1.1,
      "waitBetweenTries": 1000
    },
    {
      "id": "fd592f64-4d34-4a53-a91a-ba1a4f3f07a9",
      "name": "Fetch: YouTube Page HTML",
      "type": "n8n-nodes-base.httpRequest",
      "notes": "Retrieves the HTML of the video page to extract API Key",
      "maxTries": 3,
      "position": [
        496,
        1056
      ],
      "parameters": {
        "url": "={{ 'https://' + $json.youtubeVideoUrl.replace(/^https?:\\/\\//, '') }}",
        "options": {}
      },
      "retryOnFail": true,
      "typeVersion": 4.3,
      "waitBetweenTries": 2000
    },
    {
      "id": "b988bc18-3e14-46b6-8df3-aca439a78f86",
      "name": "Extract: API Key & Video ID",
      "type": "n8n-nodes-base.code",
      "notes": "Extracts API Key and Video ID with validation",
      "maxTries": 1,
      "position": [
        720,
        1056
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const currentItem = $input.item.json;\nconst html = currentItem.data;\n\nif (!html || typeof html !== 'string') {\n  throw new Error('HTML data not found');\n}\n\n// Extraer API Key\nconst apiKeyMatch = html.match(/INNERTUBE_API_KEY[\\s\\S]*?\\\"([a-zA-Z0-9_-]+)\\\"/);\nif (!apiKeyMatch || !apiKeyMatch[1]) {\n  throw new Error('INNERTUBE_API_KEY not found in HTML');\n}\n\n// Extraer videoId del HTML\nconst htmlVideoIdMatch = html.match(/\\\"videoId\\\":\\s*\\\"([a-zA-Z0-9_-]{11})\\\"/);\nlet videoId = htmlVideoIdMatch ? htmlVideoIdMatch[1] : null;\n\nif (!videoId) {\n  throw new Error('Could not extract videoId from HTML');\n}\n\nreturn {json: {\n  apiKey: apiKeyMatch[1],\n  videoId: videoId\n}};"
      },
      "retryOnFail": false,
      "typeVersion": 2,
      "waitBetweenTries": 1000
    },
    {
      "id": "6e21195d-17b1-4653-ac34-e7cf9770c48e",
      "name": "API: Get Video Metadata",
      "type": "n8n-nodes-base.httpRequest",
      "notes": "Get video metadata with subtitle tracks",
      "maxTries": 3,
      "position": [
        944,
        1056
      ],
      "parameters": {
        "url": "=https://www.youtube.com/youtubei/v1/player?key={{ $json.apiKey }}",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n  \"context\": {\n    \"client\": {\n      \"clientName\": \"WEB\",\n      \"clientVersion\": \"2.20210721.00.00\"\n    }\n  },\n  \"videoId\": \"{{ $json.videoId }}\"\n}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.3,
      "waitBetweenTries": 2000
    },
    {
      "id": "0737b208-4ca0-4ac8-bc9d-342a666ede8b",
      "name": "Extract: Auto-Generated Transcript URL",
      "type": "n8n-nodes-base.code",
      "onError": "continueErrorOutput",
      "maxTries": 1,
      "position": [
        1168,
        1056
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "// Obtener los caption tracks del video\nconst captionTracks = $input.item.json?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];\n\nif (!captionTracks || captionTracks.length === 0) {\n  throw new Error('No caption tracks found for this video');\n}\n\n// Buscar el primer caption track con kind='asr' (autogenerado)\nconst asrTrack = captionTracks.find(track => track.kind === 'asr');\n\nif (!asrTrack) {\n  throw new Error('No auto-generated transcript (asr) found. Available tracks: ' + captionTracks.map(t => t.languageCode + ' (' + (t.kind || 'default') + ')').join(', '));\n}\n\n// Retornar baseUrl y metadata del track\nreturn {json: {\n  transcriptUrl: asrTrack.baseUrl,\n  languageCode: asrTrack.languageCode,\n  trackName: asrTrack.name?.simpleText || 'Auto-generated transcript',\n  kind: asrTrack.kind\n}};"
      },
      "retryOnFail": false,
      "typeVersion": 2,
      "waitBetweenTries": 1000
    },
    {
      "id": "08d78016-4fd2-485f-91a6-33bdad8587e0",
      "name": "Fetch: Transcript XML",
      "type": "n8n-nodes-base.httpRequest",
      "notes": "Get XML transcript from extracted URL",
      "maxTries": 3,
      "position": [
        1392,
        960
      ],
      "parameters": {
        "url": "={{ $json.transcriptUrl }}",
        "options": {}
      },
      "retryOnFail": true,
      "typeVersion": 4.3,
      "waitBetweenTries": 2000
    },
    {
      "id": "259c3e38-5c10-4f0b-9204-6f587e675efb",
      "name": "Parse: XML to JSON",
      "type": "n8n-nodes-base.xml",
      "notes": "XML to JSON",
      "maxTries": 1,
      "position": [
        1616,
        960
      ],
      "parameters": {
        "options": {
          "normalize": false
        }
      },
      "retryOnFail": false,
      "typeVersion": 1,
      "waitBetweenTries": 1000
    },
    {
      "id": "496cfcbd-7ff9-4b70-95dd-889aabb68eb9",
      "name": "Transform: Process Transcript",
      "type": "n8n-nodes-base.code",
      "notes": "Processes transcript, decodes HTML entities, and concatenates text",
      "maxTries": 1,
      "position": [
        1840,
        960
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "// Funci\u00f3n para decodificar entidades HTML\nfunction decodeHTMLEntities(text) {\n  const entityMap = {\n    '&amp;': '&',\n    '&lt;': '<',\n    '&gt;': '>',\n    '&quot;': '\"',\n    '&#39;': \"'\",\n    '&#x27;': \"'\",\n    '&apos;': \"'\",\n    '&#x2F;': '/'\n  };\n  \n  let decoded = text;\n  for (const [entity, char] of Object.entries(entityMap)) {\n    decoded = decoded.replace(new RegExp(entity, 'g'), char);\n  }\n  \n  // Decodificar n\u00fameros hexadecimales (ej: &#39;, &#x1F;)\n  decoded = decoded.replace(/&#x([a-fA-F0-9]+);/g, (match, hex) => {\n    return String.fromCharCode(parseInt(hex, 16));\n  });\n  \n  // Decodificar n\u00fameros decimales (ej: &#39;)\n  decoded = decoded.replace(/&#(\\d+);/g, (match, dec) => {\n    return String.fromCharCode(parseInt(dec, 10));\n  });\n  \n  return decoded;\n}\n\nconst transcript = $input.item.json.transcript;\nif (!transcript || !transcript.text || !Array.isArray(transcript.text)) {\n  throw new Error('No transcript data found');\n}\n\nlet fullTranscript = '';\ntranscript.text.forEach(item => {\n  if (item._ && typeof item._ === 'string') {\n    // Decodificar entidades HTML\n    const decodedText = decodeHTMLEntities(item._);\n    fullTranscript += decodedText + ' ';\n  }\n});\n\nfullTranscript = fullTranscript.trim();\nconst wordCount = fullTranscript.split(/\\s+/).filter(w => w.length > 0).length;\n\nreturn {json: {\n  transcript: transcript,\n  fullTranscript: fullTranscript,\n  wordCount: wordCount\n}};"
      },
      "retryOnFail": false,
      "typeVersion": 2,
      "waitBetweenTries": 1000
    },
    {
      "id": "825ec3eb-2c08-4f1b-a3bf-b902bba8253c",
      "name": "\ud83d\udccb Sticky: Overview",
      "type": "n8n-nodes-base.stickyNote",
      "maxTries": 1,
      "position": [
        -496,
        656
      ],
      "parameters": {
        "width": 700,
        "height": 1046,
        "content": "## \ud83c\udfac Extract and save YouTube auto-generated transcripts\n\nThis template extracts the **auto-generated transcript** from a YouTube video using YouTube's internal caption metadata.\n\nIt is useful for:\n- AI summarization\n- Content analysis\n- Accessibility workflows\n- Transcript archiving\n\n### Input\n- `youtubeVideoUrl`\n\n### Output\n```json\n{\n  \"fullTranscript\": \"Auto-generated transcript text...\",\n  \"wordCount\": 1234,\n  \"transcript\": [...]\n}\n```\n## How it works\n1. Accepts a YouTube video URL as input\n2. Fetches the YouTube page HTML\n3. Extracts the `INNERTUBE_API_KEY` and video ID\n4. Requests video metadata from YouTube\n5. Finds the auto-generated caption track (`kind: \"asr\"`)\n6. Downloads the transcript XML\n7. Parses and cleans the transcript text\n8. Returns the full transcript, timestamps, and word count\n\n## Setup steps\n1. Import the workflow into n8n\n2. Activate or run the workflow manually\n3. Provide a value for `youtubeVideoUrl`\n4. Execute the workflow\n5. Use the output transcript in downstream AI or automation steps"
      },
      "retryOnFail": false,
      "typeVersion": 1,
      "waitBetweenTries": 1000
    },
    {
      "id": "e46be3b4-da28-4acb-bfdc-8b75ead92a02",
      "name": "Stop and Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        1392,
        1152
      ],
      "parameters": {
        "errorMessage": "No auto-generated transcript found for this video."
      },
      "typeVersion": 1
    },
    {
      "id": "e514abc4-9c26-472a-a358-92e5a0c4b0b6",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        336,
        752
      ],
      "parameters": {
        "color": 7,
        "width": 928,
        "height": 240,
        "content": "\ud83d\udd11 PHASE 1: Credentials & Metadata Acquisition\n\nThis phase extracts the necessary information to access \nthe transcriptions:\n\n1. Fetch YouTube page HTML\n2. Extract INNERTUBE_API_KEY and videoId\n3. Call YouTube API to get video metadata\n4. Identify auto-generated track (kind: asr)\n\n\u2705 Output: transcriptUrl from caption track"
      },
      "typeVersion": 1
    },
    {
      "id": "b40e621e-2ce5-445e-a080-f6c3666d8013",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1328,
        1312
      ],
      "parameters": {
        "color": 7,
        "width": 672,
        "height": 240,
        "content": "\ud83d\udcdd PHASE 2: Transcript Download & Transformation\n\nThis phase processes the extracted transcription:\n\n1. Download XML file from obtained URL\n2. Convert XML to JSON\n3. Decode HTML entities\n4. Concatenate full text\n5. Count words\n\n\u2705 Output: Clean fullTranscript ready to use"
      },
      "typeVersion": 1
    }
  ],
  "active": true,
  "settings": {
    "binaryMode": "separate",
    "availableInMCP": false,
    "executionOrder": "v1"
  },
  "versionId": "b416ede4-3667-48c0-b3c1-e325f7924ab0",
  "connections": {
    "Parse: XML to JSON": {
      "main": [
        [
          {
            "node": "Transform: Process Transcript",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch: Transcript XML": {
      "main": [
        [
          {
            "node": "Parse: XML to JSON",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "API: Get Video Metadata": {
      "main": [
        [
          {
            "node": "Extract: Auto-Generated Transcript URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch: YouTube Page HTML": {
      "main": [
        [
          {
            "node": "Extract: API Key & Video ID",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Trigger: Execute Workflow": {
      "main": [
        [
          {
            "node": "Fetch: YouTube Page HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract: API Key & Video ID": {
      "main": [
        [
          {
            "node": "API: Get Video Metadata",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract: Auto-Generated Transcript URL": {
      "main": [
        [
          {
            "node": "Fetch: Transcript XML",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Stop and Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}