{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -2432,
        272
      ],
      "parameters": {
        "text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
        "options": {
          "systemMessage": "Actuas como una p\u00e1gina web, mediante la tool sheet tienes acceso a toda la p\u00e1gina web y todo lo que te pida el usaurio puedes consultarlo all\u00ed, responde el usuario en base a la info de all\u00ed"
        },
        "promptType": "define"
      },
      "typeVersion": 2.2
    },
    {
      "id": "6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -2496,
        480
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-5-nano",
          "cachedResultName": "gpt-5-nano"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "0461df33-2d2f-42e2-a0d3-288bd78275f1",
      "name": "Simple Memory",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -2336,
        480
      ],
      "parameters": {
        "contextWindowLength": 50
      },
      "typeVersion": 1.3
    },
    {
      "id": "fdcce6e6-f00f-4f84-ac6e-2e181452d3ac",
      "name": "Get row(s) in sheet in Google Sheets",
      "type": "n8n-nodes-base.googleSheetsTool",
      "position": [
        -1968,
        464
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
          "cachedResultName": "Web"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
          "cachedResultName": "Web chat Workflow"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "621aa928-83c5-48a4-8488-67c58fa1aec8",
      "name": "If",
      "type": "n8n-nodes-base.if",
      "position": [
        -3376,
        560
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "0adf46cd-5ca1-418e-a8b8-0571240e0efb",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json['Data schema'] }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "8df9234a-85ad-45b4-bc17-ad64edaab08a",
      "name": "Maping Sitemap",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        -1536,
        736
      ],
      "parameters": {
        "url": "={{ $json.sitemapUrl }}",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "={{ $json.userAgent }}"
            },
            {
              "name": "Accept-Language",
              "value": "es-ES,es;q=0.9,en;q=0.8"
            },
            {
              "name": "Accept-Encoding",
              "value": "gzip, deflate, br"
            },
            {
              "name": "Referer",
              "value": "https://www.google.com/"
            },
            {
              "name": "Connection",
              "value": "keep-alive"
            },
            {
              "name": "Upgrade-Insecure-Requests",
              "value": "1"
            },
            {
              "name": "Sec-Fetch-Dest",
              "value": "document"
            },
            {
              "name": "Sec-Fetch-Mode",
              "value": "navigate"
            },
            {
              "name": "DNT",
              "value": "1"
            },
            {
              "name": "Accept",
              "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "0d18ebca-52f3-46ed-934c-44c9bad53dab",
      "name": "XML1",
      "type": "n8n-nodes-base.xml",
      "position": [
        -1088,
        960
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "39127cf7-f627-4fca-b1b7-c51b3656947d",
      "name": "UA Rotativo1",
      "type": "n8n-nodes-base.code",
      "position": [
        -2160,
        736
      ],
      "parameters": {
        "jsCode": "const userAgents = [\n  // Escritorio - Windows\n  \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Safari/537.36\",\n  \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0\",\n  \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36\",\n\n  // Escritorio - Mac\n  \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15\",\n  \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Safari/537.36\",\n\n  // M\u00f3vil - Android\n  \"Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Mobile Safari/537.36\",\n  \"Mozilla/5.0 (Linux; Android 9; Mi 9T Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/0.0.0.0 Mobile Safari/537.36\",\n\n  // M\u00f3vil - iPhone\n  \"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1\",\n  \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1\"\n];\n\n// Escoge uno aleatorio\nconst randomUA = userAgents[Math.floor(Math.random() * userAgents.length)];\n\nreturn [\n  {\n    json: {\n      userAgent: randomUA\n    }\n  }\n];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "4260d45a-8705-483a-b17f-58211512ba59",
      "name": "Req Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        -1712,
        592
      ],
      "parameters": {
        "errorMessage": "URL mal introducida, debes introducir con el siguiente formato: ejemplo.com"
      },
      "typeVersion": 1
    },
    {
      "id": "59d0fe0a-9e27-4755-ac23-f46fa6d2aa95",
      "name": "Sitemap Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        -1088,
        528
      ],
      "parameters": {
        "errorMessage": "Sitemap no encontrado o acceso bloqueadp"
      },
      "executeOnce": false,
      "typeVersion": 1
    },
    {
      "id": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
      "name": "Req robots",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        -1920,
        736
      ],
      "parameters": {
        "url": "={{ $node[\"AI Agent1\"].json[\"output\"][\"URL\"] }}/robots.txt",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "={{ $json.userAgent }}"
            },
            {
              "name": "Accept-Language",
              "value": "es-ES,es;q=0.9,en;q=0.8"
            },
            {
              "name": "Accept-Encoding",
              "value": "gzip, deflate, br"
            },
            {
              "name": "Referer",
              "value": "https://www.google.com/"
            },
            {
              "name": "Connection",
              "value": "keep-alive"
            },
            {
              "name": "Upgrade-Insecure-Requests",
              "value": "1"
            },
            {
              "name": "Sec-Fetch-Dest",
              "value": "document"
            },
            {
              "name": "Sec-Fetch-Mode",
              "value": "navigate"
            },
            {
              "name": "DNT",
              "value": "1"
            },
            {
              "name": "Accept",
              "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "5948d577-4aea-4394-9b20-687f44efe5c8",
      "name": "extract sitemap url",
      "type": "n8n-nodes-base.code",
      "position": [
        -1712,
        736
      ],
      "parameters": {
        "jsCode": "// Simulaci\u00f3n: contenido del robots.txt como string, en n8n ser\u00e1 $input o $json dependiendo de tu configuraci\u00f3n\nconst robotsTxtContent = $input.first().json.data || \"\"; // Cambia esto por la variable correcta en n8n\n\n// Funci\u00f3n para extraer URL del sitemap\nfunction extractSitemapUrl(robotsTxt) {\n  // Buscamos l\u00ednea que empiece con \"Sitemap:\" (ignorando may\u00fasculas y espacios)\n  const lines = robotsTxt.split(/\\r?\\n/);\n  for (const line of lines) {\n    const match = line.match(/^\\s*Sitemap:\\s*(.+)$/i);\n    if (match) {\n      return match[1].trim();\n    }\n  }\n  return null; // No encontrado\n}\n\nconst sitemapUrl = extractSitemapUrl(robotsTxtContent);\n\n// Devolver JSON con la URL del sitemap (o null si no hay)\nreturn [{ json: { sitemapUrl } }];"
      },
      "typeVersion": 2
    },
    {
      "id": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
      "name": "OPTIONS",
      "type": "n8n-nodes-base.set",
      "position": [
        -2400,
        736
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "71b9ad22-d418-4fff-92bb-dafd0818575d",
              "name": "scan_pages",
              "type": "boolean",
              "value": true
            },
            {
              "id": "42483a05-34f0-4cef-b404-dae43a7bee22",
              "name": "scan_posts",
              "type": "boolean",
              "value": false
            },
            {
              "id": "00a5ed31-dd44-4f9f-97f1-7aa4fe636afd",
              "name": "category",
              "type": "boolean",
              "value": false
            },
            {
              "id": "a2b0930f-8a9b-4f78-8d20-466366853b55",
              "name": "tags",
              "type": "boolean",
              "value": false
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
      "name": "AI Agent1",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -3024,
        736
      ],
      "parameters": {
        "text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
        "options": {
          "systemMessage": "Responde en formato JSON, el url si lo es, si no lo es pon cualquier valor y con una boolean que se indica respondiendo si es url o no (true or false)"
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "afdbed61-346e-44a6-aa69-23a2b7ecf553",
      "name": "OpenAI Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -3024,
        944
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-5-nano",
          "cachedResultName": "gpt-5-nano"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "175a77cd-bd0a-4849-8c9b-d36b4ddcecd9",
      "name": "Chat web",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -3776,
        560
      ],
      "parameters": {
        "public": true,
        "options": {
          "responseMode": "responseNodes"
        },
        "authentication": "basicAuth"
      },
      "credentials": {
        "httpBasicAuth": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "16ef0fa6-4259-43bf-b74f-3dc70d4b54e3",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        -2880,
        944
      ],
      "parameters": {
        "jsonSchemaExample": "{\n  \"URL\": \"example.com\",\n  \"URL_bool\":true\n}"
      },
      "typeVersion": 1.3
    },
    {
      "id": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
      "name": "If1",
      "type": "n8n-nodes-base.if",
      "position": [
        -2640,
        752
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "3851cb51-a282-4388-b4f6-1c1f68e8c7c5",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json.output.URL_bool }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
      "name": "Respond to Chat",
      "type": "@n8n/n8n-nodes-langchain.chat",
      "position": [
        -2400,
        928
      ],
      "parameters": {
        "message": "Debes introducir una URL v\u00e1lida ejemplo: https://google.es",
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
      "name": "Respond to Chat1",
      "type": "@n8n/n8n-nodes-langchain.chat",
      "position": [
        -2080,
        272
      ],
      "parameters": {
        "message": "={{ $json.output }}",
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
      "name": "Message a model",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "position": [
        -3024,
        1280
      ],
      "parameters": {
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o",
          "cachedResultName": "GPT-4O"
        },
        "options": {},
        "messages": {
          "values": [
            {
              "role": "system",
              "content": "=De aqu\u00ed saca y devuelve en formato JSON, los siguientes urls de los sitemap que sean true: \n\nPages: {{ $('OPTIONS').item.json.scan_pages }}\n\nPosts: {{ $('OPTIONS').item.json.scan_posts }}\n\nCategorias: {{ $('OPTIONS').item.json.category }}\n\nTags: {{ $('OPTIONS').item.json.tags }}\n\nSalida:\n\n{\n\"sitemap_page\":\"https://...\",\n\"sitemap_posts\":\"https://\"\n}"
            },
            {
              "content": "=Sitemap: \n{{ $json.sitemapindex.sitemap[0].loc }}\n\n{{ $json.sitemapindex.sitemap[1].loc }}\n\n{{ $json.sitemapindex.sitemap[2].loc }}"
            }
          ]
        },
        "jsonOutput": true
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.8
    },
    {
      "id": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
      "name": "XML",
      "type": "n8n-nodes-base.xml",
      "position": [
        -2480,
        1280
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1856,
        1280
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
      "name": "Message a model1",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "position": [
        -1200,
        1408
      ],
      "parameters": {
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-5-nano",
          "cachedResultName": "GPT-5-NANO"
        },
        "options": {},
        "messages": {
          "values": [
            {
              "role": "system",
              "content": "El usuario te mandara el contenido de la p\u00e1gina web, tu mision es sacar un resumen de la p\u00e1gina web, idioma de la p\u00e1gn, h1, enlaces internos (no imagenes ni css ni js) y enlaces externos y a\u00f1adirlos mediante la tool sheet a la db"
            },
            {
              "content": "=URL: {{ $('Split URLs').item.json.urls }}\n\n{{ $json.data }}"
            }
          ]
        }
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.8
    },
    {
      "id": "6fb7c3fa-7851-49cd-8d0b-01df74a80f35",
      "name": "Append row in sheet in Google Sheets",
      "type": "n8n-nodes-base.googleSheetsTool",
      "position": [
        -1056,
        1648
      ],
      "parameters": {
        "columns": {
          "value": {
            "Lang": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Lang', ``, 'string') }}",
            "Page URL": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Page_URL', ``, 'string') }}",
            "External URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('External_URLs', ``, 'string') }}",
            "Internal URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Internal_URLs', ``, 'string') }}",
            "Summary Content": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Summary_Content', ``, 'string') }}",
            "H1 and hierarchy": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('H1_and_hierarchy', ``, 'string') }}"
          },
          "schema": [
            {
              "id": "Page URL",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Page URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Content text",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Content text",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Lang",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Lang",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "H1 and hierarchy",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "H1 and hierarchy",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "External URLs",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "External URLs",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Internal URLs",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Internal URLs",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Summary Content",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Summary Content",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Data schema",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Data schema",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
          "cachedResultName": "Web"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
          "cachedResultName": "Web chat Workflow"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
      "name": "Complete",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        -1616,
        1168
      ],
      "parameters": {
        "columns": {
          "value": {
            "Data schema": "={{true}}"
          },
          "schema": [
            {
              "id": "Page URL",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "Page URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Content text",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "Content text",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Lang",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "Lang",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "H1 and hierarchy",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "H1 and hierarchy",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "External URLs",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "External URLs",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Internal URLs",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "Internal URLs",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Summary Content",
              "type": "string",
              "display": true,
              "removed": true,
              "required": false,
              "displayName": "Summary Content",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Data schema",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Data schema",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "Data schema"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "appendOrUpdate",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
          "cachedResultName": "Web"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
          "cachedResultName": "Web chat Workflow"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "c371c8db-e752-48fa-999d-4813aeb13f38",
      "name": "HTTP Request2",
      "type": "n8n-nodes-base.httpRequestTool",
      "position": [
        -2176,
        480
      ],
      "parameters": {
        "url": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('URL', ``, 'string') }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "40169b8e-5948-4422-98d9-4bca87ccab73",
      "name": "Merge",
      "type": "n8n-nodes-base.code",
      "position": [
        -2272,
        1280
      ],
      "parameters": {
        "jsCode": "// Obtenemos el array de URLs del JSON\nconst urlsArray = $input.first().json.urlset.url;\n\n// Creamos un objeto donde cada clave es \"url 1\", \"url 2\", etc.\nconst result = {};\nurlsArray.forEach((item, index) => {\n  if (item.loc) {\n    result[`url ${index + 1}`] = item.loc;\n  }\n});\n\n// Devolvemos el objeto\nreturn [\n  {\n    json: {\n      urls: result\n    }\n  }\n];\n"
      },
      "typeVersion": 2
    },
    {
      "id": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
      "name": "Split URLs",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -2064,
        1280
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "urls"
      },
      "typeVersion": 1
    },
    {
      "id": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
      "name": "Req URL",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        -1616,
        1408
      ],
      "parameters": {
        "url": "={{ $('Split URLs').item.json.urls }}",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "={{ $json.userAgent }}"
            },
            {
              "name": "Accept-Language",
              "value": "es-ES,es;q=0.9,en;q=0.8"
            },
            {
              "name": "Accept-Encoding",
              "value": "gzip, deflate, br"
            },
            {
              "name": "Referer",
              "value": "https://www.google.com/"
            },
            {
              "name": "Connection",
              "value": "keep-alive"
            },
            {
              "name": "Upgrade-Insecure-Requests",
              "value": "1"
            },
            {
              "name": "Sec-Fetch-Dest",
              "value": "document"
            },
            {
              "name": "Sec-Fetch-Mode",
              "value": "navigate"
            },
            {
              "name": "DNT",
              "value": "1"
            },
            {
              "name": "Accept",
              "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "71d974a6-4f60-4573-be09-7cbb09502fa3",
      "name": "HTML to Markdown",
      "type": "n8n-nodes-base.markdown",
      "position": [
        -1408,
        1408
      ],
      "parameters": {
        "html": "={{ $json.data }}",
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "f076a729-8f40-4a3b-ad32-83837964c42c",
      "name": "Maping Sitemaps",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -2672,
        1280
      ],
      "parameters": {
        "url": "={{ $json.message.content.sitemap_page }}",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Accept-Language",
              "value": "es-ES,es;q=0.9,en;q=0.8"
            },
            {
              "name": "Accept-Encoding",
              "value": "gzip, deflate, br"
            },
            {
              "name": "Referer",
              "value": "https://www.google.com/"
            },
            {
              "name": "Connection",
              "value": "keep-alive"
            },
            {
              "name": "Upgrade-Insecure-Requests",
              "value": "1"
            },
            {
              "name": "Sec-Fetch-Dest",
              "value": "document"
            },
            {
              "name": "Sec-Fetch-Mode",
              "value": "navigate"
            },
            {
              "name": "DNT",
              "value": "1"
            },
            {
              "name": "Accept",
              "value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
      "name": "Get data schema",
      "type": "n8n-nodes-base.googleSheets",
      "maxTries": 5,
      "position": [
        -3568,
        560
      ],
      "parameters": {
        "options": {},
        "filtersUI": {
          "values": [
            {
              "lookupValue": "={{ true }}",
              "lookupColumn": "Data schema"
            }
          ]
        },
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
          "cachedResultName": "Web"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
          "cachedResultName": "Web chat Workflow"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.6,
      "alwaysOutputData": true,
      "waitBetweenTries": 3000
    },
    {
      "id": "d051d2f3-cc65-4cb8-8a67-90d7df0dda08",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3568,
        736
      ],
      "parameters": {
        "width": 256,
        "height": 176,
        "content": "## Document example URL  \nhttps://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=sharing"
      },
      "typeVersion": 1
    },
    {
      "id": "2ffd406b-1b1b-44d6-be7e-5bbdf73ad5d0",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3776,
        240
      ],
      "parameters": {
        "color": 5,
        "width": 496,
        "height": 288,
        "content": "## Overview\nThis is a web consultation chat workflow that, on the first run with a given URL, discovers the sitemap, crawls the site, extracts useful information (language, H1 hierarchy, internal/external links, summary) and stores it in Google Sheets.\n\nFrom then on, if the \u201cschema\u201d flag is set in the sheet (Data schema = true), the chat switches to an Agent mode that responds to the user \u201cas if it were the website,\u201d consulting the database (Google Sheets) and making controlled HTTP requests when needed.\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "303ec015-8a82-4092-9dd9-46bb7658a1d3",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -864,
        32
      ],
      "parameters": {
        "width": 992,
        "height": 1104,
        "content": "## 1) Chat trigger & schema check\n\n* **Chat web (trigger)** \u2013 Public webhook with Basic Auth. Captures `chatInput`.\n* **Get data schema (Google Sheets)** \u2013 Filters rows where **Data schema = true**.\n* **If**:\n\n  * If schema exists \u2192 **Branch A (Agent mode with existing data)**.\n  * If not \u2192 **Branch B (URL validation & initial crawling)**.\n\n---\n\n## 2) Branch A \u2014 Agent mode (consults an already indexed site)\n\n* **AI Agent** (LangChain Agent):\n\n  * *System*: \u201cYou act as a website\u2026 use the tool sheet to access all site info.\u201d\n  * **Connected tools**:\n\n    * **Get row(s) in sheet in Google Sheets** \u2013 lets the agent read the database.\n    * **HTTP Request2** \u2013 allows the agent to fetch a URL it generates via `$fromAI('URL')`.\n  * **OpenAI Chat Model (gpt-5-nano)** \u2013 LLM powering the agent.\n  * **Simple Memory** \u2013 short-term context window (50 messages).\n* **Respond to Chat1** \u2013 Sends the agent\u2019s `output` back to the user.\n\n**Purpose**: The user can ask questions (\u201cWhat\u2019s on page X?\u201d, \u201cWhat links are there?\u201d), and the agent answers using the sheet\u2019s stored data and, if necessary, live HTTP fetches.\n\n---\n\n## 3) Branch B \u2014 URL validation & crawl preparation\n\n* **AI Agent1** (URL classifier):\n\n  * *System*: \u201cReturn JSON with `URL` and `URL_bool` (true if it\u2019s a valid URL).\u201d\n  * **OpenAI Chat Model1 (gpt-5-nano)** + **Structured Output Parser** enforce JSON.\n* **If1**:\n\n  * If `URL_bool = true` \u2192 continue.\n  * If `false` \u2192 **Respond to Chat** (\u201cYou must enter a valid URL\u2026\u201d).\n* **OPTIONS (Set)**: Flags to choose which sitemaps to process:\n\n  * `scan_pages: true`, `scan_posts/category/tags: false` (pages only).\n* **UA Rotativo1 (Code)**: Selects a **random User-Agent** (desktop/mobile, Win/Mac/iOS/Android) to reduce blocking.\n* **Req robots (HTTP Request)**: Downloads `robots.txt` from `{{ AI Agent1.output.URL }}/robots.txt` with realistic headers (language, compression, referer, etc.). If it fails, goes to **Req Error** (\u201cURL not valid\u2026\u201d).\n* **extract sitemap url (Code)**: Parses `robots.txt` and **extracts the `Sitemap:` line** \u2192 `sitemapUrl`.\n* **Maping Sitemap (HTTP Request)**: Downloads the **sitemap index** (`sitemapindex`) with error handling (failure \u2192 **Sitemap Error**).\n* **XML1 (XML\u2192JSON)**: Converts the sitemap XML to JSON.\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "f37eac9a-5e47-45a3-a1ba-e65ebb312571",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3968,
        560
      ],
      "parameters": {
        "width": 150,
        "height": 96,
        "content": "# P1"
      },
      "typeVersion": 1
    },
    {
      "id": "8a2b47b5-dd9c-4f20-b76f-437446d0d0c6",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2720,
        288
      ],
      "parameters": {
        "width": 166,
        "height": 272,
        "content": "\n\n\n\n\n\n\n\n# P2"
      },
      "typeVersion": 1
    },
    {
      "id": "d63fd3cc-2966-4460-8de0-8b871d6f2e78",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3248,
        736
      ],
      "parameters": {
        "width": 150,
        "height": 320,
        "content": "\n\n\n\n\n\n\n\n\n\n\n# P3"
      },
      "typeVersion": 1
    },
    {
      "id": "059d0c29-58d7-4b75-9ec4-89d8b1b8e54b",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -848,
        1200
      ],
      "parameters": {
        "width": 992,
        "height": 1248,
        "content": "## 4) Selecting the specific sitemap (pages)\n\n* **Message a model (GPT-4o)**:\n\n  * *System*: Ask the model to **select and return in JSON** the relevant sitemaps based on the flags (`scan_pages`, `scan_posts`, etc.).\n  * *User*: Passes the first 3 `loc` entries from `sitemapindex`.\n  * **Expected output**: e.g. `{ \"sitemap_page\": \"https://\u2026\" }`.\n* **Maping Sitemaps (HTTP Request)**: Downloads the **pages sitemap** provided by the model.\n* **XML (XML\u2192JSON)**: Converts it to JSON (`urlset.url`).\n\n---\n\n## 5) Expanding URLs & page-by-page processing loop\n\n* **Merge (Code)**: Turns `urlset.url` into an object `{ urls: { \"url 1\": \"...\", \"url 2\": \"...\" } }`.\n* **Split URLs**: Breaks that object into **one item per URL**.\n* **Loop Over Items (SplitInBatches)**: Iterates through each URL (supports batching). From here, two parallel flows run:\n\n  1. **Req URL (HTTP Request)** \u2192 **HTML to Markdown** \u2192 **Message a model1 (gpt-5-nano)**:\n\n     * *System*: \u201cYou will receive page content; extract: summary, language, H1/hierarchy, internal links (no CSS/JS/images), external links; then add them to the DB via the sheet tool.\u201d\n     * *User*: `{{ $json.data }}` (HTML converted to Markdown).\n     * **Append row in sheet in Google Sheets (Tool)** is connected as an **AI tool** and uses `$fromAI(...)` mappings to fill columns:\n\n       * **Lang**, **H1 and hierarchy**, **External URLs**, **Internal URLs**, **Summary Content**.\n     * **Result**: Adds one row per page with extracted fields.\n  2. **Complete (Google Sheets, appendOrUpdate)**:\n\n     * Marks/ensures a row with **`Data schema = true`** exists (acts as a \u201cready\u201d flag so future runs go into **Agent mode**).\n\n---\n\n## 6) What gets stored in Google Sheets\n\nSheet: **\u201cWeb\u201d** (gid=0). Columns in the schema:\n\n* **Lang** \u2013 detected language.\n* **H1 and hierarchy** \u2013 H1 and heading hierarchy.\n* **External URLs** \u2013 outbound links.\n* **Internal URLs** \u2013 valid internal links.\n* **Summary Content** \u2013 page summary.\n* **Data schema** \u2013 boolean flag controlling the flow mode.\n\n> Note: In the current mapping of \u201cAppend row\u2026\u201d, only these 5 are populated. \u201cPage URL\u201d and \u201cContent text\u201d exist in the schema but are not currently mapped (you could add them by requesting from the LLM and using `$fromAI`).\n\n\n\n## 7) Models & memory\n\n* **gpt-5-nano** \u2013 for URL classification, per-page structured extraction, and main Agent responses when data exists.\n* **GPT-4o** \u2013 for sitemap selection from the index.\n* **Memory** \u2013 50-message context window for short-term chat continuity in Agent mode.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "33d198cc-058e-4935-9e49-adc77baf654b",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2144,
        912
      ],
      "parameters": {
        "color": 3,
        "width": 608,
        "height": 192,
        "content": "## User experience per case\n\n* **No valid URL** \u2013 Returns a message requesting a sample URL.\n* **First time with valid URL** \u2013 No data yet; runs **discovery \u2192 crawling \u2192 extraction \u2192 save to Sheets**.\n* **Subsequent interactions** (with `Data schema = true`) \u2013 **Agent** answers as if it were the website, using the **DB in Sheets** and live HTTP when needed.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "670ecb31-9a8d-4d13-aa94-066463f91e6a",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3248,
        1136
      ],
      "parameters": {
        "width": 150,
        "height": 320,
        "content": "\n\n\n\n\n\n\n\n\n\n\n# P4"
      },
      "typeVersion": 1
    },
    {
      "id": "c0bffbee-cf0d-4abf-99a0-dbbc5347c08d",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2096,
        1440
      ],
      "parameters": {
        "width": 150,
        "height": 320,
        "content": "\n\n\n\n\n\n\n\n\n\n\n## P5 & P6"
      },
      "typeVersion": 1
    },
    {
      "id": "de0a9316-6130-4272-bce1-db37039e9c3d",
      "name": "Sticky Note10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -3968,
        720
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "height": 144,
        "content": "## Node By OXSR\nMore info and nodes\nhttps://n8n.io/creators/oxsr11/\n\nGit: https://github.com/oxsr"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "If": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "AI Agent1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If1": {
      "main": [
        [
          {
            "node": "OPTIONS",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Respond to Chat",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "XML": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "XML1": {
      "main": [
        [
          {
            "node": "Message a model",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "Split URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OPTIONS": {
      "main": [
        [
          {
            "node": "UA Rotativo1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Req URL": {
      "main": [
        [
          {
            "node": "HTML to Markdown",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent": {
      "main": [
        [
          {
            "node": "Respond to Chat1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Chat web": {
      "main": [
        [
          {
            "node": "Get data schema",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent1": {
      "main": [
        [
          {
            "node": "If1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Req robots": {
      "main": [
        [
          {
            "node": "extract sitemap url",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Req Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split URLs": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "UA Rotativo1": {
      "main": [
        [
          {
            "node": "Req robots",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request2": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory": {
      "ai_memory": [
        [
          {
            "node": "AI Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Maping Sitemap": {
      "main": [
        [
          {
            "node": "XML1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Sitemap Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get data schema": {
      "main": [
        [
          {
            "node": "If",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [
          {
            "node": "Complete",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Req URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Maping Sitemaps": {
      "main": [
        [
          {
            "node": "XML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Message a model": {
      "main": [
        [
          {
            "node": "Maping Sitemaps",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML to Markdown": {
      "main": [
        [
          {
            "node": "Message a model1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Message a model1": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent1",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "extract sitemap url": {
      "main": [
        [
          {
            "node": "Maping Sitemap",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "AI Agent1",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Append row in sheet in Google Sheets": {
      "ai_tool": [
        [
          {
            "node": "Message a model1",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Get row(s) in sheet in Google Sheets": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    }
  }
}