AutomationFlowsAI & RAG › Extract Infromation

Extract Infromation

Extract Infromation. Uses googleSheets, httpRequest, googleDocs. Event-driven trigger; 8 nodes.

Event trigger★★★★☆ complexity8 nodesGoogle SheetsHTTP RequestGoogle Docs
AI & RAG Trigger: Event Nodes: 8 Complexity: ★★★★☆ Added:

This workflow follows the Google Docs → Google Sheets recipe pattern — see all workflows that pair these two integrations.

The workflow JSON

Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →

Download .json
{
  "name": "Extract Infromation",
  "nodes": [
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        -1120,
        -240
      ],
      "id": "0fe4516a-04a7-4481-b2f9-5c23d2b83b2f",
      "name": "When clicking \u2018Test workflow\u2019"
    },
    {
      "parameters": {
        "documentId": {
          "__rl": true,
          "value": "1V_y7CYJT7dnlMyEHIJw9b0hAUXzcanNOqu04AbqNcjI",
          "mode": "list",
          "cachedResultName": "url list",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1V_y7CYJT7dnlMyEHIJw9b0hAUXzcanNOqu04AbqNcjI/edit?usp=drivesdk"
        },
        "sheetName": {
          "__rl": true,
          "value": "gid=0",
          "mode": "list",
          "cachedResultName": "Sheet1",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1V_y7CYJT7dnlMyEHIJw9b0hAUXzcanNOqu04AbqNcjI/edit#gid=0"
        },
        "options": {}
      },
      "type": "n8n-nodes-base.googleSheets",
      "typeVersion": 4.5,
      "position": [
        -900,
        -240
      ],
      "id": "56b78b28-4202-4056-8dac-33f7a5c34023",
      "name": "Google Sheets: List of URL for scraping ",
      "alwaysOutputData": false,
      "notesInFlow": false,
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "91d187ed-a0d9-4b7a-ae6e-ca5bd0707122",
              "name": "URL",
              "value": "={{ $json.URL }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        -660,
        -240
      ],
      "id": "13e55464-c0ea-4d3d-8d04-03a3834744f5",
      "name": "Edit Fields",
      "alwaysOutputData": false
    },
    {
      "parameters": {
        "options": {
          "reset": false
        }
      },
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        -440,
        -240
      ],
      "id": "20628b50-02c8-4c26-8777-32b7f3d823e6",
      "name": "Loop Over Items"
    },
    {
      "parameters": {
        "url": "={{ $json.URL }}",
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        -260,
        -100
      ],
      "id": "2604d4fa-93d7-4fff-ab1b-3e1a398e485d",
      "name": "HTTP Request",
      "alwaysOutputData": false
    },
    {
      "parameters": {
        "jsCode": "const html = $json[\"data\"]; // Get raw HTML content\n\n// Remove script, style, and metadata content\nconst cleanedHtml = html\n    .replace(/<script[\\s\\S]*?>[\\s\\S]*?<\\/script>/gi, '')  // Remove <script> tags\n    .replace(/<style[\\s\\S]*?>[\\s\\S]*?<\\/style>/gi, '')  // Remove <style> tags\n    .replace(/<noscript[\\s\\S]*?>[\\s\\S]*?<\\/noscript>/gi, '')  // Remove <noscript> tags\n    .replace(/<meta[\\s\\S]*?>/gi, '')  // Remove <meta> tags\n    .replace(/<link[\\s\\S]*?>/gi, '')  // Remove <link> tags\n    .replace(/<\\/?[^>]+(>|$)/g, ' ')  // Remove all other HTML tags\n    .replace(/\\s+/g, ' ')  // Normalize spaces\n    .trim();  // Trim whitespace\n\nreturn [{ url:$('Loop Over Items').first().json.URL ,clean_text: cleanedHtml }];\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -100,
        -100
      ],
      "id": "ccc2259b-4a96-4c88-88ff-6199939f6d42",
      "name": "Code"
    },
    {
      "parameters": {
        "folderId": "1Ydl5V5N85L1uhBxjyh2aZ2FYCNP-W5wj",
        "title": "={{ $json.url }}"
      },
      "type": "n8n-nodes-base.googleDocs",
      "typeVersion": 2,
      "position": [
        120,
        -100
      ],
      "id": "7e50d012-dae4-4c0c-9ab4-a1e6c76e528c",
      "name": "Google Docs: Create docs",
      "executeOnce": false,
      "notesInFlow": false,
      "credentials": {
        "googleDocsOAuth2Api": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "operation": "update",
        "documentURL": "={{ $json.id }}",
        "actionsUi": {
          "actionFields": [
            {
              "action": "insert",
              "text": "={{ $('Code').item.json.clean_text }}"
            }
          ]
        }
      },
      "type": "n8n-nodes-base.googleDocs",
      "typeVersion": 2,
      "position": [
        340,
        -100
      ],
      "id": "4aa00624-595d-4aa1-9b7d-0002c9cb82ad",
      "name": "Google Docs",
      "credentials": {
        "googleDocsOAuth2Api": {
          "name": "<your credential>"
        }
      }
    }
  ],
  "connections": {
    "When clicking \u2018Test workflow\u2019": {
      "main": [
        [
          {
            "node": "Google Sheets: List of URL for scraping ",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Sheets: List of URL for scraping ": {
      "main": [
        [
          {
            "node": "Edit Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Edit Fields": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code": {
      "main": [
        [
          {
            "node": "Google Docs: Create docs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Docs: Create docs": {
      "main": [
        [
          {
            "node": "Google Docs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Docs": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "b5b1264e-98fa-4a24-b532-ad895ec408d8",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "id": "AE6U5DTgXRT67Q2G",
  "tags": []
}

Credentials you'll need

Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.

Pro

For the full experience including quality scoring and batch install features for each workflow upgrade to Pro

About this workflow

Extract Infromation. Uses googleSheets, httpRequest, googleDocs. Event-driven trigger; 8 nodes.

Source: https://github.com/Habibi6010/AI-Agent/blob/f92644c153f3c99e8b8c7f6076e9cc1b2b859c1d/N8N/Extract_text.json — original creator credit. Request a take-down →

More AI & RAG workflows → · Browse all categories →

Related workflows

Workflows that share integrations, category, or trigger type with this one. All free to copy and import.

AI & RAG

This workflow is built for professionals and teams who want to scale their B2B outreach with context-rich, personalized communication. It automates the full prospect research process — from pulling le

OpenAI Chat, Chain Llm, Output Parser Structured +3
AI & RAG

This workflow is designed for professionals and teams seeking to scale their B2B research with comprehensive company intelligence. It automates the full prospect analysis process — from extracting con

Hunter, HTTP Request, Airtop +6
AI & RAG

This workflow answers WhatsApp support messages using Google Gemini with company context from Google Docs, and automatically creates Cal.com bookings when users provide enough scheduling details, then

Agent, WhatsApp Trigger, WhatsApp +5
AI & RAG

This workflow contains community nodes that are only compatible with the self-hosted version of n8n.

Form Trigger, HTTP Request, Agent +6
AI & RAG

This template is designed for healthcare providers, sales reps, and medical tourism companies who need to process diagnosis emails efficiently. It automates the full flow from email to report delivery

Gmail Trigger, Google Sheets, Agent +5