{
  "id": "b0NmRs9o4fvmw3vi",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "Niche History Generator",
  "tags": [
    {
      "id": "lZKSh2IoxHklnOUw",
      "name": "ScrapeOps",
      "createdAt": "2025-10-20T20:27:13.410Z",
      "updatedAt": "2025-10-20T20:27:13.410Z"
    },
    {
      "id": "yh7XX4GObvd3np2S",
      "name": "google sheets",
      "createdAt": "2025-11-06T11:10:29.979Z",
      "updatedAt": "2025-11-06T11:10:29.979Z"
    },
    {
      "id": "g2HXimaqnEo1MFUR",
      "name": "wikipedia",
      "createdAt": "2025-12-04T06:41:29.909Z",
      "updatedAt": "2025-12-04T06:41:29.909Z"
    },
    {
      "id": "LxkPKH8N5AyDKJ18",
      "name": "research",
      "createdAt": "2025-12-04T06:41:34.063Z",
      "updatedAt": "2025-12-04T06:41:34.063Z"
    },
    {
      "id": "uJLE6LNaDOFm3yTZ",
      "name": "ContentGeneration",
      "createdAt": "2025-12-04T06:41:47.080Z",
      "updatedAt": "2025-12-04T06:41:47.080Z"
    },
    {
      "id": "EPiKzzzTnU1nVITY",
      "name": "OpenAI",
      "createdAt": "2025-12-04T06:41:56.401Z",
      "updatedAt": "2025-12-04T06:41:56.401Z"
    }
  ],
  "nodes": [
    {
      "id": "5734320a-5061-4438-a690-2b8524e26243",
      "name": "When clicking \"Execute Workflow\"",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1072,
        -48
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "92c4455f-4db1-4021-aed9-69f33fc0fb5f",
      "name": "Set Topic",
      "type": "n8n-nodes-base.set",
      "position": [
        -848,
        -48
      ],
      "parameters": {
        "fields": {
          "values": [
            {
              "name": "topic",
              "stringValue": "n8n"
            }
          ]
        },
        "options": {}
      },
      "typeVersion": 3.2
    },
    {
      "id": "2a11e959-ef81-4c1d-ae38-f76b1903f277",
      "name": "Wikipedia Search API",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -624,
        -48
      ],
      "parameters": {
        "url": "https://en.wikipedia.org/w/api.php",
        "options": {},
        "sendQuery": true,
        "sendHeaders": true,
        "queryParameters": {
          "parameters": [
            {
              "name": "action",
              "value": "query"
            },
            {
              "name": "list",
              "value": "search"
            },
            {
              "name": "srsearch",
              "value": "={{ $json.topic }}"
            },
            {
              "name": "format",
              "value": "json"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "n8n-workflow/1.0 (https://n8n.io; user@example.com)"
            }
          ]
        }
      },
      "typeVersion": 4.1
    },
    {
      "id": "c0765484-7583-43f5-83aa-905761c8d878",
      "name": "ScrapeOps Scraper",
      "type": "@scrapeops/n8n-nodes-scrapeops.ScrapeOps",
      "position": [
        -176,
        -48
      ],
      "parameters": {
        "url": "={{ $json.wikipedia_page_url }}",
        "advancedOptions": {
          "render_js": true
        }
      },
      "credentials": {
        "scrapeOpsApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "8814f99a-63a2-4494-a38a-853230986a97",
      "name": "Append row in sheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        848,
        -48
      ],
      "parameters": {
        "columns": {
          "value": {
            "Topic": "={{ $json.Topic }}",
            "Timeline": "={{ $json.Timeline }}",
            "History_Raw": "={{ $json.History_Raw }}",
            "History_Cleaned": "={{ $json.History_Cleaned }}",
            "History_Summary": "={{ $json.History_Summary }}",
            "Search_Query_URL\r": "={{ $json.Search_Query_URL }}",
            "Wikipedia_Page_URL": "={{ $json.Wikipedia_PAGE_URL }}",
            "Wikipedia_Page_Title\r": "={{ $json.Wikipedia_Page_Title }}"
          },
          "schema": [
            {
              "id": "Topic",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Topic",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Search_Query_URL\r",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Search_Query_URL\r",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Wikipedia_Page_Title\r",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Wikipedia_Page_Title\r",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Wikipedia_Page_URL",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Wikipedia_Page_URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "History_Raw",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "History_Raw",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "History_Cleaned",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "History_Cleaned",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "History_Summary",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "History_Summary",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Timeline",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Timeline",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1WZNqkpZV-xSVvv4hj3TwKyjINWWe6AHOWJ2eEkUdRzE/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "https://docs.google.com/spreadsheets/d/1P0wZ449wVNndhSa6cJtK3VA3Aulv1k18zdpwWYY13gE/edit?gid=0#gid=0"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "950ac15e-6f9a-4cc2-a170-fa09031dc131",
      "name": "Message a model",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "position": [
        272,
        -48
      ],
      "parameters": {
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini",
          "cachedResultName": "GPT-4O-MINI"
        },
        "options": {},
        "messages": {
          "values": [
            {
              "role": "system",
              "content": "== You are the Niche History Generator AI. Return EXACTLY and ONLY a single valid JSON object \u2014 nothing else. Do NOT include explanation, commentary, markdown, triple backticks, or any extra text. The JSON must contain exactly these keys: Topic, Search_Query_URL, Wikipedia_Page_Title, Wikipedia_PAGE_URL, History_Raw, History_Cleaned, History_Summary, Timeline. Use \\\\n\\\\n inside paragraphs only. Timeline entries must use single-line entries starting with - **YYYY:**. Return a JSON object (not a JSON string)."
            },
            {
              "content": "=Topic:{{ $json.topic }}\nWikipedia Page Title: {{ $json.wikipedia_page_title }}\nWikipedia Page URL:{{ $json.wikipedia_page_url }}\nSearch Query URL:{{ $json.search_query_url }}\nRaw History:{{ $json.history_raw }}"
            },
            {}
          ]
        }
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "executeOnce": true,
      "typeVersion": 1.8
    },
    {
      "id": "603d4e6c-a86e-4995-a253-e6cf31b524e5",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1840,
        -464
      ],
      "parameters": {
        "width": 656,
        "height": 608,
        "content": "# \ud83d\udcdc Wikipedia Niche History Generator\n\nThis workflow automates the research process by scraping Wikipedia for a specific topic, extracting its history, and using AI to generate a structured summary and timeline.\n\n### How it works\n1. **Input**: You provide a topic (e.g., \"n8n\", \"Tesla\", \"Coffee\").\n2. **Search**: The workflow searches Wikipedia to find the most relevant page.\n3. **Scrape**: It uses **ScrapeOps** to reliably scrape the full page content without getting blocked.\n4. **Process**: A custom code node extracts the \"History\" or \"Origins\" section from the HTML.\n5. **Analyze**: GPT-4o-mini summarizes the history and creates a timeline.\n6. **Save**: The final structured data is appended to a Google Sheet.\n\n### Setup steps\n1. **ScrapeOps**: [Get your free API key](https://scrapeops.io/app/register/n8n) and add it to the ScrapeOps node.\n2. **OpenAI**: Configure your OpenAI credentials for the LLM node.\n3. **Google Sheets**:\n   - Duplicate this [Template Sheet](https://docs.google.com/spreadsheets/d/1P0wZ449wVNndhSa6cJtK3VA3Aulv1k18zdpwWYY13gE/edit?gid=0#gid=0).\n   - Connect your Google account and select the new sheet in the \"Append row\" node.\n\n### Customization\n- **Topic**: Change the value in the \"Set Topic\" node.\n- **Docs**: Read the [ScrapeOps n8n docs](https://scrapeops.io/docs/n8n/overview/) for more info."
      },
      "typeVersion": 1
    },
    {
      "id": "de7b3fe5-a62f-4c64-8a18-6965bdca9d92",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1120,
        -144
      ],
      "parameters": {
        "color": 7,
        "width": 416,
        "height": 256,
        "content": "## 1. Input\nDefine your research topic here.\t"
      },
      "typeVersion": 1
    },
    {
      "id": "88efd19f-4100-4da2-bf1b-3506d1cbc5af",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -680,
        -144
      ],
      "parameters": {
        "color": 7,
        "width": 656,
        "height": 256,
        "content": "## 2. Search & Scrape\nFind and retrieve the Wikipedia page using ScrapeOps."
      },
      "typeVersion": 1
    },
    {
      "id": "256c58ef-1543-4a5f-86b7-4e98c4f00ee9",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        8,
        -144
      ],
      "parameters": {
        "color": 7,
        "width": 752,
        "height": 256,
        "content": "## 3. Process & Analyze\nExtract history sections and use AI to summarize."
      },
      "typeVersion": 1
    },
    {
      "id": "3fc32852-8314-42d1-ac85-4396068b43d5",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        776,
        -144
      ],
      "parameters": {
        "color": 7,
        "height": 256,
        "content": "## 4. Output\nSave the structured data to Google Sheets."
      },
      "typeVersion": 1
    },
    {
      "id": "6d76b574-3f8d-45e9-96cd-421e6b1b2d8b",
      "name": "Extract History Section",
      "type": "n8n-nodes-base.code",
      "position": [
        48,
        -48
      ],
      "parameters": {
        "jsCode": "// Clean Wikipedia Text (Regex version - No Cheerio)\nconst item = items[0].json;\nlet html = '';\n\n// 1. Extract HTML from Input\nif (typeof item === 'string') {\n  html = item;\n} else if (item.response?.body) {\n  html = item.response.body;\n} else if (item.body) {\n  html = item.body;\n} else if (item.content) {\n  html = item.content;\n} else if (item.data) {\n  html = item.data;\n} else {\n    // Handle \"exploded\" string (numeric keys) from ScrapeOps\n    const keys = Object.keys(item).filter(k => !isNaN(k));\n    if (keys.length > 0) {\n        keys.sort((a, b) => parseInt(a) - parseInt(b));\n        html = keys.map(k => item[k]).join('');\n    }\n}\n\n// 2. Basic Cleanup (Remove scripts, styles, tables, references)\nhtml = html.replace(/<script\\b[^>]*>[\\s\\S]*?<\\/script>/gim, \"\");\nhtml = html.replace(/<style\\b[^>]*>[\\s\\S]*?<\\/style>/gim, \"\");\nhtml = html.replace(/<table\\b[^>]*>[\\s\\S]*?<\\/table>/gim, \"\"); \nhtml = html.replace(/<sup\\b[^>]*class=[\"'][^\"']*reference[^\"']*[\"'][^>]*>[\\s\\S]*?<\\/sup>/gim, \"\");\nhtml = html.replace(/<span\\b[^>]*class=[\"'][^\"']*mw-editsection[^\"']*[\"'][^>]*>[\\s\\S]*?<\\/span>/gim, \"\");\n\n// 3. Find the History/Origins/Background Section\nconst headerRegex = /<h[23][^>]*>.*?(?:History|Origins|Background).*?<\\/h[23]>/i;\nconst headerMatch = headerRegex.exec(html);\n\nlet historyText = '';\n\nif (headerMatch) {\n    const startIndex = headerMatch.index + headerMatch[0].length;\n    const remainder = html.substring(startIndex);\n    const nextHeaderRegex = /<h[23][^>]*>/i;\n    const nextHeaderMatch = nextHeaderRegex.exec(remainder);\n    \n    let sectionContent = remainder;\n    if (nextHeaderMatch) {\n        sectionContent = remainder.substring(0, nextHeaderMatch.index);\n    }\n    \n    const pRegex = /<p\\b[^>]*>([\\s\\S]*?)<\\/p>/gi;\n    let pMatch;\n    while ((pMatch = pRegex.exec(sectionContent)) !== null) {\n        let pText = pMatch[1];\n        pText = pText.replace(/<[^>]+>/g, '');\n        pText = pText.replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&quot;/g, '\"').replace(/&lt;/g, '<').replace(/&gt;/g, '>');\n        historyText += pText.trim() + '\\n\\n';\n    }\n} else {\n    // Fallback: Grab first 5 paragraphs\n    const pRegex = /<p\\b[^>]*>([\\s\\S]*?)<\\/p>/gi;\n    let pMatch;\n    let count = 0;\n    while ((pMatch = pRegex.exec(html)) !== null && count < 5) {\n        let pText = pMatch[1];\n        pText = pText.replace(/<[^>]+>/g, '');\n        pText = pText.replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&quot;/g, '\"').replace(/&lt;/g, '<').replace(/&gt;/g, '>');\n        if (pText.trim().length > 50) {\n            historyText += pText.trim() + '\\n\\n';\n            count++;\n        }\n    }\n}\n\nhistoryText = historyText.replace(/\\[.*?\\]/g, '').trim();\n\n// 4. Return Data (Restore metadata from Build Wikipedia URL node)\n// We use $('Construct Page URL').first().json to get the original topic/url info\nconst originalData = $('Construct Page URL').first().json;\n\nreturn [\n  {\n    json: {\n      ...originalData,\n      history_raw: historyText\n    }\n  }\n];"
      },
      "typeVersion": 2
    },
    {
      "id": "dfdb13c2-6c31-460c-908f-3f6a5304be71",
      "name": "Format AI Output",
      "type": "n8n-nodes-base.code",
      "position": [
        624,
        -48
      ],
      "parameters": {
        "jsCode": "// Universal robust parser \u2014 handles many OpenAI node shapes including message.content\nconst item = items[0];\nlet raw = '';\n// try many known places the assistant text may appear\nif (item.json.output_text) {\n  raw = item.json.output_text;\n} else if (item.json.content?.[0]?.text) {\n  raw = item.json.content[0].text;\n} else if (item.json.output?.[0]?.content?.[0]?.text) {\n  raw = item.json.output[0].content[0].text;\n} else if (item.json.choices?.[0]?.message?.content) {\n  raw = item.json.choices[0].message.content;\n} else if (item.json.choices?.[0]?.text) {\n  raw = item.json.choices[0].text;\n} else if (typeof item.json.message?.content === 'string') {\n  // your specific shape: item.json.message.content is a string with JSON\n  raw = item.json.message.content;\n} else if (Array.isArray(item.json.output) && item.json.output[0]?.content?.[0]?.text) {\n  raw = item.json.output[0].content[0].text;\n} else {\n  raw = JSON.stringify(item.json);\n}\n// normalize\nraw = String(raw).trim();\n// remove triple backticks if present\nraw = raw.replace(/^\\s*```(?:json)?\\s*/i, '');\nraw = raw.replace(/\\s*```\\s*$/i, '');\n// extract the first {...} JSON block\nconst match = raw.match(/\\{[\\s\\S]*\\}/);\nconst jsonText = match ? match[0] : raw;\n// parse with fallback (handle escaped newlines)\nlet parsed;\ntry {\n  parsed = JSON.parse(jsonText);\n} catch (e1) {\n  try {\n    parsed = JSON.parse(jsonText.replace(/\\\\n/g, '\\n'));\n  } catch (e2) {\n    return [{ json: { parse_error: e2.message, raw } }];\n  }\n}\n// tolerant key lookup\nfunction getVal(obj, names){\n  for (const n of names){\n    if (Object.prototype.hasOwnProperty.call(obj, n)) return obj[n];\n  }\n  // case-insensitive normalized\n  const norm = {};\n  for (const k of Object.keys(obj||{})) norm[k.toLowerCase().replace(/[_\\s]+/g,'')] = obj[k];\n  for (const n of names){\n    const key = n.toLowerCase().replace(/[_\\s]+/g,'');\n    if (norm[key] !== undefined) return norm[key];\n  }\n  return undefined;\n}\n// build output\nconst out = {};\nout.Topic = getVal(parsed, ['Topic','topic']) || '';\nout.Search_Query_URL = getVal(parsed, ['Search_Query_URL','search_query_url','searchqueryurl']) || '';\nout.Wikipedia_Page_Title = getVal(parsed, ['Wikipedia_Page_Title','Wikipedia Page Title','wikipedia_page_title']) || '';\nout.Wikipedia_PAGE_URL = getVal(parsed, ['Wikipedia_PAGE_URL','Wikipedia Page URL','wikipedia_page_url']) || '';\nout.History_Raw = getVal(parsed, ['History_Raw','history_raw']) || '';\nout.History_Cleaned = getVal(parsed, ['History_Cleaned','history_cleaned']) || '';\nout.History_Summary = getVal(parsed, ['History_Summary','history_summary']) || '';\n// handle Timeline: array -> newline string, or string (maybe JSON string)\nlet timeline = getVal(parsed, ['Timeline','timeline','TimeLine']);\nif (Array.isArray(timeline)) {\n  out.Timeline = timeline.join('\\n');\n} else if (typeof timeline === 'string') {\n  try {\n    const maybe = JSON.parse(timeline);\n    out.Timeline = Array.isArray(maybe) ? maybe.join('\\n') : timeline;\n  } catch (e) {\n    out.Timeline = timeline;\n  }\n} else {\n  out.Timeline = '';\n}\n// unescape double-escaped newlines\nfor (const k of ['History_Raw','History_Cleaned','History_Summary','Timeline']) {\n  if (typeof out[k] === 'string') out[k] = out[k].replace(/\\\\n/g, '\\n');\n}\nreturn [{ json: out }];"
      },
      "typeVersion": 2
    },
    {
      "id": "d319b30d-ce61-43a8-bb41-7423b59f89ff",
      "name": "Construct Page URL",
      "type": "n8n-nodes-base.code",
      "position": [
        -400,
        -48
      ],
      "parameters": {
        "jsCode": "// Get topic from Set Topic node (not from Wikipedia API response)\nconst topic = $('Set Topic').first().json.topic;\nconst search = items[0].json.query.search;\n\nif (!search || search.length === 0) {\n  throw new Error(\"No Wikipedia page found for: \" + topic);\n}\n\nconst title = search[0].title;\nconst encoded = encodeURIComponent(title.replace(/ /g, \"_\"));\n\nreturn [\n  {\n    json: {\n      topic,\n      wikipedia_page_title: title,\n      wikipedia_page_url: `https://en.wikipedia.org/wiki/${encoded}`,\n      search_query_url: `https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=${encodeURIComponent(topic)}&format=json`\n    }\n  }\n];"
      },
      "typeVersion": 2
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "a392cdfb-8bf6-4257-869a-204f60d8d05e",
  "connections": {
    "Set Topic": {
      "main": [
        [
          {
            "node": "Wikipedia Search API",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Message a model": {
      "main": [
        [
          {
            "node": "Format AI Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Format AI Output": {
      "main": [
        [
          {
            "node": "Append row in sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "ScrapeOps Scraper": {
      "main": [
        [
          {
            "node": "Extract History Section",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Construct Page URL": {
      "main": [
        [
          {
            "node": "ScrapeOps Scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wikipedia Search API": {
      "main": [
        [
          {
            "node": "Construct Page URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract History Section": {
      "main": [
        [
          {
            "node": "Message a model",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \"Execute Workflow\"": {
      "main": [
        [
          {
            "node": "Set Topic",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}