{
  "id": "ALTwr1xWxmqGdCtZ",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "name": "ScrapeNinja: AI generated web scraper",
  "tags": [],
  "nodes": [
    {
      "id": "9dd743dd-4ee6-4aaf-820d-7e63ea31399c",
      "name": "ScrapeNinja",
      "type": "CUSTOM.scrapeNinja",
      "position": [
        200,
        0
      ],
      "parameters": {
        "url": "https://news.ycombinator.com/"
      },
      "credentials": {
        "scrapeNinjaApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "fbfe223d-e37f-473d-96c5-a5e89b19e2fc",
      "name": "Google Gemini Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        680,
        280
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-exp-1206"
      },
      "credentials": {
        "googlePalmApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "75a9b517-f51e-4b84-90d5-d9352f5185cc",
      "name": "Generate custom web scraper",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        0,
        0
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "c1e4e096-ae88-45d4-aed0-d7b92b496b5a",
      "name": "Cleanup HTML",
      "type": "CUSTOM.scrapeNinja",
      "position": [
        420,
        0
      ],
      "parameters": {
        "html": "={{ $json.body }}",
        "operation": "cleanup-html"
      },
      "typeVersion": 1
    },
    {
      "id": "dccb1015-ce34-4c4c-b556-a31de8639baf",
      "name": "Generate JS eval code via LLM",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        640,
        0
      ],
      "parameters": {
        "text": "=write cheerio.js function to news items. your response MUST start with:\n\nfunction extract(html, cheerioInstance) {\n// use cheerio to load html...\n\nreturn [{ url: \"item1\", \"title\": \"xxx\", \"score\": \"5\", comments: 6 }, ... ]\n}\ndo not use new URL() because this wont work in my env.\nhtml:\n{{ $json.html }}",
        "promptType": "define"
      },
      "typeVersion": 1.5
    },
    {
      "id": "71c03255-9bb8-4ce8-b191-7b163326aa41",
      "name": "Eval generated code to extract data",
      "type": "CUSTOM.scrapeNinja",
      "position": [
        1000,
        0
      ],
      "parameters": {
        "html": "={{ $('Cleanup HTML').item.json.html }}",
        "operation": "extract-custom",
        "extractionFunction": "={{ $json.text }}"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "2a2dd735-e805-4fda-8a1c-b0c6144e0804",
  "connections": {
    "ScrapeNinja": {
      "main": [
        [
          {
            "node": "Cleanup HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Cleanup HTML": {
      "main": [
        [
          {
            "node": "Generate JS eval code via LLM",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Generate JS eval code via LLM",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Generate custom web scraper": {
      "main": [
        [
          {
            "node": "ScrapeNinja",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate JS eval code via LLM": {
      "main": [
        [
          {
            "node": "Eval generated code to extract data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Eval generated code to extract data": {
      "main": [
        []
      ]
    }
  }
}