{
  "name": "Local AI Expert",
  "nodes": [
    {
      "parameters": {
        "respondWith": "allIncomingItems",
        "options": {
          "responseHeaders": {
            "entries": [
              {
                "name": "X-n8n-Signature",
                "value": "EvtIS^EBVISeie6svB@6ev"
              }
            ]
          }
        }
      },
      "id": "b1da4de0-0c68-4b0b-84ab-08133cbde32c",
      "name": "Respond to Webhook",
      "type": "n8n-nodes-base.respondToWebhook",
      "typeVersion": 1.1,
      "position": [
        1600,
        400
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "ee2bcd57-3b4c-43f9-b4b7-3a25687b9a68",
              "name": "query",
              "value": "={{ $json.body.query }}",
              "type": "string"
            },
            {
              "id": "63f23e51-af2b-47c4-a288-5abaf9b6c357",
              "name": "user_id",
              "value": "={{ $json.body.user_id }}",
              "type": "string"
            },
            {
              "id": "b97a3670-8a87-481b-8695-db44624be7d8",
              "name": "request_id",
              "value": "={{ $json.body.request_id }}",
              "type": "string"
            },
            {
              "id": "7d3fa06d-08f7-4517-b9c5-3c46ff476f55",
              "name": "session_id",
              "value": "={{ $json.body.session_id }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "74bbc464-0af1-4ea5-9961-a796438418fb",
      "name": "Prep Input Fields",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        760,
        400
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "b5eaa2a2-a6bc-40ab-af5e-baa8a5dda1a7",
              "name": "success",
              "value": "=true",
              "type": "boolean"
            }
          ]
        },
        "options": {}
      },
      "id": "6b1e4fb6-cae8-4cb0-ab4b-143d5ccd11e8",
      "name": "Prep Output Fields",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1380,
        400
      ]
    },
    {
      "parameters": {
        "model": "claude-3-5-haiku-20241022",
        "options": {}
      },
      "id": "7f675009-7bb8-4975-b4d8-813bacb2bae0",
      "name": "Anthropic Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatAnthropic",
      "typeVersion": 1.2,
      "position": [
        880,
        620
      ],
      "credentials": {
        "anthropicApi": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "sessionIdType": "customKey",
        "sessionKey": "={{$('Prep Input Fields').item.json.session_id}}",
        "tableName": "messages",
        "contextWindowLength": 10
      },
      "id": "95aa3010-d9f9-4f37-bf2b-df84b98521df",
      "name": "Postgres Chat Memory",
      "type": "@n8n/n8n-nodes-langchain.memoryPostgresChat",
      "typeVersion": 1.3,
      "position": [
        1040,
        620
      ],
      "credentials": {
        "postgres": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {},
      "id": "252e0a56-5e9b-4173-89db-e430816b9037",
      "name": "Execute Workflow Trigger",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "typeVersion": 1,
      "position": [
        620,
        880
      ]
    },
    {
      "parameters": {
        "tableId": "messages",
        "fieldsUi": {
          "fieldValues": [
            {
              "fieldId": "session_id",
              "fieldValue": "={{ $json.session_id }}"
            },
            {
              "fieldId": "message",
              "fieldValue": "={{ {\n\"type\": \"ai\",\n\"content\": `-> Researching url: ${$json.query.url}`,\n\"data\": {},\n\"additional_kwargs\": {},\n\"response_metadata\": {}\n} }}"
            }
          ]
        }
      },
      "id": "3148b5ae-34f5-4138-b088-7537b40cc2fe",
      "name": "Add AI Research Message to DB",
      "type": "n8n-nodes-base.supabase",
      "typeVersion": 1,
      "position": [
        820,
        880
      ],
      "credentials": {
        "supabaseApi": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "url": "={{ $('Execute Workflow Trigger').item.json.query.url }}",
        "options": {}
      },
      "id": "1120cd14-efb0-4ccf-87af-88254e8c8480",
      "name": "HTTP Request",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        1040,
        880
      ]
    },
    {
      "parameters": {
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "data",
              "cssSelector": "form, :has(> article)"
            }
          ]
        },
        "options": {}
      },
      "id": "2008623e-7167-477f-83b6-154df71a2f83",
      "name": "HTML",
      "type": "n8n-nodes-base.html",
      "typeVersion": 1.2,
      "position": [
        1260,
        880
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "9e600a04-b321-4862-9c8a-4e434575dcdf",
              "name": "response",
              "value": "={{ $json.data.substring(0, 5000) }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "6b1499e8-2350-444c-a72f-afc96bf6c3e3",
      "name": "Edit Fields",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1480,
        880
      ]
    },
    {
      "parameters": {
        "name": "Research_AI_Models",
        "description": "Use this tool to fetch model information (list of models or info on a specific model) from Ollama or HuggingFace.\n\nThe base HuggingFace URL is: https://huggingface.co/models\n\nThe base Ollama URL is:\nhttps://ollama.com/search\n\nTo sort models in HuggingFace, use a URL like:\nhttps://huggingface.co/models?sort=[SORT_OPTION]\n\nSORT_OPTION can be one of:\n- trending (most trending first)\n- likes (most likes frst)\n- downloads (most downloads first)\n- created (newest first)\n- modified (most recently updated first)\n\nFor Ollama, the URL gives you the most popular models first unless you add this parameter to the URL:\n\n- o=newest\n\nWhich then gives you the newest models first. You can also add the \"c\" parameter to the URL which can be one of the following:\n\n- c=embedding (just embedding models)\n- c=vision (just vision models)\n- c=tools (just models with function calling)\n\nSo as an example, the URL \"https://ollama.com/search?c=tools&o=newest\" would give you the newest models that support tool calling.",
        "workflowId": {
          "__rl": true,
          "value": "={{ $workflow.id }}",
          "mode": "id"
        },
        "fields": {
          "values": [
            {
              "name": "session_id",
              "stringValue": "={{$('Prep Input Fields').item.json.session_id}}"
            }
          ]
        },
        "specifyInputSchema": true,
        "jsonSchemaExample": "{\n\t\"url\": \"The URL to research\"\n}"
      },
      "id": "3f3dd9ee-fc1a-4546-b7fc-bf18e8bf2ec0",
      "name": "Call AI Model Research Tool",
      "type": "@n8n/n8n-nodes-langchain.toolWorkflow",
      "typeVersion": 1.2,
      "position": [
        1200,
        620
      ]
    },
    {
      "parameters": {
        "resource": "all",
        "limit": 2,
        "additionalFields": {
          "keyword": "={{ $fromAI('search_keyword') }}"
        }
      },
      "id": "996c19b6-06d2-4bc2-9dfa-213038755959",
      "name": "Hacker News",
      "type": "n8n-nodes-base.hackerNewsTool",
      "typeVersion": 1,
      "position": [
        1340,
        620
      ]
    },
    {
      "parameters": {
        "promptType": "define",
        "text": "={{$('Prep Input Fields').item.json.query}}",
        "options": {
          "systemMessage": "You are an expert advisor helping users run LLMs locally. Follow these instructions when responding to queries:\n\n# Hard guidelines\n\n1. A GPU with 24GB of VRAM is enough to run 32b parameter models since default Ollama 32b parameter models are Q4.\n2. 48GB of VRAM (so two GPUs with 24GB of VRAM or one with 48GB) is enough to run 70b parameter models Q4 but it won't be super fast.\n3. Lower end graphics cards like a 1070 are enough to run 7b parameter models or smaller.\n\n# Hardware Assessment Instructions\nWhen a user asks about running an LLM:\n1. If they haven't provided hardware specs, ask for:\n   - GPU model and VRAM\n   - System RAM\n   - Whether they're using Apple Silicon (M1/M2/M3/M4)\n2. If they have shared specs, immediately provide quantization options:\n   - For NVIDIA GPUs: List Q4, Q5, Q8 VRAM requirements\n   - For Apple Silicon: Note Metal performance expectations\n   - Include actual VRAM requirements for each quantization level\n\n# Response Structure\nAlways structure answers in this order when applicable:\n1. Direct answer about feasibility\n2. Quantization recommendations if applicable\n3. Performance expectations\n4. Alternatives if needed\n5. Cloud options if hardware is insufficient\n\nExample: \"Can I run Mixtral 8x7B on my RTX 3080?\"\n- \"Yes, with 10GB VRAM you can run Mixtral using Q4 quantization. Here's what to expect:\n  - Q4: Runs well, using 9GB VRAM\n  - Q5: Not recommended (requires 11GB VRAM)\n  - Performance: 20-30 tokens/second\n  - Alternative: Consider Mistral 7B for faster inference\n  - Cloud option: RunPod offers $0.19/hour for better performance\"\n\n# Key Information to Include\n\n## Quantization Facts\n- Always mention that Q4 is often sufficient for most use cases\n- Note when higher quantization (Q5/Q8) provides meaningful benefits\n- Explain VRAM savings (e.g., \"Q4 typically reduces VRAM by 60-75%\")\n\n## Context Window Information\nWhen relevant, explain:\n- VRAM usage scales with context length\n- Typical VRAM requirements per 1K tokens\n- When to reduce context for better performance\n\n## Hardware-Specific Advice\nFor NVIDIA GPUs:\n- List specific VRAM requirements\n- Note architecture requirements (e.g., Ampere, Ada Lovelace)\n- Recommend optimal batch sizes\n\nFor Apple Silicon:\n- Note which models run well natively\n- Mention Metal optimization benefits\n- Include typical performance metrics\n\n## Cloud Alternatives\nWhen hardware is insufficient, provide:\n- RunPod pricing ($/hour for relevant GPU)\n- OpenRouter API costs ($/1K tokens)\n- DigitalOcean/Novita options for longer-term use\n\n## Fine-tuning Information\nWhen fine-tuning is mentioned:\n- State 2-3x VRAM requirement vs inference\n- Recommend cloud services if local hardware insufficient\n- Include approximate cost estimates\n\n## Framework Recommendations\nInclude specific tooling advice:\n- Ollama for easy deployment\n- llama.cpp for maximum optimization\n- AirLLM for running larger models on limited hardware\n\n# Example Scenarios and Responses\n\nQuery: \"What can I run on 8GB VRAM?\"\nResponse format:\n1. List 2-3 best models with Q4 quantization\n2. Mention context window limitations\n3. Suggest cloud alternatives\n4. Note optimization frameworks\n\nQuery: \"How do I run Phi-2 on M2 Mac?\"\nResponse format:\n1. Confirm native support\n2. List expected performance\n3. Recommend tools (e.g., Ollama)\n4. Include context window capabilities\n\nQuery: \"Best GPU for running local LLMs?\"\nResponse format:\n1. List top 3 current GPUs with prices\n2. Include VRAM and model compatibility\n3. Note future-proofing considerations\n4. Mention cloud alternatives\n\n# Response Rules\n- Always mention quantization options first\n- Include specific numbers (VRAM, tokens/sec)\n- Offer alternatives when primary option isn't ideal\n- Explain tradeoffs in 1-2 sentences\n- Include rough cost estimates for cloud options"
        }
      },
      "id": "a71f8351-6443-4ce9-a0c1-ac3540816011",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "typeVersion": 1.7,
      "position": [
        1000,
        400
      ]
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "invoke-local-ai-expert",
        "authentication": "headerAuth",
        "responseMode": "responseNode",
        "options": {}
      },
      "id": "0b09ed0b-2b5f-4523-b40a-022524cd4f43",
      "name": "Webhook",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2,
      "position": [
        500,
        400
      ],
      "credentials": {
        "httpHeaderAuth": {
          "name": "<your credential>"
        }
      }
    },
    {
      "parameters": {
        "content": "# Local AI Expert Agent\n\nAuthor: [Cole Medin](https://www.youtube.com/@ColeMedin)\n\nThis n8n-powered agent serves as your personal consultant for local AI deployments and open-source Large Language Models (LLMs). It has been prompted with general guidance for LLM hardware requirements and deployment strategies. And it also is able to search through the Ollama and HuggingFace model catalogs for information on specific models.\n\n## Features\n\n- Provides information about trending open-source LLMs\n- Advises on hardware requirements for running different LLMs\n- Offers guidance on local AI deployment strategies\n- Answers questions about model optimization and performance\n- Helps troubleshoot common local AI setup issues",
        "height": 430.6255813953493,
        "width": 651.0139534883727,
        "color": 6
      },
      "id": "2863d0ae-4068-4581-9f3b-29b277c2398f",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "typeVersion": 1,
      "position": [
        -200,
        520
      ]
    }
  ],
  "connections": {
    "Prep Output Fields": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Anthropic Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Postgres Chat Memory": {
      "ai_memory": [
        [
          {
            "node": "AI Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Prep Input Fields": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Execute Workflow Trigger": {
      "main": [
        [
          {
            "node": "Add AI Research Message to DB",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Add AI Research Message to DB": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML": {
      "main": [
        [
          {
            "node": "Edit Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Call AI Model Research Tool": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Hacker News": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent": {
      "main": [
        [
          {
            "node": "Prep Output Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook": {
      "main": [
        [
          {
            "node": "Prep Input Fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": true,
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "30103370-02ac-4177-88f3-d92695cb2cbb",
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "id": "IjY6kp6rHrNfAiGS",
  "tags": [
    {
      "createdAt": "2024-12-09T14:35:20.507Z",
      "updatedAt": "2024-12-09T14:35:20.507Z",
      "id": "wBUwbX8AS8QmBHOC",
      "name": "studio-prod"
    }
  ]
}