This workflow corresponds to n8n.io template #11618 — we link there as the canonical source.
This workflow follows the Chat → Chat Trigger recipe pattern — see all workflows that pair these two integrations.
The workflow JSON
Copy or download the full n8n JSON below. Paste it into a new n8n workflow, add your credentials, activate. Full import guide →
{
"id": "Sy7yYvjk1MVkkI8j",
"meta": {
"templateCredsSetupCompleted": true
},
"name": "LMUNIT Test",
"tags": [],
"nodes": [
{
"id": "7ba3480f-b4ef-4cd2-91be-cf03623525d1",
"name": "When chat message received",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-176,
64
],
"parameters": {
"options": {
"responseMode": "responseNodes"
}
},
"typeVersion": 1.3
},
{
"id": "a258fcba-1add-4563-b2db-295ecbfd2e83",
"name": "Run LMUnit",
"type": "n8n-nodes-contextualai.contextualAi",
"position": [
1520,
64
],
"parameters": {
"query": "={{ $('When chat message received').first().json.chatInput }}",
"resource": "LMUnit",
"response": "={{ $json.response }}",
"unitTest": "={{ $json.unit_test }}"
},
"credentials": {
"contextualAiApi": {
"name": "<your credential>"
}
},
"typeVersion": 1
},
{
"id": "6603c1b2-1e37-479b-8566-4da1d2a867a5",
"name": "Preprocess OpenAI Response",
"type": "n8n-nodes-base.set",
"position": [
400,
-64
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "205eea09-5966-4687-a5bc-e2bbc1b245d9",
"name": "provider",
"type": "string",
"value": "OpenAI"
},
{
"id": "eba418d6-d2a9-4166-a58d-6b892fed1e78",
"name": "response",
"type": "string",
"value": "={{ $json.message.content }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "9d3f1b8e-83b7-43d4-9533-f5d30fb5ce6b",
"name": "Preprocess Gemini Response",
"type": "n8n-nodes-base.set",
"position": [
400,
144
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "e91fc940-84b9-4ce7-aa9b-cb07f23daf5e",
"name": "provider",
"type": "string",
"value": "Gemini"
},
{
"id": "f8b7d752-e6fc-487f-a4ee-cb23dc831a6d",
"name": "response",
"type": "string",
"value": "={{ $json.content.parts[0].text }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "64dab774-809d-46e6-9b8a-417334eef4ce",
"name": "Combine responses",
"type": "n8n-nodes-base.merge",
"position": [
624,
48
],
"parameters": {
"numberInputs": 3
},
"typeVersion": 3.2
},
{
"id": "856be276-1e27-4a4c-90d2-614dcff43c56",
"name": "Add unit tests to responses",
"type": "n8n-nodes-base.code",
"position": [
848,
64
],
"parameters": {
"jsCode": "//Unit Tests - it checks for clarity and conciseness\nconst tests = [\n \"Is the response clear and easy to understand?\",\n \"Does the response use precise and accurate information?\",\n];\n\n\nconst expanded = [];\n\nfor (const item of $input.all()) {\n for (const test of tests) {\n expanded.push({\n json: {\n provider: item.json.provider,\n response: item.json.response,\n unit_test: test\n }\n });\n }\n}\n\nreturn expanded;\n"
},
"typeVersion": 2
},
{
"id": "004de229-3b47-4a25-8758-cb78dac4f2dc",
"name": "Iterate over each unit tests",
"type": "n8n-nodes-base.splitInBatches",
"position": [
1072,
64
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "d5e6bbc0-1136-44fb-a1f3-44e1e7a1be4d",
"name": "Wait for 3 sec",
"type": "n8n-nodes-base.wait",
"position": [
1296,
64
],
"parameters": {
"amount": 3
},
"typeVersion": 1.1
},
{
"id": "30c53f36-50e1-46dd-b34a-95f3c9715645",
"name": "Associate scores with Responses",
"type": "n8n-nodes-base.set",
"position": [
1744,
64
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "d2adecc6-8c39-4917-84d9-35361b4ea97b",
"name": "provider",
"type": "string",
"value": "={{ $('Add unit tests to responses').item.json.provider }}"
},
{
"id": "7f1d3b1d-e76e-4c78-afa4-675133a719a9",
"name": "response",
"type": "string",
"value": "={{ $('Add unit tests to responses').item.json.response }}"
},
{
"id": "5c043663-0edd-4d6c-b3de-53774eb48415",
"name": "unit_test",
"type": "string",
"value": "={{ $('Add unit tests to responses').item.json.unit_test }}"
},
{
"id": "b024613a-e108-4d5f-85db-c8c85321f666",
"name": "score",
"type": "string",
"value": "={{ $json.score }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "93c4e9f3-f54d-4cb8-8bc9-f7cb0e6810c2",
"name": "Group Results Together",
"type": "n8n-nodes-base.code",
"position": [
1968,
144
],
"parameters": {
"jsCode": "const results = $input.all().map(i => i.json);\n\n/** @type {Record<string, any>} */\nconst grouped = {};\n\n// Group by provider and unit test\nresults.forEach(r => {\n const provider = r.provider || 'Unknown';\n const unitTest = r.unit_test || 'Unknown';\n const response = r.response || '';\n const score = parseFloat(r.score) || 0;\n\n if (!grouped[provider]) grouped[provider] = {};\n\n grouped[provider][unitTest] = {\n response,\n score\n };\n});\n\n// Return grouped results\nreturn [\n {\n json: {\n groupedResults: grouped\n }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "d7df1b30-3074-48ed-8760-a5abf2f27e25",
"name": "Format Final Result",
"type": "n8n-nodes-base.code",
"position": [
1296,
-128
],
"parameters": {
"jsCode": "const allItems = $input.all();\n\n// Merge all groupedResults into a single object\n/** @type {Record<string, Record<string, any>>} */\nconst merged = {};\n\nallItems.forEach(item => {\n const grouped = item.json.groupedResults;\n for (const provider in grouped) {\n if (!merged[provider]) merged[provider] = {};\n const tests = grouped[provider];\n for (const testName in tests) {\n merged[provider][testName] = tests[testName];\n }\n }\n});\n\nlet message = \"Here are the evaluation results:\\n\\n\";\n\nfor (const provider in merged) {\n const tests = merged[provider];\n const firstTestKey = Object.keys(tests)[0];\n const responseText = tests[firstTestKey].response || '';\n\n message += `Provider: ${provider}\\n\\n`;\n message += `Response: \"${responseText}\"\\n\\n`;\n message += `Evaluation Criteria:\\n\\n`;\n\n for (const testName in tests) {\n const score = tests[testName].score;\n message += ` \"${testName}\": ${score}\\n\\n`;\n }\n\n message += `\\n\\n`; \n}\n\nreturn [\n {\n json: { message }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "4890542e-6f0b-4e56-9d58-03e21e35296e",
"name": "Final Response",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
1520,
-128
],
"parameters": {
"message": "={{ $json.message }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "50fafc17-0180-4caa-9758-9f4cf6d292e2",
"name": "Preprocess Anthropic Response",
"type": "n8n-nodes-base.set",
"position": [
384,
336
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f24ca891-bc37-4fd6-8696-0f1ed6113450",
"name": "provider",
"type": "string",
"value": "Anthropic"
},
{
"id": "3d753f19-8d94-40a3-9262-38893bae0414",
"name": "response",
"type": "string",
"value": "={{ $json.content[0].text }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "afee1fc1-af47-40cd-8350-4c19969249ad",
"name": "OpenAI GPT 4.1",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
48,
-64
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1",
"cachedResultName": "GPT-4.1"
},
"options": {},
"messages": {
"values": [
{
"content": "={{ $json.chatInput }}"
}
]
}
},
"credentials": {
"openAiApi": {
"name": "<your credential>"
}
},
"typeVersion": 1.8
},
{
"id": "1577d553-e55e-4f5d-a7c5-a89f2af3a96f",
"name": "Gemini 2.5 Flash",
"type": "@n8n/n8n-nodes-langchain.googleGemini",
"position": [
48,
144
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "models/gemini-2.5-flash",
"cachedResultName": "models/gemini-2.5-flash"
},
"options": {},
"messages": {
"values": [
{
"content": "={{ $json.chatInput }}"
}
]
}
},
"credentials": {
"googlePalmApi": {
"name": "<your credential>"
}
},
"typeVersion": 1
},
{
"id": "1d779c2e-d50e-4466-a70a-232f557800a0",
"name": "Claude 4.5 Sonnet",
"type": "@n8n/n8n-nodes-langchain.anthropic",
"position": [
32,
336
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "claude-sonnet-4-5-20250929",
"cachedResultName": "claude-sonnet-4-5-20250929"
},
"options": {},
"messages": {
"values": [
{
"content": "={{ $json.chatInput }}"
}
]
}
},
"credentials": {
"anthropicApi": {
"name": "<your credential>"
}
},
"typeVersion": 1
},
{
"id": "89f15299-0f97-4fb4-ac18-26e3cbb2ee6f",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-816,
-320
],
"parameters": {
"width": 528,
"height": 1376,
"content": "# Multi-Model Response Evaluation using Contextual AI\u2019s LMUnit \n\n## PROBLEM \nEvaluating and comparing responses from multiple LLMs (OpenAI, Claude, Gemini) can be challenging when done manually. \n- Each model produces outputs that differ in clarity, tone, and reasoning structure. \n- Traditional evaluation metrics like ROUGE or BLEU fail to capture nuanced quality differences. \n- Human evaluations are inconsistent, slow, and difficult to scale. \n\n### This workflow automates **LLM response quality evaluation** using **Contextual AI\u2019s LMUnit**, a natural language unit testing framework that provides systematic, fine-grained feedback on response clarity and conciseness. \n> **Note:** LMUnit offers natural language-based evaluation with a 1\u20135 scoring scale, enabling consistent and interpretable results across different model outputs.\n\n## How it works \n- A **chat trigger node** collects responses from multiple LLMs such as **OpenAI GPT-4.1, **Claude 4.5 Sonnet**, and **Gemini 2.5 Flash**. \n- Each model receives the same input prompt to ensure fair comparison, which is then aggregated and associated with each test cases\n- We use Contextual AI's LMUnit node to evaluate each response using predefined quality criteria: \n - \u201cIs the response clear and easy to understand?\u201d - Clarity \n - \u201cIs the response concise and free from redundancy?\u201d - Conciseness \n- **LMUnit** then produces evaluation scores (1\u20135) for each test \n- Results are aggregated and formatted into a structured summary showing model-wise performance and overall averages.\n\n## How to set up \n- Create a free [Contextual AI account](https://app.contextual.ai/) and obtain your `CONTEXTUALAI_API_KEY`. \n- In your **n8n** instance, add this key as a credential under \u201cContextual AI.\u201d \n- Obtain and add credentials for each model provider you wish to test: \n - **OpenAI API Key:** [platform.openai.com/account/api-keys](https://platform.openai.com/account/api-keys) \n - **Anthropic API Key:** [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) \n - **Gemini API Key:** [ai.google.dev/gemini-api/docs/api-key](https://ai.google.dev/gemini-api/docs/api-key) \n- Start sending prompts using chat interface to automatically generate model outputs and evaluations.\n\n## How to customize the workflow \n- Add more **evaluation criteria** (e.g., factual accuracy, tone, completeness) in the LMUnit test configuration. \n- Include additional **LLM providers** by duplicating the response generation nodes. \n- Adjust **thresholds and aggregation logic** to suit your evaluation goals. \n- Enhance the final summary formatting for dashboards, tables, or JSON exports. \n- For detailed API parameters, refer to the [LMUnit API reference](https://docs.contextual.ai/api-reference/lmunit/lmunit). \n- If you have feedback or need support, please email **feedback@contextual.ai**."
},
"typeVersion": 1
},
{
"id": "fa8085b7-11f8-4cc5-9430-e5f535345f41",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-192,
-320
],
"parameters": {
"color": 7,
"width": 736,
"height": 832,
"content": "### 1: User Message Submission \nWhen a user sends a message through the chat interface, the input is captured and forwarded simultaneously to three different language models: GPT 4.1, Gemini 2.5 Flash, and Claude 4.5 Sonnet. Each model processes the same message independently and generates its own response. \n"
},
"typeVersion": 1
},
{
"id": "100f6319-47d8-4882-a360-5e075c777924",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
576,
-320
],
"parameters": {
"color": 7,
"width": 416,
"height": 832,
"content": "### 2: Associate unit tests with language model responses\nAfter preprocessing responses, we attach unit tests to evaluate each response using predefined quality criteria:\n - \u201cIs the response clear and easy to understand?\u201d - Clarity \n - \u201cIs the response concise and free from redundancy?\u201d - Conciseness \n\n"
},
"typeVersion": 1
},
{
"id": "b6bcb7e0-a35d-4724-9f4e-bd5fc2897465",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1024,
-320
],
"parameters": {
"color": 7,
"width": 1136,
"height": 816,
"content": "### 3: Use Contextual AI's LMUnit to evaluate responses\nFinally, we pass the responses alongside the unit tests to Contextual AI's LMUnit to generate evaluation scores and aggregate them to display final result to the user.\n"
},
"typeVersion": 1
}
],
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "a0ba83b2-1b60-465a-a387-2efee5d4ad08",
"connections": {
"Run LMUnit": {
"main": [
[
{
"node": "Associate scores with Responses",
"type": "main",
"index": 0
}
]
]
},
"OpenAI GPT 4.1": {
"main": [
[
{
"node": "Preprocess OpenAI Response",
"type": "main",
"index": 0
}
]
]
},
"Wait for 3 sec": {
"main": [
[
{
"node": "Run LMUnit",
"type": "main",
"index": 0
}
]
]
},
"Gemini 2.5 Flash": {
"main": [
[
{
"node": "Preprocess Gemini Response",
"type": "main",
"index": 0
}
]
]
},
"Claude 4.5 Sonnet": {
"main": [
[
{
"node": "Preprocess Anthropic Response",
"type": "main",
"index": 0
}
]
]
},
"Combine responses": {
"main": [
[
{
"node": "Add unit tests to responses",
"type": "main",
"index": 0
}
]
]
},
"Format Final Result": {
"main": [
[
{
"node": "Final Response",
"type": "main",
"index": 0
}
]
]
},
"Group Results Together": {
"main": [
[
{
"node": "Iterate over each unit tests",
"type": "main",
"index": 0
}
]
]
},
"Preprocess Gemini Response": {
"main": [
[
{
"node": "Combine responses",
"type": "main",
"index": 1
}
]
]
},
"Preprocess OpenAI Response": {
"main": [
[
{
"node": "Combine responses",
"type": "main",
"index": 0
}
]
]
},
"When chat message received": {
"main": [
[
{
"node": "OpenAI GPT 4.1",
"type": "main",
"index": 0
},
{
"node": "Gemini 2.5 Flash",
"type": "main",
"index": 0
},
{
"node": "Claude 4.5 Sonnet",
"type": "main",
"index": 0
}
]
]
},
"Add unit tests to responses": {
"main": [
[
{
"node": "Iterate over each unit tests",
"type": "main",
"index": 0
}
]
]
},
"Iterate over each unit tests": {
"main": [
[
{
"node": "Format Final Result",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait for 3 sec",
"type": "main",
"index": 0
}
]
]
},
"Preprocess Anthropic Response": {
"main": [
[
{
"node": "Combine responses",
"type": "main",
"index": 2
}
]
]
},
"Associate scores with Responses": {
"main": [
[
{
"node": "Group Results Together",
"type": "main",
"index": 0
}
]
]
}
}
}
Credentials you'll need
Each integration node will prompt for credentials when you import. We strip credential IDs before publishing — you'll add your own.
anthropicApicontextualAiApigooglePalmApiopenAiApi
For the full experience including quality scoring and batch install features for each workflow upgrade to Pro
About this workflow
Evaluating and comparing responses from multiple LLMs (OpenAI, Claude, Gemini) can be challenging when done manually. Each model produces outputs that differ in clarity, tone, and reasoning structure. Traditional evaluation metrics like ROUGE or BLEU fail to capture nuanced…
Source: https://n8n.io/workflows/11618/ — original creator credit. Request a take-down →
Related workflows
Workflows that share integrations, category, or trigger type with this one. All free to copy and import.
This workflow is for: People who want to quickly launch simple landing pages without paying monthly fees to landing page creators. It’s ideal for rapid prototyping, generation of large amounts of land
Most career advice is generic. This workflow builds a fully personalized AI coaching system that remembers every user, adapts to their career stage and goals, detects what kind of help they need, and
This workflow allows users to ask portfolio-related questions in a simple format (). It validates the input, fetches client data and holdings from Google Sheets, retrieves live market prices via API,
Use cases: We fundraise alot using whatsapp groups in East Africa, especially in Kenya ! Keeping track of each payment and the tallying requires alot of manual effort and brings unnecessary tension in
Automate the creation of high-performing YouTube Shorts in minutes! Content Creators: Generate engaging short videos effortlessly. Marketing Agencies: Produce client-ready content quickly. Business Ow