{
  "meta": {
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "20985bbf-3a4f-4e7c-8c7d-4d4bee4e1eaa",
      "name": "Correctness Classifier",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        340,
        -320
      ],
      "parameters": {
        "text": "=question: {{ $json.question }}\n## answers:\n{{ $json.answer.split('\\n').filter(Boolean).map(str => `* ${str.trim()}`).join('\\n') }}\n## ground truth\n{{ $json.groundTruth.map(str => `* ${str}`).join('\\n') }}",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "=Given a ground truth and an answer statements, analyze each statement and classify them in one of the following categories: TP (true positive): statements that are present in answer that are also directly supported by the one or more statements in ground truth, FP (false positive): statements present in the answer but not directly supported by any statement in ground truth, FN (false negative): statements found in the ground truth but not present in answer. Each statement can only belong to one of the categories. Provide a reason for each classification."
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "d40a51d3-6ea0-4a16-b094-7862c061904f",
      "name": "Examples1",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        520,
        -140
      ],
      "parameters": {
        "jsonSchemaExample": "{\n  \"TP\": [\n    {\n      \"statement\": \"The primary function of the sun is to provide light to the solar system.\",\n      \"reason\": \"This statement is somewhat supported by the ground truth mentioning the sun providing light and its roles, though it focuses more broadly on the sun's energy.\"\n    }\n  ],\n  \"FP\": [\n    {\n      \"statement\": \"The sun is powered by nuclear fission, similar to nuclear reactors on Earth.\",\n      \"reason\": \"This statement is incorrect and contradicts the ground truth which states that the sun is powered by nuclear fusion.\"\n    }\n  ],\n  \"FN\": [\n    {\n      \"statement\": \"The sun is powered by nuclear fusion, where hydrogen atoms fuse to form helium.\",\n      \"reason\": \"This accurate description of the sun\u2019s power source is not included in the answer.\"\n    },\n    {\n      \"statement\": \"This fusion process in the sun's core releases a tremendous amount of energy.\",\n      \"reason\": \"This process and its significance are not mentioned in the answer.\"\n    },\n    {\n      \"statement\": \"The energy from the sun provides heat and light, which are essential for life on Earth.\",\n      \"reason\": \"The answer only mentions light, omitting the essential aspects of heat and its necessity for life, which the ground truth covers.\"\n    },\n    {\n      \"statement\": \"The sun's light plays a critical role in Earth's climate system.\",\n      \"reason\": \"This broader impact of the sun\u2019s light on Earth's climate system is not addressed in the answer.\"\n    },\n    {\n      \"statement\": \"Sunlight helps to drive the weather and ocean currents.\",\n      \"reason\": \"The effect of sunlight on weather patterns and ocean currents is omitted in the answer.\"\n    }\n  ]\n}"
      },
      "typeVersion": 1.2
    },
    {
      "id": "32195796-6f39-47e1-b531-c7583a4bfc2d",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        340,
        -140
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "23186242-a4e5-4e70-a010-acbfa2eafb35",
      "name": "OpenAI Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -752,
        50
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "2e6b4d45-f0e0-434a-92bb-c76bf96588ad",
      "name": "When fetching a dataset row",
      "type": "n8n-nodes-base.evaluationTrigger",
      "position": [
        -1280,
        -270
      ],
      "parameters": {
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=0",
          "cachedResultName": "Correctness"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
          "cachedResultName": "96. Evaluations Test"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "9d8b8b85-819a-4762-a573-4e4f50f3c2ba",
      "name": "Remap Input",
      "type": "n8n-nodes-base.set",
      "position": [
        -1060,
        -270
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "00924b90-278f-49f5-80f2-c297df0fcc97",
              "name": "chatInput",
              "type": "string",
              "value": "={{ $json.input }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "781b1a23-0359-46ab-bf8e-c86b1bcc0cf9",
      "name": "Evaluation",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        -464,
        -170
      ],
      "parameters": {
        "operation": "checkIfEvaluating"
      },
      "typeVersion": 4.6
    },
    {
      "id": "7f8623d7-4263-45f2-a2f1-dcb07dc17c90",
      "name": "Set Input Fields",
      "type": "n8n-nodes-base.set",
      "position": [
        -244,
        -320
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "d58952c1-d346-4fbf-881e-d5c04b6781a5",
              "name": "question",
              "type": "string",
              "value": "={{ $('When fetching a dataset row').first().json.input }}"
            },
            {
              "id": "0f10a3d0-cf6e-4715-9ded-2cee54aa62ec",
              "name": "answer",
              "type": "string",
              "value": "={{ $json.output }}"
            },
            {
              "id": "edbe42ed-36a7-438a-989f-900673e61d0f",
              "name": "groundTruth",
              "type": "array",
              "value": "={{ $('When fetching a dataset row').first().json['ground truth'].split('\\n') }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "a126142b-bd50-48fb-ab3d-e20fea082dc7",
      "name": "No Operation, do nothing",
      "type": "n8n-nodes-base.noOp",
      "position": [
        -244,
        -70
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "b896a963-5fe8-4566-9f11-9c963d93e467",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -840,
        -170
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 2
    },
    {
      "id": "526b0248-d81b-428f-8f02-aa69acebd05c",
      "name": "When chat message received",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -1060,
        -70
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1.1
    },
    {
      "id": "acd7d0aa-00fa-4d9d-9995-f468e1f4770a",
      "name": "Merge",
      "type": "n8n-nodes-base.merge",
      "position": [
        1220,
        -320
      ],
      "parameters": {
        "mode": "combine",
        "options": {},
        "combineBy": "combineByPosition"
      },
      "typeVersion": 3.1
    },
    {
      "id": "a183f7f7-0071-42ae-a17d-fc7b087f0f49",
      "name": "Calculate F1 Score",
      "type": "n8n-nodes-base.code",
      "position": [
        980,
        -320
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const { TP, FP, FN } = $input.item.json.output;\n\nreturn {\n  f1Score: fbetaScore(TP.length, FP.length, FN.length)\n};\n\nfunction fbetaScore(tp, fp, fn, beta = 1.0) {\n  const precision = tp + fp === 0 ? 0 : tp / (tp + fp);\n  const recall = tp + fn === 0 ? 0 : tp / (tp + fn);\n  \n  if (precision === 0 && recall === 0) return 0.0;\n\n  const betaSquared = beta * beta;\n  const fbeta = (1 + betaSquared)\n    * (precision * recall)\n    / ((betaSquared * precision) + recall);\n\n  return fbeta;\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "cd961c4b-170d-46dc-9aa2-128eb4b1ffe7",
      "name": "Correctness Score",
      "type": "n8n-nodes-base.code",
      "position": [
        1400,
        -320
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const {\n  f1Score,\n  similarityScore,\n} = $input.item.json;\n\nconst weights = [0.75, 0.25];\n\nreturn {\n  score: weightedAverage([f1Score, similarityScore], weights)\n};\n\nfunction weightedAverage(values, weights) {\n  const weightedSum = values.reduce((sum, val, i) => sum + val * weights[i], 0);\n  const totalWeight = weights.reduce((sum, w) => sum + w, 0);\n  return weightedSum / totalWeight;\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "4a6f9201-8c09-4210-9322-bd14bd86d4fd",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1352,
        -530
      ],
      "parameters": {
        "color": 7,
        "width": 840,
        "height": 720,
        "content": "## 1. Setup Your AI Workflow to Use Evaluations\n[Learn more about the Evaluations Trigger](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluationTrigger)\n\nThe Evaluations Trigger is a separate execution which does not affect your production workflow in any way. It is manually triggered and automatically pulled datasets from the assigned Google Sheet."
      },
      "typeVersion": 1
    },
    {
      "id": "4be720d8-8a34-4f8d-a27a-04302542a2cc",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -500,
        -900
      ],
      "parameters": {
        "color": 7,
        "width": 2480,
        "height": 1100,
        "content": "## 2. Answer Correctness: Is the agent getting its facts correct?\n[Learn more about the Evaluations Trigger](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluationTrigger)\n\nThis evaluation measures answer correctness compared to ground truth as a combination of factuality and semantic similarity.\nWhen the agent is without tools, this test may check for accuracy in the agent's training data. For best results, the agent's response should be verbose and conversational. "
      },
      "typeVersion": 1
    },
    {
      "id": "2b3fe36c-4af9-4dec-bf0d-e9b6fe0386fb",
      "name": "Update Metrics",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        1800,
        -320
      ],
      "parameters": {
        "metrics": {
          "assignments": [
            {
              "id": "1fd7759c-f4ef-4eda-87ad-9d9563b63e99",
              "name": "score",
              "type": "number",
              "value": "={{ $json.score }}"
            }
          ]
        },
        "operation": "setMetrics"
      },
      "typeVersion": 4.6
    },
    {
      "id": "2c3b928e-db4c-42b7-b530-34ad3f6a1a04",
      "name": "Update Outputs",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        1600,
        -320
      ],
      "parameters": {
        "outputs": {
          "values": [
            {
              "outputName": "output",
              "outputValue": "={{ $('Set Input Fields').first().json.answer }}"
            },
            {
              "outputName": "score",
              "outputValue": "={{ $json.score }}"
            }
          ]
        },
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=0",
          "cachedResultName": "Correctness"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
          "cachedResultName": "96. Evaluations Test"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "9de68866-994d-455e-af12-fb608a75341d",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1840,
        -600
      ],
      "parameters": {
        "width": 440,
        "height": 800,
        "content": "## Try It Out!\n### This n8n template demonstrates how to calculate the evaluation metric \"Correctness\" which in this scenario, measures the compares and classifies the agent's response against a set of ground truths.\n\nThe scoring approach is adapted from [https://github.com/explodinggradients/ragas/blob/main/ragas/src/ragas/metrics/_answer_correctness.py](https://github.com/explodinggradients/ragas/blob/main/ragas/src/ragas/metrics/_answer_correctness.py)\n\n### How it works\n* This evaluation works best where the agent's response is allowed to be more verbose and conversational.\n* For our scoring, we classify the agent's response into 3 buckets: True Positive (in answer and ground truth), False Positive (in answer but not ground truth) and False Negative (not in answer but in ground truth).\n* We also calculate an average similarity score on the agent's response against all ground truths.\n* The classification and the similarity score is then averaged to give the final score. \n* A high score indicates the agent is accurate whereas a low score could indicate the agent has incorrect training data or is not providing a comprehensive enough answer.\n\n### Requirements\n* n8n version 1.94+\n* Check out this Google Sheet for a sample data [https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing](https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing)\n\n\n### Need Help?\nJoin the [Discord](https://discord.com/invite/XPKeKXeB7d) or ask in the [Forum](https://community.n8n.io/)!\n\nHappy Hacking!"
      },
      "typeVersion": 1
    },
    {
      "id": "5d3cbd9f-7cd9-4ff9-851c-e0b5b352302e",
      "name": "Get Embeddings",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        180,
        -500
      ],
      "parameters": {
        "url": "https://api.openai.com/v1/embeddings",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "authentication": "predefinedCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "input",
              "value": "={{ $json.groundTruth }}"
            },
            {
              "name": "model",
              "value": "text-embedding-3-small"
            },
            {
              "name": "encoding_format",
              "value": "float"
            }
          ]
        },
        "nodeCredentialType": "openAiApi"
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "1da5f360-0e0f-4643-8236-8f55cc343655",
      "name": "GroundTruth to Items",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -20,
        -500
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "groundTruth"
      },
      "typeVersion": 1
    },
    {
      "id": "e3c0673d-e80a-4f6f-a061-a9d92bfbb082",
      "name": "Get Embeddings1",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        180,
        -680
      ],
      "parameters": {
        "url": "https://api.openai.com/v1/embeddings",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "authentication": "predefinedCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "input",
              "value": "={{ $json.answer }}"
            },
            {
              "name": "model",
              "value": "text-embedding-3-small"
            },
            {
              "name": "encoding_format",
              "value": "float"
            }
          ]
        },
        "nodeCredentialType": "openAiApi"
      },
      "credentials": {
        "openAiApi": {
          "name": "<your credential>"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "70688754-a777-464c-8b3b-f216018438e9",
      "name": "Aggregate",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        580,
        -500
      ],
      "parameters": {
        "options": {},
        "aggregate": "aggregateAllItemData",
        "destinationFieldName": "groundTruth"
      },
      "typeVersion": 1
    },
    {
      "id": "1b91219c-0e7a-4843-8f37-ee0160af2d2d",
      "name": "Remap Embeddings",
      "type": "n8n-nodes-base.set",
      "position": [
        380,
        -680
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3db07c98-f926-46e0-85f2-ed1eb137f842",
              "name": "answer",
              "type": "array",
              "value": "={{ $json.data[0].embedding }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "d6844754-380d-4610-984e-a16042a9e239",
      "name": "Remap Embeddings1",
      "type": "n8n-nodes-base.set",
      "position": [
        380,
        -500
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3db07c98-f926-46e0-85f2-ed1eb137f842",
              "name": "data",
              "type": "array",
              "value": "={{ $json.data[0].embedding }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8759c3ab-4d72-46f9-b969-339aba79b34f",
      "name": "Create Embeddings Result",
      "type": "n8n-nodes-base.merge",
      "position": [
        780,
        -500
      ],
      "parameters": {
        "mode": "combine",
        "options": {},
        "combineBy": "combineByPosition"
      },
      "typeVersion": 3.1
    },
    {
      "id": "3acb5098-2703-4474-86da-d96f76350936",
      "name": "Calculate Similarity Score",
      "type": "n8n-nodes-base.code",
      "position": [
        980,
        -500
      ],
      "parameters": {
        "jsCode": "const { answer, groundTruth = [] } = $input.item.json;\n\nconst scores = await Promise.all(groundTruth.map(truth =>\n  cosineSimilarity(answer, truth.data)\n));\n\nconst scoreAvg = scores.reduce((acc,score) => acc + score, 0) / scores.length;\n\nreturn { json: { similarityScore: scoreAvg } }\n\nfunction cosineSimilarity(a, b) {  \n  let dotProduct = normA = normB = 0;\n  for (let i = 0; i < a.length; i++) {\n    dotProduct += a[i] * b[i];\n    normA += a[i] ** 2;\n    normB += b[i] ** 2;\n  }\n  return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));\n}"
      },
      "typeVersion": 2
    }
  ],
  "connections": {
    "Merge": {
      "main": [
        [
          {
            "node": "Correctness Score",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "AI Agent": {
      "main": [
        [
          {
            "node": "Evaluation",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate": {
      "main": [
        [
          {
            "node": "Create Embeddings Result",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Examples1": {
      "ai_outputParser": [
        [
          {
            "node": "Correctness Classifier",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Evaluation": {
      "main": [
        [
          {
            "node": "Set Input Fields",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "No Operation, do nothing",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Remap Input": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Embeddings": {
      "main": [
        [
          {
            "node": "Remap Embeddings1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Outputs": {
      "main": [
        [
          {
            "node": "Update Metrics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Embeddings1": {
      "main": [
        [
          {
            "node": "Remap Embeddings",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Remap Embeddings": {
      "main": [
        [
          {
            "node": "Create Embeddings Result",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Input Fields": {
      "main": [
        [
          {
            "node": "Correctness Classifier",
            "type": "main",
            "index": 0
          },
          {
            "node": "Get Embeddings1",
            "type": "main",
            "index": 0
          },
          {
            "node": "GroundTruth to Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Correctness Score": {
      "main": [
        [
          {
            "node": "Update Outputs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Correctness Classifier",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Remap Embeddings1": {
      "main": [
        [
          {
            "node": "Aggregate",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Calculate F1 Score": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "GroundTruth to Items": {
      "main": [
        [
          {
            "node": "Get Embeddings",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Correctness Classifier": {
      "main": [
        [
          {
            "node": "Calculate F1 Score",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Embeddings Result": {
      "main": [
        [
          {
            "node": "Calculate Similarity Score",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Calculate Similarity Score": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When fetching a dataset row": {
      "main": [
        [
          {
            "node": "Remap Input",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}