> ## Documentation Index
> Fetch the complete documentation index at: https://langwatch.ai/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# LLM Answer Match

> Uses an LLM to check if the generated output answers a question correctly the same way as the expected output, even if their style is different.



## OpenAPI

````yaml post /langevals/llm_answer_match/evaluate
openapi: 3.1.0
info:
  title: LangEvals API
  version: 1.0.0
  description: API for LangEvals evaluators
servers:
  - url: https://app.langwatch.ai/api/evaluations
    description: Production server
security:
  - api_key: []
paths:
  /langevals/llm_answer_match/evaluate:
    post:
      summary: LLM Answer Match
      description: >-
        Uses an LLM to check if the generated output answers a question
        correctly the same way as the expected output, even if their style is
        different.
      operationId: langevals_llm_answer_match_evaluate
      requestBody:
        content:
          application/json:
            schema:
              allOf:
                - $ref: '#/components/schemas/langevals_llm_answer_matchRequest'
                - type: object
                  properties:
                    settings:
                      $ref: '#/components/schemas/langevals_llm_answer_matchSettings'
        required: true
      responses:
        '200':
          description: Successful evaluation
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/EvaluationResult'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                type: object
                properties:
                  detail:
                    type: string
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
                properties:
                  detail:
                    type: string
      x-codeSamples:
        - lang: python
          label: Experiment
          source: |
            import langwatch

            df = langwatch.datasets.get_dataset("dataset-id").to_pandas()

            experiment = langwatch.experiment.init("my-experiment")

            for index, row in experiment.loop(df.iterrows()):
                # your execution code here
                experiment.evaluate(
                    "langevals/llm_answer_match",
                    index=index,
                    data={
                        "output": output,
                        "expected_output": row["expected_output"],
                        "input": row["input"],
                    },
                    settings={}
                )
        - lang: python
          label: Online Evaluation
          source: |-
            import langwatch

            @langwatch.span()
            def my_llm_step():
                ... # your existing code
                result = langwatch.evaluation.evaluate(
                    "langevals/llm_answer_match",
                    name="My LLM Answer Match Check",
                    data={
                        "output": "",
                        "expected_output": "",
                        "input": "",
                    },
                    settings={},
                )
                print(result)
        - lang: typescript
          label: Experiment
          source: >-
            import { LangWatch } from "langwatch";


            const langwatch = new LangWatch();


            // Fetch dataset from LangWatch

            const dataset = await langwatch.datasets.get("your-dataset-slug");


            const experiment = await
            langwatch.experiments.init("my-experiment");


            await experiment.run(
              dataset.entries.map((e) => e.entry),
              async ({ item, index }) => {
                // Run your LLM/agent
                const output = await myLLM(item.input);

                // Evaluate the output
                await experiment.evaluate("langevals/llm_answer_match", {
                  index,
                  data: {
                    output: output,
                    expected_output: item.expected_output,
                    input: item.input,
                  },
                });
              },
              { concurrency: 4 }
            );
        - lang: typescript
          label: Online Evaluation
          source: |-
            import { LangWatch } from "langwatch";

            const langwatch = new LangWatch();

            async function myLLMStep(input: string): Promise<string> {
              // ... your existing code

              // Call the evaluator
              const result = await langwatch.evaluations.evaluate("langevals/llm_answer_match", {
                name: "my-evaluation",
                data: {
                  output: "", // your output value
                  expected_output: "", // your expected_output value
                  input: "", // your input value
                },
                settings: {},
              });

              console.log(result);
              return result;
            }
components:
  schemas:
    langevals_llm_answer_matchRequest:
      type: object
      properties:
        trace_id:
          type: string
          description: Optional trace ID to associate this evaluation with a trace
        data:
          type: object
          properties:
            output:
              type: string
              description: The output/response text to evaluate
            expected_output:
              type: string
              description: The expected output for comparison
            input:
              type: string
              description: The input text to evaluate
          required:
            - output
            - expected_output
      required:
        - data
    langevals_llm_answer_matchSettings:
      type: object
      properties:
        model:
          description: The model to use for evaluation
          type: string
          default: openai/gpt-5
        max_tokens:
          description: Max tokens allowed for evaluation
          type: number
          default: 128000
        prompt:
          description: Prompt for the comparison
          type: string
          default: >-
            Verify that the predicted answer matches the gold answer for the
            question. Style does not matter, for example the gold answer may be
            more direct while the predicted answer more verbose and still be
            correct.
    EvaluationResult:
      type: object
      properties:
        status:
          type: string
          enum:
            - processed
            - skipped
            - error
        score:
          type: number
          description: Numeric score from the evaluation
        passed:
          type: boolean
          description: Whether the evaluation passed
        label:
          type: string
          description: Label assigned by the evaluation
        details:
          type: string
          description: Additional details about the evaluation
        cost:
          type: object
          properties:
            currency:
              type: string
            amount:
              type: number
  securitySchemes:
    api_key:
      type: apiKey
      in: header
      name: X-Auth-Token
      description: API key for authentication

````