This metric evaluates whether all of the output relevant items present in the contexts are ranked higher or not. Higher scores indicate better utilization.
POST
/
legacy
/
ragas_context_utilization
/
evaluate
Copy
import langwatch
df = langwatch.datasets.get_dataset("dataset-id").to_pandas()
experiment = langwatch.experiment.init("my-experiment")
for index, row in experiment.loop(df.iterrows()):
# your execution code here
experiment.evaluate(
"legacy/ragas_context_utilization",
index=index,
data={
"input": row["input"],
"output": output,
"contexts": row["contexts"],
},
settings={}
)