import { LangWatch } from "langwatch";const langwatch = new LangWatch();const result = await langwatch.experiments.run("your-experiment-slug");result.printSummary();
That’s it! The experiment runs with the configuration saved in LangWatch.
result = langwatch.experiment.run( "my-experiment", timeout=300.0, # Max wait time (seconds) poll_interval=5.0, # How often to check status on_progress=lambda done, total: print(f"{done}/{total}"),)result.print_summary(exit_on_failure=True) # Exit with code 1 on failures
import langwatch# Load your datasetdataset = langwatch.dataset.get_dataset("my-dataset").to_pandas()# Initialize experimentexperiment = langwatch.experiment.init("ci-quality-check")# Run through each test casefor idx, row in experiment.loop(dataset.iterrows()): # Call your LLM/agent response = my_llm(row["input"]) # Run evaluators experiment.evaluate( "ragas/faithfulness", index=idx, data={ "input": row["input"], "output": response, "contexts": row["contexts"], }, )# Print summary and exit with code 1 on failureexperiment.print_summary()
import { LangWatch } from "langwatch";const langwatch = new LangWatch();// Load your datasetconst dataset = await langwatch.datasets.get("my-dataset");// Initialize experimentconst experiment = await langwatch.experiments.init("ci-quality-check");// Run through each test caseawait experiment.run( dataset.entries.map(e => e.entry), async ({ item, index }) => { // Call your LLM/agent const response = await myLLM(item.input); // Run evaluators await experiment.evaluate("ragas/faithfulness", { index, data: { input: item.input, output: response, contexts: item.contexts, }, }); }, { concurrency: 4 });// Print summary and exit with code 1 on failureexperiment.printSummary();
from langwatch.evaluation import ( EvaluationNotFoundError, EvaluationTimeoutError, EvaluationRunFailedError,)try: result = langwatch.experiment.run("my-experiment", timeout=300) result.print_summary()except EvaluationNotFoundError: print("Experiment not found - check the slug") exit(1)except EvaluationTimeoutError as e: print(f"Timeout: only {e.progress}/{e.total} completed") exit(1)except EvaluationRunFailedError as e: print(f"Run failed: {e.error_message}") exit(1)
import { EvaluationNotFoundError, EvaluationTimeoutError, EvaluationRunFailedError,} from "langwatch";try { const result = await langwatch.experiments.run("my-experiment", { timeout: 300000 }); result.printSummary();} catch (error) { if (error instanceof EvaluationNotFoundError) { console.error("Experiment not found - check the slug"); } else if (error instanceof EvaluationTimeoutError) { console.error(`Timeout: only ${error.progress}/${error.total} completed`); } else if (error instanceof EvaluationRunFailedError) { console.error(`Run failed: ${error.errorMessage}`); } process.exit(1);}