Documentation Index
Fetch the complete documentation index at: https://langwatch.ai/docs/llms.txt
Use this file to discover all available pages before exploring further.
You can manage datasets from LangWatch using the SDK, MCP tools, or REST API for offline evaluations and automated workflows.
Setup
import langwatch
# Initialize the SDK (or set LANGWATCH_API_KEY environment variable)
langwatch.setup()
import { LangWatch } from "langwatch";
const langwatch = new LangWatch();
List Datasets
Retrieve all datasets for your project with pagination support.
# List all datasets (first page, default limit)
result = langwatch.dataset.list_datasets()
for ds in result.data:
print(f"{ds.name} ({ds.slug}) - {len(ds.columnTypes)} columns")
print(f"Page {result.pagination.page} of {result.pagination.totalPages}")
# List with explicit pagination
result = langwatch.dataset.list_datasets(page=2, limit=10)
// List all datasets (first page, default limit)
const result = await langwatch.datasets.list();
for (const ds of result.data) {
console.log(`${ds.name} (${ds.slug}) - ${ds.columnTypes.length} columns`);
}
console.log(`Page ${result.pagination.page} of ${result.pagination.totalPages}`);
// List with explicit pagination
const page2 = await langwatch.datasets.list({ page: 2, limit: 10 });
Create a Dataset
Create a new dataset with an optional column schema.
# Create with name and column types
info = langwatch.dataset.create_dataset(
"User Feedback",
columns=[
{"name": "input", "type": "string"},
{"name": "output", "type": "string"},
],
)
print(f"Created: {info.name} (slug: {info.slug})")
# Create with just a name (columns can be added later)
info = langwatch.dataset.create_dataset("Simple Dataset")
// Create with name and column types
const info = await langwatch.datasets.create({
name: "User Feedback",
columnTypes: [
{ name: "input", type: "string" },
{ name: "output", type: "string" },
],
});
console.log(`Created: ${info.name} (slug: ${info.slug})`);
// Create with just a name
const simple = await langwatch.datasets.create({ name: "Simple Dataset" });
Get a Dataset
Fetch a dataset by slug or ID, including all its entries.
# Fetch dataset by slug or ID
dataset = langwatch.dataset.get_dataset("your-dataset-slug")
# Access entries
for entry in dataset.entries:
print(entry.id, entry.entry)
# Convert to pandas DataFrame for easy manipulation
df = dataset.to_pandas()
print(df.head())
// Fetch dataset by slug or ID
const dataset = await langwatch.datasets.get("your-dataset-slug");
// Access entries
for (const entry of dataset.entries) {
console.log(entry.entry);
}
Update a Dataset
Update a dataset’s name or column types.
# Update the name
updated = langwatch.dataset.update_dataset("my-dataset", name="New Name")
print(f"New slug: {updated.slug}")
# Update column types
updated = langwatch.dataset.update_dataset(
"my-dataset",
columns=[{"name": "question", "type": "string"}, {"name": "answer", "type": "string"}],
)
// Update the name
const updated = await langwatch.datasets.update("my-dataset", {
name: "New Name",
});
console.log(`New slug: ${updated.slug}`);
// Update column types
const withCols = await langwatch.datasets.update("my-dataset", {
columnTypes: [
{ name: "question", type: "string" },
{ name: "answer", type: "string" },
],
});
Delete a Dataset
Archive a dataset by slug or ID.
langwatch.dataset.delete_dataset("my-dataset")
await langwatch.datasets.delete("my-dataset");
List Records
Retrieve records from a dataset with pagination.
# List records (first page, default limit)
result = langwatch.dataset.list_records("my-dataset")
for record in result.data:
print(record.id, record.entry)
print(f"Total: {result.pagination.total}")
# List with explicit pagination
result = langwatch.dataset.list_records("my-dataset", page=2, limit=20)
// List records (first page, default limit)
const result = await langwatch.datasets.listRecords("my-dataset");
for (const record of result.data) {
console.log(record.id, record.entry);
}
console.log(`Total: ${result.pagination.total}`);
// List with explicit pagination
const page2 = await langwatch.datasets.listRecords("my-dataset", {
page: 2,
limit: 20,
});
Create Records
Batch-add records to an existing dataset.
records = langwatch.dataset.create_records(
"my-dataset",
entries=[
{"input": "What is LangWatch?", "output": "An LLM observability platform."},
{"input": "How do I get started?", "output": "Install the SDK and call setup()."},
],
)
for r in records:
print(f"Created record: {r.id}")
const records = await langwatch.datasets.createRecords("my-dataset", [
{ input: "What is LangWatch?", output: "An LLM observability platform." },
{ input: "How do I get started?", output: "Install the SDK and call setup()." },
]);
for (const r of records.data) {
console.log(`Created record: ${r.id}`);
}
Update a Record
Update (or upsert) a single record by ID.
record = langwatch.dataset.update_record(
"my-dataset",
"rec-1",
entry={"input": "updated question", "output": "updated answer"},
)
print(f"Updated: {record.id} -> {record.entry}")
const record = await langwatch.datasets.updateRecord("my-dataset", "rec-1", {
input: "updated question", output: "updated answer",
});
console.log(`Updated: ${record.id}`);
Delete Records
Batch-delete records by their IDs.
deleted_count = langwatch.dataset.delete_records(
"my-dataset",
record_ids=["rec-1", "rec-2"],
)
print(f"Deleted {deleted_count} records")
const result = await langwatch.datasets.deleteRecords("my-dataset", ["rec-1", "rec-2"]);
console.log(`Deleted ${result.deletedCount} records`);
Upload a File
Upload a CSV, JSON, or JSONL file to a dataset. If the dataset does not exist, it is created automatically.
# Upload to existing or create new (default: append)
result = langwatch.dataset.upload("my-dataset", file_path="data.csv")
print(f"Created {result.recordsCreated} records")
# Replace all records (delete existing, then upload)
result = langwatch.dataset.upload("my-dataset", file_path="data.csv", if_exists="replace")
# Error if dataset already exists (create-only)
result = langwatch.dataset.upload("my-dataset", file_path="data.csv", if_exists="error")
The if_exists parameter controls how conflicts are handled:| Value | Behavior |
|---|
"append" (default) | Append rows to the existing dataset, or create it if it doesn’t exist |
"replace" | Delete all existing records first, then upload. Creates the dataset if it doesn’t exist |
"error" | Raise an error if the dataset already exists. Creates it otherwise |
// Upload to existing or create new (default: append)
const file = new File([csvContent], "data.csv", { type: "text/csv" });
const result = await langwatch.datasets.upload("my-dataset", file);
console.log(`Created ${result.recordsCreated} records`);
// Replace all records (delete existing, then upload)
await langwatch.datasets.upload("my-dataset", file, { ifExists: "replace" });
// Error if dataset already exists (create-only)
await langwatch.datasets.upload("my-dataset", file, { ifExists: "error" });
The ifExists parameter controls how conflicts are handled:| Value | Behavior |
|---|
"append" (default) | Append rows to the existing dataset, or create it if it doesn’t exist |
"replace" | Delete all existing records first, then upload. Creates the dataset if it doesn’t exist |
"error" | Throw an error if the dataset already exists. Creates it otherwise |
Using with Evaluations
Datasets are commonly used to run offline evaluations against your LLM or agent.
import langwatch
langwatch.setup()
# Fetch dataset
df = langwatch.dataset.get_dataset("your-dataset-slug").to_pandas()
# Initialize evaluation
evaluation = langwatch.experiment.init("my-evaluation")
for index, row in evaluation.loop(df.iterrows()):
# Run your LLM/agent
output = my_llm(row["input"])
# Log evaluation metrics
evaluation.log("response_quality", index=index, score=0.9)
import { LangWatch } from "langwatch";
const langwatch = new LangWatch();
// Fetch dataset
const dataset = await langwatch.datasets.get("your-dataset-slug");
// Initialize evaluation
const evaluation = await langwatch.experiments.init("my-evaluation");
await evaluation.run(
dataset.entries.map((e) => e.entry),
async ({ item, index }) => {
// Run your LLM/agent
const output = await myLLM(item.input);
// Log evaluation metrics
evaluation.log("response_quality", { index, score: 0.9 });
},
{ concurrency: 4 }
);
Dataset Entry Structure
Each dataset entry contains:
| Field | Description |
|---|
id | Unique identifier for the entry |
entry | The actual data (e.g., input, expected_output, contexts) |
datasetId | ID of the parent dataset |
projectId | ID of the project |
createdAt | Timestamp of creation |
updatedAt | Timestamp of last update |
Typed Datasets (TypeScript)
You can define types for your dataset entries for better type safety:
type MyDatasetEntry = {
input: string;
expected_output: string;
contexts?: string[];
};
const dataset = await langwatch.datasets.get<MyDatasetEntry>("my-dataset");
// Now entry.entry is typed as MyDatasetEntry
for (const entry of dataset.entries) {
console.log(entry.entry.input); // Typed as string
console.log(entry.entry.expected_output); // Typed as string
}
If you’re using an AI coding agent (Claude Code, Cursor, etc.) with the LangWatch MCP server, dataset tools are available directly:
| Tool | Description |
|---|
platform_list_datasets | List all datasets with record counts |
platform_get_dataset | Get dataset metadata, columns, and record preview |
platform_create_dataset | Create a new dataset with optional column definitions |
platform_update_dataset | Update dataset name or column types |
platform_delete_dataset | Archive a dataset |
platform_create_dataset_records | Add records in batch (max 1000) |
platform_update_dataset_record | Update a single record |
platform_delete_dataset_records | Delete records by IDs |
The platform_list_datasets and platform_get_dataset tools support a format parameter — use "json" for raw data or "digest" (default) for AI-readable markdown.
Finding Your Dataset Slug
You can find the dataset slug in the LangWatch UI:
- Go to the Datasets page
- Click on your dataset
- The slug is shown in the URL:
app.langwatch.ai/{project}/datasets/{slug}
You can also use the dataset ID (starting with dataset_) which is shown in the dataset details.