ℹ️ These docs are for the v2.0 version of Galileo. Documentation for v1.0 version can be found here.
curl --request POST \
--url https://api.galileo.ai/v2/projects/{project_id}/experiments \
--header 'Content-Type: application/json' \
--header 'Galileo-API-Key: <api-key>' \
--data '
{
"name": "<string>",
"task_type": 16,
"playground_id": "<string>",
"prompt_template_version_id": "<string>",
"dataset": {
"dataset_id": "<string>",
"version_index": 123
},
"playground_prompt_id": "<string>",
"prompt_settings": {
"logprobs": true,
"top_logprobs": 5,
"echo": false,
"n": 1,
"reasoning_effort": "medium",
"verbosity": "medium",
"deployment_name": "<string>",
"model_alias": "GPT-4o",
"temperature": 1,
"max_tokens": 4096,
"stop_sequences": [
"<string>"
],
"top_p": 1,
"top_k": 40,
"frequency_penalty": 0,
"presence_penalty": 0,
"tools": [
{}
],
"tool_choice": "<string>",
"response_format": {},
"known_models": [
{
"name": "<string>",
"alias": "<string>",
"integration": "openai",
"user_role": "<string>",
"assistant_role": "<string>",
"system_supported": false,
"alternative_names": [
"<string>"
],
"input_token_limit": 123,
"output_token_limit": 123,
"token_limit": 123,
"output_price": 0,
"input_price": 0,
"cost_by": "tokens",
"is_chat": false,
"provides_log_probs": false,
"formatting_tokens": 0,
"response_prefix_tokens": 0,
"api_version": "<string>",
"params_map": {
"model": "<string>",
"temperature": "<string>",
"max_tokens": "<string>",
"stop_sequences": "<string>",
"top_p": "<string>",
"top_k": "<string>",
"frequency_penalty": "<string>",
"presence_penalty": "<string>",
"echo": "<string>",
"logprobs": "<string>",
"top_logprobs": "<string>",
"n": "<string>",
"api_version": "<string>",
"tools": "<string>",
"tool_choice": "<string>",
"response_format": "<string>",
"reasoning_effort": "<string>",
"verbosity": "<string>",
"deployment_name": "<string>"
},
"output_map": {
"response": "<string>",
"token_count": "<string>",
"input_token_count": "<string>",
"output_token_count": "<string>",
"completion_reason": "<string>"
},
"input_map": {
"prompt": "<string>",
"prefix": "",
"suffix": ""
}
}
]
},
"scorers": [
{
"id": "<string>",
"scorer_type": "llm",
"model_name": "<string>",
"num_judges": 123,
"filters": [
{
"value": "<string>",
"operator": "eq",
"name": "node_name",
"filter_type": "string",
"case_sensitive": true
}
],
"scoreable_node_types": [
"<string>"
],
"cot_enabled": true,
"output_type": "boolean",
"input_type": "basic",
"name": "<string>",
"model_type": "slm",
"scorer_version": {
"id": "<string>",
"version": 123,
"scorer_id": "<string>",
"generated_scorer": {
"id": "<string>",
"name": "<string>",
"chain_poll_template": {
"template": "<string>",
"metric_system_prompt": "<string>",
"metric_description": "<string>",
"value_field_name": "rating",
"explanation_field_name": "explanation",
"metric_few_shot_examples": [
{
"generation_prompt_and_response": "<string>",
"evaluating_response": "<string>"
}
],
"response_schema": {}
},
"instructions": "<string>",
"user_prompt": "<string>"
},
"registered_scorer": {
"id": "<string>",
"name": "<string>",
"score_type": "<string>"
},
"finetuned_scorer": {
"id": "<string>",
"name": "<string>",
"lora_task_id": 123,
"prompt": "<string>",
"luna_input_type": "span",
"luna_output_type": "float",
"class_name_to_vocab_ix": {},
"executor": "action_completion_luna"
},
"model_name": "<string>",
"num_judges": 123,
"scoreable_node_types": [
"<string>"
],
"cot_enabled": true,
"output_type": "boolean",
"input_type": "basic"
}
}
],
"trigger": false
}
'{
"id": "<string>",
"project_id": "<string>",
"task_type": 0,
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"name": "",
"created_by": "<string>",
"created_by_user": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>"
},
"num_spans": 123,
"num_traces": 123,
"dataset": {
"dataset_id": "<string>",
"version_index": 123,
"name": "<string>"
},
"aggregate_metrics": {},
"aggregate_feedback": {},
"ranking_score": 123,
"rank": 123,
"winner": true,
"playground_id": "<string>",
"playground": {
"playground_id": "<string>",
"name": "<string>"
},
"prompt_run_settings": {
"logprobs": true,
"top_logprobs": 5,
"echo": false,
"n": 1,
"reasoning_effort": "medium",
"verbosity": "medium",
"deployment_name": "<string>",
"model_alias": "GPT-4o",
"temperature": 1,
"max_tokens": 4096,
"stop_sequences": [
"<string>"
],
"top_p": 1,
"top_k": 40,
"frequency_penalty": 0,
"presence_penalty": 0,
"tools": "<string>",
"tool_choice": "<string>",
"response_format": {}
},
"prompt_model": "<string>",
"prompt": {
"prompt_template_id": "<string>",
"version_index": 123,
"name": "<string>",
"content": "<string>"
},
"tags": {},
"status": {
"log_generation": {
"progress_percent": 0
}
}
}Create a new experiment for a project.
curl --request POST \
--url https://api.galileo.ai/v2/projects/{project_id}/experiments \
--header 'Content-Type: application/json' \
--header 'Galileo-API-Key: <api-key>' \
--data '
{
"name": "<string>",
"task_type": 16,
"playground_id": "<string>",
"prompt_template_version_id": "<string>",
"dataset": {
"dataset_id": "<string>",
"version_index": 123
},
"playground_prompt_id": "<string>",
"prompt_settings": {
"logprobs": true,
"top_logprobs": 5,
"echo": false,
"n": 1,
"reasoning_effort": "medium",
"verbosity": "medium",
"deployment_name": "<string>",
"model_alias": "GPT-4o",
"temperature": 1,
"max_tokens": 4096,
"stop_sequences": [
"<string>"
],
"top_p": 1,
"top_k": 40,
"frequency_penalty": 0,
"presence_penalty": 0,
"tools": [
{}
],
"tool_choice": "<string>",
"response_format": {},
"known_models": [
{
"name": "<string>",
"alias": "<string>",
"integration": "openai",
"user_role": "<string>",
"assistant_role": "<string>",
"system_supported": false,
"alternative_names": [
"<string>"
],
"input_token_limit": 123,
"output_token_limit": 123,
"token_limit": 123,
"output_price": 0,
"input_price": 0,
"cost_by": "tokens",
"is_chat": false,
"provides_log_probs": false,
"formatting_tokens": 0,
"response_prefix_tokens": 0,
"api_version": "<string>",
"params_map": {
"model": "<string>",
"temperature": "<string>",
"max_tokens": "<string>",
"stop_sequences": "<string>",
"top_p": "<string>",
"top_k": "<string>",
"frequency_penalty": "<string>",
"presence_penalty": "<string>",
"echo": "<string>",
"logprobs": "<string>",
"top_logprobs": "<string>",
"n": "<string>",
"api_version": "<string>",
"tools": "<string>",
"tool_choice": "<string>",
"response_format": "<string>",
"reasoning_effort": "<string>",
"verbosity": "<string>",
"deployment_name": "<string>"
},
"output_map": {
"response": "<string>",
"token_count": "<string>",
"input_token_count": "<string>",
"output_token_count": "<string>",
"completion_reason": "<string>"
},
"input_map": {
"prompt": "<string>",
"prefix": "",
"suffix": ""
}
}
]
},
"scorers": [
{
"id": "<string>",
"scorer_type": "llm",
"model_name": "<string>",
"num_judges": 123,
"filters": [
{
"value": "<string>",
"operator": "eq",
"name": "node_name",
"filter_type": "string",
"case_sensitive": true
}
],
"scoreable_node_types": [
"<string>"
],
"cot_enabled": true,
"output_type": "boolean",
"input_type": "basic",
"name": "<string>",
"model_type": "slm",
"scorer_version": {
"id": "<string>",
"version": 123,
"scorer_id": "<string>",
"generated_scorer": {
"id": "<string>",
"name": "<string>",
"chain_poll_template": {
"template": "<string>",
"metric_system_prompt": "<string>",
"metric_description": "<string>",
"value_field_name": "rating",
"explanation_field_name": "explanation",
"metric_few_shot_examples": [
{
"generation_prompt_and_response": "<string>",
"evaluating_response": "<string>"
}
],
"response_schema": {}
},
"instructions": "<string>",
"user_prompt": "<string>"
},
"registered_scorer": {
"id": "<string>",
"name": "<string>",
"score_type": "<string>"
},
"finetuned_scorer": {
"id": "<string>",
"name": "<string>",
"lora_task_id": 123,
"prompt": "<string>",
"luna_input_type": "span",
"luna_output_type": "float",
"class_name_to_vocab_ix": {},
"executor": "action_completion_luna"
},
"model_name": "<string>",
"num_judges": 123,
"scoreable_node_types": [
"<string>"
],
"cot_enabled": true,
"output_type": "boolean",
"input_type": "basic"
}
}
],
"trigger": false
}
'{
"id": "<string>",
"project_id": "<string>",
"task_type": 0,
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"name": "",
"created_by": "<string>",
"created_by_user": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>"
},
"num_spans": 123,
"num_traces": 123,
"dataset": {
"dataset_id": "<string>",
"version_index": 123,
"name": "<string>"
},
"aggregate_metrics": {},
"aggregate_feedback": {},
"ranking_score": 123,
"rank": 123,
"winner": true,
"playground_id": "<string>",
"playground": {
"playground_id": "<string>",
"name": "<string>"
},
"prompt_run_settings": {
"logprobs": true,
"top_logprobs": 5,
"echo": false,
"n": 1,
"reasoning_effort": "medium",
"verbosity": "medium",
"deployment_name": "<string>",
"model_alias": "GPT-4o",
"temperature": 1,
"max_tokens": 4096,
"stop_sequences": [
"<string>"
],
"top_p": 1,
"top_k": 40,
"frequency_penalty": 0,
"presence_penalty": 0,
"tools": "<string>",
"tool_choice": "<string>",
"response_format": {}
},
"prompt_model": "<string>",
"prompt": {
"prompt_template_id": "<string>",
"version_index": 123,
"name": "<string>",
"content": "<string>"
},
"tags": {},
"status": {
"log_generation": {
"progress_percent": 0
}
}
}1Prompt run settings.
Show child attributes
Show child attributes
anthropic, aws_bedrock, aws_sagemaker, azure, custom, databricks, mistral, nvidia, openai, vegas_gateway, vertex_ai, writer Alternative names for the model, used for matching with various current, versioned or legacy names.
tokens, characters Maps the internal settings parameters (left) to the serialized parameters (right) we want to send in the API requests.
Show child attributes
Show child attributes
Show child attributes
llm, code, luna, preset List of filters to apply to the scorer.
Filters on node names in scorer jobs.
Show child attributes
eq, ne, contains "node_name""string"List of node types that can be scored by this scorer. Defaults to llm/chat.
Whether to enable chain of thought for this scorer. Defaults to False for llm scorers.
What type of output to use for model-based scorers (boolean, categorical, etc.).
boolean, categorical, count, discrete, freeform, percentage, multilabel What type of input to use for model-based scorers (sessions_normalized, trace_io_only, etc..).
basic, llm_spans, retriever_spans, sessions_normalized, sessions_trace_io_only, tool_spans, trace_input_only, trace_io_only, trace_normalized, trace_output_only, agent_spans, workflow_spans Type of model to use for this scorer. slm maps to luna, and llm maps to plus
slm, llm, code ScorerVersion to use for this scorer. If not provided, the latest version will be used.
Show child attributes
Show child attributes
Template for a chainpoll metric prompt, containing all the info necessary to send a chainpoll prompt.
Show child attributes
Chainpoll prompt template.
System prompt for the metric.
Description of what the metric should do.
Field name to look for in the chainpoll response, for the rating.
Field name to look for in the chainpoll response, for the explanation.
Few-shot examples for the metric.
Response schema for the output
Show child attributes
span, trace_object, trace_input_output_only float, string, string_list Executor pipeline. Defaults to finetuned scorer pipeline but can run custom galileo score pipelines.
action_completion_luna, action_advancement_luna, agentic_session_success, agentic_session_success, agentic_workflow_success, agentic_workflow_success, agent_efficiency, agent_flow, bleu, chunk_attribution_utilization_luna, chunk_attribution_utilization, completeness_luna, completeness, context_adherence, context_adherence_luna, context_relevance, context_relevance_luna, conversation_quality, correctness, ground_truth_adherence, input_pii, input_pii_gpt, input_sexist, input_sexist, input_sexist_luna, input_sexist_luna, input_tone, input_tone_gpt, input_toxicity, input_toxicity_luna, instruction_adherence, output_pii, output_pii_gpt, output_sexist, output_sexist, output_sexist_luna, output_sexist_luna, output_tone, output_tone_gpt, output_toxicity, output_toxicity_luna, prompt_injection, prompt_injection_luna, prompt_perplexity, rouge, tool_error_rate, tool_error_rate_luna, tool_selection_quality, tool_selection_quality_luna, uncertainty, user_intent_change List of node types that can be scored by this scorer. Defaults to llm/chat.
Whether to enable chain of thought for this scorer. Defaults to False for llm scorers.
What type of output to use for model-based scorers (sessions_normalized, trace_io_only, etc.).
boolean, categorical, count, discrete, freeform, percentage, multilabel What type of input to use for model-based scorers (sessions_normalized, trace_io_only, etc.).
basic, llm_spans, retriever_spans, sessions_normalized, sessions_trace_io_only, tool_spans, trace_input_only, trace_io_only, trace_normalized, trace_output_only, agent_spans, workflow_spans Successful Response
Galileo ID of the experiment
Galileo ID of the project associated with this experiment
Valid task types for modeling.
We store these as ints instead of strings because we will be looking this up in the database frequently.
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 Timestamp of the experiment's creation
Timestamp of the trace or span's last update
Name of the experiment
Aggregate feedback information related to the experiment
Show child attributes
Show child attributes
Show child attributes
Prompt run settings.
Show child attributes
Show child attributes
Show child attributes
256256Was this page helpful?