Experiments in Galileo allow you to evaluate and compare different prompts, models, and configurations using datasets. This helps you identify the best approach for your specific use case.
The simplest way to get started is by using a prompt template. The get_dataset function below expects a dataset that you created through either the console or the SDK. Ensure you have saved a dataset before running the experiment!
from galileo import Message, MessageRolefrom galileo.prompts import get_prompt_template, create_prompt_templatefrom galileo.experiments import run_experimentfrom galileo.datasets import get_datasetproject ="my-project"prompt_template = get_prompt_template(name="geography-prompt", project=project)# If the prompt template doesn't exist, create itif prompt_template isNone: prompt_template = create_prompt_template( name="geography-prompt", project=project, messages=[ Message(role=MessageRole.system, content="You are a geography expert. Respond with only the continent name."), Message(role=MessageRole.user, content=f"{input['input']}")])results = run_experiment("geography-experiment", dataset=get_dataset(name="countries"),# Name of a dataset you created prompt_template=prompt_template,# Optional prompt_settings={"max_tokens":256,"model_alias":"GPT-4o",# Make sure you have an integration set up for the model alias you're using"temperature":0.8}, metrics=["correctness"], project=project)
For more complex scenarios, you can use custom functions with the OpenAI wrapper. Here, you may use either a saved dataset or a custom one
from galileo.experiments import run_experimentfrom galileo.datasets import get_datasetfrom galileo.openai import openaidataset = get_dataset(name="countries")defllm_call(input):return openai.chat.completions.create( model="gpt-4o", messages=[{"role":"system","content":"You are a geography expert."},{"role":"user","content":f"Which continent does the following country belong to: {input['input']}"}],).choices[0].message.contentresults = run_experiment("geography-experiment", dataset=dataset, function=llm_call, metrics=["correctness"], project="my-project",)
from galileo.experiments import run_experimentfrom galileo import log, openaidataset =[{"input":"Spain"}]defllm_call(input):return openai.chat.completions.create( model="gpt-4", messages=[{"role":"system","content":"You are a geography expert"},{"role":"user","content":f"Which continent does the following country belong to: {input['input']}"}],).choices[0].message.contentresults = run_experiment("geography-experiment", dataset=dataset, function=llm_call, metrics=["correctness"], project="my-project")
from galileo.experiments import run_experimentfrom galileo.datasets import get_datasetfrom galileo.openai import openaidataset =[{"input":"Spain"}]defllm_call(input):return openai.chat.completions.create( model="gpt-4o", messages=[{"role":"system","content":"You are a geography expert."},{"role":"user","content":f"Which continent does the following country belong to: {input['input']}"}],).choices[0].message.contentdefcheck_for_delve(input, output, expected)->int:return1if"delve"notininputelse0dataset = get_dataset(name="storyteller-dataset")results = run_experiment("geography-experiment", dataset=dataset, function=llm_call, metrics=[check_for_delve], project="my-project")