
This document covers the design and developer experience of the TypeScript client library for Galileo.

Note: This library is in pre-release mode and may not be stable.


npm install galileo


You can configure Galileo using environment variables:

# Scoped to an Organization

# Optional, set a default Project

# Optional, set a default Log Stream

In Node.js, you can use process.env to specify these variables:

process.env.GALILEO_API_KEY = "your-api-key";
process.env.GALILEO_PROJECT = "your-project";
process.env.GALILEO_LOG_STREAM = "your-log-stream";


OpenAI Client Wrapper

The simplest way to get started is to use our OpenAI client wrapper. This example will automatically produce a single-span trace in the Logstream UI:

import { OpenAI } from "openai";
import { wrapOpenAI } from "galileo";

const openai = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));

// Use the wrapped client as you normally would
async function callOpenAI() {
  const response = await{
    model: "gpt-4o",
    messages: [{ content: "Say hello world!", role: "user" }],
  return response;

// Call the function

Log Function Wrapper

The log function wrapper allows you to wrap functions with spans of different types. This is useful for creating workflow spans with nested LLM calls or tool spans.

import { OpenAI } from "openai";
import { wrapOpenAI, flush, log, init } from 'galileo';

async function runExample() {
  const openai = wrapOpenAI(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }));

  // This will automatically create an llm span since we're using the `wrapOpenAI` wrapper
  const callOpenAI = async (input) => {
    const result = await{
      model: 'gpt-4o',
      messages: [{ content: `Say hello ${input}!`, role: 'user' }]
    return result;

  // Optionally initialize the logger if you haven't set GALILEO_PROJECT and GALILEO_LOG_STREAM environment variables
  await init({
    projectName: 'my-project',
    logStreamName: 'my-log-stream'

  const wrappedToolCall = log(
    { name: 'tool span', spanType: 'tool' },
    (input) => {
      return 'tool call result';

  const wrappedFunc = await log({ name: 'workflow span' }, async (input) => {
    const result = await callOpenAI(input);
    return wrappedToolCall(result);

  // This will create a workflow span with an llm span and a tool span
  const result = await wrappedFunc('world');

  await flush();
  return result;

// Run the example

Span Types

Here are the different span types:

  • Workflow: A span that can have child spans, useful for nesting several child spans to denote a thread within a trace. If you wrap a parent function with log, calls that are made within that scope are automatically logged in the same trace.
  • Llm: Captures the input, output, and settings of an LLM call. This span gets automatically created when our client library wrappers (OpenAI and Anthropic) are used. Cannot have nested children.
  • Retriever: Contains the output documents of a retrieval operation.
  • Tool: Captures the input and output of a tool call. Used to decorate functions that are invoked as tools.


For more advanced use cases, you can use the GalileoLogger directly:

import { GalileoLogger } from "galileo";

async function runLoggerExample() {
  // You can set the GALILEO_PROJECT and GALILEO_LOG_STREAM environment variables
  const logger = new GalileoLogger({
    projectName: "my-project",
    logStreamName: "my-log-stream",

  console.log("Creating trace with spans...");

  // Create a new trace
  const trace = logger.startTrace({
    input: "Example trace input", // input
    output: undefined, // output (will be set later)
    name: "Example Trace", // name
    createdAt: * 1000000, // createdAt in nanoseconds
    durationNs: undefined, // durationNs
    metadata: { source: "test-script" }, // metadata
    tags: ["test", "example"], // tags

  // Add a workflow span (parent span)
  const workflowSpan = logger.addWorkflowSpan({
    input: "Processing workflow", // input
    output: undefined, // output (will be set later)
    name: "Main Workflow", // name
    durationNs: undefined, // durationNs
    createdAt: * 1000000, // createdAt in nanoseconds
    metadata: { workflow_type: "test" }, // metadata
    tags: ["workflow"], // tags

  // Add an LLM span as a child of the workflow span
    input: [{ role: "user", content: "Hello, how are you?" }], // input messages
    output: {
      role: "assistant",
      content: "I am doing well, thank you for asking!",
    }, // output message
    model: "gpt-3.5-turbo", // model name
    name: "Chat Completion", // name
    durationNs: 1000000000, // durationNs (1s)
    metadata: { temperature: "0.7" }, // metadata
    tags: ["llm", "chat"], // tags

  // Conclude the workflow span
    output: "Workflow completed successfully",
    durationNs: 2000000000, // 2 seconds

  // Conclude the trace
    output: "Final trace output with all spans completed",
    durationNs: 3000000000, // 3 seconds

  // Flush the traces to Galileo
  const flushedTraces = await logger.flush();

  return flushedTraces;

// Run the example


Create and use a prompt template:

import { createPromptTemplate } from "galileo";

const template = await createPromptTemplate({
  template: [
    { role: "system", content: "You are a great storyteller." },
    { role: "user", content: "Please write a short story about the following topic: {topic}" }
  projectName: "my-project",
  name: "storyteller-prompt"

You can also use an existing template:

import { getPromptTemplate } from "galileo";

async function retrievePromptTemplate() {
  // Get a prompt template
  const template = await getPromptTemplate({
    projectName: 'my-project', 
    name: 'Hello name prompt'
  return template;


Creating and Using Datasets

You can create and use datasets for experiments:

import { getDataset } from 'galileo';

const dataset = await getDataset(undefined, 'names');


Evaluating with Runner Function

You can use a runner function to run an experiment with a dataset:

import { runExperiment } from "galileo";
import { OpenAI } from "openai";

async function runFunctionExperiment() {
  const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

  const runner = async (input) => {
    const result = await{
      model: "gpt-4",
      messages: [
        { role: "system", content: "You are a great storyteller." },
        { role: "user", content: `Write a story about ${input["topic"]}` },
    return result;

  await runExperiment({
    name: "story-function-experiment",
    datasetName: "storyteller-dataset",
    runner: runner,
    metrics: ["correctness"],
    projectName: "my-project",

// Run the experiment

Running an Experiment with a Prompt Template

import { runExperiment, getPromptTemplate, getDataset } from "galileo";

async function runPromptExperiment() {
  const template = await getPromptTemplate({
    projectName: "my-project",
    name: "storyteller-prompt"

  const dataset = await getDataset(undefined, "storyteller-dataset");

  await runExperiment({
    name: "Test Experiment",
    dataset: dataset,
    promptTemplate: template,
    metrics: ["toxicity"],
    projectName: "my-project"

Running an Experiment with Custom Dataset

You can also use a locally generated dataset with a runner function:

import { runExperiment } from "galileo";
import { OpenAI } from "openai";

async function runCustomDatasetExperiment() {
  const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

  const dataset = [{ input: "Spain", expected: "Europe" }];

  const runner = async (input) => {
    const result = await{
      model: "gpt-4",
      messages: [
        { role: "system", content: "You are a geography expert" },
          role: "user",
          content: `Which continent does the following country belong to: ${input["input"]}`,
    return result;

  await runExperiment({
    name: "geography-experiment",
    dataset: dataset,
    function: runner,
    metrics: ["correctness"],
    projectName: "my-project",

// Run the experiment