/* Adjust space between sidebar sections */
.sidebar > .section {
  margin-bottom: 0px; /* Adjust spacing as needed */
}

/* Adjust space between pages within a section */
.sidebar .page {
  margin-bottom: 0px; /* Adjust spacing as needed */
}

/* Adjust padding inside each page link */
.sidebar .page a {
  padding: 0px 0px; /* Adjust top/bottom and left/right padding */
}

/* Section highlight animation when targeted through anchor links */
:target,
h2[id]:target,
h3[id]:target,
h4[id]:target,
div[id]:target {
  position: relative;
  background: linear-gradient(to right, rgba(74, 47, 249, 0.1), rgba(2, 255, 187, 0.1));
  animation: highlight-section 3s ease-in-out;
  scroll-margin-top: 80px;
  padding: 1rem;
  margin: -1rem;
  border-radius: 8px;
  border-left: 4px solid #4a2ff9;
}

@keyframes highlight-section {
  0% {
    background: linear-gradient(to right, rgba(74, 47, 249, 0.2), rgba(2, 255, 187, 0.2));
  }
  50% {
    background: linear-gradient(to right, rgba(74, 47, 249, 0.2), rgba(2, 255, 187, 0.2));
  }
  100% {
    background: linear-gradient(to right, rgba(74, 47, 249, 0.1), rgba(2, 255, 187, 0.1));
  }
}


#banner {
  background-color: #fef1d7;

  p {
    color: #383645;
  }

  a {
    color: #383645;
    border-bottom: 1px solid #383645;
  }

  button {
    color: #9b98ae;
  }
}


// REO script
!(function () {
  var e, t, n;
  (e = "638190bf025179e"),
    (t = function () {
      Reo.init({ clientID: "638190bf025179e" });
    }),
    ((n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js"),
    (n.async = !0),
    (n.onload = t),
    document.head.appendChild(n);
})();

// Hubspot script
const script = document.createElement("script");
script.type = "text/javascript";
script.id = "hs-script-loader";
script.async = true;
script.defer = true;
script.src = "//js.hs-scripts.com/23114811.js";
document.head.appendChild(script);

// RB2B script.
!(function () {
  var reb2b = (window.reb2b = window.reb2b || []);
  if (reb2b.invoked) return;
  reb2b.invoked = true;
  reb2b.methods = ["identify", "collect"];
  reb2b.factory = function (method) {
    return function () {
      var args = Array.prototype.slice.call(arguments);
      args.unshift(method);
      reb2b.push(args);
      return reb2b;
    };
  };
  for (var i = 0; i < reb2b.methods.length; i++) {
    var key = reb2b.methods[i];
    reb2b[key] = reb2b.factory(key);
  }
  reb2b.load = function (key) {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.async = true;
    script.src = "https://s3-us-west-2.amazonaws.com/b2bjsstore/b/" + key + "/8XOE9GH5EDOM.js.gz";
    var first = document.getElementsByTagName("script")[0];
    first.parentNode.insertBefore(script, first);
  };
  reb2b.SNIPPET_VERSION = "1.0.1";
  reb2b.load("8XOE9GH5EDOM");
})();

// Universal CodeGroup tab synchronization
(function () {
  // Store the last selected language to sync across all CodeGroups
  let lastSelectedLanguage = null;

  // Function to initialize the script
  function init() {
    console.log("Universal CodeGroup tab sync initialized");

    // Set up the click event listener on the document (event delegation)
    document.addEventListener("click", handleDocumentClick);

    // Set up a MutationObserver to detect when new CodeGroups are added
    setupMutationObserver();

    // Initial scan for CodeGroups
    syncAllCodeGroups();
  }

  // Handle clicks anywhere in the document
  function handleDocumentClick(event) {
    // Find if the click was on a CodeGroup tab
    let target = event.target;

    // Traverse up the DOM to find if we clicked on a tab
    while (target && target !== document) {
      if (target.getAttribute && target.getAttribute("role") === "tab" && target.id && target.id.startsWith("headlessui-tabs-tab-")) {
        // We found a tab click
        const tabDiv = target.querySelector("div");
        if (tabDiv) {
          const language = tabDiv.textContent.trim();
          console.log(`Tab clicked: ${language}`);

          // Store the selected language
          lastSelectedLanguage = language;

          // Sync all other CodeGroups to this language (after a small delay)
          setTimeout(() => {
            syncAllCodeGroups();
          }, 10);
        }
        break;
      }
      target = target.parentNode;
    }
  }

  // Function to sync all CodeGroups to the last selected language
  function syncAllCodeGroups() {
    if (!lastSelectedLanguage) {
      // If no language has been selected yet, find the first selected tab
      const selectedTab = document.querySelector('[role="tab"][aria-selected="true"]');
      if (selectedTab) {
        const tabDiv = selectedTab.querySelector("div");
        if (tabDiv) {
          lastSelectedLanguage = tabDiv.textContent.trim();
          console.log(`Initial language detected: ${lastSelectedLanguage}`);
        }
      }
    }

    if (!lastSelectedLanguage) {
      console.log("No language selected yet");
      return;
    }

    // Find all tab containers
    const tabLists = document.querySelectorAll('[role="tablist"][aria-orientation="horizontal"]');
    console.log(`Found ${tabLists.length} CodeGroup containers`);

    // For each container, find and click the tab with the matching language
    tabLists.forEach((tabList) => {
      const tabs = tabList.querySelectorAll('[role="tab"]');

      // Find the tab with the matching language
      let matchingTab = null;
      tabs.forEach((tab) => {
        const tabDiv = tab.querySelector("div");
        if (tabDiv) {
          const tabLanguage = tabDiv.textContent.trim();
          if (tabLanguage === lastSelectedLanguage && tab.getAttribute("aria-selected") !== "true") {
            matchingTab = tab;
          }
        }
      });

      // Click the matching tab if found and not already selected
      if (matchingTab) {
        console.log(`Syncing tab to ${lastSelectedLanguage}`);
        matchingTab.click();
      }
    });
  }

  // Set up a MutationObserver to detect when new CodeGroups are added
  function setupMutationObserver() {
    const observer = new MutationObserver((mutations) => {
      let shouldSync = false;

      mutations.forEach((mutation) => {
        if (mutation.addedNodes && mutation.addedNodes.length) {
          // Check if any of the added nodes are or contain CodeGroup elements
          for (let i = 0; i < mutation.addedNodes.length; i++) {
            const node = mutation.addedNodes[i];
            if (node.nodeType === 1) {
              // Element node
              if ((node.getAttribute && node.getAttribute("role") === "tablist") || (node.querySelector && node.querySelector('[role="tablist"]'))) {
                shouldSync = true;
                break;
              }
            }
          }
        }
      });

      if (shouldSync) {
        console.log("New CodeGroup detected, syncing...");
        setTimeout(syncAllCodeGroups, 100); // Delay to ensure the DOM is fully updated
      }
    });

    // Start observing the entire document
    observer.observe(document.documentElement, {
      childList: true,
      subtree: true,
    });

    console.log("MutationObserver set up");
  }

  // Start the script when the DOM is ready
  if (document.readyState === "loading") {
    document.addEventListener("DOMContentLoaded", init);
  } else {
    init();
  }
})();


Next Steps

Understand and evaluate the performance of AI agents using Galileo's agentic metrics.

Agentic Metrics

ℹ️ These docs are for the free version of Galileo. Documentation for current customers can be found [here](https://docs.galileo.ai/galileo).

Galileo

What is Galileo?

Recent updates and enhancements to Galileo.

Release Notes

Getting Started with Galileo

Logging

From Spot Testing to Systematic Evaluation

Experiments

Overview

Datasets

Prompts

Python SDK Reference

An overview of the Galileo TypeScript SDK.

Typescript SDK Reference

Learn how to integrate and use OpenAI's API with Galileo's wrapper client.

Basic OpenAI Integration

Learn how to handle ignored instructions and ensure that your AI models follow your instructions.

Handling Ignored Instructions

Learn how to identify and address hallucinations and factual errors in your AI models

Fixing Hallucinations and Factual Errors

Learn how to reduce hesitation and uncertainty in your AI models.

Reducing Hesitation and Uncertainty

Learn how to implement a basic Retrieval-Augmented Generation (RAG) system using Galileo and OpenAI.

Basic RAG Example

Learn how to prevent out of context information from being generated by your AI models.

Preventing Out of Context Information

Learn how to boost your AI model's performance by fully leveraging retrieved text chunks.

Maximizing Chunk Utilization

Learn how to ensure that your RAG systems provide complete answers using the Galileo completeness metric.

Completeness in RAG Systems

Learn how to implement a basic agentic AI system using Galileo and OpenAI.

Basic Agentic AI Example

Learn how to build an Agentic System for a smart weather application in a Python-based tech stack.

🌦️ Weather Vibes Agent Cookbook

Learn how to build and monitor a LangChain AI Agent using Galileo for tracing and observability.

🔎 Monitoring LangChain Agents with Galileo

Learn how to add evaluations to a multi-agent LangGraph chat bot using Galileo

Add evaluations to a multi-agent LangGraph application

Guide to using MongoDB Atlas Vector Search with LangGraph agents logging to Galileo.

MongoDB Atlas Integration for Retrieval-Augmented Generation (RAG)

OpenAI Agent Integration

This guide explains how to send OpenTelemetry (OTEL) traces to Galileo using OpenInference. Configure the endpoint and headers to get started quickly.

Logging to Galileo using OpenTelemetry and OpenInference (Python)

Explore Galileo's comprehensive metrics framework for evaluating and improving AI system performance across multiple dimensions.

Metrics Overview

Explore Galileo's comprehensive out-of-the-box metrics for evaluating and improving AI system performance across multiple dimensions.

Metrics Comparison

A quick lookup for integrating and interpreting metrics in your workflows.

Metrics SDK Reference

Projects

Log Streams

Traces

Spans

Annotations Overview

Adding Annotations

Learn how to use datasets and experiments to improve your application.

Experiments Overview

Running Experiments in the Galileo Console

Running Experiments with Code

Learn how to compare multiple experiment runs in Galileo.

Compare experiments

Playground

Healthcheck

Get Token

Log Traces

Traces Available Columns

Spans Available Columns

Query Traces

Log Spans

Query Spans

Get Trace

Get Span

Query Metrics

Create Session

Query Sessions

Get Session

Sessions Available Columns

List Log Streams

Create Log Stream

Get Log Stream

Update Log Stream

Delete Log Stream

List Experiments

Create Experiment

Get Experiment

Update Experiment

Delete Experiment

Procures the column information for experiments.

Experiments Available Columns

List Feedback Templates V2

Create Feedback Template V2

Get Feedback Template V2

Delete Feedback Template

Update Feedback Template

Reorder Feedback Templates

Get Feedback Rating V2

Create Feedback Rating V2

Delete Feedback Rating V2

Apply Bulk Feedback V2

List Annotation Templates

Create Annotation Template

Get Annotation Template

Delete Annotation Template

Update Annotation Template

Reorder Annotation Templates

Get Annotation Rating

Create Annotation Rating

Delete Annotation Rating

Apply Bulk Annotation

Create

Get Scorer

Delete Scorer

Update

Create Llm Scorer Version

Get Scorer Version Code

Create Code Scorer Version

List Scorers With Filters

List Tags

Get Scorer Version Or Latest

List All Versions For Scorer

Invoke

List Prompt Datasets

Upload Prompt Evaluation Dataset

List Datasets

Create Dataset

Get Dataset

Delete Dataset

Update Dataset

Query Datasets

Download Prompt Dataset

Update Prompt Dataset

Delete Prompt Dataset

Get Dataset Content

Rollback the content of a dataset to a previous version.

Upsert Dataset Content

Update the content of a dataset.

The `index` and `column_name` fields are treated as keys tied to a specific version of the dataset.
As such, these values are considered immutable identifiers for the dataset's structure.

For example, if an edit operation changes the name of a column, subsequent edit operations in
the same request should reference the column using its original name.

The `If-Match` header is used to ensure that updates are only applied if the client's version of the dataset
matches the server's version. This prevents conflicts from simultaneous updates. The `ETag` header in the response
provides the new version identifier after a successful update.

Update Dataset Content

Download Dataset

Preview Dataset

Query Dataset Content

List the users with which the dataset has been shared.

List User Dataset Collaborators

Create User Dataset Collaborators

List the groups with which the dataset has been shared.

List Group Dataset Collaborators

Create Group Dataset Collaborators

Delete User Dataset Collaborator

Update the sharing permissions of a user on a dataset.

Update User Dataset Collaborator

Delete Group Dataset Collaborator

Update the sharing permissions of a group on a dataset.

Update Group Dataset Collaborator

Query Dataset Versions

Get Dataset Version Content

Update Dataset Version

List Dataset Projects

Get Collaborator Roles

Create Project

Get Project

Update Project

Deletes a project and all associated runs and objects.

Any user with project access can delete a project.
Note that `get_project_by_id` calls `user_can_access_project`.

Delete Project

List the users with which the project has been shared.

List User Project Collaborators

Create User Project Collaborators

List the groups with which the project has been shared.

List Group Project Collaborators

Create Group Project Collaborators

Delete User Project Collaborator

Update the sharing permissions of a user on a project.

Update User Project Collaborator

Delete Group Project Collaborator

Update the sharing permissions of a group on a project.

Update Group Project Collaborator

Gets projects optimized for V2 with pagination and server-side run counts.

Get Projects V2

Troubleshooting

How to Use Galileo

TypeScript SDK Overview

Learn how to ensure that your AI models use all the retrieved data.

Ensuring Complete Use of Retrieved Data

Learn how to maximize the utilization of retrieved chunks by your AI models.

Fixing Irrelevant Retrievals

Learn how to optimize multi-step task execution and ensure that agents follow a logical sequence of actions.

Optimizing Multi-Step Task Execution

Learn how to fix issues when AI models ignore your instructions and how to design better prompts for reliable responses.

Ensuring Agents Follow Instructions

Learn how to improve agent decision making and ensure that agents make the right choices.

Name	Description	When to Use	Example Use Case
Tool Error	Detects errors or failures during the execution of tools.	When implementing AI agents that use tools and want to track error rates.	A coding assistant that uses external APIs to run code and must handle and report execution errors appropriately.
Tool Selection Quality	Evaluates whether the agent selected the most appropriate tools for the task.	When optimizing agent systems for effective tool usage.	A data analysis agent that must choose the right visualization or statistical method based on the data type and user question.
Action Advancement	Measures how effectively each action advances toward the goal.	When assessing whether an agent is making meaningful progress in multi-step tasks.	A travel planning agent that needs to book flights, hotels, and activities in the correct sequence.

Overview

Getting Started

SDK/API

How-to Guides

Cookbooks

Integrations

Concepts

API Reference

References

Agentic Metrics

Next Steps

Overview

Getting Started

SDK/API

How-to Guides

Cookbooks

Integrations

Concepts

API Reference

References

​Next Steps

Next Steps