Structured output quirks – Curiosity Trace

TLDR: Many LLM providers claim to be OpenAI API compatible but there are some differences in the response_format parameter for structured output usage.

OpenAI: Supports both json_schema and json_object for response format type. When using json_object, you must include the JSON schema in the system prompt.

Groq: Supports json_object and requires including the schema in the system prompt. Providing the schema via the value argument makes no difference.

TGI: Succeeds only when using json_object with the schema provided as a value in the response_format; simply including the schema in the prompt isn’t sufficient.

Structured outputs for large language models (LLMs) refer to the ability of these models to generate outputs in a predefined format or structure. This can include generating JSON objects, tables, or other structured data formats that are easier to parse and utilize in downstream applications.

Let’s use a simple example to illustrate this. Suppose we have a text and we want to extract the entities and relations from it. We can use a structured output to extract the entities and relations from the text.

Code

import json
import os

from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel, Field

load_dotenv()

def jprint(obj):
    print(json.dumps(obj, indent=2))

class EntityRelationExtraction(BaseModel):
    triples: list[str] = Field(description="The `subject | predicate | object` triples extracted from the text.")

schema = EntityRelationExtraction.model_json_schema()
jprint(schema)

{
  "properties": {
    "triples": {
      "description": "The `subject | predicate | object` triples extracted from the text.",
      "items": {
        "type": "string"
      },
      "title": "Triples",
      "type": "array"
    }
  },
  "required": [
    "triples"
  ],
  "title": "EntityRelationExtraction",
  "type": "object"
}

We’ll compare OpenAI, Groq, and Hugging Face Text Generation Inference (TGI) chat completion APIs in this notebook for the structured output usage.

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

groq_client = OpenAI(base_url=os.getenv("GROQ_BASE_URL"), api_key=os.getenv("GROQ_API_KEY"))

# Assume Llama 3.1 8B model is served on localhost:8008 using TGI
tgi_client = OpenAI(base_url="http://0.0.0.0:8008/v1", api_key='_')

text = """
Einstein was born in Ulm, Germany. He discovered the special theory of relativity in 1905. He was awarded the Nobel Prize in Physics in 1921. His first wife, Mileva Marić, was a Serbian mathematician and physicist.
""".strip()

Code

from typing import Any

def erx_json(
    client: OpenAI,
    text: str,
    model: str,
    temperature: float = 0.0,
    response_format: dict | None = None,
    include_schema_in_system_prompt: bool = False,
) -> tuple[Any, EntityRelationExtraction | None, Exception | None]:
    system_prompt = "You are a helpful assistant that extracts `subject | predicate | object` triples from text."
    if include_schema_in_system_prompt:
        system_prompt += f"\n# JSON Schema\n{schema}"
    
    response = None

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": text},
            ],
            temperature=temperature,
            response_format=response_format,
        )
    except Exception as e:
        error = e
        return None, None, error
    
    try:
        parsed = EntityRelationExtraction.model_validate_json(
            response.choices[0].message.content
        )
    except Exception as e:
        error = e
        return response, None, error

    return response, parsed, None

Code

inputs = [
    {
        "name": "json_schema",
        "response_format": {
            "type": "json_schema",
            "json_schema": {"name": "EntityRelationExtraction", "schema": schema}
        },
        "include_schema_in_system_prompt": False,
    },
    {
        "name": "json_schema_w_schema_in_system_prompt",
        "response_format": {
            "type": "json_schema",
            "json_schema": {"name": "EntityRelationExtraction", "schema": schema}
        },
        "include_schema_in_system_prompt": True,
    },
    {
        "name": "json_object",
        "response_format": {
            "type": "json_object",
            "value": None
        },
        "include_schema_in_system_prompt": False,
    },
    {
        "name": "json_object_w_schema_in_system_prompt",
        "response_format": {
            "type": "json_object",
            "value": None
        },
        "include_schema_in_system_prompt": True,
    },
    {
        "name": "json_object_value_schema",
        "response_format": {
            "type": "json_object",
            "value": schema
        },
        "include_schema_in_system_prompt": False,
    },
    {
        "name": "json_object_value_schema_w_system_prompt",
        "response_format": {
            "type": "json_object",
            "value": schema
        },
        "include_schema_in_system_prompt": True,
    },
]

Code

results = []

for client, provider, model in [
    (openai_client, "OpenAI", "gpt-4o-mini"),
    (groq_client, "Groq", "llama-3.1-8b-instant"),
    (tgi_client, "TGI", "llama-3-8b"),
]:
    for input in inputs:
        response, parsed, error = erx_json(
            client,
            text,
            model=model,
            response_format=input["response_format"],
            include_schema_in_system_prompt=input["include_schema_in_system_prompt"],
        )
        success = error is None

        response_format_type = input["response_format"]["type"]
        response_format_value = input["response_format"].get("value") is not None
        response_format_json_schema = input["response_format"].get("json_schema") is not None
        include_schema_in_system_prompt = input["include_schema_in_system_prompt"]
        results.append(
            {
                "provider": provider,
                "model": model,
                "response_format_type": response_format_type,
                "response_format_json_schema": response_format_json_schema,
                "response_format_value": response_format_value,
                "include_schema_in_system_prompt": include_schema_in_system_prompt,
                "success": success,
                "error": error,
            }
        )

Code

import pandas as pd
from itables import show

def pretty_error(error):
    if error is None:
        return "None"
    
    parts = error.message.split(' - ', 1)
    if len(parts) == 1:
        return parts[0]

    error_code, payload_str = parts
    try:
        error_message = eval(payload_str)['error']
        if isinstance(error_message, str):
            return error_message
        elif 'message' in error_message:
            error_message = error_message['message']
        else:
            error_message = payload_str
    except Exception as e:
        return error_code
    
    return f"{error_code} - {error_message}"

df = pd.json_normalize(results)
df['error'] = df['error'].map(pretty_error)

Code

table = df[['provider', 'response_format_type', 'response_format_value', 'include_schema_in_system_prompt', 'success', 'error']]
show(table, paging=False)

provider	response_format_type	response_format_value	include_schema_in_system_prompt	success	error
Loading ITables v2.2.4 from the internet... (need help?)