Code
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel, Field
load_dotenv()
def jprint(obj):
print(json.dumps(obj, indent=2))
February 17, 2025
TLDR: Many LLM providers claim to be OpenAI API compatible but there are some differences in the
response_format
parameter for structured output usage.
- OpenAI: Supports both
json_schema
andjson_object
for response formattype
. When usingjson_object
, you must include the JSON schema in the system prompt.- Groq: Supports
json_object
and requires including the schema in the system prompt. Providing the schema via thevalue
argument makes no difference.- TGI: Succeeds only when using
json_object
with the schema provided as avalue
in theresponse_format
; simply including the schema in the prompt isn’t sufficient.
Structured outputs for large language models (LLMs) refer to the ability of these models to generate outputs in a predefined format or structure. This can include generating JSON objects, tables, or other structured data formats that are easier to parse and utilize in downstream applications.
Let’s use a simple example to illustrate this. Suppose we have a text and we want to extract the entities and relations from it. We can use a structured output to extract the entities and relations from the text.
class EntityRelationExtraction(BaseModel):
triples: list[str] = Field(description="The `subject | predicate | object` triples extracted from the text.")
schema = EntityRelationExtraction.model_json_schema()
jprint(schema)
{
"properties": {
"triples": {
"description": "The `subject | predicate | object` triples extracted from the text.",
"items": {
"type": "string"
},
"title": "Triples",
"type": "array"
}
},
"required": [
"triples"
],
"title": "EntityRelationExtraction",
"type": "object"
}
We’ll compare OpenAI, Groq, and Hugging Face Text Generation Inference (TGI) chat completion APIs in this notebook for the structured output usage.
from typing import Any
def erx_json(
client: OpenAI,
text: str,
model: str,
temperature: float = 0.0,
response_format: dict | None = None,
include_schema_in_system_prompt: bool = False,
) -> tuple[Any, EntityRelationExtraction | None, Exception | None]:
system_prompt = "You are a helpful assistant that extracts `subject | predicate | object` triples from text."
if include_schema_in_system_prompt:
system_prompt += f"\n# JSON Schema\n{schema}"
response = None
try:
response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
],
temperature=temperature,
response_format=response_format,
)
except Exception as e:
error = e
return None, None, error
try:
parsed = EntityRelationExtraction.model_validate_json(
response.choices[0].message.content
)
except Exception as e:
error = e
return response, None, error
return response, parsed, None
inputs = [
{
"name": "json_schema",
"response_format": {
"type": "json_schema",
"json_schema": {"name": "EntityRelationExtraction", "schema": schema}
},
"include_schema_in_system_prompt": False,
},
{
"name": "json_schema_w_schema_in_system_prompt",
"response_format": {
"type": "json_schema",
"json_schema": {"name": "EntityRelationExtraction", "schema": schema}
},
"include_schema_in_system_prompt": True,
},
{
"name": "json_object",
"response_format": {
"type": "json_object",
"value": None
},
"include_schema_in_system_prompt": False,
},
{
"name": "json_object_w_schema_in_system_prompt",
"response_format": {
"type": "json_object",
"value": None
},
"include_schema_in_system_prompt": True,
},
{
"name": "json_object_value_schema",
"response_format": {
"type": "json_object",
"value": schema
},
"include_schema_in_system_prompt": False,
},
{
"name": "json_object_value_schema_w_system_prompt",
"response_format": {
"type": "json_object",
"value": schema
},
"include_schema_in_system_prompt": True,
},
]
results = []
for client, provider, model in [
(openai_client, "OpenAI", "gpt-4o-mini"),
(groq_client, "Groq", "llama-3.1-8b-instant"),
(tgi_client, "TGI", "llama-3-8b"),
]:
for input in inputs:
response, parsed, error = erx_json(
client,
text,
model=model,
response_format=input["response_format"],
include_schema_in_system_prompt=input["include_schema_in_system_prompt"],
)
success = error is None
response_format_type = input["response_format"]["type"]
response_format_value = input["response_format"].get("value") is not None
response_format_json_schema = input["response_format"].get("json_schema") is not None
include_schema_in_system_prompt = input["include_schema_in_system_prompt"]
results.append(
{
"provider": provider,
"model": model,
"response_format_type": response_format_type,
"response_format_json_schema": response_format_json_schema,
"response_format_value": response_format_value,
"include_schema_in_system_prompt": include_schema_in_system_prompt,
"success": success,
"error": error,
}
)
import pandas as pd
from itables import show
def pretty_error(error):
if error is None:
return "None"
parts = error.message.split(' - ', 1)
if len(parts) == 1:
return parts[0]
error_code, payload_str = parts
try:
error_message = eval(payload_str)['error']
if isinstance(error_message, str):
return error_message
elif 'message' in error_message:
error_message = error_message['message']
else:
error_message = payload_str
except Exception as e:
return error_code
return f"{error_code} - {error_message}"
df = pd.json_normalize(results)
df['error'] = df['error'].map(pretty_error)
provider | response_format_type | response_format_value | include_schema_in_system_prompt | success | error |
---|---|---|---|---|---|
Loading ITables v2.2.4 from the internet... (need help?) |