Open Responses Compatible API

Open Responses is an open-source specification and ecosystem for building multi-provider, interoperable LLM interfaces based on the OpenAI Responses API.

API Endpoint:
https://evalstate-openresponses.hf.space/v1
Get started by sending requests to this endpoint
View on GitHub
Open Responses Compatible
Connect to Hugging Face Inference Providers with Open Responses
Provider Agnostic
Configurable to work any Chat Completion API back-end (local or remote).
Multi-modal, streaming, structured output
Supports text and image inputs, streaming output, JSON schema, and function calling.
Remote MCP
Server-side MCP tool execution.

Examples

from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="moonshotai/Kimi-K2-Instruct:groq",
    instructions="You are a helpful assistant.",
    input="Tell me a three sentence bedtime story about a unicorn.",
)

print(response)
print(response.output_text)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="Qwen/Qwen2.5-VL-7B-Instruct",
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "what is in this image?"},
                {
                    "type": "input_image",
                    "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                },
            ],
        }
    ],
)

print(response)
print(response.output_text)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="moonshotai/Kimi-K2-Instruct:groq",
    input=[
        {
            "role": "developer",
            "content": "Talk like a pirate.",
        },
        {
            "role": "user",
            "content": "Are semicolons optional in JavaScript?",
        },
    ],
)

print(response)
print(response.output_text)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

stream = client.responses.create(
    model="moonshotai/Kimi-K2-Instruct:groq",
    input=[
        {
            "role": "user",
            "content": "Say 'double bubble bath' ten times fast.",
        },
    ],
    stream=True,
)

for event in stream:
    print(event)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

tools = [
    {
        "type": "function",
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location", "unit"],
        },
    }
]

response = client.responses.create(
    model="moonshotai/Kimi-K2-Instruct:groq",
    tools=tools,
    input="What is the weather like in Boston today?",
    tool_choice="auto",
)

print(response)
from openai import OpenAI
from pydantic import BaseModel
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

response = client.responses.parse(
    model="moonshotai/Kimi-K2-Instruct:groq",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
    text_format=CalendarEvent,
)

print(response.output_parsed)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="moonshotai/Kimi-K2-Instruct:groq",
    input="how does tiktoken work?",
    tools=[
        {
            "type": "mcp",
            "server_label": "gitmcp",
            "server_url": "https://gitmcp.io/openai/tiktoken",
            "allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
            "require_approval": "never",
        },
    ],
)

for output in response.output:
    print(output)
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://evalstate-openresponses.hf.space/v1",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="openai/gpt-oss-120b",
    instructions="You are a helpful assistant.",
    input="Say hello to the world.",
    reasoning={
        "effort": "low",
    }
)

for index, item in enumerate(response.output):
    print(f"Output #{index}: {item.type}", item.content)