Skip to content

Elsai Model Hub

The Elsai Model Hub exposes hosted models through an OpenAI-compatible HTTP API. Use the official OpenAI Python client by pointing base_url at the hub.

Base URL: https://models-hub-api.elsaifoundry.ai/v1

API Keys

API keys are not published in this documentation. Contact the DevOps team to obtain credentials. Gemma, Phi, and LightOnOCR endpoints may use different keys.

Available models

ModelIDChatMultimodalTool CallingOCR
Gemma-4 E4Bgemma-4
Phi-4 Miniphi-4
LightOnOCRlightonocr

Environment variables

bash
export MODELS_HUB_GEMMA_API_KEY="gemma key"
export MODELS_HUB_PHI_API_KEY="phi mini key"
export MODELS_HUB_LIGHTONOCR_API_KEY="lightonocr key"

Gemma-4 E4B

Chat completion

python
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)

response = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "Explain transformers in one paragraph."}],
)
print(response.choices[0].message.content)

Streaming

python
stream = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "Write a haiku about Python."}],
    stream=True,
)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Multimodal (image + text)

python
import base64

with open("image.png", "rb") as f:
    b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
    model="gemma-4",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Describe this image."},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
        ],
    }],
)
print(response.choices[0].message.content)

Tool calling

Tool calling follows a two-step pattern: send the user message with a tools array, execute the requested tool locally, then re-invoke the API with the tool result to get the final response.

python
import json
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. 'San Francisco, CA'",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

# Step 1 — send user message with tools
response = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
    tools=tools,
    max_tokens=1024,
)

message = response.choices[0].message

if message.tool_calls:
    tool_call = message.tool_calls[0]
    print(f"Tool: {tool_call.function.name}")
    print(f"Args: {tool_call.function.arguments}")

    # Step 2 — execute tool locally, return result to model
    response = client.chat.completions.create(
        model="gemma-4",
        messages=[
            {"role": "user", "content": "What is the weather in Tokyo today?"},
            message,
            {
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps({
                    "temperature": 22,
                    "condition": "Partly cloudy",
                    "unit": "celsius",
                }),
            },
        ],
        tools=tools,
        max_tokens=1024,
    )

    print(response.choices[0].message.content)

Phi-4 Mini

python
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)

response = client.chat.completions.create(
    model="phi-4",
    messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
print(response.choices[0].message.content)

Tool calling

Phi-4 supports tool_choice="auto" — the model decides autonomously whether to invoke a tool.

python
import json
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. 'San Francisco, CA'",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

# Step 1 — model decides whether to call a tool
response = client.chat.completions.create(
    model="phi-4",
    messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
    tools=tools,
    tool_choice="auto",   # model decides autonomously
    max_tokens=1024,
)

message = response.choices[0].message

if message.tool_calls:
    tool_call = message.tool_calls[0]
    print(f"Tool: {tool_call.function.name}")
    print(f"Args: {tool_call.function.arguments}")

    # Step 2 — return tool result, get final response
    response = client.chat.completions.create(
        model="phi-4",
        messages=[
            {"role": "user", "content": "What is the weather in Tokyo today?"},
            message,
            {
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps({
                    "temperature": 22,
                    "condition": "Partly cloudy",
                    "unit": "celsius",
                }),
            },
        ],
        tools=tools,
        max_tokens=1024,
    )

    print(response.choices[0].message.content)

LightOnOCR

python
import os
from openai import OpenAI
import base64

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_LIGHTONOCR_API_KEY"],
)

with open("document.pdf", "rb") as f:
    b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
    model="lightonocr",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract all text from this document."},
            {"type": "image_url", "image_url": {"url": f"data:application/pdf;base64,{b64}"}},
        ],
    }],
)
print(response.choices[0].message.content)

Copyright © 2026 Elsai Foundry.