elsai Model Hub

The elsai Model Hub exposes hosted models through an OpenAI-compatible HTTP API. Use the official OpenAI Python client by pointing base_url at the hub.

Base URL: https://models-hub-api.elsaifoundry.ai/v1

API Keys

API keys are not published in this documentation. Contact the DevOps team to obtain credentials. Gemma, Phi, and LightOnOCR endpoints may use different keys.

Available models

Model	ID	Chat	Multimodal	Tool Calling	OCR
Gemma-4 E4B	`gemma-4`	✅	✅	✅	—
Phi-4 Mini	`phi-4`	✅	—	✅	—
LightOnOCR	`lightonocr`	—	✅	—	✅

Environment variables

bash

export MODELS_HUB_GEMMA_API_KEY="gemma key"
export MODELS_HUB_PHI_API_KEY="phi mini key"
export MODELS_HUB_LIGHTONOCR_API_KEY="lightonocr key"

Gemma-4 E4B

Chat completion

python

import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)

response = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "Explain transformers in one paragraph."}],
)
print(response.choices[0].message.content)

Streaming

python

stream = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "Write a haiku about Python."}],
    stream=True,
)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Multimodal (image + text)

python

import base64

with open("image.png", "rb") as f:
    b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
    model="gemma-4",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Describe this image."},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
        ],
    }],
)
print(response.choices[0].message.content)

Tool calling

Tool calling follows a two-step pattern: send the user message with a tools array, execute the requested tool locally, then re-invoke the API with the tool result to get the final response.

python

import json
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. 'San Francisco, CA'",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

# Step 1 — send user message with tools
response = client.chat.completions.create(
    model="gemma-4",
    messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
    tools=tools,
    max_tokens=1024,
)

message = response.choices[0].message

if message.tool_calls:
    tool_call = message.tool_calls[0]
    print(f"Tool: {tool_call.function.name}")
    print(f"Args: {tool_call.function.arguments}")

    # Step 2 — execute tool locally, return result to model
    response = client.chat.completions.create(
        model="gemma-4",
        messages=[
            {"role": "user", "content": "What is the weather in Tokyo today?"},
            message,
            {
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps({
                    "temperature": 22,
                    "condition": "Partly cloudy",
                    "unit": "celsius",
                }),
            },
        ],
        tools=tools,
        max_tokens=1024,
    )

    print(response.choices[0].message.content)

Phi-4 Mini

python

import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)

response = client.chat.completions.create(
    model="phi-4",
    messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
print(response.choices[0].message.content)

Tool calling

Phi-4 supports tool_choice="auto" — the model decides autonomously whether to invoke a tool.

python

import json
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g. 'San Francisco, CA'",
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit",
                    },
                },
                "required": ["location"],
            },
        },
    }
]

# Step 1 — model decides whether to call a tool
response = client.chat.completions.create(
    model="phi-4",
    messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
    tools=tools,
    tool_choice="auto",   # model decides autonomously
    max_tokens=1024,
)

message = response.choices[0].message

if message.tool_calls:
    tool_call = message.tool_calls[0]
    print(f"Tool: {tool_call.function.name}")
    print(f"Args: {tool_call.function.arguments}")

    # Step 2 — return tool result, get final response
    response = client.chat.completions.create(
        model="phi-4",
        messages=[
            {"role": "user", "content": "What is the weather in Tokyo today?"},
            message,
            {
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps({
                    "temperature": 22,
                    "condition": "Partly cloudy",
                    "unit": "celsius",
                }),
            },
        ],
        tools=tools,
        max_tokens=1024,
    )

    print(response.choices[0].message.content)

LightOnOCR

python

import os
from openai import OpenAI
import base64

client = OpenAI(
    base_url="https://models-hub-api.elsaifoundry.ai/v1",
    api_key=os.environ["MODELS_HUB_LIGHTONOCR_API_KEY"],
)

with open("document.pdf", "rb") as f:
    b64 = base64.b64encode(f.read()).decode()

response = client.chat.completions.create(
    model="lightonocr",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract all text from this document."},
            {"type": "image_url", "image_url": {"url": f"data:application/pdf;base64,{b64}"}},
        ],
    }],
)
print(response.choices[0].message.content)

elsai Model Hub ​

Available models ​

Environment variables ​

Gemma-4 E4B ​

Chat completion ​

Streaming ​

Multimodal (image + text) ​

Tool calling ​

Phi-4 Mini ​

Tool calling ​

LightOnOCR ​

elsai Model Hub

Available models

Environment variables

Gemma-4 E4B

Chat completion

Streaming

Multimodal (image + text)

Tool calling

Phi-4 Mini

Tool calling

LightOnOCR