Appearance
Elsai Model Hub
The Elsai Model Hub exposes hosted models through an OpenAI-compatible HTTP API. Use the official OpenAI Python client by pointing base_url at the hub.
Base URL: https://models-hub-api.elsaifoundry.ai/v1
API Keys
API keys are not published in this documentation. Contact the DevOps team to obtain credentials. Gemma, Phi, and LightOnOCR endpoints may use different keys.
Available models
| Model | ID | Chat | Multimodal | Tool Calling | OCR |
|---|---|---|---|---|---|
| Gemma-4 E4B | gemma-4 | ✅ | ✅ | ✅ | — |
| Phi-4 Mini | phi-4 | ✅ | — | ✅ | — |
| LightOnOCR | lightonocr | — | ✅ | — | ✅ |
Environment variables
bash
export MODELS_HUB_GEMMA_API_KEY="gemma key"
export MODELS_HUB_PHI_API_KEY="phi mini key"
export MODELS_HUB_LIGHTONOCR_API_KEY="lightonocr key"Gemma-4 E4B
Chat completion
python
import os
from openai import OpenAI
client = OpenAI(
base_url="https://models-hub-api.elsaifoundry.ai/v1",
api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)
response = client.chat.completions.create(
model="gemma-4",
messages=[{"role": "user", "content": "Explain transformers in one paragraph."}],
)
print(response.choices[0].message.content)Streaming
python
stream = client.chat.completions.create(
model="gemma-4",
messages=[{"role": "user", "content": "Write a haiku about Python."}],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)Multimodal (image + text)
python
import base64
with open("image.png", "rb") as f:
b64 = base64.b64encode(f.read()).decode()
response = client.chat.completions.create(
model="gemma-4",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image."},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64}"}},
],
}],
)
print(response.choices[0].message.content)Tool calling
Tool calling follows a two-step pattern: send the user message with a tools array, execute the requested tool locally, then re-invoke the API with the tool result to get the final response.
python
import json
import os
from openai import OpenAI
client = OpenAI(
base_url="https://models-hub-api.elsaifoundry.ai/v1",
api_key=os.environ["MODELS_HUB_GEMMA_API_KEY"],
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g. 'San Francisco, CA'",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit",
},
},
"required": ["location"],
},
},
}
]
# Step 1 — send user message with tools
response = client.chat.completions.create(
model="gemma-4",
messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
tools=tools,
max_tokens=1024,
)
message = response.choices[0].message
if message.tool_calls:
tool_call = message.tool_calls[0]
print(f"Tool: {tool_call.function.name}")
print(f"Args: {tool_call.function.arguments}")
# Step 2 — execute tool locally, return result to model
response = client.chat.completions.create(
model="gemma-4",
messages=[
{"role": "user", "content": "What is the weather in Tokyo today?"},
message,
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps({
"temperature": 22,
"condition": "Partly cloudy",
"unit": "celsius",
}),
},
],
tools=tools,
max_tokens=1024,
)
print(response.choices[0].message.content)Phi-4 Mini
python
import os
from openai import OpenAI
client = OpenAI(
base_url="https://models-hub-api.elsaifoundry.ai/v1",
api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)
response = client.chat.completions.create(
model="phi-4",
messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
print(response.choices[0].message.content)Tool calling
Phi-4 supports tool_choice="auto" — the model decides autonomously whether to invoke a tool.
python
import json
import os
from openai import OpenAI
client = OpenAI(
base_url="https://models-hub-api.elsaifoundry.ai/v1",
api_key=os.environ["MODELS_HUB_PHI_API_KEY"],
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g. 'San Francisco, CA'",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit",
},
},
"required": ["location"],
},
},
}
]
# Step 1 — model decides whether to call a tool
response = client.chat.completions.create(
model="phi-4",
messages=[{"role": "user", "content": "What is the weather in Tokyo today?"}],
tools=tools,
tool_choice="auto", # model decides autonomously
max_tokens=1024,
)
message = response.choices[0].message
if message.tool_calls:
tool_call = message.tool_calls[0]
print(f"Tool: {tool_call.function.name}")
print(f"Args: {tool_call.function.arguments}")
# Step 2 — return tool result, get final response
response = client.chat.completions.create(
model="phi-4",
messages=[
{"role": "user", "content": "What is the weather in Tokyo today?"},
message,
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps({
"temperature": 22,
"condition": "Partly cloudy",
"unit": "celsius",
}),
},
],
tools=tools,
max_tokens=1024,
)
print(response.choices[0].message.content)LightOnOCR
python
import os
from openai import OpenAI
import base64
client = OpenAI(
base_url="https://models-hub-api.elsaifoundry.ai/v1",
api_key=os.environ["MODELS_HUB_LIGHTONOCR_API_KEY"],
)
with open("document.pdf", "rb") as f:
b64 = base64.b64encode(f.read()).decode()
response = client.chat.completions.create(
model="lightonocr",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Extract all text from this document."},
{"type": "image_url", "image_url": {"url": f"data:application/pdf;base64,{b64}"}},
],
}],
)
print(response.choices[0].message.content)