167 lines
5.5 KiB
Python
167 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
LLM Client
|
|
Unified interface for calling Anthropic, OpenAI, and LiteLLM/compatible APIs.
|
|
Resolves the user's LLM config from the dashboard and routes accordingly.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from urllib import request, error as urlerror
|
|
from shared import DASHBOARD_API, api_request
|
|
|
|
|
|
# Default models per provider
|
|
DEFAULT_MODELS = {
|
|
"anthropic": "claude-sonnet-4-5-20250514",
|
|
"openai": "gpt-4o-mini",
|
|
"litellm": "anthropic/claude-sonnet-4-5-20250514",
|
|
"ollama": "llama3",
|
|
}
|
|
|
|
# Default API URLs per provider
|
|
DEFAULT_URLS = {
|
|
"anthropic": "https://api.anthropic.com",
|
|
"openai": "https://api.openai.com",
|
|
"litellm": "http://localhost:4000",
|
|
"ollama": "http://localhost:11434",
|
|
}
|
|
|
|
|
|
def get_llm_config(user_id):
|
|
"""Get the resolved LLM config for a user from the dashboard API."""
|
|
config = api_request(f"{DASHBOARD_API}/api/users/{user_id}/llm", retries=1)
|
|
if config.get("source") == "none":
|
|
raise RuntimeError("No LLM configured. Set one up via the LLM button in the dashboard.")
|
|
return config
|
|
|
|
|
|
def complete(user_id, prompt, system=None, max_tokens=4096):
|
|
"""Send a completion request using the user's configured LLM.
|
|
|
|
Args:
|
|
user_id: Dashboard user ID (for config resolution)
|
|
prompt: The user message / prompt text
|
|
system: Optional system message
|
|
max_tokens: Max response tokens
|
|
|
|
Returns:
|
|
dict with keys: text (str), model (str), input_tokens (int), output_tokens (int)
|
|
"""
|
|
config = get_llm_config(user_id)
|
|
provider = config.get("provider_type", "anthropic")
|
|
api_url = config.get("api_url") or DEFAULT_URLS.get(provider, "")
|
|
api_key = config.get("api_key", "")
|
|
model = config.get("default_model") or DEFAULT_MODELS.get(provider, "")
|
|
|
|
if provider == "anthropic":
|
|
return _call_anthropic(api_url, api_key, model, prompt, system, max_tokens)
|
|
elif provider in ("openai", "litellm"):
|
|
return _call_openai_compatible(api_url, api_key, model, prompt, system, max_tokens)
|
|
elif provider == "ollama":
|
|
return _call_openai_compatible(api_url, api_key, model, prompt, system, max_tokens, is_ollama=True)
|
|
else:
|
|
raise RuntimeError(f"Unknown LLM provider: {provider}")
|
|
|
|
|
|
def _call_anthropic(api_url, api_key, model, prompt, system, max_tokens):
|
|
"""Call the Anthropic Messages API."""
|
|
url = f"{api_url.rstrip('/')}/v1/messages"
|
|
body = {
|
|
"model": model,
|
|
"max_tokens": max_tokens,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
}
|
|
if system:
|
|
body["system"] = system
|
|
|
|
headers = {
|
|
"x-api-key": api_key,
|
|
"anthropic-version": "2023-06-01",
|
|
"content-type": "application/json",
|
|
}
|
|
|
|
data = json.dumps(body).encode()
|
|
req = request.Request(url, data=data, headers=headers, method="POST")
|
|
|
|
try:
|
|
with request.urlopen(req, timeout=120) as resp:
|
|
result = json.loads(resp.read().decode())
|
|
except urlerror.HTTPError as e:
|
|
err_body = e.read().decode() if e.fp else ""
|
|
raise RuntimeError(f"Anthropic API error {e.code}: {err_body[:500]}")
|
|
|
|
text = ""
|
|
for block in result.get("content", []):
|
|
if block.get("type") == "text":
|
|
text += block["text"]
|
|
|
|
usage = result.get("usage", {})
|
|
return {
|
|
"text": text,
|
|
"model": result.get("model", model),
|
|
"input_tokens": usage.get("input_tokens", 0),
|
|
"output_tokens": usage.get("output_tokens", 0),
|
|
}
|
|
|
|
|
|
def _call_openai_compatible(api_url, api_key, model, prompt, system, max_tokens, is_ollama=False):
|
|
"""Call an OpenAI-compatible API (works with OpenAI, LiteLLM, Ollama)."""
|
|
if is_ollama:
|
|
url = f"{api_url.rstrip('/')}/api/chat"
|
|
else:
|
|
url = f"{api_url.rstrip('/')}/v1/chat/completions"
|
|
|
|
messages = []
|
|
if system:
|
|
messages.append({"role": "system", "content": system})
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
body = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"max_tokens": max_tokens,
|
|
}
|
|
|
|
headers = {"content-type": "application/json"}
|
|
if api_key:
|
|
headers["authorization"] = f"Bearer {api_key}"
|
|
|
|
data = json.dumps(body).encode()
|
|
req = request.Request(url, data=data, headers=headers, method="POST")
|
|
|
|
try:
|
|
with request.urlopen(req, timeout=120) as resp:
|
|
result = json.loads(resp.read().decode())
|
|
except urlerror.HTTPError as e:
|
|
err_body = e.read().decode() if e.fp else ""
|
|
raise RuntimeError(f"API error {e.code}: {err_body[:500]}")
|
|
|
|
if is_ollama:
|
|
text = result.get("message", {}).get("content", "")
|
|
return {"text": text, "model": model, "input_tokens": 0, "output_tokens": 0}
|
|
|
|
choice = result.get("choices", [{}])[0]
|
|
text = choice.get("message", {}).get("content", "")
|
|
usage = result.get("usage", {})
|
|
return {
|
|
"text": text,
|
|
"model": result.get("model", model),
|
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Quick test
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--user-id", type=int, required=True)
|
|
parser.add_argument("--prompt", default="Say hello in one sentence.")
|
|
args = parser.parse_args()
|
|
|
|
result = complete(args.user_id, args.prompt)
|
|
print(f"Model: {result['model']}")
|
|
print(f"Tokens: {result['input_tokens']} in, {result['output_tokens']} out")
|
|
print(f"Response: {result['text']}")
|