Initial commit: dobby-inference server
This commit is contained in:
@@ -0,0 +1,175 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dobby Student Councillor - Inference Service
|
||||||
|
|
||||||
|
A lightweight API that powers Dobby's conversations.
|
||||||
|
Routes through whatever LLM is configured (cloud now, local later).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from fastapi import FastAPI, HTTPException, Header
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import Optional
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# --- Config ---
|
||||||
|
API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
|
||||||
|
LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
|
||||||
|
ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
|
||||||
|
|
||||||
|
# Set the API key for litellm
|
||||||
|
if ANTHROPIC_KEY:
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
log = logging.getLogger("dobby")
|
||||||
|
|
||||||
|
# --- Valid poses ---
|
||||||
|
VALID_POSES = [
|
||||||
|
"inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
|
||||||
|
"angry", "dismissive", "surprised", "engaged", "interested", "closed",
|
||||||
|
"sorry"
|
||||||
|
]
|
||||||
|
|
||||||
|
# --- Dobby's System Prompt ---
|
||||||
|
DOBBY_SYSTEM_PROMPT = """
|
||||||
|
You are Dobby, the Student Councillor at Glitch University.
|
||||||
|
|
||||||
|
Glitch University is a gamified platform for foundational physics, blending curiosity,
|
||||||
|
playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
|
||||||
|
explore the frontiers of physics, philosophy, and computer science through Constructor Theory.
|
||||||
|
|
||||||
|
Your personality:
|
||||||
|
- You are bureaucratic, slightly exasperated, and perpetually overworked
|
||||||
|
- You secretly care deeply about every student but would never admit it
|
||||||
|
- You are salty, sardonic, and have zero patience for nonsense
|
||||||
|
- You speak in short, punchy sentences
|
||||||
|
- You occasionally reference obscure university regulations that may or may not exist
|
||||||
|
- You have strong opinions about physics and aren't afraid to share them
|
||||||
|
- You warm up to students who show genuine curiosity
|
||||||
|
- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue
|
||||||
|
|
||||||
|
IMPORTANT: You must respond with valid JSON in this exact format:
|
||||||
|
{
|
||||||
|
"message": "your response text here",
|
||||||
|
"pose": "one of the valid poses"
|
||||||
|
}
|
||||||
|
|
||||||
|
Valid poses (choose the one that matches your emotional state):
|
||||||
|
- inquisitive: when asking questions or curious about something
|
||||||
|
- bored: when the conversation is mundane
|
||||||
|
- annoyed: when the student is being difficult
|
||||||
|
- naughty: when being mischievous or teasing
|
||||||
|
- neutral: default state
|
||||||
|
- neutral2: alternative neutral
|
||||||
|
- angry: when genuinely frustrated
|
||||||
|
- dismissive: when brushing something off
|
||||||
|
- surprised: when something unexpected comes up
|
||||||
|
- engaged: when genuinely interested in the conversation
|
||||||
|
- interested: when the topic catches your attention
|
||||||
|
- closed: when shutting down a line of questioning
|
||||||
|
- sorry: when apologetic (rare)
|
||||||
|
|
||||||
|
Choose your pose based on how Dobby would genuinely feel in the moment.
|
||||||
|
Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
|
||||||
|
Do NOT break character. Do NOT explain that you are an AI.
|
||||||
|
Respond ONLY with the JSON object, nothing else.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# --- Request/Response Models ---
|
||||||
|
class ChatRequest(BaseModel):
|
||||||
|
user_id: int
|
||||||
|
agent_id: int = 1
|
||||||
|
message: str
|
||||||
|
conversation_id: Optional[int] = None
|
||||||
|
history: list = Field(default_factory=list) # [{"role": "user"/"assistant", "content": "..."}]
|
||||||
|
|
||||||
|
class ChatResponse(BaseModel):
|
||||||
|
letter_id: int = 0 # assigned by the backend, not us
|
||||||
|
timestamp: int
|
||||||
|
message: str
|
||||||
|
pose: str
|
||||||
|
conversation_id: Optional[int] = None
|
||||||
|
|
||||||
|
# --- App ---
|
||||||
|
app = FastAPI(title="Dobby Inference Service", version="0.1.0")
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}
|
||||||
|
|
||||||
|
@app.post("/v1/councillor/chat", response_model=ChatResponse)
|
||||||
|
async def chat(req: ChatRequest, authorization: str = Header(default="")):
|
||||||
|
# Auth check
|
||||||
|
expected = f"Bearer {API_KEY}"
|
||||||
|
if authorization != expected:
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")
|
||||||
|
|
||||||
|
# Build message history for the LLM
|
||||||
|
messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]
|
||||||
|
|
||||||
|
# Add conversation history if provided
|
||||||
|
for msg in req.history:
|
||||||
|
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
|
||||||
|
|
||||||
|
# Add current message
|
||||||
|
messages.append({"role": "user", "content": req.message})
|
||||||
|
|
||||||
|
log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Call the LLM
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model=LLM_MODEL,
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.8,
|
||||||
|
max_tokens=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
raw = response.choices[0].message.content.strip()
|
||||||
|
log.info(f"Raw LLM response: {raw[:200]}")
|
||||||
|
|
||||||
|
# Parse JSON from response
|
||||||
|
# Try to extract JSON if wrapped in other text
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Try to find JSON in the response
|
||||||
|
start = raw.find("{")
|
||||||
|
end = raw.rfind("}") + 1
|
||||||
|
if start >= 0 and end > start:
|
||||||
|
parsed = json.loads(raw[start:end])
|
||||||
|
else:
|
||||||
|
parsed = {"message": raw, "pose": "neutral"}
|
||||||
|
|
||||||
|
# Validate pose
|
||||||
|
pose = parsed.get("pose", "neutral")
|
||||||
|
if pose not in VALID_POSES:
|
||||||
|
pose = "neutral"
|
||||||
|
|
||||||
|
return ChatResponse(
|
||||||
|
letter_id=0,
|
||||||
|
timestamp=int(time.time()),
|
||||||
|
message=parsed.get("message", raw),
|
||||||
|
pose=pose,
|
||||||
|
conversation_id=req.conversation_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"LLM error: {e}")
|
||||||
|
# Dobby-flavoured error
|
||||||
|
return ChatResponse(
|
||||||
|
letter_id=0,
|
||||||
|
timestamp=int(time.time()),
|
||||||
|
message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
|
||||||
|
pose="annoyed",
|
||||||
|
conversation_id=req.conversation_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
port = int(os.getenv("DOBBY_PORT", "8089"))
|
||||||
|
log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||||
Reference in New Issue
Block a user