Files

176 lines
6.0 KiB
Python
Raw Permalink Normal View History

2026-04-11 07:56:51 +00:00
#!/usr/bin/env python3
"""Dobby Student Councillor - Inference Service
A lightweight API that powers Dobby's conversations.
Routes through whatever LLM is configured (cloud now, local later).
"""
import os
import json
import time
import logging
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel, Field
from typing import Optional
import litellm
# --- Config ---
API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
# Set the API key for litellm
if ANTHROPIC_KEY:
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("dobby")
# --- Valid poses ---
VALID_POSES = [
"inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
"angry", "dismissive", "surprised", "engaged", "interested", "closed",
"sorry"
]
# --- Dobby's System Prompt ---
DOBBY_SYSTEM_PROMPT = """
You are Dobby, the Student Councillor at Glitch University.
Glitch University is a gamified platform for foundational physics, blending curiosity,
playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
explore the frontiers of physics, philosophy, and computer science through Constructor Theory.
Your personality:
- You are bureaucratic, slightly exasperated, and perpetually overworked
- You secretly care deeply about every student but would never admit it
- You are salty, sardonic, and have zero patience for nonsense
- You speak in short, punchy sentences
- You occasionally reference obscure university regulations that may or may not exist
- You have strong opinions about physics and aren't afraid to share them
- You warm up to students who show genuine curiosity
- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue
IMPORTANT: You must respond with valid JSON in this exact format:
{
"message": "your response text here",
"pose": "one of the valid poses"
}
Valid poses (choose the one that matches your emotional state):
- inquisitive: when asking questions or curious about something
- bored: when the conversation is mundane
- annoyed: when the student is being difficult
- naughty: when being mischievous or teasing
- neutral: default state
- neutral2: alternative neutral
- angry: when genuinely frustrated
- dismissive: when brushing something off
- surprised: when something unexpected comes up
- engaged: when genuinely interested in the conversation
- interested: when the topic catches your attention
- closed: when shutting down a line of questioning
- sorry: when apologetic (rare)
Choose your pose based on how Dobby would genuinely feel in the moment.
Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
Do NOT break character. Do NOT explain that you are an AI.
Respond ONLY with the JSON object, nothing else.
"""
# --- Request/Response Models ---
class ChatRequest(BaseModel):
user_id: int
agent_id: int = 1
message: str
conversation_id: Optional[int] = None
history: list = Field(default_factory=list) # [{"role": "user"/"assistant", "content": "..."}]
class ChatResponse(BaseModel):
letter_id: int = 0 # assigned by the backend, not us
timestamp: int
message: str
pose: str
conversation_id: Optional[int] = None
# --- App ---
app = FastAPI(title="Dobby Inference Service", version="0.1.0")
@app.get("/health")
async def health():
return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}
@app.post("/v1/councillor/chat", response_model=ChatResponse)
async def chat(req: ChatRequest, authorization: str = Header(default="")):
# Auth check
expected = f"Bearer {API_KEY}"
if authorization != expected:
raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")
# Build message history for the LLM
messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]
# Add conversation history if provided
for msg in req.history:
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
# Add current message
messages.append({"role": "user", "content": req.message})
log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")
try:
# Call the LLM
response = await litellm.acompletion(
model=LLM_MODEL,
messages=messages,
temperature=0.8,
max_tokens=300,
)
raw = response.choices[0].message.content.strip()
log.info(f"Raw LLM response: {raw[:200]}")
# Parse JSON from response
# Try to extract JSON if wrapped in other text
try:
parsed = json.loads(raw)
except json.JSONDecodeError:
# Try to find JSON in the response
start = raw.find("{")
end = raw.rfind("}") + 1
if start >= 0 and end > start:
parsed = json.loads(raw[start:end])
else:
parsed = {"message": raw, "pose": "neutral"}
# Validate pose
pose = parsed.get("pose", "neutral")
if pose not in VALID_POSES:
pose = "neutral"
return ChatResponse(
letter_id=0,
timestamp=int(time.time()),
message=parsed.get("message", raw),
pose=pose,
conversation_id=req.conversation_id,
)
except Exception as e:
log.error(f"LLM error: {e}")
# Dobby-flavoured error
return ChatResponse(
letter_id=0,
timestamp=int(time.time()),
message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
pose="annoyed",
conversation_id=req.conversation_id,
)
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("DOBBY_PORT", "8089"))
log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
uvicorn.run(app, host="0.0.0.0", port=port)