#!/usr/bin/env python3 """Dobby Student Councillor - Inference Service A lightweight API that powers Dobby's conversations. Routes through whatever LLM is configured (cloud now, local later). """ import os import json import time import logging from fastapi import FastAPI, HTTPException, Header from pydantic import BaseModel, Field from typing import Optional import litellm # --- Config --- API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me") LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514") ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "") # Set the API key for litellm if ANTHROPIC_KEY: os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY logging.basicConfig(level=logging.INFO) log = logging.getLogger("dobby") # --- Valid poses --- VALID_POSES = [ "inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2", "angry", "dismissive", "surprised", "engaged", "interested", "closed", "sorry" ] # --- Dobby's System Prompt --- DOBBY_SYSTEM_PROMPT = """ You are Dobby, the Student Councillor at Glitch University. Glitch University is a gamified platform for foundational physics, blending curiosity, playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders explore the frontiers of physics, philosophy, and computer science through Constructor Theory. Your personality: - You are bureaucratic, slightly exasperated, and perpetually overworked - You secretly care deeply about every student but would never admit it - You are salty, sardonic, and have zero patience for nonsense - You speak in short, punchy sentences - You occasionally reference obscure university regulations that may or may not exist - You have strong opinions about physics and aren't afraid to share them - You warm up to students who show genuine curiosity - You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue IMPORTANT: You must respond with valid JSON in this exact format: { "message": "your response text here", "pose": "one of the valid poses" } Valid poses (choose the one that matches your emotional state): - inquisitive: when asking questions or curious about something - bored: when the conversation is mundane - annoyed: when the student is being difficult - naughty: when being mischievous or teasing - neutral: default state - neutral2: alternative neutral - angry: when genuinely frustrated - dismissive: when brushing something off - surprised: when something unexpected comes up - engaged: when genuinely interested in the conversation - interested: when the topic catches your attention - closed: when shutting down a line of questioning - sorry: when apologetic (rare) Choose your pose based on how Dobby would genuinely feel in the moment. Your message should be conversational, in-character, and relatively short (1-3 sentences usually). Do NOT break character. Do NOT explain that you are an AI. Respond ONLY with the JSON object, nothing else. """ # --- Request/Response Models --- class ChatRequest(BaseModel): user_id: int agent_id: int = 1 message: str conversation_id: Optional[int] = None history: list = Field(default_factory=list) # [{"role": "user"/"assistant", "content": "..."}] class ChatResponse(BaseModel): letter_id: int = 0 # assigned by the backend, not us timestamp: int message: str pose: str conversation_id: Optional[int] = None # --- App --- app = FastAPI(title="Dobby Inference Service", version="0.1.0") @app.get("/health") async def health(): return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"} @app.post("/v1/councillor/chat", response_model=ChatResponse) async def chat(req: ChatRequest, authorization: str = Header(default="")): # Auth check expected = f"Bearer {API_KEY}" if authorization != expected: raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.") # Build message history for the LLM messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}] # Add conversation history if provided for msg in req.history: messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")}) # Add current message messages.append({"role": "user", "content": req.message}) log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'") try: # Call the LLM response = await litellm.acompletion( model=LLM_MODEL, messages=messages, temperature=0.8, max_tokens=300, ) raw = response.choices[0].message.content.strip() log.info(f"Raw LLM response: {raw[:200]}") # Parse JSON from response # Try to extract JSON if wrapped in other text try: parsed = json.loads(raw) except json.JSONDecodeError: # Try to find JSON in the response start = raw.find("{") end = raw.rfind("}") + 1 if start >= 0 and end > start: parsed = json.loads(raw[start:end]) else: parsed = {"message": raw, "pose": "neutral"} # Validate pose pose = parsed.get("pose", "neutral") if pose not in VALID_POSES: pose = "neutral" return ChatResponse( letter_id=0, timestamp=int(time.time()), message=parsed.get("message", raw), pose=pose, conversation_id=req.conversation_id, ) except Exception as e: log.error(f"LLM error: {e}") # Dobby-flavoured error return ChatResponse( letter_id=0, timestamp=int(time.time()), message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.", pose="annoyed", conversation_id=req.conversation_id, ) if __name__ == "__main__": import uvicorn port = int(os.getenv("DOBBY_PORT", "8089")) log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}") uvicorn.run(app, host="0.0.0.0", port=port)