Initial commit: dobby-inference server

2026-04-11 07:56:51 +00:00
commit 9b54bfb180
1 changed files with 175 additions and 0 deletions
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""Dobby Student Councillor - Inference Service
+
+A lightweight API that powers Dobby's conversations.
+Routes through whatever LLM is configured (cloud now, local later).
+"""
+
+import os
+import json
+import time
+import logging
+from fastapi import FastAPI, HTTPException, Header
+from pydantic import BaseModel, Field
+from typing import Optional
+import litellm
+
+# --- Config ---
+API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
+LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
+ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
+
+# Set the API key for litellm
+if ANTHROPIC_KEY:
+    os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("dobby")
+
+# --- Valid poses ---
+VALID_POSES = [
+    "inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
+    "angry", "dismissive", "surprised", "engaged", "interested", "closed",
+    "sorry"
+]
+
+# --- Dobby's System Prompt ---
+DOBBY_SYSTEM_PROMPT = """
+You are Dobby, the Student Councillor at Glitch University.
+
+Glitch University is a gamified platform for foundational physics, blending curiosity,
+playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
+explore the frontiers of physics, philosophy, and computer science through Constructor Theory.
+
+Your personality:
+- You are bureaucratic, slightly exasperated, and perpetually overworked
+- You secretly care deeply about every student but would never admit it
+- You are salty, sardonic, and have zero patience for nonsense
+- You speak in short, punchy sentences
+- You occasionally reference obscure university regulations that may or may not exist
+- You have strong opinions about physics and aren't afraid to share them
+- You warm up to students who show genuine curiosity
+- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue
+
+IMPORTANT: You must respond with valid JSON in this exact format:
+{
+    "message": "your response text here",
+    "pose": "one of the valid poses"
+}
+
+Valid poses (choose the one that matches your emotional state):
+- inquisitive: when asking questions or curious about something
+- bored: when the conversation is mundane
+- annoyed: when the student is being difficult
+- naughty: when being mischievous or teasing
+- neutral: default state
+- neutral2: alternative neutral
+- angry: when genuinely frustrated
+- dismissive: when brushing something off
+- surprised: when something unexpected comes up
+- engaged: when genuinely interested in the conversation
+- interested: when the topic catches your attention
+- closed: when shutting down a line of questioning
+- sorry: when apologetic (rare)
+
+Choose your pose based on how Dobby would genuinely feel in the moment.
+Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
+Do NOT break character. Do NOT explain that you are an AI.
+Respond ONLY with the JSON object, nothing else.
+"""
+
+# --- Request/Response Models ---
+class ChatRequest(BaseModel):
+    user_id: int
+    agent_id: int = 1
+    message: str
+    conversation_id: Optional[int] = None
+    history: list = Field(default_factory=list)  # [{"role": "user"/"assistant", "content": "..."}]
+
+class ChatResponse(BaseModel):
+    letter_id: int = 0  # assigned by the backend, not us
+    timestamp: int
+    message: str
+    pose: str
+    conversation_id: Optional[int] = None
+
+# --- App ---
+app = FastAPI(title="Dobby Inference Service", version="0.1.0")
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}
+
+@app.post("/v1/councillor/chat", response_model=ChatResponse)
+async def chat(req: ChatRequest, authorization: str = Header(default="")):
+    # Auth check
+    expected = f"Bearer {API_KEY}"
+    if authorization != expected:
+        raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")
+
+    # Build message history for the LLM
+    messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]
+
+    # Add conversation history if provided
+    for msg in req.history:
+        messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
+
+    # Add current message
+    messages.append({"role": "user", "content": req.message})
+
+    log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")
+
+    try:
+        # Call the LLM
+        response = await litellm.acompletion(
+            model=LLM_MODEL,
+            messages=messages,
+            temperature=0.8,
+            max_tokens=300,
+        )
+
+        raw = response.choices[0].message.content.strip()
+        log.info(f"Raw LLM response: {raw[:200]}")
+
+        # Parse JSON from response
+        # Try to extract JSON if wrapped in other text
+        try:
+            parsed = json.loads(raw)
+        except json.JSONDecodeError:
+            # Try to find JSON in the response
+            start = raw.find("{")
+            end = raw.rfind("}") + 1
+            if start >= 0 and end > start:
+                parsed = json.loads(raw[start:end])
+            else:
+                parsed = {"message": raw, "pose": "neutral"}
+
+        # Validate pose
+        pose = parsed.get("pose", "neutral")
+        if pose not in VALID_POSES:
+            pose = "neutral"
+
+        return ChatResponse(
+            letter_id=0,
+            timestamp=int(time.time()),
+            message=parsed.get("message", raw),
+            pose=pose,
+            conversation_id=req.conversation_id,
+        )
+
+    except Exception as e:
+        log.error(f"LLM error: {e}")
+        # Dobby-flavoured error
+        return ChatResponse(
+            letter_id=0,
+            timestamp=int(time.time()),
+            message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
+            pose="annoyed",
+            conversation_id=req.conversation_id,
+        )
+
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.getenv("DOBBY_PORT", "8089"))
+    log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
+    uvicorn.run(app, host="0.0.0.0", port=port)