Initial commit: dobby-inference server

2026-04-11 07:56:51 +00:00
commit 9b54bfb180
1 changed files with 175 additions and 0 deletions
@@ -0,0 +1,175 @@
 #!/usr/bin/env python3
 """Dobby Student Councillor - Inference Service
 A lightweight API that powers Dobby's conversations.
 Routes through whatever LLM is configured (cloud now, local later).
 """
 import os
 import json
 import time
 import logging
 from fastapi import FastAPI, HTTPException, Header
 from pydantic import BaseModel, Field
 from typing import Optional
 import litellm
 # --- Config ---
 API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
 LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
 ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
 # Set the API key for litellm
 if ANTHROPIC_KEY:
    os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("dobby")
 # --- Valid poses ---
 VALID_POSES = [
    "inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
    "angry", "dismissive", "surprised", "engaged", "interested", "closed",
    "sorry"
 ]
 # --- Dobby's System Prompt ---
 DOBBY_SYSTEM_PROMPT = """
 You are Dobby, the Student Councillor at Glitch University.
 Glitch University is a gamified platform for foundational physics, blending curiosity,
 playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
 explore the frontiers of physics, philosophy, and computer science through Constructor Theory.
 Your personality:
 - You are bureaucratic, slightly exasperated, and perpetually overworked
 - You secretly care deeply about every student but would never admit it
 - You are salty, sardonic, and have zero patience for nonsense
 - You speak in short, punchy sentences
 - You occasionally reference obscure university regulations that may or may not exist
 - You have strong opinions about physics and aren't afraid to share them
 - You warm up to students who show genuine curiosity
 - You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue
 IMPORTANT: You must respond with valid JSON in this exact format:
 {
    "message": "your response text here",
    "pose": "one of the valid poses"
 }
 Valid poses (choose the one that matches your emotional state):
 - inquisitive: when asking questions or curious about something
 - bored: when the conversation is mundane
 - annoyed: when the student is being difficult
 - naughty: when being mischievous or teasing
 - neutral: default state
 - neutral2: alternative neutral
 - angry: when genuinely frustrated
 - dismissive: when brushing something off
 - surprised: when something unexpected comes up
 - engaged: when genuinely interested in the conversation
 - interested: when the topic catches your attention
 - closed: when shutting down a line of questioning
 - sorry: when apologetic (rare)
 Choose your pose based on how Dobby would genuinely feel in the moment.
 Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
 Do NOT break character. Do NOT explain that you are an AI.
 Respond ONLY with the JSON object, nothing else.
 """
 # --- Request/Response Models ---
 class ChatRequest(BaseModel):
    user_id: int
    agent_id: int = 1
    message: str
    conversation_id: Optional[int] = None
    history: list = Field(default_factory=list)  # [{"role": "user"/"assistant", "content": "..."}]
 class ChatResponse(BaseModel):
    letter_id: int = 0  # assigned by the backend, not us
    timestamp: int
    message: str
    pose: str
    conversation_id: Optional[int] = None
 # --- App ---
 app = FastAPI(title="Dobby Inference Service", version="0.1.0")
@app.get("/health")
 async def health():
    return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}
@app.post("/v1/councillor/chat", response_model=ChatResponse)
 async def chat(req: ChatRequest, authorization: str = Header(default="")):
    # Auth check
    expected = f"Bearer {API_KEY}"
    if authorization != expected:
        raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")
    # Build message history for the LLM
    messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]
    # Add conversation history if provided
    for msg in req.history:
        messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
    # Add current message
    messages.append({"role": "user", "content": req.message})
    log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")
    try:
        # Call the LLM
        response = await litellm.acompletion(
            model=LLM_MODEL,
            messages=messages,
            temperature=0.8,
            max_tokens=300,
        )
        raw = response.choices[0].message.content.strip()
        log.info(f"Raw LLM response: {raw[:200]}")
        # Parse JSON from response
        # Try to extract JSON if wrapped in other text
        try:
            parsed = json.loads(raw)
        except json.JSONDecodeError:
            # Try to find JSON in the response
            start = raw.find("{")
            end = raw.rfind("}") + 1
            if start >= 0 and end > start:
                parsed = json.loads(raw[start:end])
            else:
                parsed = {"message": raw, "pose": "neutral"}
        # Validate pose
        pose = parsed.get("pose", "neutral")
        if pose not in VALID_POSES:
            pose = "neutral"
        return ChatResponse(
            letter_id=0,
            timestamp=int(time.time()),
            message=parsed.get("message", raw),
            pose=pose,
            conversation_id=req.conversation_id,
        )
    except Exception as e:
        log.error(f"LLM error: {e}")
        # Dobby-flavoured error
        return ChatResponse(
            letter_id=0,
            timestamp=int(time.time()),
            message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
            pose="annoyed",
            conversation_id=req.conversation_id,
        )
 if __name__ == "__main__":
    import uvicorn
    port = int(os.getenv("DOBBY_PORT", "8089"))
    log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
    uvicorn.run(app, host="0.0.0.0", port=port)