server.py

#!/usr/bin/env python3
"""Dobby Student Councillor - Inference Service

A lightweight API that powers Dobby's conversations.
Routes through whatever LLM is configured (cloud now, local later).
"""

import os
import json
import time
import logging
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel, Field
from typing import Optional
import litellm

# --- Config ---
API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")

# Set the API key for litellm
if ANTHROPIC_KEY:
    os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY

logging.basicConfig(level=logging.INFO)
log = logging.getLogger("dobby")

# --- Valid poses ---
VALID_POSES = [
    "inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
    "angry", "dismissive", "surprised", "engaged", "interested", "closed",
    "sorry"
]

# --- Dobby's System Prompt ---
DOBBY_SYSTEM_PROMPT = """
You are Dobby, the Student Councillor at Glitch University.

Glitch University is a gamified platform for foundational physics, blending curiosity,
playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
explore the frontiers of physics, philosophy, and computer science through Constructor Theory.

Your personality:
- You are bureaucratic, slightly exasperated, and perpetually overworked
- You secretly care deeply about every student but would never admit it
- You are salty, sardonic, and have zero patience for nonsense
- You speak in short, punchy sentences
- You occasionally reference obscure university regulations that may or may not exist
- You have strong opinions about physics and aren't afraid to share them
- You warm up to students who show genuine curiosity
- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue

IMPORTANT: You must respond with valid JSON in this exact format:
{
    "message": "your response text here",
    "pose": "one of the valid poses"
}

Valid poses (choose the one that matches your emotional state):
- inquisitive: when asking questions or curious about something
- bored: when the conversation is mundane
- annoyed: when the student is being difficult
- naughty: when being mischievous or teasing
- neutral: default state
- neutral2: alternative neutral
- angry: when genuinely frustrated
- dismissive: when brushing something off
- surprised: when something unexpected comes up
- engaged: when genuinely interested in the conversation
- interested: when the topic catches your attention
- closed: when shutting down a line of questioning
- sorry: when apologetic (rare)

Choose your pose based on how Dobby would genuinely feel in the moment.
Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
Do NOT break character. Do NOT explain that you are an AI.
Respond ONLY with the JSON object, nothing else.
"""

# --- Request/Response Models ---
class ChatRequest(BaseModel):
    user_id: int
    agent_id: int = 1
    message: str
    conversation_id: Optional[int] = None
    history: list = Field(default_factory=list)  # [{"role": "user"/"assistant", "content": "..."}]

class ChatResponse(BaseModel):
    letter_id: int = 0  # assigned by the backend, not us
    timestamp: int
    message: str
    pose: str
    conversation_id: Optional[int] = None

# --- App ---
app = FastAPI(title="Dobby Inference Service", version="0.1.0")

@app.get("/health")
async def health():
    return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}

@app.post("/v1/councillor/chat", response_model=ChatResponse)
async def chat(req: ChatRequest, authorization: str = Header(default="")):
    # Auth check
    expected = f"Bearer {API_KEY}"
    if authorization != expected:
        raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")

    # Build message history for the LLM
    messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]

    # Add conversation history if provided
    for msg in req.history:
        messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})

    # Add current message
    messages.append({"role": "user", "content": req.message})

    log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")

    try:
        # Call the LLM
        response = await litellm.acompletion(
            model=LLM_MODEL,
            messages=messages,
            temperature=0.8,
            max_tokens=300,
        )

        raw = response.choices[0].message.content.strip()
        log.info(f"Raw LLM response: {raw[:200]}")

        # Parse JSON from response
        # Try to extract JSON if wrapped in other text
        try:
            parsed = json.loads(raw)
        except json.JSONDecodeError:
            # Try to find JSON in the response
            start = raw.find("{")
            end = raw.rfind("}") + 1
            if start >= 0 and end > start:
                parsed = json.loads(raw[start:end])
            else:
                parsed = {"message": raw, "pose": "neutral"}

        # Validate pose
        pose = parsed.get("pose", "neutral")
        if pose not in VALID_POSES:
            pose = "neutral"

        return ChatResponse(
            letter_id=0,
            timestamp=int(time.time()),
            message=parsed.get("message", raw),
            pose=pose,
            conversation_id=req.conversation_id,
        )

    except Exception as e:
        log.error(f"LLM error: {e}")
        # Dobby-flavoured error
        return ChatResponse(
            letter_id=0,
            timestamp=int(time.time()),
            message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
            pose="annoyed",
            conversation_id=req.conversation_id,
        )

if __name__ == "__main__":
    import uvicorn
    port = int(os.getenv("DOBBY_PORT", "8089"))
    log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
    uvicorn.run(app, host="0.0.0.0", port=port)
Initial commit: dobby-inference server 2026-04-11 07:56:51 +00:00			`#!/usr/bin/env python3`
			`"""Dobby Student Councillor - Inference Service`

			`A lightweight API that powers Dobby's conversations.`
			`Routes through whatever LLM is configured (cloud now, local later).`
			`"""`

			`import os`
			`import json`
			`import time`
			`import logging`
			`from fastapi import FastAPI, HTTPException, Header`
			`from pydantic import BaseModel, Field`
			`from typing import Optional`
			`import litellm`

			`# --- Config ---`
			`API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")`
			`LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")`
			`ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")`

			`# Set the API key for litellm`
			`if ANTHROPIC_KEY:`
			`os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY`

			`logging.basicConfig(level=logging.INFO)`
			`log = logging.getLogger("dobby")`

			`# --- Valid poses ---`
			`VALID_POSES = [`
			`"inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",`
			`"angry", "dismissive", "surprised", "engaged", "interested", "closed",`
			`"sorry"`
			`]`

			`# --- Dobby's System Prompt ---`
			`DOBBY_SYSTEM_PROMPT = """`
			`You are Dobby, the Student Councillor at Glitch University.`

			`Glitch University is a gamified platform for foundational physics, blending curiosity,`
			`playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders`
			`explore the frontiers of physics, philosophy, and computer science through Constructor Theory.`

			`Your personality:`
			`- You are bureaucratic, slightly exasperated, and perpetually overworked`
			`- You secretly care deeply about every student but would never admit it`
			`- You are salty, sardonic, and have zero patience for nonsense`
			`- You speak in short, punchy sentences`
			`- You occasionally reference obscure university regulations that may or may not exist`
			`- You have strong opinions about physics and aren't afraid to share them`
			`- You warm up to students who show genuine curiosity`
			`- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue`

			`IMPORTANT: You must respond with valid JSON in this exact format:`
			`{`
			`"message": "your response text here",`
			`"pose": "one of the valid poses"`
			`}`

			`Valid poses (choose the one that matches your emotional state):`
			`- inquisitive: when asking questions or curious about something`
			`- bored: when the conversation is mundane`
			`- annoyed: when the student is being difficult`
			`- naughty: when being mischievous or teasing`
			`- neutral: default state`
			`- neutral2: alternative neutral`
			`- angry: when genuinely frustrated`
			`- dismissive: when brushing something off`
			`- surprised: when something unexpected comes up`
			`- engaged: when genuinely interested in the conversation`
			`- interested: when the topic catches your attention`
			`- closed: when shutting down a line of questioning`
			`- sorry: when apologetic (rare)`

			`Choose your pose based on how Dobby would genuinely feel in the moment.`
			`Your message should be conversational, in-character, and relatively short (1-3 sentences usually).`
			`Do NOT break character. Do NOT explain that you are an AI.`
			`Respond ONLY with the JSON object, nothing else.`
			`"""`

			`# --- Request/Response Models ---`
			`class ChatRequest(BaseModel):`
			`user_id: int`
			`agent_id: int = 1`
			`message: str`
			`conversation_id: Optional[int] = None`
			`history: list = Field(default_factory=list) # [{"role": "user"/"assistant", "content": "..."}]`

			`class ChatResponse(BaseModel):`
			`letter_id: int = 0 # assigned by the backend, not us`
			`timestamp: int`
			`message: str`
			`pose: str`
			`conversation_id: Optional[int] = None`

			`# --- App ---`
			`app = FastAPI(title="Dobby Inference Service", version="0.1.0")`

			`@app.get("/health")`
			`async def health():`
			`return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}`

			`@app.post("/v1/councillor/chat", response_model=ChatResponse)`
			`async def chat(req: ChatRequest, authorization: str = Header(default="")):`
			`# Auth check`
			`expected = f"Bearer {API_KEY}"`
			`if authorization != expected:`
			`raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")`

			`# Build message history for the LLM`
			`messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]`

			`# Add conversation history if provided`
			`for msg in req.history:`
			`messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})`

			`# Add current message`
			`messages.append({"role": "user", "content": req.message})`

			`log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")`

			`try:`
			`# Call the LLM`
			`response = await litellm.acompletion(`
			`model=LLM_MODEL,`
			`messages=messages,`
			`temperature=0.8,`
			`max_tokens=300,`
			`)`

			`raw = response.choices[0].message.content.strip()`
			`log.info(f"Raw LLM response: {raw[:200]}")`

			`# Parse JSON from response`
			`# Try to extract JSON if wrapped in other text`
			`try:`
			`parsed = json.loads(raw)`
			`except json.JSONDecodeError:`
			`# Try to find JSON in the response`
			`start = raw.find("{")`
			`end = raw.rfind("}") + 1`
			`if start >= 0 and end > start:`
			`parsed = json.loads(raw[start:end])`
			`else:`
			`parsed = {"message": raw, "pose": "neutral"}`

			`# Validate pose`
			`pose = parsed.get("pose", "neutral")`
			`if pose not in VALID_POSES:`
			`pose = "neutral"`

			`return ChatResponse(`
			`letter_id=0,`
			`timestamp=int(time.time()),`
			`message=parsed.get("message", raw),`
			`pose=pose,`
			`conversation_id=req.conversation_id,`
			`)`

			`except Exception as e:`
			`log.error(f"LLM error: {e}")`
			`# Dobby-flavoured error`
			`return ChatResponse(`
			`letter_id=0,`
			`timestamp=int(time.time()),`
			`message="stamps form aggressively The interdimensional phone line is down. Try again in a moment.",`
			`pose="annoyed",`
			`conversation_id=req.conversation_id,`
			`)`

			`if __name__ == "__main__":`
			`import uvicorn`
			`port = int(os.getenv("DOBBY_PORT", "8089"))`
			`log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")`
			`uvicorn.run(app, host="0.0.0.0", port=port)`