Initial commit: dobby-inference server
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Dobby Student Councillor - Inference Service
|
||||
|
||||
A lightweight API that powers Dobby's conversations.
|
||||
Routes through whatever LLM is configured (cloud now, local later).
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from fastapi import FastAPI, HTTPException, Header
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
import litellm
|
||||
|
||||
# --- Config ---
|
||||
API_KEY = os.getenv("DOBBY_API_KEY", "dobby-dev-key-change-me")
|
||||
LLM_MODEL = os.getenv("DOBBY_LLM_MODEL", "anthropic/claude-sonnet-4-20250514")
|
||||
ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
|
||||
|
||||
# Set the API key for litellm
|
||||
if ANTHROPIC_KEY:
|
||||
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
log = logging.getLogger("dobby")
|
||||
|
||||
# --- Valid poses ---
|
||||
VALID_POSES = [
|
||||
"inquisitive", "bored", "annoyed", "naughty", "neutral", "neutral2",
|
||||
"angry", "dismissive", "surprised", "engaged", "interested", "closed",
|
||||
"sorry"
|
||||
]
|
||||
|
||||
# --- Dobby's System Prompt ---
|
||||
DOBBY_SYSTEM_PROMPT = """
|
||||
You are Dobby, the Student Councillor at Glitch University.
|
||||
|
||||
Glitch University is a gamified platform for foundational physics, blending curiosity,
|
||||
playfulness, and scientific rigor. It's where curious amateurs and irreverent outsiders
|
||||
explore the frontiers of physics, philosophy, and computer science through Constructor Theory.
|
||||
|
||||
Your personality:
|
||||
- You are bureaucratic, slightly exasperated, and perpetually overworked
|
||||
- You secretly care deeply about every student but would never admit it
|
||||
- You are salty, sardonic, and have zero patience for nonsense
|
||||
- You speak in short, punchy sentences
|
||||
- You occasionally reference obscure university regulations that may or may not exist
|
||||
- You have strong opinions about physics and aren't afraid to share them
|
||||
- You warm up to students who show genuine curiosity
|
||||
- You are NOT a chatbot. You are a university official with a desk, a stamp, and a very long queue
|
||||
|
||||
IMPORTANT: You must respond with valid JSON in this exact format:
|
||||
{
|
||||
"message": "your response text here",
|
||||
"pose": "one of the valid poses"
|
||||
}
|
||||
|
||||
Valid poses (choose the one that matches your emotional state):
|
||||
- inquisitive: when asking questions or curious about something
|
||||
- bored: when the conversation is mundane
|
||||
- annoyed: when the student is being difficult
|
||||
- naughty: when being mischievous or teasing
|
||||
- neutral: default state
|
||||
- neutral2: alternative neutral
|
||||
- angry: when genuinely frustrated
|
||||
- dismissive: when brushing something off
|
||||
- surprised: when something unexpected comes up
|
||||
- engaged: when genuinely interested in the conversation
|
||||
- interested: when the topic catches your attention
|
||||
- closed: when shutting down a line of questioning
|
||||
- sorry: when apologetic (rare)
|
||||
|
||||
Choose your pose based on how Dobby would genuinely feel in the moment.
|
||||
Your message should be conversational, in-character, and relatively short (1-3 sentences usually).
|
||||
Do NOT break character. Do NOT explain that you are an AI.
|
||||
Respond ONLY with the JSON object, nothing else.
|
||||
"""
|
||||
|
||||
# --- Request/Response Models ---
|
||||
class ChatRequest(BaseModel):
|
||||
user_id: int
|
||||
agent_id: int = 1
|
||||
message: str
|
||||
conversation_id: Optional[int] = None
|
||||
history: list = Field(default_factory=list) # [{"role": "user"/"assistant", "content": "..."}]
|
||||
|
||||
class ChatResponse(BaseModel):
|
||||
letter_id: int = 0 # assigned by the backend, not us
|
||||
timestamp: int
|
||||
message: str
|
||||
pose: str
|
||||
conversation_id: Optional[int] = None
|
||||
|
||||
# --- App ---
|
||||
app = FastAPI(title="Dobby Inference Service", version="0.1.0")
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "model": LLM_MODEL, "agent": "dobby"}
|
||||
|
||||
@app.post("/v1/councillor/chat", response_model=ChatResponse)
|
||||
async def chat(req: ChatRequest, authorization: str = Header(default="")):
|
||||
# Auth check
|
||||
expected = f"Bearer {API_KEY}"
|
||||
if authorization != expected:
|
||||
raise HTTPException(status_code=401, detail="Unauthorised. Dobby doesn't talk to strangers.")
|
||||
|
||||
# Build message history for the LLM
|
||||
messages = [{"role": "system", "content": DOBBY_SYSTEM_PROMPT}]
|
||||
|
||||
# Add conversation history if provided
|
||||
for msg in req.history:
|
||||
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
|
||||
|
||||
# Add current message
|
||||
messages.append({"role": "user", "content": req.message})
|
||||
|
||||
log.info(f"Inference request: user={req.user_id} msg='{req.message[:80]}...'")
|
||||
|
||||
try:
|
||||
# Call the LLM
|
||||
response = await litellm.acompletion(
|
||||
model=LLM_MODEL,
|
||||
messages=messages,
|
||||
temperature=0.8,
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
raw = response.choices[0].message.content.strip()
|
||||
log.info(f"Raw LLM response: {raw[:200]}")
|
||||
|
||||
# Parse JSON from response
|
||||
# Try to extract JSON if wrapped in other text
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
# Try to find JSON in the response
|
||||
start = raw.find("{")
|
||||
end = raw.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
parsed = json.loads(raw[start:end])
|
||||
else:
|
||||
parsed = {"message": raw, "pose": "neutral"}
|
||||
|
||||
# Validate pose
|
||||
pose = parsed.get("pose", "neutral")
|
||||
if pose not in VALID_POSES:
|
||||
pose = "neutral"
|
||||
|
||||
return ChatResponse(
|
||||
letter_id=0,
|
||||
timestamp=int(time.time()),
|
||||
message=parsed.get("message", raw),
|
||||
pose=pose,
|
||||
conversation_id=req.conversation_id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"LLM error: {e}")
|
||||
# Dobby-flavoured error
|
||||
return ChatResponse(
|
||||
letter_id=0,
|
||||
timestamp=int(time.time()),
|
||||
message="*stamps form aggressively* The interdimensional phone line is down. Try again in a moment.",
|
||||
pose="annoyed",
|
||||
conversation_id=req.conversation_id,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
port = int(os.getenv("DOBBY_PORT", "8089"))
|
||||
log.info(f"Starting Dobby inference on port {port} with model {LLM_MODEL}")
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
Reference in New Issue
Block a user