Read agent_id/agent_name from request body (LiteLLM extra params)

LiteLLM passes extra parameters as top-level JSON fields in the request
body. _extract_agent_name() now reads agent_id and agent_name from the
body first, then falls back to X-Agent-Name / X-Agent-Id headers.

Critically, both fields are stripped from the body before any upstream
call — otherwise Claude/LM Studio reject the unknown parameters.

Applied to all four route handlers: /v1/chat/completions, /v1/messages,
/api/chat, /api/generate.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-21 19:41:12 +02:00
parent cd471c4c95
commit a9aa594d73
+29 -16
View File
@@ -774,17 +774,30 @@ async def _route_agent_chat(
# Saliency + recollection pipeline # Saliency + recollection pipeline
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _agent_name_from_headers(headers: dict) -> str: def _extract_agent_name(body: dict, headers: dict) -> tuple[str, dict]:
""" """
Extract agent identity from request headers. Extract agent identity from the request body (LiteLLM extra params) or headers.
Checks X-Agent-Name first (e.g. 'GUNNAR''gunnar'), Priority: body agent_name > body agent_id > X-Agent-Name header > X-Agent-Id header.
then falls back to X-Agent-Id (e.g. '3''3').
Both are stored as the agent_name key in the agent_models table. Also returns a cleaned copy of the body with agent_id/agent_name stripped,
so unknown parameters are never forwarded to the upstream LLM.
""" """
name = headers.get("x-agent-name", "").strip().lower() # Pull from body first — LiteLLM passes extra params as top-level JSON fields
if name: agent_name = str(body.get("agent_name", "")).strip().lower()
return name agent_id = str(body.get("agent_id", "")).strip()
return headers.get("x-agent-id", "").strip() # numeric IDs work as-is
# Fall back to headers (X-Agent-Name / X-Agent-Id)
if not agent_name:
agent_name = headers.get("x-agent-name", "").strip().lower()
if not agent_id and not agent_name:
agent_id = headers.get("x-agent-id", "").strip()
identity = agent_name or agent_id # name preferred; id as string fallback
# Strip festinger-specific params so the upstream never sees them
clean_body = {k: v for k, v in body.items() if k not in ("agent_id", "agent_name")}
return identity, clean_body
async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict: async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
@@ -800,7 +813,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
# Derive a ModelConfig from the intercepted request so context discovery can # Derive a ModelConfig from the intercepted request so context discovery can
# mirror Agent0's current model without a separate write_model_id config. # mirror Agent0's current model without a separate write_model_id config.
request_model = _extract_request_model_config(path, body, hdrs, cfg) request_model = _extract_request_model_config(path, body, hdrs, cfg)
agent_name = _agent_name_from_headers(hdrs) agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
# Extract only the last user message — agent responses and reasoning traces # Extract only the last user message — agent responses and reasoning traces
# are noise for both cue scanning and concept discovery. # are noise for both cue scanning and concept discovery.
@@ -898,7 +911,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
async def chat(request: Request) -> Response: async def chat(request: Request) -> Response:
cfg = request.app.state.yaml_config cfg = request.app.state.yaml_config
pool = request.app.state.pool pool = request.app.state.pool
body = await request.json() _, body = _extract_agent_name(await request.json(), dict(request.headers))
model = body.get("model", "unknown") model = body.get("model", "unknown")
upstream = cfg["upstream_ollama"] upstream = cfg["upstream_ollama"]
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
@@ -931,7 +944,7 @@ async def chat(request: Request) -> Response:
async def generate(request: Request) -> Response: async def generate(request: Request) -> Response:
cfg = request.app.state.yaml_config cfg = request.app.state.yaml_config
pool = request.app.state.pool pool = request.app.state.pool
body = await request.json() _, body = _extract_agent_name(await request.json(), dict(request.headers))
model = body.get("model", "unknown") model = body.get("model", "unknown")
upstream = cfg["upstream_ollama"] upstream = cfg["upstream_ollama"]
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
@@ -970,7 +983,7 @@ async def anthropic_messages(request: Request) -> Response:
cfg = request.app.state.yaml_config cfg = request.app.state.yaml_config
pool = request.app.state.pool pool = request.app.state.pool
raw_body = await request.body() raw_body = await request.body()
body = json.loads(raw_body) _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
# Capture streaming intent BEFORE call_anthropic forces stream=False # Capture streaming intent BEFORE call_anthropic forces stream=False
original_stream: bool = bool(body.get("stream", False)) original_stream: bool = bool(body.get("stream", False))
model = body.get("model", "unknown") model = body.get("model", "unknown")
@@ -1030,13 +1043,13 @@ async def anthropic_messages(request: Request) -> Response:
async def openai_chat_completions(request: Request) -> Response: async def openai_chat_completions(request: Request) -> Response:
cfg = request.app.state.yaml_config cfg = request.app.state.yaml_config
pool = request.app.state.pool pool = request.app.state.pool
body = await request.json() raw_body = await request.json()
hdrs = dict(request.headers)
agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name
model = body.get("model", "unknown") model = body.get("model", "unknown")
upstream = cfg["upstream_openai"] upstream = cfg["upstream_openai"]
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
original_stream: bool = bool(body.get("stream", False)) original_stream: bool = bool(body.get("stream", False))
hdrs = dict(request.headers)
agent_name = _agent_name_from_headers(hdrs)
log.info("chat route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s", log.info("chat route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
model, upstream, agent_name or "", original_stream) model, upstream, agent_name or "", original_stream)