Read agent_id/agent_name from request body (LiteLLM extra params)

LiteLLM passes extra parameters as top-level JSON fields in the request body. _extract_agent_name() now reads agent_id and agent_name from the body first, then falls back to X-Agent-Name / X-Agent-Id headers. Critically, both fields are stripped from the body before any upstream call — otherwise Claude/LM Studio reject the unknown parameters. Applied to all four route handlers: /v1/chat/completions, /v1/messages, /api/chat, /api/generate. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 19:41:12 +02:00
parent cd471c4c95
commit a9aa594d73
1 changed files with 29 additions and 16 deletions
@@ -774,17 +774,30 @@ async def _route_agent_chat(
 # Saliency + recollection pipeline
 # ---------------------------------------------------------------------------
-def _agent_name_from_headers(headers: dict) -> str:
+def _extract_agent_name(body: dict, headers: dict) -> tuple[str, dict]:
    """
-    Extract agent identity from request headers.
+    Extract agent identity from the request body (LiteLLM extra params) or headers.
-    Checks X-Agent-Name first (e.g. 'GUNNAR' → 'gunnar'),
+    Priority: body agent_name > body agent_id > X-Agent-Name header > X-Agent-Id header.
-    then falls back to X-Agent-Id (e.g. '3' → '3').
+
-    Both are stored as the agent_name key in the agent_models table.
+    Also returns a cleaned copy of the body with agent_id/agent_name stripped,
    so unknown parameters are never forwarded to the upstream LLM.
    """
-    name = headers.get("x-agent-name", "").strip().lower()
+    # Pull from body first — LiteLLM passes extra params as top-level JSON fields
-    if name:
+    agent_name = str(body.get("agent_name", "")).strip().lower()
-        return name
+    agent_id = str(body.get("agent_id", "")).strip()
-    return headers.get("x-agent-id", "").strip()  # numeric IDs work as-is
+
    # Fall back to headers (X-Agent-Name / X-Agent-Id)
    if not agent_name:
        agent_name = headers.get("x-agent-name", "").strip().lower()
    if not agent_id and not agent_name:
        agent_id = headers.get("x-agent-id", "").strip()
    identity = agent_name or agent_id  # name preferred; id as string fallback
    # Strip festinger-specific params so the upstream never sees them
    clean_body = {k: v for k, v in body.items() if k not in ("agent_id", "agent_name")}
    return identity, clean_body
 async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
@@ -800,7 +813,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
    # Derive a ModelConfig from the intercepted request so context discovery can
    # mirror Agent0's current model without a separate write_model_id config.
    request_model = _extract_request_model_config(path, body, hdrs, cfg)
-    agent_name = _agent_name_from_headers(hdrs)
+    agent_name, _ = _extract_agent_name(body, hdrs)  # body already cleaned by route handler
    # Extract only the last user message — agent responses and reasoning traces
    # are noise for both cue scanning and concept discovery.
@@ -898,7 +911,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
 async def chat(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    body = await request.json()
+    _, body = _extract_agent_name(await request.json(), dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
@@ -931,7 +944,7 @@ async def chat(request: Request) -> Response:
 async def generate(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    body = await request.json()
+    _, body = _extract_agent_name(await request.json(), dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
@@ -970,7 +983,7 @@ async def anthropic_messages(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
    raw_body = await request.body()
-    body = json.loads(raw_body)
+    _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
    # Capture streaming intent BEFORE call_anthropic forces stream=False
    original_stream: bool = bool(body.get("stream", False))
    model = body.get("model", "unknown")
@@ -1030,13 +1043,13 @@ async def anthropic_messages(request: Request) -> Response:
 async def openai_chat_completions(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    body = await request.json()
+    raw_body = await request.json()
    hdrs = dict(request.headers)
    agent_name, body = _extract_agent_name(raw_body, hdrs)  # strips agent_id/agent_name
    model = body.get("model", "unknown")
    upstream = cfg["upstream_openai"]
    min_len = cfg["detection"]["min_length"]
    original_stream: bool = bool(body.get("stream", False))
    hdrs = dict(request.headers)
    agent_name = _agent_name_from_headers(hdrs)
    log.info("chat  route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
             model, upstream, agent_name or "—", original_stream)