Read agent_id/agent_name from request body (LiteLLM extra params)
LiteLLM passes extra parameters as top-level JSON fields in the request body. _extract_agent_name() now reads agent_id and agent_name from the body first, then falls back to X-Agent-Name / X-Agent-Id headers. Critically, both fields are stripped from the body before any upstream call — otherwise Claude/LM Studio reject the unknown parameters. Applied to all four route handlers: /v1/chat/completions, /v1/messages, /api/chat, /api/generate. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -774,17 +774,30 @@ async def _route_agent_chat(
|
|||||||
# Saliency + recollection pipeline
|
# Saliency + recollection pipeline
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _agent_name_from_headers(headers: dict) -> str:
|
def _extract_agent_name(body: dict, headers: dict) -> tuple[str, dict]:
|
||||||
"""
|
"""
|
||||||
Extract agent identity from request headers.
|
Extract agent identity from the request body (LiteLLM extra params) or headers.
|
||||||
Checks X-Agent-Name first (e.g. 'GUNNAR' → 'gunnar'),
|
Priority: body agent_name > body agent_id > X-Agent-Name header > X-Agent-Id header.
|
||||||
then falls back to X-Agent-Id (e.g. '3' → '3').
|
|
||||||
Both are stored as the agent_name key in the agent_models table.
|
Also returns a cleaned copy of the body with agent_id/agent_name stripped,
|
||||||
|
so unknown parameters are never forwarded to the upstream LLM.
|
||||||
"""
|
"""
|
||||||
name = headers.get("x-agent-name", "").strip().lower()
|
# Pull from body first — LiteLLM passes extra params as top-level JSON fields
|
||||||
if name:
|
agent_name = str(body.get("agent_name", "")).strip().lower()
|
||||||
return name
|
agent_id = str(body.get("agent_id", "")).strip()
|
||||||
return headers.get("x-agent-id", "").strip() # numeric IDs work as-is
|
|
||||||
|
# Fall back to headers (X-Agent-Name / X-Agent-Id)
|
||||||
|
if not agent_name:
|
||||||
|
agent_name = headers.get("x-agent-name", "").strip().lower()
|
||||||
|
if not agent_id and not agent_name:
|
||||||
|
agent_id = headers.get("x-agent-id", "").strip()
|
||||||
|
|
||||||
|
identity = agent_name or agent_id # name preferred; id as string fallback
|
||||||
|
|
||||||
|
# Strip festinger-specific params so the upstream never sees them
|
||||||
|
clean_body = {k: v for k, v in body.items() if k not in ("agent_id", "agent_name")}
|
||||||
|
|
||||||
|
return identity, clean_body
|
||||||
|
|
||||||
|
|
||||||
async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
|
async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
|
||||||
@@ -800,7 +813,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
|
|||||||
# Derive a ModelConfig from the intercepted request so context discovery can
|
# Derive a ModelConfig from the intercepted request so context discovery can
|
||||||
# mirror Agent0's current model without a separate write_model_id config.
|
# mirror Agent0's current model without a separate write_model_id config.
|
||||||
request_model = _extract_request_model_config(path, body, hdrs, cfg)
|
request_model = _extract_request_model_config(path, body, hdrs, cfg)
|
||||||
agent_name = _agent_name_from_headers(hdrs)
|
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
|
||||||
|
|
||||||
# Extract only the last user message — agent responses and reasoning traces
|
# Extract only the last user message — agent responses and reasoning traces
|
||||||
# are noise for both cue scanning and concept discovery.
|
# are noise for both cue scanning and concept discovery.
|
||||||
@@ -898,7 +911,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers
|
|||||||
async def chat(request: Request) -> Response:
|
async def chat(request: Request) -> Response:
|
||||||
cfg = request.app.state.yaml_config
|
cfg = request.app.state.yaml_config
|
||||||
pool = request.app.state.pool
|
pool = request.app.state.pool
|
||||||
body = await request.json()
|
_, body = _extract_agent_name(await request.json(), dict(request.headers))
|
||||||
model = body.get("model", "unknown")
|
model = body.get("model", "unknown")
|
||||||
upstream = cfg["upstream_ollama"]
|
upstream = cfg["upstream_ollama"]
|
||||||
min_len = cfg["detection"]["min_length"]
|
min_len = cfg["detection"]["min_length"]
|
||||||
@@ -931,7 +944,7 @@ async def chat(request: Request) -> Response:
|
|||||||
async def generate(request: Request) -> Response:
|
async def generate(request: Request) -> Response:
|
||||||
cfg = request.app.state.yaml_config
|
cfg = request.app.state.yaml_config
|
||||||
pool = request.app.state.pool
|
pool = request.app.state.pool
|
||||||
body = await request.json()
|
_, body = _extract_agent_name(await request.json(), dict(request.headers))
|
||||||
model = body.get("model", "unknown")
|
model = body.get("model", "unknown")
|
||||||
upstream = cfg["upstream_ollama"]
|
upstream = cfg["upstream_ollama"]
|
||||||
min_len = cfg["detection"]["min_length"]
|
min_len = cfg["detection"]["min_length"]
|
||||||
@@ -970,7 +983,7 @@ async def anthropic_messages(request: Request) -> Response:
|
|||||||
cfg = request.app.state.yaml_config
|
cfg = request.app.state.yaml_config
|
||||||
pool = request.app.state.pool
|
pool = request.app.state.pool
|
||||||
raw_body = await request.body()
|
raw_body = await request.body()
|
||||||
body = json.loads(raw_body)
|
_, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
|
||||||
# Capture streaming intent BEFORE call_anthropic forces stream=False
|
# Capture streaming intent BEFORE call_anthropic forces stream=False
|
||||||
original_stream: bool = bool(body.get("stream", False))
|
original_stream: bool = bool(body.get("stream", False))
|
||||||
model = body.get("model", "unknown")
|
model = body.get("model", "unknown")
|
||||||
@@ -1030,13 +1043,13 @@ async def anthropic_messages(request: Request) -> Response:
|
|||||||
async def openai_chat_completions(request: Request) -> Response:
|
async def openai_chat_completions(request: Request) -> Response:
|
||||||
cfg = request.app.state.yaml_config
|
cfg = request.app.state.yaml_config
|
||||||
pool = request.app.state.pool
|
pool = request.app.state.pool
|
||||||
body = await request.json()
|
raw_body = await request.json()
|
||||||
|
hdrs = dict(request.headers)
|
||||||
|
agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name
|
||||||
model = body.get("model", "unknown")
|
model = body.get("model", "unknown")
|
||||||
upstream = cfg["upstream_openai"]
|
upstream = cfg["upstream_openai"]
|
||||||
min_len = cfg["detection"]["min_length"]
|
min_len = cfg["detection"]["min_length"]
|
||||||
original_stream: bool = bool(body.get("stream", False))
|
original_stream: bool = bool(body.get("stream", False))
|
||||||
hdrs = dict(request.headers)
|
|
||||||
agent_name = _agent_name_from_headers(hdrs)
|
|
||||||
|
|
||||||
log.info("chat route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
|
log.info("chat route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
|
||||||
model, upstream, agent_name or "—", original_stream)
|
model, upstream, agent_name or "—", original_stream)
|
||||||
|
|||||||
Reference in New Issue
Block a user