diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py
index cbbab5c..e8d6bb4 100644
--- a/plugins/festinger/festinger/main.py
+++ b/plugins/festinger/festinger/main.py
@@ -1327,6 +1327,47 @@ async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Resp
     return await _handle_ollama_generate(request, agent_name=agent_id.lower())
 
 
+@app.get("/{agent_id}/v1/models")
+async def models_with_agent_id(agent_id: str, request: Request) -> Response:
+    """
+    Model discovery for agent-prefixed base URLs.
+
+    Clients (LiteLLM, OpenAI SDK) that use base_url=http://festinger/{agent_id}/v1
+    will call GET /{agent_id}/v1/models to discover available models.
+    Strip the agent prefix and proxy to the agent's configured upstream.
+    """
+    pool = request.app.state.pool
+    cfg = request.app.state.yaml_config
+    name = agent_id.lower()
+
+    agent_model = await _get_agent_routing_model(pool, name)
+    if agent_model and agent_model.base_url:
+        raw_base = agent_model.base_url.rstrip("/")
+        if raw_base.endswith("/v1"):
+            raw_base = raw_base[:-3]
+        upstream_url = f"{raw_base}/v1/models"
+    else:
+        # Fall back to configured OpenAI upstream
+        raw_base = cfg.get("upstream_openai", "").rstrip("/")
+        if raw_base.endswith("/v1"):
+            raw_base = raw_base[:-3]
+        upstream_url = f"{raw_base}/v1/models"
+
+    log.info("models_discovery  agent=%s → %s", name, upstream_url)
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            r = await client.get(upstream_url)
+        return Response(content=r.content, status_code=r.status_code,
+                        media_type=r.headers.get("content-type", "application/json"))
+    except httpx.RequestError as exc:
+        log.error("models_discovery_error  agent=%s url=%s  %s", name, upstream_url, exc)
+        return Response(
+            content='{"object":"list","data":[]}',
+            status_code=200,
+            media_type="application/json",
+        )
+
+
 # ---------------------------------------------------------------------------
 # /scan — gutask integration: scan task / letter text and return recollection
 # ---------------------------------------------------------------------------
@@ -4215,6 +4256,15 @@ async def passthrough(path: str, request: Request) -> Response:
         log.info("passthrough redirect  %s → /v1/chat/completions", path)
         return await openai_chat_completions(request)
 
+    # Agent-prefixed paths: /{agent_id}/v1/... where the agent_id segment was not
+    # matched by a dedicated route (e.g. GET /{agent_id}/v1/completions).
+    # Strip the leading segment so the upstream receives a clean /v1/... path.
+    parts = path.split("/", 1)
+    if len(parts) == 2 and parts[1].startswith("v1/"):
+        stripped = parts[1]
+        log.info("passthrough strip_agent_prefix  %s → /%s", path, stripped)
+        path = stripped
+
     if path.startswith("v1/"):
         upstream = cfg["upstream_anthropic"]
         relay_headers = ANTHROPIC_RELAY_HEADERS