Add cross-protocol agent routing at /v1/chat/completions

When X-Agent-Name or X-Agent-Id is present and matches an agent_models entry, Festinger routes the main inference request to the configured provider — not just the memory-writing utility model. Protocol translation: - Incoming OpenAI → outgoing Claude: system-message extraction, max_tokens defaulting, response translated back to OpenAI format - Incoming OpenAI → outgoing LM Studio/OpenAI: model + base_url swap - All responses returned as OpenAI-compatible JSON or SSE Also adds streaming synthesis for /v1/chat/completions (OpenAI SSE) and X-Agent-Id fallback in _agent_name_from_headers so numeric AGENT_ID env vars work without needing AGENT_NAME. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Add per-agent model assignments (agent_models table)
2026-04-21 19:32:57 +02:00 · 2026-04-21 19:24:28 +02:00 · 2026-04-21 19:17:51 +02:00
5 changed files with 584 additions and 23 deletions
@@ -7,9 +7,10 @@ upstream_ollama: "http://host.docker.internal:11434"
 # Override via UPSTREAM_ANTHROPIC env var if needed
 upstream_anthropic: "https://api.anthropic.com"

-# Where the real OpenAI-compatible API is running (for /v1/chat/completions)
+# Where the real OpenAI-compatible API is running (for /v1/chat/completions).
+# For LM Studio set this to its local address, e.g. "http://host.docker.internal:1234"
 # Override via UPSTREAM_OPENAI env var if needed
-upstream_openai: "https://api.openai.com"
+upstream_openai: "http://host.docker.internal:1234"

 # Port this proxy listens on inside the container (exposed as 11434 on the docker network)
 proxy_port: 11434
@@ -111,3 +111,14 @@ CREATE TABLE IF NOT EXISTS kg_write_log (
 CREATE INDEX IF NOT EXISTS kwl_created_idx ON kg_write_log (created_at DESC);
 CREATE INDEX IF NOT EXISTS kwl_concept_idx ON kg_write_log (concept_id);
 CREATE INDEX IF NOT EXISTS kwl_op_idx      ON kg_write_log (op);
+
+-- ---------------------------------------------------------------------------
+-- agent_models — per-agent LLM model assignments
+-- Maps an agent identity (from X-Agent-Name header) to a specific model.
+-- Priority over write_model_id (global default) when agent_name is present.
+-- ---------------------------------------------------------------------------
+CREATE TABLE IF NOT EXISTS agent_models (
+    agent_name  TEXT PRIMARY KEY,              -- normalised lowercase, e.g. 'gunnar', 'rind'
+    model_id    INT  NOT NULL REFERENCES models(id) ON DELETE CASCADE,
+    created_at  TIMESTAMPTZ NOT NULL DEFAULT now()
+);
@@ -60,6 +60,16 @@ async def init_schema(pool: asyncpg.Pool) -> None:
        await conn.execute(
            "ALTER TABLE soas ADD COLUMN IF NOT EXISTS first_seen_context TEXT NOT NULL DEFAULT ''"
        )
+        # Migration: per-agent model assignments (agent_name → model_id)
+        await conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS agent_models (
+                agent_name  TEXT PRIMARY KEY,
+                model_id    INT  NOT NULL REFERENCES models(id) ON DELETE CASCADE,
+                created_at  TIMESTAMPTZ NOT NULL DEFAULT now()
+            )
+            """
+        )
    log.info("schema applied")


@@ -44,6 +44,7 @@ from .cue_scanner import scan_cues
 from .recollection import build_recollection_block, inject_recollection
 from .resolution_job import run_resolution_job, last_run_timestamp
 from .tokenizer import tokenize
+from .llm_client import ModelConfig
 from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker
 from .urd_writer import InsertRequest, insert_urd_edge
 from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
@@ -517,11 +518,276 @@ def inject_recollection_anthropic(body: dict, block: str) -> dict:
    return body


+# ---------------------------------------------------------------------------
+# Request model mirroring
+# ---------------------------------------------------------------------------
+
+def _extract_request_model_config(
+    path: str,
+    body: dict,
+    request_headers: dict,
+    cfg: dict,
+) -> ModelConfig | None:
+    """
+    Build a ModelConfig from the intercepted request so Festinger's utility
+    LLM calls (context discovery) can use the same provider/model as Agent0 —
+    no separate write_model_id configuration needed.
+
+    Provider inference:
+      /v1/messages            → anthropic
+      /v1/chat/completions    → lm-studio (OpenAI-compatible; base_url from upstream_openai)
+      /api/chat, /api/generate → lm-studio (Ollama's OpenAI-compat endpoint; base_url from upstream_ollama)
+    """
+    model_name = body.get("model", "")
+    if not model_name:
+        return None
+
+    if path == "/v1/messages":
+        api_key = request_headers.get("x-api-key", "")
+        return ModelConfig(
+            provider="claude",
+            model_name=model_name,
+            api_key=api_key,
+        )
+
+    if path == "/v1/chat/completions":
+        auth = request_headers.get("authorization", "")
+        api_key = auth[len("Bearer "):].strip() if auth.lower().startswith("bearer ") else auth
+        base_url = cfg.get("upstream_openai", "")
+        return ModelConfig(
+            provider="lm-studio",
+            model_name=model_name,
+            api_key=api_key or "lm-studio",
+            base_url=base_url,
+        )
+
+    if path in ("/api/chat", "/api/generate"):
+        # Ollama exposes an OpenAI-compatible endpoint at the same base URL.
+        base_url = cfg.get("upstream_ollama", "")
+        return ModelConfig(
+            provider="lm-studio",
+            model_name=model_name,
+            api_key="ollama",
+            base_url=base_url,
+        )
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Agent routing — cross-protocol dispatch
+# ---------------------------------------------------------------------------
+
+def _openai_to_anthropic_body(body: dict, model_name: str) -> dict:
+    """
+    Translate an OpenAI chat completions request to Anthropic Messages API format.
+    - system messages are lifted to the top-level 'system' field
+    - max_tokens defaults to 4096 if not specified
+    - temperature/top_p forwarded if present
+    """
+    system_parts: list[str] = []
+    claude_messages: list[dict] = []
+
+    for m in body.get("messages", []):
+        role = m.get("role", "")
+        content = m.get("content", "")
+        if role == "system":
+            if isinstance(content, str):
+                system_parts.append(content)
+            elif isinstance(content, list):
+                system_parts.extend(
+                    b.get("text", "") for b in content
+                    if isinstance(b, dict) and b.get("type") == "text"
+                )
+        else:
+            claude_messages.append(m)
+
+    anthropic_body: dict = {
+        "model": model_name,
+        "messages": claude_messages,
+        "max_tokens": body.get("max_tokens") or 4096,
+    }
+    if system_parts:
+        anthropic_body["system"] = "\n\n".join(system_parts)
+    for key in ("temperature", "top_p", "stop_sequences"):
+        if key in body:
+            anthropic_body[key] = body[key]
+    return anthropic_body
+
+
+def _anthropic_to_openai_response(data: dict) -> dict:
+    """Convert an Anthropic Messages API response to OpenAI chat completions format."""
+    text = "".join(
+        b.get("text", "") for b in data.get("content", [])
+        if b.get("type") == "text"
+    )
+    usage = data.get("usage", {})
+    stop_map = {"end_turn": "stop", "max_tokens": "length", "stop_sequence": "stop"}
+    finish = stop_map.get(data.get("stop_reason", "end_turn"), "stop")
+    return {
+        "id": data.get("id", "chatcmpl-festinger"),
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": data.get("model", ""),
+        "choices": [{
+            "index": 0,
+            "message": {"role": "assistant", "content": text},
+            "finish_reason": finish,
+        }],
+        "usage": {
+            "prompt_tokens": usage.get("input_tokens", 0),
+            "completion_tokens": usage.get("output_tokens", 0),
+            "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
+        },
+    }
+
+
+def _openai_sse_from_response(raw: dict) -> bytes:
+    """
+    Synthesise a minimal OpenAI-compatible SSE stream from a complete (non-streaming)
+    OpenAI-format response dict.  Used when the client sent stream=true but the
+    upstream was called non-streaming.
+    """
+    text = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
+    model = raw.get("model", "")
+    cid = raw.get("id", "chatcmpl-festinger")
+    ts = int(time.time())
+
+    def chunk(delta: dict, finish_reason=None) -> str:
+        return "data: " + json.dumps({
+            "id": cid, "object": "chat.completion.chunk",
+            "created": ts, "model": model,
+            "choices": [{"index": 0, "delta": delta, "finish_reason": finish_reason}],
+        }) + "\n\n"
+
+    parts = [
+        chunk({"role": "assistant", "content": ""}),
+        chunk({"content": text}),
+        chunk({}, finish_reason="stop"),
+        "data: [DONE]\n\n",
+    ]
+    return "".join(parts).encode()
+
+
+async def _get_agent_routing_model(pool, agent_name: str) -> ModelConfig | None:
+    """
+    Look up the agent's configured model from the agent_models table.
+    agent_name is the normalised key (lowercase name or numeric ID string).
+    """
+    if not agent_name:
+        return None
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            """
+            SELECT m.provider, m.model_name, m.api_key, m.base_url
+            FROM agent_models am
+            JOIN models m ON m.id = am.model_id
+            WHERE am.agent_name = $1
+            """,
+            agent_name,
+        )
+    if not row:
+        return None
+    return ModelConfig(
+        provider=row["provider"],
+        model_name=row["model_name"],
+        api_key=row["api_key"],
+        base_url=row["base_url"] or "",
+    )
+
+
+async def _route_agent_chat(
+    body: dict,
+    agent_model: ModelConfig,
+    original_stream: bool,
+    pool,
+    cfg: dict,
+    request_headers: dict,
+    min_len: int,
+) -> Response:
+    """
+    Route an OpenAI-compatible chat completions request to the agent's
+    configured provider, handling cross-protocol translation.
+
+    claude   → Anthropic Messages API (translated in both directions)
+    openai   → OpenAI-compatible endpoint (base_url + model swap)
+    lm-studio→ same as openai
+    """
+    # Run recollection injection (same pipeline as standard path)
+    body = await process_prompt(body, "/v1/chat/completions", pool, cfg, request_headers)
+
+    sess = session_key(agent_model.model_name, body.get("messages", []))
+
+    # Capture upstream config for use in the loop-detection re-run closure
+    if agent_model.provider == "claude":
+        anthropic_upstream = agent_model.base_url or "https://api.anthropic.com"
+        anthropic_headers = {
+            "x-api-key": agent_model.api_key,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+        }
+    else:
+        oai_upstream = agent_model.base_url or cfg.get("upstream_openai", "https://api.openai.com")
+        oai_headers = {
+            "authorization": f"Bearer {agent_model.api_key or 'lm-studio'}",
+            "content-type": "application/json",
+        }
+
+    async def _call(current_body: dict) -> tuple[str, dict]:
+        """Call the agent's upstream; always return (text, openai_format_dict)."""
+        if agent_model.provider == "claude":
+            ab = _openai_to_anthropic_body(current_body, agent_model.model_name)
+            text, raw_a = await call_anthropic(ab, anthropic_upstream, anthropic_headers)
+            return text, _anthropic_to_openai_response(raw_a)
+        else:
+            b = dict(current_body)
+            b["model"] = agent_model.model_name
+            text, raw_o = await call_openai(b, oai_upstream, oai_headers)
+            return text, raw_o
+
+    text, raw = await _call(body)
+    count = record_and_check(sess, text, min_len)
+
+    if count >= 2:
+        log.warning(
+            "loop_detected (agent routed)  agent_model=%s session=%s count=%d",
+            agent_model.model_name, sess[1], count,
+        )
+        body, override = apply_mitigations(body, count, cfg)
+        if override is not None:
+            raw["choices"] = [{
+                "index": 0,
+                "message": {"role": "assistant", "content": override},
+                "finish_reason": "stop",
+            }]
+            raw["loop_detected"] = True
+        else:
+            text, raw = await _call(body)
+            record_and_check(sess, text, min_len)
+
+    if original_stream:
+        return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
+    return Response(content=json.dumps(raw), media_type="application/json")
+
+
 # ---------------------------------------------------------------------------
 # Saliency + recollection pipeline
 # ---------------------------------------------------------------------------

-async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
+def _agent_name_from_headers(headers: dict) -> str:
+    """
+    Extract agent identity from request headers.
+    Checks X-Agent-Name first (e.g. 'GUNNAR' → 'gunnar'),
+    then falls back to X-Agent-Id (e.g. '3' → '3').
+    Both are stored as the agent_name key in the agent_models table.
+    """
+    name = headers.get("x-agent-name", "").strip().lower()
+    if name:
+        return name
+    return headers.get("x-agent-id", "").strip()  # numeric IDs work as-is
+
+
+async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
    """
    Run the saliency + recollection pipeline over the prompt.
    Returns a (possibly modified) body dict with the recollection block injected.
@@ -530,6 +796,12 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
    conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
    recency_days = int(await get_config(pool, "recollection_recency_days", "90"))

+    hdrs = request_headers or {}
+    # Derive a ModelConfig from the intercepted request so context discovery can
+    # mirror Agent0's current model without a separate write_model_id config.
+    request_model = _extract_request_model_config(path, body, hdrs, cfg)
+    agent_name = _agent_name_from_headers(hdrs)
+
    # Extract only the last user message — agent responses and reasoning traces
    # are noise for both cue scanning and concept discovery.
    user_text = _last_user_message_text(body, path)
@@ -590,7 +862,11 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:

    # 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
    if novel_candidates and len(user_text) >= 20:
-        await enqueue_context_discover(user_text, novel_candidates)
+        await enqueue_context_discover(
+            user_text, novel_candidates,
+            agent_name=agent_name,
+            fallback_model=request_model,
+        )

    if not salient_for_read:
        return body
@@ -628,7 +904,7 @@ async def chat(request: Request) -> Response:
    min_len = cfg["detection"]["min_length"]
    log.info("chat  route=/api/chat model=%s", model)
    try:
-        body = await process_prompt(body, "/api/chat", pool, cfg)
+        body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
        text, raw = await call_ollama("/api/chat", body, upstream)
        sess = session_key(model, body.get("messages", []))
        count = record_and_check(sess, text, min_len)
@@ -661,7 +937,7 @@ async def generate(request: Request) -> Response:
    min_len = cfg["detection"]["min_length"]
    log.info("chat  route=/api/generate model=%s", model)
    try:
-        body = await process_prompt(body, "/api/generate", pool, cfg)
+        body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
        messages = [{"role": "user", "content": body.get("prompt", "")}]
        sess = session_key(model, messages)
        text, raw = await call_ollama("/api/generate", body, upstream)
@@ -709,7 +985,7 @@ async def anthropic_messages(request: Request) -> Response:
        headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
        if "anthropic-version" not in {k.lower() for k in headers}:
            headers["anthropic-version"] = "2023-06-01"
-        body = await process_prompt(body, "/v1/messages", pool, cfg)
+        body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_anthropic(body, upstream, headers)
@@ -758,10 +1034,29 @@ async def openai_chat_completions(request: Request) -> Response:
    model = body.get("model", "unknown")
    upstream = cfg["upstream_openai"]
    min_len = cfg["detection"]["min_length"]
-    log.info("chat  route=/v1/chat/completions model=%s upstream=%s", model, upstream)
+    original_stream: bool = bool(body.get("stream", False))
+    hdrs = dict(request.headers)
+    agent_name = _agent_name_from_headers(hdrs)
+
+    log.info("chat  route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
+             model, upstream, agent_name or "—", original_stream)
    try:
+        # Agent routing: if agent has a registered model, dispatch cross-protocol
+        if agent_name:
+            agent_model = await _get_agent_routing_model(pool, agent_name)
+            if agent_model:
+                log.info(
+                    "agent_route  agent=%s provider=%s model=%s base_url=%s",
+                    agent_name, agent_model.provider, agent_model.model_name,
+                    agent_model.base_url or "(default)",
+                )
+                return await _route_agent_chat(
+                    body, agent_model, original_stream, pool, cfg, hdrs, min_len
+                )
+
+        # Standard path — forward to configured upstream unchanged
        headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
-        body = await process_prompt(body, "/v1/chat/completions", pool, cfg)
+        body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_openai(body, upstream, headers)
@@ -773,9 +1068,13 @@ async def openai_chat_completions(request: Request) -> Response:
                if raw.get("choices"):
                    raw["choices"][0]["message"]["content"] = override
                raw["loop_detected"] = True
+                if original_stream:
+                    return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
                return Response(content=json.dumps(raw), media_type="application/json")
            text, raw = await call_openai(body, upstream, headers)
            record_and_check(sess, text, min_len)
+        if original_stream:
+            return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
        return Response(content=json.dumps(raw), media_type="application/json")
    except UpstreamError as exc:
        log.error("chat_upstream_error  route=/v1/chat/completions model=%s  %s", model, exc)
@@ -903,6 +1202,81 @@ async def delete_model(model_id: int, request: Request) -> dict:
    return {"status": "ok", "deleted": model_id}


+# ---------------------------------------------------------------------------
+# /agent-models — per-agent model assignments
+# ---------------------------------------------------------------------------
+
+@app.get("/agent-models")
+async def list_agent_models(request: Request) -> dict:
+    pool = request.app.state.pool
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """
+            SELECT am.agent_name, am.model_id, am.created_at,
+                   m.provider, m.model_name, m.base_url
+            FROM agent_models am
+            JOIN models m ON m.id = am.model_id
+            ORDER BY am.agent_name
+            """
+        )
+    return {"agent_models": [
+        {
+            "agent_name": r["agent_name"],
+            "model_id": r["model_id"],
+            "provider": r["provider"],
+            "model_name": r["model_name"],
+            "base_url": r["base_url"] or "",
+            "created_at": r["created_at"].isoformat(),
+        }
+        for r in rows
+    ]}
+
+
+@app.put("/agent-models/{agent_name}")
+async def set_agent_model(agent_name: str, request: Request) -> dict:
+    """
+    Assign a model to an agent.  agent_name is normalised to lowercase.
+    Body: {"model_id": 3}
+    """
+    pool = request.app.state.pool
+    data = await request.json()
+    model_id = data.get("model_id")
+    if not model_id:
+        return {"error": "model_id is required"}
+    name = agent_name.strip().lower()
+    if not name:
+        return {"error": "agent_name must not be empty"}
+    async with pool.acquire() as conn:
+        # Verify the model exists
+        row = await conn.fetchrow("SELECT id, provider, model_name FROM models WHERE id=$1", int(model_id))
+        if not row:
+            return {"error": f"model {model_id} not found"}
+        await conn.execute(
+            """
+            INSERT INTO agent_models (agent_name, model_id)
+            VALUES ($1, $2)
+            ON CONFLICT (agent_name) DO UPDATE SET model_id = EXCLUDED.model_id, created_at = now()
+            """,
+            name, int(model_id),
+        )
+    log.info("agent_model set  agent=%s model_id=%s provider=%s model=%s",
+             name, model_id, row["provider"], row["model_name"])
+    return {"status": "ok", "agent_name": name, "model_id": int(model_id)}
+
+
+@app.delete("/agent-models/{agent_name}")
+async def delete_agent_model(agent_name: str, request: Request) -> dict:
+    pool = request.app.state.pool
+    name = agent_name.strip().lower()
+    async with pool.acquire() as conn:
+        result = await conn.execute("DELETE FROM agent_models WHERE agent_name=$1", name)
+    deleted = int(result.split()[-1]) if result else 0
+    if not deleted:
+        return {"error": f"no assignment found for agent '{name}'"}
+    log.info("agent_model deleted  agent=%s", name)
+    return {"status": "ok", "deleted": name}
+
+
@app.get("/models/discover")
 async def discover_models(base_url: str = "http://host.docker.internal:1234") -> dict:
    """
@@ -1874,6 +2248,33 @@ ADMIN_HTML = """<!DOCTYPE html>
    </details>
  </div>

+  <h2>Agent models</h2>
+  <p style="font-size:0.83em;color:#666;margin-bottom:0.8em">
+    Routes the main inference request to the agent's model — full cross-protocol
+    (Claude ↔ LM Studio ↔ OpenAI). Agent must send <code>X-Agent-Name: GUNNAR</code>
+    or <code>X-Agent-Id: 3</code> on every request. Also determines which model
+    Festinger uses for memory writing (context discovery).
+  </p>
+  <table id="agent-models-table" style="margin-bottom:0.8em">
+    <thead><tr><th>Agent name / ID</th><th>Model ID</th><th>Provider</th><th>Model name</th><th></th></tr></thead>
+    <tbody id="agent-models-tbody"><tr><td colspan="5">Loading…</td></tr></tbody>
+  </table>
+  <details style="margin-bottom:1em">
+    <summary style="cursor:pointer;font-size:0.9em;color:#555">Assign model to agent…</summary>
+    <div style="margin-top:0.6em;display:flex;gap:0.7em;flex-wrap:wrap;align-items:flex-end">
+      <label style="font-size:0.85em">Agent name or ID (e.g. gunnar or 3)
+        <input id="am-agent" type="text" placeholder="gunnar or 3"
+               style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px;width:160px">
+      </label>
+      <label style="font-size:0.85em">Model
+        <select id="am-model" style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px">
+          <option value="">— select —</option>
+        </select>
+      </label>
+      <button onclick="assignAgentModel(this)" style="height:32px">Assign</button>
+    </div>
+  </details>
+
  <h2>Actions</h2>
  <div class="actions">
    <button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
@@ -1950,6 +2351,11 @@ ADMIN_HTML = """<!DOCTYPE html>
          <td><button onclick="setConfig('write_model_id','${{m.id}}')" style="padding:2px 8px;font-size:0.8em;${{writeId==String(m.id)?'background:#2a7a2a;color:#fff;border-color:#2a7a2a':''}}">${{writeId==String(m.id)?'✓ active':'set'}}</button></td>
          <td><button onclick="deleteModel(${{m.id}},this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td>
        </tr>`).join('');
+
+      // Populate model dropdown in agent-models assignment form
+      const sel = document.getElementById('am-model');
+      sel.innerHTML = '<option value="">— select —</option>' +
+        md.models.map(m => `<option value="${{m.id}}">${{m.id}} — ${{m.provider}} / ${{m.model_name}}</option>`).join('');
    }}

    async function addModel(btn) {{
@@ -1991,6 +2397,57 @@ ADMIN_HTML = """<!DOCTYPE html>
      await loadModels();
    }}

+    async function loadAgentModels() {{
+      const r = await fetch('/agent-models');
+      const d = await r.json();
+      const tbody = document.getElementById('agent-models-tbody');
+      if (!d.agent_models.length) {{
+        tbody.innerHTML = '<tr><td colspan="5" style="color:#999">No assignments yet.</td></tr>';
+        return;
+      }}
+      tbody.innerHTML = d.agent_models.map(a => `
+        <tr>
+          <td><strong>${{a.agent_name}}</strong></td>
+          <td>${{a.model_id}}</td>
+          <td>${{a.provider}}</td>
+          <td>${{a.model_name}}${{a.base_url ? ' <span style="color:#999;font-size:0.85em">(' + a.base_url + ')</span>' : ''}}</td>
+          <td><button onclick="removeAgentModel('${{a.agent_name}}',this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td>
+        </tr>`).join('');
+    }}
+
+    async function assignAgentModel(btn) {{
+      const agent = document.getElementById('am-agent').value.trim().toLowerCase();
+      const model_id = document.getElementById('am-model').value;
+      if (!agent) {{ alert('Enter an agent name.'); return; }}
+      if (!model_id) {{ alert('Select a model.'); return; }}
+      btn.disabled = true;
+      try {{
+        const r = await fetch('/agent-models/' + encodeURIComponent(agent), {{
+          method: 'PUT',
+          headers: {{'Content-Type': 'application/json'}},
+          body: JSON.stringify({{model_id: parseInt(model_id)}})
+        }});
+        const d = await r.json();
+        if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
+        showResult('Assigned model ' + model_id + ' to agent "' + agent + '".', true);
+        document.getElementById('am-agent').value = '';
+        await loadAgentModels();
+      }} catch(e) {{ showResult('Error: ' + e.message, false); }}
+      finally {{ btn.disabled = false; }}
+    }}
+
+    async function removeAgentModel(agent, btn) {{
+      if (!confirm('Remove model assignment for "' + agent + '"?')) return;
+      btn.disabled = true;
+      try {{
+        const r = await fetch('/agent-models/' + encodeURIComponent(agent), {{method: 'DELETE'}});
+        const d = await r.json();
+        if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
+        await loadAgentModels();
+      }} catch(e) {{ showResult('Error: ' + e.message, false); }}
+      finally {{ btn.disabled = false; }}
+    }}
+
    async function loadConflicts() {{
      const r = await fetch('/conflicts');
      const d = await r.json();
@@ -2139,6 +2596,7 @@ ADMIN_HTML = """<!DOCTYPE html>
    loadConflicts();
    loadLog(0);
    loadModels();
+    loadAgentModels();
  </script>
 </body>
 </html>
@@ -32,11 +32,19 @@ class ContextDiscoverRequest:
    User message text plus candidate tokens (words absent from the English
    dictionary) submitted for LLM-driven concept discovery.

-    candidate_tokens are hints for the LLM — it decides which are real domain
-    concepts vs typos/noise, and extracts relationship triples from the text.
+    candidate_tokens: hints for the LLM — it decides which are real domain
+      concepts vs typos/noise, and extracts relationship triples from the text.
+
+    agent_name: normalised agent identity from X-Agent-Name header (lowercase).
+      Used to look up an agent-specific model in the agent_models table.
+
+    fallback_model: last-resort ModelConfig derived from the intercepted request.
+      Used when neither an agent-specific model nor write_model_id is configured.
    """
    user_text: str
    candidate_tokens: list[str]
+    agent_name: str = ""
+    fallback_model: Optional[ModelConfig] = None


@dataclass
@@ -54,12 +62,27 @@ _LLM_CONCURRENCY = 2
 _llm_semaphore: asyncio.Semaphore | None = None


-async def enqueue_context_discover(user_text: str, candidate_tokens: list[str]) -> None:
-    """Enqueue a user message for LLM-driven concept discovery and relation extraction."""
+async def enqueue_context_discover(
+    user_text: str,
+    candidate_tokens: list[str],
+    agent_name: str = "",
+    fallback_model: Optional[ModelConfig] = None,
+) -> None:
+    """
+    Enqueue a user message for LLM-driven concept discovery and relation extraction.
+
+    agent_name: normalised agent identity (from X-Agent-Name header).
+      Festinger looks up an agent-specific model in the agent_models table.
+
+    fallback_model: last-resort ModelConfig from the intercepted request.
+      Used when neither an agent-specific model nor write_model_id is configured.
+    """
    try:
        _queue.put_nowait(ContextDiscoverRequest(
            user_text=user_text,
            candidate_tokens=candidate_tokens,
+            agent_name=agent_name,
+            fallback_model=fallback_model,
        ))
    except asyncio.QueueFull:
        log.warning("write queue full — dropping context discover")
@@ -103,7 +126,8 @@ async def _worker(pool: asyncpg.Pool) -> None:
                # Slow path: fire off without awaiting so the worker stays free.
                asyncio.create_task(
                    _process_context_discover_guarded(
-                        pool, item.user_text, item.candidate_tokens
+                        pool, item.user_text, item.candidate_tokens,
+                        item.agent_name, item.fallback_model,
                    )
                )
        except Exception as e:
@@ -116,12 +140,14 @@ async def _process_context_discover_guarded(
    pool: asyncpg.Pool,
    user_text: str,
    candidate_tokens: list[str],
+    agent_name: str = "",
+    fallback_model: Optional[ModelConfig] = None,
 ) -> None:
    """Wrapper that acquires the LLM semaphore before concept discovery."""
    assert _llm_semaphore is not None
    async with _llm_semaphore:
        try:
-            await _process_context_discover(pool, user_text, candidate_tokens)
+            await _process_context_discover(pool, user_text, candidate_tokens, agent_name, fallback_model)
        except Exception as e:
            log.exception("context discover task error: %s", e)

@@ -154,29 +180,84 @@ async def _process_cue(pool: asyncpg.Pool, triple: CueTriple) -> None:
        log.info("cue triple collision: %s", collision)


+async def _resolve_discover_model(
+    pool: asyncpg.Pool,
+    agent_name: str,
+    fallback_model: Optional[ModelConfig],
+) -> Optional[ModelConfig]:
+    """
+    Resolve which LLM to use for context discovery.
+
+    Priority:
+      1. Agent-specific model  — agent_models table, keyed by agent_name
+      2. Global default        — write_model_id config key
+      3. Request mirror        — fallback_model (same provider/model Agent0 used)
+    """
+    if agent_name:
+        async with pool.acquire() as conn:
+            row = await conn.fetchrow(
+                """
+                SELECT m.provider, m.model_name, m.api_key, m.base_url
+                FROM agent_models am
+                JOIN models m ON m.id = am.model_id
+                WHERE am.agent_name = $1
+                """,
+                agent_name,
+            )
+        if row:
+            log.debug(
+                "context discover: agent=%s → provider=%s model=%s",
+                agent_name, row["provider"], row["model_name"],
+            )
+            return ModelConfig(
+                provider=row["provider"],
+                model_name=row["model_name"],
+                api_key=row["api_key"],
+                base_url=row["base_url"] or "",
+            )
+
+    write_model_id = await get_config(pool, "write_model_id")
+    if write_model_id:
+        m = await get_model_config(pool, write_model_id)
+        if m:
+            return m
+        log.warning("write_model_id=%s not found in models table", write_model_id)
+
+    if fallback_model:
+        log.debug(
+            "context discover: mirroring request model provider=%s model=%s",
+            fallback_model.provider, fallback_model.model_name,
+        )
+        return fallback_model
+
+    return None
+
+
 async def _process_context_discover(
    pool: asyncpg.Pool,
    user_text: str,
    candidate_tokens: list[str],
+    agent_name: str = "",
+    fallback_model: Optional[ModelConfig] = None,
 ) -> None:
    """
-    Ask the local LLM to evaluate candidate tokens (dictionary misses) and extract
+    Ask the LLM to evaluate candidate tokens (dictionary misses) and extract
    relationship triples for those it judges to be real domain concepts.

+    Model selection priority (see _resolve_discover_model):
+      1. Agent-specific model (agent_models table)
+      2. write_model_id config (global default)
+      3. Request mirror (fallback_model)
+
    Saliency feedback loop:
      - Confirmed concepts (triples inserted) → saliency raised to NOVEL_CONFIRMED_SALIENCY
        so they surface as recollection hits on subsequent turns.
      - Rejected concepts (no triples) → saliency stays low (NOVEL_INITIAL_SALIENCY),
        effectively hiding typos and noise from the recollection engine.
    """
-    write_model_id = await get_config(pool, "write_model_id")
-    if not write_model_id:
-        log.debug("no write_model_id configured — skipping context discover")
-        return
-
-    model = await get_model_config(pool, write_model_id)
+    model = await _resolve_discover_model(pool, agent_name, fallback_model)
    if not model:
-        log.warning("write_model_id=%s not found in models table", write_model_id)
+        log.debug("no model resolved for context discover (agent=%r) — skipping", agent_name)
        return

    seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"]