Compare commits

...

3 Commits

Author SHA1 Message Date
gitprov cd471c4c95 Add cross-protocol agent routing at /v1/chat/completions
When X-Agent-Name or X-Agent-Id is present and matches an agent_models
entry, Festinger routes the main inference request to the configured
provider — not just the memory-writing utility model.

Protocol translation:
  - Incoming OpenAI → outgoing Claude: system-message extraction,
    max_tokens defaulting, response translated back to OpenAI format
  - Incoming OpenAI → outgoing LM Studio/OpenAI: model + base_url swap
  - All responses returned as OpenAI-compatible JSON or SSE

Also adds streaming synthesis for /v1/chat/completions (OpenAI SSE)
and X-Agent-Id fallback in _agent_name_from_headers so numeric
AGENT_ID env vars work without needing AGENT_NAME.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 19:32:57 +02:00
gitprov 10d9e1e2dd Add per-agent model assignments (agent_models table)
Festinger now reads X-Agent-Name from every intercepted request and
resolves the utility LLM model in priority order:
  1. agent_models table  — agent-specific (e.g. gunnar → claude, rind → qwen)
  2. write_model_id config — global default
  3. Request mirror       — same provider/model Agent0 is currently using

New API: GET/PUT/DELETE /agent-models
New admin UI: "Agent models" section with assignment form and table.

Agent0 side: add a custom header X-Agent-Name: <name> in the LLM
provider config per agent container (AGENT_NAME env var can drive this).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 19:24:28 +02:00
gitprov 7210fe2066 Mirror request model for context discovery — no write_model_id needed
Festinger now extracts provider/model/api-key from every intercepted
request and passes it to the context-discover queue as a fallback_model.
_process_context_discover uses it when write_model_id is not configured,
so Agent0's current model (LM Studio, Ollama, Anthropic) is automatically
reused for utility LLM calls without any extra setup.

Priority: write_model_id (explicit override) > fallback_model (request mirror)

Also updates upstream_openai default in config.yaml to LM Studio's
local address (host.docker.internal:1234).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 19:17:51 +02:00
5 changed files with 584 additions and 23 deletions
+3 -2
View File
@@ -7,9 +7,10 @@ upstream_ollama: "http://host.docker.internal:11434"
# Override via UPSTREAM_ANTHROPIC env var if needed # Override via UPSTREAM_ANTHROPIC env var if needed
upstream_anthropic: "https://api.anthropic.com" upstream_anthropic: "https://api.anthropic.com"
# Where the real OpenAI-compatible API is running (for /v1/chat/completions) # Where the real OpenAI-compatible API is running (for /v1/chat/completions).
# For LM Studio set this to its local address, e.g. "http://host.docker.internal:1234"
# Override via UPSTREAM_OPENAI env var if needed # Override via UPSTREAM_OPENAI env var if needed
upstream_openai: "https://api.openai.com" upstream_openai: "http://host.docker.internal:1234"
# Port this proxy listens on inside the container (exposed as 11434 on the docker network) # Port this proxy listens on inside the container (exposed as 11434 on the docker network)
proxy_port: 11434 proxy_port: 11434
+11
View File
@@ -111,3 +111,14 @@ CREATE TABLE IF NOT EXISTS kg_write_log (
CREATE INDEX IF NOT EXISTS kwl_created_idx ON kg_write_log (created_at DESC); CREATE INDEX IF NOT EXISTS kwl_created_idx ON kg_write_log (created_at DESC);
CREATE INDEX IF NOT EXISTS kwl_concept_idx ON kg_write_log (concept_id); CREATE INDEX IF NOT EXISTS kwl_concept_idx ON kg_write_log (concept_id);
CREATE INDEX IF NOT EXISTS kwl_op_idx ON kg_write_log (op); CREATE INDEX IF NOT EXISTS kwl_op_idx ON kg_write_log (op);
-- ---------------------------------------------------------------------------
-- agent_models — per-agent LLM model assignments
-- Maps an agent identity (from X-Agent-Name header) to a specific model.
-- Priority over write_model_id (global default) when agent_name is present.
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS agent_models (
agent_name TEXT PRIMARY KEY, -- normalised lowercase, e.g. 'gunnar', 'rind'
model_id INT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
+10
View File
@@ -60,6 +60,16 @@ async def init_schema(pool: asyncpg.Pool) -> None:
await conn.execute( await conn.execute(
"ALTER TABLE soas ADD COLUMN IF NOT EXISTS first_seen_context TEXT NOT NULL DEFAULT ''" "ALTER TABLE soas ADD COLUMN IF NOT EXISTS first_seen_context TEXT NOT NULL DEFAULT ''"
) )
# Migration: per-agent model assignments (agent_name → model_id)
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS agent_models (
agent_name TEXT PRIMARY KEY,
model_id INT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
)
"""
)
log.info("schema applied") log.info("schema applied")
+465 -7
View File
@@ -44,6 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize from .tokenizer import tokenize
from .llm_client import ModelConfig
from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
@@ -517,11 +518,276 @@ def inject_recollection_anthropic(body: dict, block: str) -> dict:
return body return body
# ---------------------------------------------------------------------------
# Request model mirroring
# ---------------------------------------------------------------------------
def _extract_request_model_config(
path: str,
body: dict,
request_headers: dict,
cfg: dict,
) -> ModelConfig | None:
"""
Build a ModelConfig from the intercepted request so Festinger's utility
LLM calls (context discovery) can use the same provider/model as Agent0 —
no separate write_model_id configuration needed.
Provider inference:
/v1/messages → anthropic
/v1/chat/completions → lm-studio (OpenAI-compatible; base_url from upstream_openai)
/api/chat, /api/generate → lm-studio (Ollama's OpenAI-compat endpoint; base_url from upstream_ollama)
"""
model_name = body.get("model", "")
if not model_name:
return None
if path == "/v1/messages":
api_key = request_headers.get("x-api-key", "")
return ModelConfig(
provider="claude",
model_name=model_name,
api_key=api_key,
)
if path == "/v1/chat/completions":
auth = request_headers.get("authorization", "")
api_key = auth[len("Bearer "):].strip() if auth.lower().startswith("bearer ") else auth
base_url = cfg.get("upstream_openai", "")
return ModelConfig(
provider="lm-studio",
model_name=model_name,
api_key=api_key or "lm-studio",
base_url=base_url,
)
if path in ("/api/chat", "/api/generate"):
# Ollama exposes an OpenAI-compatible endpoint at the same base URL.
base_url = cfg.get("upstream_ollama", "")
return ModelConfig(
provider="lm-studio",
model_name=model_name,
api_key="ollama",
base_url=base_url,
)
return None
# ---------------------------------------------------------------------------
# Agent routing — cross-protocol dispatch
# ---------------------------------------------------------------------------
def _openai_to_anthropic_body(body: dict, model_name: str) -> dict:
"""
Translate an OpenAI chat completions request to Anthropic Messages API format.
- system messages are lifted to the top-level 'system' field
- max_tokens defaults to 4096 if not specified
- temperature/top_p forwarded if present
"""
system_parts: list[str] = []
claude_messages: list[dict] = []
for m in body.get("messages", []):
role = m.get("role", "")
content = m.get("content", "")
if role == "system":
if isinstance(content, str):
system_parts.append(content)
elif isinstance(content, list):
system_parts.extend(
b.get("text", "") for b in content
if isinstance(b, dict) and b.get("type") == "text"
)
else:
claude_messages.append(m)
anthropic_body: dict = {
"model": model_name,
"messages": claude_messages,
"max_tokens": body.get("max_tokens") or 4096,
}
if system_parts:
anthropic_body["system"] = "\n\n".join(system_parts)
for key in ("temperature", "top_p", "stop_sequences"):
if key in body:
anthropic_body[key] = body[key]
return anthropic_body
def _anthropic_to_openai_response(data: dict) -> dict:
"""Convert an Anthropic Messages API response to OpenAI chat completions format."""
text = "".join(
b.get("text", "") for b in data.get("content", [])
if b.get("type") == "text"
)
usage = data.get("usage", {})
stop_map = {"end_turn": "stop", "max_tokens": "length", "stop_sequence": "stop"}
finish = stop_map.get(data.get("stop_reason", "end_turn"), "stop")
return {
"id": data.get("id", "chatcmpl-festinger"),
"object": "chat.completion",
"created": int(time.time()),
"model": data.get("model", ""),
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": text},
"finish_reason": finish,
}],
"usage": {
"prompt_tokens": usage.get("input_tokens", 0),
"completion_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
},
}
def _openai_sse_from_response(raw: dict) -> bytes:
"""
Synthesise a minimal OpenAI-compatible SSE stream from a complete (non-streaming)
OpenAI-format response dict. Used when the client sent stream=true but the
upstream was called non-streaming.
"""
text = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
model = raw.get("model", "")
cid = raw.get("id", "chatcmpl-festinger")
ts = int(time.time())
def chunk(delta: dict, finish_reason=None) -> str:
return "data: " + json.dumps({
"id": cid, "object": "chat.completion.chunk",
"created": ts, "model": model,
"choices": [{"index": 0, "delta": delta, "finish_reason": finish_reason}],
}) + "\n\n"
parts = [
chunk({"role": "assistant", "content": ""}),
chunk({"content": text}),
chunk({}, finish_reason="stop"),
"data: [DONE]\n\n",
]
return "".join(parts).encode()
async def _get_agent_routing_model(pool, agent_name: str) -> ModelConfig | None:
"""
Look up the agent's configured model from the agent_models table.
agent_name is the normalised key (lowercase name or numeric ID string).
"""
if not agent_name:
return None
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT m.provider, m.model_name, m.api_key, m.base_url
FROM agent_models am
JOIN models m ON m.id = am.model_id
WHERE am.agent_name = $1
""",
agent_name,
)
if not row:
return None
return ModelConfig(
provider=row["provider"],
model_name=row["model_name"],
api_key=row["api_key"],
base_url=row["base_url"] or "",
)
async def _route_agent_chat(
body: dict,
agent_model: ModelConfig,
original_stream: bool,
pool,
cfg: dict,
request_headers: dict,
min_len: int,
) -> Response:
"""
Route an OpenAI-compatible chat completions request to the agent's
configured provider, handling cross-protocol translation.
claude → Anthropic Messages API (translated in both directions)
openai → OpenAI-compatible endpoint (base_url + model swap)
lm-studio→ same as openai
"""
# Run recollection injection (same pipeline as standard path)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg, request_headers)
sess = session_key(agent_model.model_name, body.get("messages", []))
# Capture upstream config for use in the loop-detection re-run closure
if agent_model.provider == "claude":
anthropic_upstream = agent_model.base_url or "https://api.anthropic.com"
anthropic_headers = {
"x-api-key": agent_model.api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
}
else:
oai_upstream = agent_model.base_url or cfg.get("upstream_openai", "https://api.openai.com")
oai_headers = {
"authorization": f"Bearer {agent_model.api_key or 'lm-studio'}",
"content-type": "application/json",
}
async def _call(current_body: dict) -> tuple[str, dict]:
"""Call the agent's upstream; always return (text, openai_format_dict)."""
if agent_model.provider == "claude":
ab = _openai_to_anthropic_body(current_body, agent_model.model_name)
text, raw_a = await call_anthropic(ab, anthropic_upstream, anthropic_headers)
return text, _anthropic_to_openai_response(raw_a)
else:
b = dict(current_body)
b["model"] = agent_model.model_name
text, raw_o = await call_openai(b, oai_upstream, oai_headers)
return text, raw_o
text, raw = await _call(body)
count = record_and_check(sess, text, min_len)
if count >= 2:
log.warning(
"loop_detected (agent routed) agent_model=%s session=%s count=%d",
agent_model.model_name, sess[1], count,
)
body, override = apply_mitigations(body, count, cfg)
if override is not None:
raw["choices"] = [{
"index": 0,
"message": {"role": "assistant", "content": override},
"finish_reason": "stop",
}]
raw["loop_detected"] = True
else:
text, raw = await _call(body)
record_and_check(sess, text, min_len)
if original_stream:
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
return Response(content=json.dumps(raw), media_type="application/json")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Saliency + recollection pipeline # Saliency + recollection pipeline
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict: def _agent_name_from_headers(headers: dict) -> str:
"""
Extract agent identity from request headers.
Checks X-Agent-Name first (e.g. 'GUNNAR''gunnar'),
then falls back to X-Agent-Id (e.g. '3''3').
Both are stored as the agent_name key in the agent_models table.
"""
name = headers.get("x-agent-name", "").strip().lower()
if name:
return name
return headers.get("x-agent-id", "").strip() # numeric IDs work as-is
async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
""" """
Run the saliency + recollection pipeline over the prompt. Run the saliency + recollection pipeline over the prompt.
Returns a (possibly modified) body dict with the recollection block injected. Returns a (possibly modified) body dict with the recollection block injected.
@@ -530,6 +796,12 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6")) conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
recency_days = int(await get_config(pool, "recollection_recency_days", "90")) recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
hdrs = request_headers or {}
# Derive a ModelConfig from the intercepted request so context discovery can
# mirror Agent0's current model without a separate write_model_id config.
request_model = _extract_request_model_config(path, body, hdrs, cfg)
agent_name = _agent_name_from_headers(hdrs)
# Extract only the last user message — agent responses and reasoning traces # Extract only the last user message — agent responses and reasoning traces
# are noise for both cue scanning and concept discovery. # are noise for both cue scanning and concept discovery.
user_text = _last_user_message_text(body, path) user_text = _last_user_message_text(body, path)
@@ -590,7 +862,11 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
# 3. Enqueue for LLM-driven discovery if there are candidates to evaluate. # 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
if novel_candidates and len(user_text) >= 20: if novel_candidates and len(user_text) >= 20:
await enqueue_context_discover(user_text, novel_candidates) await enqueue_context_discover(
user_text, novel_candidates,
agent_name=agent_name,
fallback_model=request_model,
)
if not salient_for_read: if not salient_for_read:
return body return body
@@ -628,7 +904,7 @@ async def chat(request: Request) -> Response:
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/chat model=%s", model) log.info("chat route=/api/chat model=%s", model)
try: try:
body = await process_prompt(body, "/api/chat", pool, cfg) body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
text, raw = await call_ollama("/api/chat", body, upstream) text, raw = await call_ollama("/api/chat", body, upstream)
sess = session_key(model, body.get("messages", [])) sess = session_key(model, body.get("messages", []))
count = record_and_check(sess, text, min_len) count = record_and_check(sess, text, min_len)
@@ -661,7 +937,7 @@ async def generate(request: Request) -> Response:
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/generate model=%s", model) log.info("chat route=/api/generate model=%s", model)
try: try:
body = await process_prompt(body, "/api/generate", pool, cfg) body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
messages = [{"role": "user", "content": body.get("prompt", "")}] messages = [{"role": "user", "content": body.get("prompt", "")}]
sess = session_key(model, messages) sess = session_key(model, messages)
text, raw = await call_ollama("/api/generate", body, upstream) text, raw = await call_ollama("/api/generate", body, upstream)
@@ -709,7 +985,7 @@ async def anthropic_messages(request: Request) -> Response:
headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS) headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
if "anthropic-version" not in {k.lower() for k in headers}: if "anthropic-version" not in {k.lower() for k in headers}:
headers["anthropic-version"] = "2023-06-01" headers["anthropic-version"] = "2023-06-01"
body = await process_prompt(body, "/v1/messages", pool, cfg) body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
messages = body.get("messages", []) messages = body.get("messages", [])
sess = session_key(model, messages) sess = session_key(model, messages)
text, raw = await call_anthropic(body, upstream, headers) text, raw = await call_anthropic(body, upstream, headers)
@@ -758,10 +1034,29 @@ async def openai_chat_completions(request: Request) -> Response:
model = body.get("model", "unknown") model = body.get("model", "unknown")
upstream = cfg["upstream_openai"] upstream = cfg["upstream_openai"]
min_len = cfg["detection"]["min_length"] min_len = cfg["detection"]["min_length"]
log.info("chat route=/v1/chat/completions model=%s upstream=%s", model, upstream) original_stream: bool = bool(body.get("stream", False))
hdrs = dict(request.headers)
agent_name = _agent_name_from_headers(hdrs)
log.info("chat route=/v1/chat/completions model=%s upstream=%s agent=%s stream=%s",
model, upstream, agent_name or "", original_stream)
try: try:
# Agent routing: if agent has a registered model, dispatch cross-protocol
if agent_name:
agent_model = await _get_agent_routing_model(pool, agent_name)
if agent_model:
log.info(
"agent_route agent=%s provider=%s model=%s base_url=%s",
agent_name, agent_model.provider, agent_model.model_name,
agent_model.base_url or "(default)",
)
return await _route_agent_chat(
body, agent_model, original_stream, pool, cfg, hdrs, min_len
)
# Standard path — forward to configured upstream unchanged
headers = _relay_headers(request, OPENAI_RELAY_HEADERS) headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg) body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs)
messages = body.get("messages", []) messages = body.get("messages", [])
sess = session_key(model, messages) sess = session_key(model, messages)
text, raw = await call_openai(body, upstream, headers) text, raw = await call_openai(body, upstream, headers)
@@ -773,9 +1068,13 @@ async def openai_chat_completions(request: Request) -> Response:
if raw.get("choices"): if raw.get("choices"):
raw["choices"][0]["message"]["content"] = override raw["choices"][0]["message"]["content"] = override
raw["loop_detected"] = True raw["loop_detected"] = True
if original_stream:
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
return Response(content=json.dumps(raw), media_type="application/json") return Response(content=json.dumps(raw), media_type="application/json")
text, raw = await call_openai(body, upstream, headers) text, raw = await call_openai(body, upstream, headers)
record_and_check(sess, text, min_len) record_and_check(sess, text, min_len)
if original_stream:
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
return Response(content=json.dumps(raw), media_type="application/json") return Response(content=json.dumps(raw), media_type="application/json")
except UpstreamError as exc: except UpstreamError as exc:
log.error("chat_upstream_error route=/v1/chat/completions model=%s %s", model, exc) log.error("chat_upstream_error route=/v1/chat/completions model=%s %s", model, exc)
@@ -903,6 +1202,81 @@ async def delete_model(model_id: int, request: Request) -> dict:
return {"status": "ok", "deleted": model_id} return {"status": "ok", "deleted": model_id}
# ---------------------------------------------------------------------------
# /agent-models — per-agent model assignments
# ---------------------------------------------------------------------------
@app.get("/agent-models")
async def list_agent_models(request: Request) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT am.agent_name, am.model_id, am.created_at,
m.provider, m.model_name, m.base_url
FROM agent_models am
JOIN models m ON m.id = am.model_id
ORDER BY am.agent_name
"""
)
return {"agent_models": [
{
"agent_name": r["agent_name"],
"model_id": r["model_id"],
"provider": r["provider"],
"model_name": r["model_name"],
"base_url": r["base_url"] or "",
"created_at": r["created_at"].isoformat(),
}
for r in rows
]}
@app.put("/agent-models/{agent_name}")
async def set_agent_model(agent_name: str, request: Request) -> dict:
"""
Assign a model to an agent. agent_name is normalised to lowercase.
Body: {"model_id": 3}
"""
pool = request.app.state.pool
data = await request.json()
model_id = data.get("model_id")
if not model_id:
return {"error": "model_id is required"}
name = agent_name.strip().lower()
if not name:
return {"error": "agent_name must not be empty"}
async with pool.acquire() as conn:
# Verify the model exists
row = await conn.fetchrow("SELECT id, provider, model_name FROM models WHERE id=$1", int(model_id))
if not row:
return {"error": f"model {model_id} not found"}
await conn.execute(
"""
INSERT INTO agent_models (agent_name, model_id)
VALUES ($1, $2)
ON CONFLICT (agent_name) DO UPDATE SET model_id = EXCLUDED.model_id, created_at = now()
""",
name, int(model_id),
)
log.info("agent_model set agent=%s model_id=%s provider=%s model=%s",
name, model_id, row["provider"], row["model_name"])
return {"status": "ok", "agent_name": name, "model_id": int(model_id)}
@app.delete("/agent-models/{agent_name}")
async def delete_agent_model(agent_name: str, request: Request) -> dict:
pool = request.app.state.pool
name = agent_name.strip().lower()
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM agent_models WHERE agent_name=$1", name)
deleted = int(result.split()[-1]) if result else 0
if not deleted:
return {"error": f"no assignment found for agent '{name}'"}
log.info("agent_model deleted agent=%s", name)
return {"status": "ok", "deleted": name}
@app.get("/models/discover") @app.get("/models/discover")
async def discover_models(base_url: str = "http://host.docker.internal:1234") -> dict: async def discover_models(base_url: str = "http://host.docker.internal:1234") -> dict:
""" """
@@ -1874,6 +2248,33 @@ ADMIN_HTML = """<!DOCTYPE html>
</details> </details>
</div> </div>
<h2>Agent models</h2>
<p style="font-size:0.83em;color:#666;margin-bottom:0.8em">
Routes the main inference request to the agent's model — full cross-protocol
(Claude ↔ LM Studio ↔ OpenAI). Agent must send <code>X-Agent-Name: GUNNAR</code>
or <code>X-Agent-Id: 3</code> on every request. Also determines which model
Festinger uses for memory writing (context discovery).
</p>
<table id="agent-models-table" style="margin-bottom:0.8em">
<thead><tr><th>Agent name / ID</th><th>Model ID</th><th>Provider</th><th>Model name</th><th></th></tr></thead>
<tbody id="agent-models-tbody"><tr><td colspan="5">Loading…</td></tr></tbody>
</table>
<details style="margin-bottom:1em">
<summary style="cursor:pointer;font-size:0.9em;color:#555">Assign model to agent…</summary>
<div style="margin-top:0.6em;display:flex;gap:0.7em;flex-wrap:wrap;align-items:flex-end">
<label style="font-size:0.85em">Agent name or ID (e.g. gunnar or 3)
<input id="am-agent" type="text" placeholder="gunnar or 3"
style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px;width:160px">
</label>
<label style="font-size:0.85em">Model
<select id="am-model" style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px">
<option value="">— select —</option>
</select>
</label>
<button onclick="assignAgentModel(this)" style="height:32px">Assign</button>
</div>
</details>
<h2>Actions</h2> <h2>Actions</h2>
<div class="actions"> <div class="actions">
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button> <button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
@@ -1950,6 +2351,11 @@ ADMIN_HTML = """<!DOCTYPE html>
<td><button onclick="setConfig('write_model_id','${{m.id}}')" style="padding:2px 8px;font-size:0.8em;${{writeId==String(m.id)?'background:#2a7a2a;color:#fff;border-color:#2a7a2a':''}}">${{writeId==String(m.id)?'✓ active':'set'}}</button></td> <td><button onclick="setConfig('write_model_id','${{m.id}}')" style="padding:2px 8px;font-size:0.8em;${{writeId==String(m.id)?'background:#2a7a2a;color:#fff;border-color:#2a7a2a':''}}">${{writeId==String(m.id)?'✓ active':'set'}}</button></td>
<td><button onclick="deleteModel(${{m.id}},this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td> <td><button onclick="deleteModel(${{m.id}},this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td>
</tr>`).join(''); </tr>`).join('');
// Populate model dropdown in agent-models assignment form
const sel = document.getElementById('am-model');
sel.innerHTML = '<option value="">— select —</option>' +
md.models.map(m => `<option value="${{m.id}}">${{m.id}} — ${{m.provider}} / ${{m.model_name}}</option>`).join('');
}} }}
async function addModel(btn) {{ async function addModel(btn) {{
@@ -1991,6 +2397,57 @@ ADMIN_HTML = """<!DOCTYPE html>
await loadModels(); await loadModels();
}} }}
async function loadAgentModels() {{
const r = await fetch('/agent-models');
const d = await r.json();
const tbody = document.getElementById('agent-models-tbody');
if (!d.agent_models.length) {{
tbody.innerHTML = '<tr><td colspan="5" style="color:#999">No assignments yet.</td></tr>';
return;
}}
tbody.innerHTML = d.agent_models.map(a => `
<tr>
<td><strong>${{a.agent_name}}</strong></td>
<td>${{a.model_id}}</td>
<td>${{a.provider}}</td>
<td>${{a.model_name}}${{a.base_url ? ' <span style="color:#999;font-size:0.85em">(' + a.base_url + ')</span>' : ''}}</td>
<td><button onclick="removeAgentModel('${{a.agent_name}}',this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td>
</tr>`).join('');
}}
async function assignAgentModel(btn) {{
const agent = document.getElementById('am-agent').value.trim().toLowerCase();
const model_id = document.getElementById('am-model').value;
if (!agent) {{ alert('Enter an agent name.'); return; }}
if (!model_id) {{ alert('Select a model.'); return; }}
btn.disabled = true;
try {{
const r = await fetch('/agent-models/' + encodeURIComponent(agent), {{
method: 'PUT',
headers: {{'Content-Type': 'application/json'}},
body: JSON.stringify({{model_id: parseInt(model_id)}})
}});
const d = await r.json();
if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
showResult('Assigned model ' + model_id + ' to agent "' + agent + '".', true);
document.getElementById('am-agent').value = '';
await loadAgentModels();
}} catch(e) {{ showResult('Error: ' + e.message, false); }}
finally {{ btn.disabled = false; }}
}}
async function removeAgentModel(agent, btn) {{
if (!confirm('Remove model assignment for "' + agent + '"?')) return;
btn.disabled = true;
try {{
const r = await fetch('/agent-models/' + encodeURIComponent(agent), {{method: 'DELETE'}});
const d = await r.json();
if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
await loadAgentModels();
}} catch(e) {{ showResult('Error: ' + e.message, false); }}
finally {{ btn.disabled = false; }}
}}
async function loadConflicts() {{ async function loadConflicts() {{
const r = await fetch('/conflicts'); const r = await fetch('/conflicts');
const d = await r.json(); const d = await r.json();
@@ -2139,6 +2596,7 @@ ADMIN_HTML = """<!DOCTYPE html>
loadConflicts(); loadConflicts();
loadLog(0); loadLog(0);
loadModels(); loadModels();
loadAgentModels();
</script> </script>
</body> </body>
</html> </html>
+95 -14
View File
@@ -32,11 +32,19 @@ class ContextDiscoverRequest:
User message text plus candidate tokens (words absent from the English User message text plus candidate tokens (words absent from the English
dictionary) submitted for LLM-driven concept discovery. dictionary) submitted for LLM-driven concept discovery.
candidate_tokens are hints for the LLM — it decides which are real domain candidate_tokens: hints for the LLM — it decides which are real domain
concepts vs typos/noise, and extracts relationship triples from the text. concepts vs typos/noise, and extracts relationship triples from the text.
agent_name: normalised agent identity from X-Agent-Name header (lowercase).
Used to look up an agent-specific model in the agent_models table.
fallback_model: last-resort ModelConfig derived from the intercepted request.
Used when neither an agent-specific model nor write_model_id is configured.
""" """
user_text: str user_text: str
candidate_tokens: list[str] candidate_tokens: list[str]
agent_name: str = ""
fallback_model: Optional[ModelConfig] = None
@dataclass @dataclass
@@ -54,12 +62,27 @@ _LLM_CONCURRENCY = 2
_llm_semaphore: asyncio.Semaphore | None = None _llm_semaphore: asyncio.Semaphore | None = None
async def enqueue_context_discover(user_text: str, candidate_tokens: list[str]) -> None: async def enqueue_context_discover(
"""Enqueue a user message for LLM-driven concept discovery and relation extraction.""" user_text: str,
candidate_tokens: list[str],
agent_name: str = "",
fallback_model: Optional[ModelConfig] = None,
) -> None:
"""
Enqueue a user message for LLM-driven concept discovery and relation extraction.
agent_name: normalised agent identity (from X-Agent-Name header).
Festinger looks up an agent-specific model in the agent_models table.
fallback_model: last-resort ModelConfig from the intercepted request.
Used when neither an agent-specific model nor write_model_id is configured.
"""
try: try:
_queue.put_nowait(ContextDiscoverRequest( _queue.put_nowait(ContextDiscoverRequest(
user_text=user_text, user_text=user_text,
candidate_tokens=candidate_tokens, candidate_tokens=candidate_tokens,
agent_name=agent_name,
fallback_model=fallback_model,
)) ))
except asyncio.QueueFull: except asyncio.QueueFull:
log.warning("write queue full — dropping context discover") log.warning("write queue full — dropping context discover")
@@ -103,7 +126,8 @@ async def _worker(pool: asyncpg.Pool) -> None:
# Slow path: fire off without awaiting so the worker stays free. # Slow path: fire off without awaiting so the worker stays free.
asyncio.create_task( asyncio.create_task(
_process_context_discover_guarded( _process_context_discover_guarded(
pool, item.user_text, item.candidate_tokens pool, item.user_text, item.candidate_tokens,
item.agent_name, item.fallback_model,
) )
) )
except Exception as e: except Exception as e:
@@ -116,12 +140,14 @@ async def _process_context_discover_guarded(
pool: asyncpg.Pool, pool: asyncpg.Pool,
user_text: str, user_text: str,
candidate_tokens: list[str], candidate_tokens: list[str],
agent_name: str = "",
fallback_model: Optional[ModelConfig] = None,
) -> None: ) -> None:
"""Wrapper that acquires the LLM semaphore before concept discovery.""" """Wrapper that acquires the LLM semaphore before concept discovery."""
assert _llm_semaphore is not None assert _llm_semaphore is not None
async with _llm_semaphore: async with _llm_semaphore:
try: try:
await _process_context_discover(pool, user_text, candidate_tokens) await _process_context_discover(pool, user_text, candidate_tokens, agent_name, fallback_model)
except Exception as e: except Exception as e:
log.exception("context discover task error: %s", e) log.exception("context discover task error: %s", e)
@@ -154,29 +180,84 @@ async def _process_cue(pool: asyncpg.Pool, triple: CueTriple) -> None:
log.info("cue triple collision: %s", collision) log.info("cue triple collision: %s", collision)
async def _resolve_discover_model(
pool: asyncpg.Pool,
agent_name: str,
fallback_model: Optional[ModelConfig],
) -> Optional[ModelConfig]:
"""
Resolve which LLM to use for context discovery.
Priority:
1. Agent-specific model — agent_models table, keyed by agent_name
2. Global default — write_model_id config key
3. Request mirror — fallback_model (same provider/model Agent0 used)
"""
if agent_name:
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT m.provider, m.model_name, m.api_key, m.base_url
FROM agent_models am
JOIN models m ON m.id = am.model_id
WHERE am.agent_name = $1
""",
agent_name,
)
if row:
log.debug(
"context discover: agent=%s → provider=%s model=%s",
agent_name, row["provider"], row["model_name"],
)
return ModelConfig(
provider=row["provider"],
model_name=row["model_name"],
api_key=row["api_key"],
base_url=row["base_url"] or "",
)
write_model_id = await get_config(pool, "write_model_id")
if write_model_id:
m = await get_model_config(pool, write_model_id)
if m:
return m
log.warning("write_model_id=%s not found in models table", write_model_id)
if fallback_model:
log.debug(
"context discover: mirroring request model provider=%s model=%s",
fallback_model.provider, fallback_model.model_name,
)
return fallback_model
return None
async def _process_context_discover( async def _process_context_discover(
pool: asyncpg.Pool, pool: asyncpg.Pool,
user_text: str, user_text: str,
candidate_tokens: list[str], candidate_tokens: list[str],
agent_name: str = "",
fallback_model: Optional[ModelConfig] = None,
) -> None: ) -> None:
""" """
Ask the local LLM to evaluate candidate tokens (dictionary misses) and extract Ask the LLM to evaluate candidate tokens (dictionary misses) and extract
relationship triples for those it judges to be real domain concepts. relationship triples for those it judges to be real domain concepts.
Model selection priority (see _resolve_discover_model):
1. Agent-specific model (agent_models table)
2. write_model_id config (global default)
3. Request mirror (fallback_model)
Saliency feedback loop: Saliency feedback loop:
- Confirmed concepts (triples inserted) → saliency raised to NOVEL_CONFIRMED_SALIENCY - Confirmed concepts (triples inserted) → saliency raised to NOVEL_CONFIRMED_SALIENCY
so they surface as recollection hits on subsequent turns. so they surface as recollection hits on subsequent turns.
- Rejected concepts (no triples) → saliency stays low (NOVEL_INITIAL_SALIENCY), - Rejected concepts (no triples) → saliency stays low (NOVEL_INITIAL_SALIENCY),
effectively hiding typos and noise from the recollection engine. effectively hiding typos and noise from the recollection engine.
""" """
write_model_id = await get_config(pool, "write_model_id") model = await _resolve_discover_model(pool, agent_name, fallback_model)
if not write_model_id:
log.debug("no write_model_id configured — skipping context discover")
return
model = await get_model_config(pool, write_model_id)
if not model: if not model:
log.warning("write_model_id=%s not found in models table", write_model_id) log.debug("no model resolved for context discover (agent=%r) — skipping", agent_name)
return return
seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"] seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"]