Adding changes to festinger

2026-04-25 11:31:58 +02:00
parent abc5cf5952
commit df28e56add
8 changed files with 707 additions and 118 deletions
@@ -0,0 +1,47 @@
 # Hermes config for gerhard — evaluation instance
 #
 # LLM calls go to festinger, which sits in front of the real inference
 # providers. The agent identity is encoded in the base URL so festinger
 # can route to the right model and build the per-agent knowledge graph.
 #
 #   festinger routing:  http://festinger:11434/gerhard/v1/chat/completions
 #   → festinger looks up agent_name="gerhard" in agent_models table
 #   → if found: routes to the registered provider/model
 #   → if not found: falls through to upstream_openai in config.yaml
 model:
  provider: "custom"
  base_url: "http://festinger:11434/gerhard/v1"
  # No api_key needed — festinger does not authenticate OpenAI-compat requests.
  # Set a placeholder so the OpenAI SDK doesn't complain.
  api_key: "festinger"
  # Model name is what gets sent in the request body. festinger can override
  # this per-agent via the agent_models table. Set something reasonable as
  # a default (festinger's upstream_openai fallback will use whatever it supports).
  default: "claude-opus-4-6"
 terminal:
  backend: "local"
  cwd: "."
  timeout: 180
  lifetime_seconds: 300
 agent:
  max_turns: 60
  verbose: false
  reasoning_effort: "medium"
 memory:
  memory_enabled: true
  user_profile_enabled: true
 compression:
  enabled: true
  threshold: 0.50
  target_ratio: 0.20
  protect_last_n: 20
 session_reset:
  mode: both
  idle_minutes: 1440
  at_hour: 4
@@ -76,19 +76,36 @@ services:
      - "host.docker.internal:host-gateway"
  gerhard:
-    image: agent0ai/agent-zero:latest
+    build:
      context: ../../hermes-agent
    image: hermes-agent
    container_name: gerhard
    ports:
      - "50007:80"
    volumes:
-      - ./agents/gerhard:/a0/usr
+      - ./agents/gerhard-hermes:/opt/data
      - ${HOME}/.ssh:/root/.ssh
    restart: unless-stopped
    environment:
-      AUTH_LOGIN: ${AUTH_LOGIN}
+      HERMES_UID: ${HERMES_UID:-1000}
-      AUTH_PASSWORD: ${AUTH_PASSWORD}
+      HERMES_GID: ${HERMES_GID:-1000}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    command: ["gateway", "run"]
  gerhard-dashboard:
    image: hermes-agent
    container_name: gerhard-dashboard
    ports:
      - "50007:9119"    # web dashboard at localhost:50007
    volumes:
      - ./agents/gerhard-hermes:/opt/data
    restart: unless-stopped
    depends_on:
      - gerhard
    environment:
      HERMES_UID: ${HERMES_UID:-1000}
      HERMES_GID: ${HERMES_GID:-1000}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    command: ["dashboard", "--host", "0.0.0.0", "--no-open"]
  postgres:
    image: postgres:16-alpine
@@ -135,7 +152,7 @@ services:
      - gunnar
      - rind
      - abyssinthia
-      - gerhard
+      - gerhard-dashboard
 volumes:
  festinger-pgdata:
@@ -122,3 +122,17 @@ CREATE TABLE IF NOT EXISTS agent_models (
    model_id    INT  NOT NULL REFERENCES models(id) ON DELETE CASCADE,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT now()
 );
 -- ---------------------------------------------------------------------------
 -- recollection_log — every prompt where a recollection block was injected
 -- ---------------------------------------------------------------------------
 CREATE TABLE IF NOT EXISTS recollection_log (
    id                  BIGSERIAL PRIMARY KEY,
    created_at          TIMESTAMPTZ NOT NULL DEFAULT now(),
    agent_name          TEXT        NOT NULL DEFAULT '',
    salient_tokens      TEXT[]      NOT NULL DEFAULT '{}',
    recollection_block  TEXT        NOT NULL,
    messages_json       JSONB       NOT NULL DEFAULT '[]'
 );
 CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC);
@@ -7,6 +7,7 @@ on the hot path. Writes are write-through: in-memory first, then Postgres.
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Optional
@@ -18,6 +19,7 @@ class SoasRow:
    saliency: float = 0.0
    novelty: float = 0.0
    first_seen_context: str = ""
    last_seen: Optional[datetime] = None
@dataclass
@@ -4,9 +4,11 @@ Database layer — asyncpg pool, schema init, cache warm-up, flush.
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import math
 import os
 from datetime import datetime, timezone
 from pathlib import Path
 import asyncpg
@@ -70,6 +72,22 @@ async def init_schema(pool: asyncpg.Pool) -> None:
            )
            """
        )
        # Migration: recollection event log
        await conn.execute(
            """
            CREATE TABLE IF NOT EXISTS recollection_log (
                id                  BIGSERIAL PRIMARY KEY,
                created_at          TIMESTAMPTZ NOT NULL DEFAULT now(),
                agent_name          TEXT        NOT NULL DEFAULT '',
                salient_tokens      TEXT[]      NOT NULL DEFAULT '{}',
                recollection_block  TEXT        NOT NULL,
                messages_json       JSONB       NOT NULL DEFAULT '[]'
            )
            """
        )
        await conn.execute(
            "CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)"
        )
    log.info("schema applied")
@@ -135,7 +153,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
    """Load all SOAS and URD rows into the in-memory cache."""
    async with pool.acquire() as conn:
        soas_rows = await conn.fetch(
-            "SELECT id, token, encounter_count, saliency, novelty, first_seen_context FROM soas"
+            "SELECT id, token, encounter_count, saliency, novelty, first_seen_context, last_seen FROM soas"
        )
        for r in soas_rows:
            row = SoasRow(
@@ -145,6 +163,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
                saliency=r["saliency"],
                novelty=r["novelty"],
                first_seen_context=r["first_seen_context"] or "",
                last_seen=r["last_seen"],
            )
            cache.soas_by_token[r["token"]] = row
            cache.soas_by_id[r["id"]] = r["token"]
@@ -378,7 +397,18 @@ def recalculate_saliency(encounter_count: int, is_common_english: bool) -> float
 # ---------------------------------------------------------------------------
 async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
-    """Flush staged encounter_count deltas to Postgres in one batch UPDATE."""
+    """
    Flush staged encounter_count deltas to Postgres.
    Saliency model:
      saliency = 0  →  common English word (dictionary seed).  Never touched.
      saliency ≥ 1  →  domain concept.  Incremented by +delta on each flush.
    saliency=1 is set by urd_writer when a concept first gets a URD edge,
    meaning "mentioned at least once in a meaningful context".  Each flush
    adds the number of new encounters since the last flush, so saliency
    equals the total number of times the concept has been seen by the agent.
    """
    deltas = cache.drain_deltas()
    if not deltas:
        return
@@ -388,23 +418,10 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
            for soas_id, delta in deltas.items():
                token = cache.soas_by_id.get(soas_id, "")
                row = cache.soas_by_token.get(token)
-                new_count = (row.encounter_count if row else 0)
+                if not row or row.saliency == 0.0:
-                # novelty = 0 for common English words (pre-seeded)
+                    # saliency=0 is the English-word sentinel — never increment.
-                is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False
+                    continue
-
+                new_saliency = row.saliency + delta
                # Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not
                # be promoted above the read threshold by encounter-count alone.
                # Their saliency is raised explicitly when the LLM confirms them.
                is_unconfirmed_novel = (
                    row is not None
                    and row.novelty > 0.0
                    and not cache.urd_by_concept.get(soas_id)
                )
                if is_unconfirmed_novel:
                    new_saliency = row.saliency   # preserve low saliency until LLM confirms
                else:
                    new_saliency = recalculate_saliency(new_count, is_common)
                await conn.execute(
                    """
                    UPDATE soas
@@ -415,8 +432,8 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
                    """,
                    delta, new_saliency, soas_id,
                )
                if row:
                row.saliency = new_saliency
                row.last_seen = datetime.now(timezone.utc)
    log.debug("flushed %d saliency deltas", len(deltas))
@@ -429,3 +446,32 @@ async def get_config(pool: asyncpg.Pool, key: str, default: str = "") -> str:
    async with pool.acquire() as conn:
        row = await conn.fetchrow("SELECT value FROM config WHERE key = $1", key)
    return row["value"] if row else default
 # ---------------------------------------------------------------------------
 # Recollection log
 # ---------------------------------------------------------------------------
 async def log_recollection(
    pool: asyncpg.Pool,
    agent_name: str,
    salient_tokens: list[str],
    recollection_block: str,
    messages: list[dict],
 ) -> None:
    """Persist one recollection event. Fire-and-forget — never raises."""
    try:
        async with pool.acquire() as conn:
            await conn.execute(
                """
                INSERT INTO recollection_log
                    (agent_name, salient_tokens, recollection_block, messages_json)
                VALUES ($1, $2, $3, $4)
                """,
                agent_name or "",
                salient_tokens,
                recollection_block,
                json.dumps(messages),
            )
    except Exception:
        log.exception("recollection_log insert failed")
@@ -6,14 +6,19 @@ Routes:
  POST /api/generate                     Ollama-compatible generate
  POST /v1/messages                      Anthropic Messages API proxy (loop detection + recollection)
  POST /v1/chat/completions              OpenAI-compatible proxy (loop detection + recollection)
  POST /chat/completions                 Alias for /v1/chat/completions (LiteLLM without /v1 prefix)
  POST /{agent_id}/v1/chat/completions   Agent-prefixed OpenAI proxy — agent identity from URL
  POST /{agent_id}/chat/completions      Agent-prefixed OpenAI proxy (no /v1 prefix variant)
  POST /{agent_id}/v1/messages           Agent-prefixed Anthropic proxy — agent identity from URL
  POST /{agent_id}/api/chat              Agent-prefixed Ollama chat — agent identity from URL
  POST /{agent_id}/api/generate          Agent-prefixed Ollama generate — agent identity from URL
  POST /iknowthat                        Manual write path (gutask iknowthat)
  POST /resolve/run                      Manually trigger nightly resolution job
  POST /reload                           Reload URD cache (called by resolution job)
  GET  /health                           Health + stats
  GET  /conflicts                        Pending and recently resolved conflicts
  GET  /admin                            Minimal admin UI
-  *    /v1/{path}              Passthrough to upstream Anthropic
+  *    /{path}                           Passthrough to upstream Ollama or Anthropic
  *    /{path}                 Passthrough to upstream Ollama
 """
 from __future__ import annotations
@@ -38,7 +43,7 @@ from .db import (
    close_pool, get_config, get_or_create_soas,
    get_pool, init_schema, bootstrap_dimensions,
    bootstrap_english_dictionary, warm_cache, reload_urd_cache,
-    flush_encounter_deltas, reset_graph,
+    flush_encounter_deltas, reset_graph, log_recollection,
 )
 from .loop_detector import apply_mitigations, record_and_check, session_key
 from .cue_scanner import scan_cues
@@ -499,6 +504,30 @@ def _last_assistant_message_text(body: dict, path: str) -> str:
    return ""
 def _system_message_text(body: dict, path: str) -> str:
    """
    Extract the system message text from the request body.
    - OpenAI/Ollama: first message with role='system' in the messages list.
    - Anthropic: top-level 'system' field (string or content-block list).
    """
    if path == "/v1/messages":
        sys_field = body.get("system") or ""
        if isinstance(sys_field, list):
            return " ".join(
                b.get("text", "") for b in sys_field
                if isinstance(b, dict) and b.get("type") == "text"
            )
        return sys_field
    if path in ("/api/chat", "/v1/chat/completions", "/api/generate"):
        messages = body.get("messages", [])
        sys_msg = next((m for m in messages if m.get("role") == "system"), None)
        if sys_msg:
            return " ".join(_extract_text_strings(sys_msg.get("content", "")))
    return ""
 def inject_recollection_anthropic(body: dict, block: str) -> dict:
    """
    Inject a recollection block into an Anthropic Messages API request.
@@ -723,6 +752,7 @@ async def _route_agent_chat(
    cfg: dict,
    request_headers: dict,
    min_len: int,
    agent_name: str = "",
 ) -> Response:
    """
    Route an OpenAI-compatible chat completions request to the agent's
@@ -740,7 +770,7 @@ async def _route_agent_chat(
    # (Configure write_model_id or a per-agent model for a cloud/separate
    # model if you want memory building alongside local inference.)
    body = await process_prompt(
-        body, "/v1/chat/completions", pool, cfg, request_headers,
+        body, "/v1/chat/completions", pool, cfg, request_headers, agent_name=agent_name,
    )
    sess = session_key(agent_model.model_name, body.get("messages", []))
@@ -845,16 +875,20 @@ async def process_prompt(
    pool,
    cfg: dict,
    request_headers: dict | None = None,
    agent_name: str = "",
 ) -> dict:
    """
    Run the saliency + recollection pipeline over the prompt.
    Returns a (possibly modified) body dict with the recollection block injected.
    agent_name may be passed in directly (e.g. extracted from the URL path) to
    avoid re-parsing the body/headers; falls back to _extract_agent_name if empty.
    """
    read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
    conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
    recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
    hdrs = request_headers or {}
    if not agent_name:
        agent_name, _ = _extract_agent_name(body, hdrs)  # body already cleaned by route handler
    # Last user message — primary source for recollection reads.
@@ -874,7 +908,21 @@ async def process_prompt(
        for cue in scan_cues(assistant_text):
            await enqueue_cue(cue)
-    # 3. Token loop over user message for saliency-triggered recollection.
+    # 3. Session boost — scan the system message for domain tokens.
    #    Concepts grounding the agent's persona or project context rank higher
    #    in the recollection block for the entire session.
    session_boost_ids: set[int] = set()
    sys_text = _system_message_text(body, path)
    if sys_text:
        for tok in tokenize(sys_text):
            row = cache.soas_by_token.get(tok)
            if row and row.saliency > 0.0:
                session_boost_ids.add(row.id)
        if session_boost_ids:
            boost_tokens = [cache.soas_by_id.get(i, str(i)) for i in session_boost_ids]
            log.debug("session_boost | agent=%s tokens=%s", agent_name or "(none)", boost_tokens)
    # 4. Token loop over user message for saliency-triggered recollection.
    tokens = tokenize(user_text)
    salient_for_read: list[int] = []
@@ -882,8 +930,8 @@ async def process_prompt(
        soas_row = cache.soas_by_token.get(token)
        if soas_row is None:
            continue
-        if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
+        if soas_row.saliency == 0.0:
-            continue  # common English — skip
+            continue  # saliency=0 is the English-word sentinel — skip
        cache.record_encounter(soas_row.id)
        if soas_row.saliency >= read_threshold:
            salient_for_read.append(soas_row.id)
@@ -891,8 +939,24 @@ async def process_prompt(
    if not salient_for_read:
        return body
-    # 5. Build recollection block
+    # 5. Conflict spike — warn loudly if any salient concept has a pending contradiction.
-    block = build_recollection_block(salient_for_read, conf_floor, recency_days)
+    conflicted = [
        cache.soas_by_id.get(cid, str(cid))
        for cid in salient_for_read
        if cid in cache.pending_conflicts
    ]
    if conflicted:
        log.warning(
            "recollection_conflict_spike | agent=%s conflicted=%s  "
            "(recollection block will show '?' suffix on affected dimensions)",
            agent_name or "(none)", conflicted,
        )
    # 6. Build recollection block
    block = build_recollection_block(
        salient_for_read, conf_floor, recency_days,
        session_boost_ids=session_boost_ids,
    )
    if not block:
        return body
@@ -902,16 +966,24 @@ async def process_prompt(
        agent_name or "(none)", salient_tokens, block,
    )
-    # 6. Inject into messages
+    # 7. Inject into messages
    if path == "/api/chat" or path == "/v1/chat/completions":
        body = dict(body)
        body["messages"] = inject_recollection(body.get("messages", []), block)
        log_messages = body["messages"]
    elif path == "/v1/messages":
        body = inject_recollection_anthropic(body, block)
-    # /api/generate uses a flat prompt string — prepend there
+        # Reconstruct a messages-style list for the log viewer
        sys_content = body.get("system", "")
        log_messages = ([{"role": "system", "content": sys_content}] if sys_content else []) + body.get("messages", [])
    elif path == "/api/generate":
        body = dict(body)
        body["prompt"] = block + "\n\n" + body.get("prompt", "")
        log_messages = [{"role": "user", "content": body["prompt"]}]
    else:
        log_messages = []
    asyncio.create_task(log_recollection(pool, agent_name or "", salient_tokens, block, log_messages))
    return body
@@ -920,17 +992,20 @@ async def process_prompt(
 # Routes
 # ---------------------------------------------------------------------------
-@app.post("/api/chat")
+async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Response:
 async def chat(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    _, body = _extract_agent_name(await request.json(), dict(request.headers))
+    raw_body = await request.json()
    if agent_name:
        _, body = _extract_agent_name(raw_body, dict(request.headers))
    else:
        agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
-    log.info("chat  route=/api/chat model=%s", model)
+    log.info("chat  route=/api/chat model=%s agent=%s", model, agent_name or "—")
    try:
-        body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
+        body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name)
        text, raw = await call_ollama("/api/chat", body, upstream)
        sess = session_key(model, body.get("messages", []))
        count = record_and_check(sess, text, min_len)
@@ -953,17 +1028,20 @@ async def chat(request: Request) -> Response:
        raise
-@app.post("/api/generate")
+async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Response:
 async def generate(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    _, body = _extract_agent_name(await request.json(), dict(request.headers))
+    raw_body = await request.json()
    if agent_name:
        _, body = _extract_agent_name(raw_body, dict(request.headers))
    else:
        agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
-    log.info("chat  route=/api/generate model=%s", model)
+    log.info("chat  route=/api/generate model=%s agent=%s", model, agent_name or "—")
    try:
-        body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
+        body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name)
        messages = [{"role": "user", "content": body.get("prompt", "")}]
        sess = session_key(model, messages)
        text, raw = await call_ollama("/api/generate", body, upstream)
@@ -987,31 +1065,44 @@ async def generate(request: Request) -> Response:
        raise
@app.post("/api/chat")
 async def chat(request: Request) -> Response:
    return await _handle_ollama_chat(request)
@app.post("/api/generate")
 async def generate(request: Request) -> Response:
    return await _handle_ollama_generate(request)
 # ---------------------------------------------------------------------------
 # Anthropic Messages API  (POST /v1/messages)
 # ---------------------------------------------------------------------------
-@app.post("/v1/messages")
+async def _handle_anthropic_messages(request: Request, agent_name: str = "") -> Response:
 async def anthropic_messages(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
    raw_body = await request.body()
-    _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
+    parsed = json.loads(raw_body)
    if agent_name:
        _, body = _extract_agent_name(parsed, dict(request.headers))
    else:
        agent_name, body = _extract_agent_name(parsed, dict(request.headers))
    # Capture streaming intent BEFORE call_anthropic forces stream=False
    original_stream: bool = bool(body.get("stream", False))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_anthropic"]
    min_len = cfg["detection"]["min_length"]
    log.info(
-        "chat  route=/v1/messages model=%s upstream=%s stream=%s  req_preview=%.200s",
+        "chat  route=/v1/messages model=%s upstream=%s agent=%s stream=%s  req_preview=%.200s",
-        model, upstream, original_stream,
+        model, upstream, agent_name or "—", original_stream,
        raw_body[:200].decode(errors="replace").replace("\n", " "),
    )
    try:
        headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
        if "anthropic-version" not in {k.lower() for k in headers}:
            headers["anthropic-version"] = "2023-06-01"
-        body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
+        body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_anthropic(body, upstream, headers)
@@ -1048,16 +1139,23 @@ async def anthropic_messages(request: Request) -> Response:
        raise
@app.post("/v1/messages")
 async def anthropic_messages(request: Request) -> Response:
    return await _handle_anthropic_messages(request)
 # ---------------------------------------------------------------------------
 # OpenAI-compatible chat completions  (POST /v1/chat/completions)
 # ---------------------------------------------------------------------------
-@app.post("/v1/chat/completions")
+async def _handle_openai_chat(request: Request, agent_name: str = "") -> Response:
 async def openai_chat_completions(request: Request) -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
    raw_body = await request.json()
    hdrs = dict(request.headers)
    if agent_name:
        _, body = _extract_agent_name(raw_body, hdrs)
    else:
        agent_name, body = _extract_agent_name(raw_body, hdrs)  # strips agent_id/agent_name
    model = body.get("model", "unknown")
    upstream = cfg["upstream_openai"]
@@ -1077,12 +1175,12 @@ async def openai_chat_completions(request: Request) -> Response:
                    agent_model.base_url or "(default)",
                )
                return await _route_agent_chat(
-                    body, agent_model, original_stream, pool, cfg, hdrs, min_len
+                    body, agent_model, original_stream, pool, cfg, hdrs, min_len, agent_name=agent_name,
                )
        # Standard path — forward to configured upstream unchanged
        headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
-        body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs)
+        body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_openai(body, upstream, headers)
@@ -1110,12 +1208,54 @@ async def openai_chat_completions(request: Request) -> Response:
        raise
@app.post("/v1/chat/completions")
 async def openai_chat_completions(request: Request) -> Response:
    return await _handle_openai_chat(request)
 # Alias: some LiteLLM provider types (custom_openai, openai_like) omit the
 # /v1 prefix and post directly to /chat/completions.
@app.post("/chat/completions")
 async def openai_chat_completions_no_prefix(request: Request) -> Response:
    """Alias for /v1/chat/completions — handles LiteLLM providers that omit /v1."""
-    return await openai_chat_completions(request)
+    return await _handle_openai_chat(request)
 # ---------------------------------------------------------------------------
 # Agent-prefixed routes  /{agent_id}/...
 #
 # Configure each agent instance's base_url to include its name:
 #   http://festinger:11434/gerhard/v1      ← hermes (OpenAI-compat)
 #   http://festinger:11434/dobby/v1/messages  ← Agent0 (Anthropic)
 #   http://festinger:11434/gunnar           ← Ollama clients
 #
 # The agent_id is extracted from the URL path and takes priority over any
 # agent_name/agent_id passed in the request body or headers.
 # ---------------------------------------------------------------------------
@app.post("/{agent_id}/v1/chat/completions")
 async def openai_chat_with_agent_id(agent_id: str, request: Request) -> Response:
    return await _handle_openai_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/chat/completions")
 async def openai_chat_with_agent_id_no_v1(agent_id: str, request: Request) -> Response:
    return await _handle_openai_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/v1/messages")
 async def anthropic_messages_with_agent_id(agent_id: str, request: Request) -> Response:
    return await _handle_anthropic_messages(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/api/chat")
 async def ollama_chat_with_agent_id(agent_id: str, request: Request) -> Response:
    return await _handle_ollama_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/api/generate")
 async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Response:
    return await _handle_ollama_generate(request, agent_name=agent_id.lower())
 # ---------------------------------------------------------------------------
@@ -1679,6 +1819,262 @@ async def test_reset(scenario_id: str, request: Request) -> dict:
    return await reset_scenario(pool, scenario_id)
 # ---------------------------------------------------------------------------
 # /recollection-log — browse prompts where recollection was injected
 # ---------------------------------------------------------------------------
 RECOLLECTION_LOG_HTML = """<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <title>Festinger — Recollection Log</title>
 <style>
  * { box-sizing: border-box; margin: 0; padding: 0; }
  body { font-family: monospace; background: #0d0d1a; color: #ccc; padding: 20px; }
  h1 { color: #fff; font-size: 1.2em; margin-bottom: 6px; }
  .subtitle { font-size: 0.75em; color: #555; margin-bottom: 20px; }
  .toolbar { display: flex; gap: 10px; align-items: center; margin-bottom: 18px; }
  .btn { padding: 6px 14px; border: none; border-radius: 3px; cursor: pointer;
         font-family: monospace; font-size: 0.82em; }
  .btn-danger { background: #6b1a1a; color: #ffaaaa; }
  .btn-danger:hover { background: #8b2a2a; }
  .btn-secondary { background: #1e1e40; color: #aaa; }
  .btn-secondary:hover { background: #2a2a55; }
  .filter-row { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; }
  .filter-row input { background: #1a1a2e; border: 1px solid #2a2a4e; color: #ddd;
    padding: 5px 10px; border-radius: 3px; font-family: monospace; font-size: 0.82em; width: 220px; }
  .count { font-size: 0.75em; color: #555; }
  .entry { background: #12122a; border: 1px solid #1e1e40; border-radius: 5px;
           margin-bottom: 14px; overflow: hidden; }
  .entry-header { padding: 10px 14px; display: flex; gap: 16px; align-items: baseline;
                  border-bottom: 1px solid #1e1e40; cursor: pointer; }
  .entry-header:hover { background: #1a1a3a; }
  .ts { color: #555; font-size: 0.78em; white-space: nowrap; }
  .agent { color: #7070ff; font-size: 0.82em; font-weight: bold; }
  .tokens { font-size: 0.78em; color: #aaa; flex: 1; }
  .tok-tag { display: inline-block; background: #1e1e50; color: #9090ff;
             border-radius: 3px; padding: 1px 6px; margin: 1px 3px 1px 0; }
  .toggle-hint { font-size: 0.7em; color: #444; margin-left: auto; white-space: nowrap; }
  .entry-body { display: none; padding: 14px; }
  .entry-body.open { display: block; }
  .section-label { font-size: 0.68em; text-transform: uppercase; letter-spacing: 0.1em;
                   color: #555; margin-bottom: 6px; margin-top: 14px; }
  .section-label:first-child { margin-top: 0; }
  .recollection-block { background: #0a1020; border-left: 3px solid #3040aa;
    padding: 10px 12px; font-size: 0.83em; white-space: pre-wrap; color: #aabbff;
    border-radius: 0 3px 3px 0; line-height: 1.5; }
  .messages-accordion { margin-top: 8px; }
  .msg-toggle { background: #1a1a3a; border: none; color: #777; font-family: monospace;
                font-size: 0.78em; padding: 5px 10px; border-radius: 3px; cursor: pointer;
                width: 100%; text-align: left; }
  .msg-toggle:hover { background: #222250; color: #aaa; }
  .messages-list { display: none; margin-top: 8px; }
  .messages-list.open { display: block; }
  .msg { border: 1px solid #1e1e40; border-radius: 3px; margin-bottom: 6px; overflow: hidden; }
  .msg-role { padding: 4px 10px; font-size: 0.72em; font-weight: bold; text-transform: uppercase;
              letter-spacing: 0.08em; }
  .role-system  { background: #1a2040; color: #6688ff; }
  .role-user    { background: #1a2a1a; color: #66bb66; }
  .role-assistant { background: #2a1a1a; color: #cc8844; }
  .role-other   { background: #1e1e2e; color: #888; }
  .msg-content  { padding: 8px 10px; font-size: 0.8em; white-space: pre-wrap;
                  line-height: 1.5; max-height: 300px; overflow-y: auto; color: #ccc; }
  .empty { color: #444; font-size: 0.85em; padding: 30px 0; text-align: center; }
  .pagination { display: flex; gap: 8px; align-items: center; margin-top: 20px; }
  .pg-btn { padding: 5px 12px; background: #1e1e40; border: none; color: #aaa;
            border-radius: 3px; cursor: pointer; font-family: monospace; font-size: 0.8em; }
  .pg-btn:hover { background: #2a2a55; }
  .pg-btn:disabled { opacity: 0.35; cursor: default; }
  .pg-info { font-size: 0.78em; color: #555; }
  a.back { color: #5555aa; text-decoration: none; font-size: 0.82em; display: inline-block; margin-bottom: 16px; }
  a.back:hover { color: #8888ff; }
 </style>
 </head>
 <body>
 <a href="/admin" class="back">← admin</a>
 <h1>Recollection Log</h1>
 <p class="subtitle">Every prompt where Festinger injected a &lt;recollection&gt; block.</p>
 <div class="toolbar">
  <button class="btn btn-danger" onclick="clearLog()">Clear log</button>
  <button class="btn btn-secondary" onclick="load()">Refresh</button>
  <span class="count" id="count-label"></span>
 </div>
 <div class="filter-row">
  <input id="agent-filter" type="text" placeholder="Filter by agent…" oninput="load()" />
 </div>
 <div id="entries"></div>
 <div class="pagination">
  <button class="pg-btn" id="btn-prev" onclick="prevPage()" disabled>← prev</button>
  <span class="pg-info" id="pg-info"></span>
  <button class="pg-btn" id="btn-next" onclick="nextPage()">next →</button>
 </div>
 <script>
 let currentPage = 0;
 const PAGE = 20;
 async function load() {
  const agent = document.getElementById('agent-filter').value.trim();
  const qs = new URLSearchParams({ limit: PAGE, offset: currentPage * PAGE });
  if (agent) qs.set('agent', agent);
  const res = await fetch('/recollection-log/data?' + qs);
  const data = await res.json();
  renderEntries(data.entries);
  const total = data.total;
  document.getElementById('count-label').textContent = total + ' event' + (total === 1 ? '' : 's');
  document.getElementById('pg-info').textContent =
    'page ' + (currentPage + 1) + ' of ' + Math.max(1, Math.ceil(total / PAGE));
  document.getElementById('btn-prev').disabled = currentPage === 0;
  document.getElementById('btn-next').disabled = (currentPage + 1) * PAGE >= total;
 }
 function renderEntries(entries) {
  const el = document.getElementById('entries');
  if (!entries.length) {
    el.innerHTML = '<div class="empty">No recollection events yet.</div>';
    return;
  }
  el.innerHTML = entries.map((e, i) => {
    const tokHtml = (e.salient_tokens || []).map(t =>
      '<span class="tok-tag">' + esc(t) + '</span>'
    ).join('');
    const msgsHtml = (e.messages || []).map(m => {
      const role = (m.role || 'unknown').toLowerCase();
      const cls = ['system','user','assistant'].includes(role) ? 'role-' + role : 'role-other';
      const content = typeof m.content === 'string' ? m.content
        : JSON.stringify(m.content, null, 2);
      return '<div class="msg"><div class="msg-role ' + cls + '">' + esc(role) + '</div>'
           + '<div class="msg-content">' + esc(content) + '</div></div>';
    }).join('');
    return '<div class="entry">'
      + '<div class="entry-header" onclick="toggle(' + i + ')">'
      + '  <span class="ts">' + esc(e.created_at) + '</span>'
      + '  <span class="agent">' + esc(e.agent_name || '(none)') + '</span>'
      + '  <span class="tokens">' + tokHtml + '</span>'
      + '  <span class="toggle-hint">click to expand</span>'
      + '</div>'
      + '<div class="entry-body" id="entry-body-' + i + '">'
      + '  <div class="section-label">Recollection block</div>'
      + '  <pre class="recollection-block">' + esc(e.recollection_block) + '</pre>'
      + '  <div class="section-label" style="margin-top:14px">Full prompt (' + (e.messages || []).length + ' messages)</div>'
      + '  <div class="messages-accordion">'
      + '    <button class="msg-toggle" onclick="toggleMsgs(event,' + i + ')">Show messages ▾</button>'
      + '    <div class="messages-list" id="msgs-' + i + '">' + msgsHtml + '</div>'
      + '  </div>'
      + '</div>'
      + '</div>';
  }).join('');
 }
 function toggle(i) {
  const el = document.getElementById('entry-body-' + i);
  el.classList.toggle('open');
 }
 function toggleMsgs(evt, i) {
  evt.stopPropagation();
  const el = document.getElementById('msgs-' + i);
  el.classList.toggle('open');
  evt.target.textContent = el.classList.contains('open') ? 'Hide messages ▴' : 'Show messages ▾';
 }
 function esc(s) {
  return String(s)
    .replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')
    .replace(/"/g,'&quot;');
 }
 function prevPage() { currentPage = Math.max(0, currentPage - 1); load(); }
 function nextPage() { currentPage++; load(); }
 async function clearLog() {
  if (!confirm('Delete all recollection log entries?')) return;
  await fetch('/recollection-log', { method: 'DELETE' });
  currentPage = 0;
  load();
 }
 load();
 </script>
 </body>
 </html>
 """
@app.get("/recollection-log", response_class=HTMLResponse)
 async def recollection_log_ui() -> HTMLResponse:
    return HTMLResponse(RECOLLECTION_LOG_HTML)
@app.get("/recollection-log/data")
 async def recollection_log_data(
    request: Request,
    limit: int = 20,
    offset: int = 0,
    agent: str = "",
 ) -> dict:
    pool = request.app.state.pool
    async with pool.acquire() as conn:
        if agent:
            rows = await conn.fetch(
                """
                SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
                FROM recollection_log
                WHERE agent_name ILIKE $1
                ORDER BY created_at DESC
                LIMIT $2 OFFSET $3
                """,
                f"%{agent}%", limit, offset,
            )
            total = await conn.fetchval(
                "SELECT COUNT(*) FROM recollection_log WHERE agent_name ILIKE $1",
                f"%{agent}%",
            )
        else:
            rows = await conn.fetch(
                """
                SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
                FROM recollection_log
                ORDER BY created_at DESC
                LIMIT $1 OFFSET $2
                """,
                limit, offset,
            )
            total = await conn.fetchval("SELECT COUNT(*) FROM recollection_log")
    entries = []
    for r in rows:
        entries.append({
            "id": r["id"],
            "created_at": r["created_at"].strftime("%Y-%m-%d %H:%M:%S UTC"),
            "agent_name": r["agent_name"],
            "salient_tokens": list(r["salient_tokens"]),
            "recollection_block": r["recollection_block"],
            "messages": r["messages_json"],
        })
    return {"total": total, "offset": offset, "limit": limit, "entries": entries}
@app.delete("/recollection-log")
 async def clear_recollection_log(request: Request) -> dict:
    pool = request.app.state.pool
    async with pool.acquire() as conn:
        result = await conn.execute("DELETE FROM recollection_log")
    deleted = int(result.split()[-1]) if result else 0
    log.info("recollection_log cleared  deleted=%d", deleted)
    return {"status": "ok", "deleted": deleted}
 # ---------------------------------------------------------------------------
 # /graph — knowledge graph explorer
 # ---------------------------------------------------------------------------
@@ -2510,6 +2906,7 @@ ADMIN_HTML = """<!DOCTYPE html>
  <h1>Festinger</h1>
  <p class="subtitle">Ollama-compatible inference middleware — loop detection &amp; Recollections world model
    &nbsp;&mdash;&nbsp;<a href="/graph" style="color:#1a1a2e">Knowledge Graph Explorer</a>
    &nbsp;&mdash;&nbsp;<a href="/recollection-log" style="color:#1a1a2e">Recollection Log</a>
    &nbsp;&mdash;&nbsp;<a href="/models-ui" style="color:#1a1a2e">Model Manager</a>
  </p>
@@ -2,13 +2,28 @@
 Recollection engine — read path.
 For each salient concept found in an intercepted prompt:
-  - Query URD edges (from in-memory cache, filtered by confidence + recency)
+  - Query URD edges (from in-memory cache, filtered by confidence)
  - Rank by effective score (recency-decayed saliency + centrality bonus)
  - Render hit or zero-hit block
-  - Inject the <recollection> block into the prompt before forwarding to Ollama
+  - Inject the <recollection> block into the prompt
 Scoring
 -------
 effective_score(c) = saliency(c) * recency_decay(c) + centrality_bonus(c)
  recency_decay   = exp(-days_since_last_seen / RECENCY_HALF_LIFE)
                    → 1.0 if never seen (no penalty for brand-new concepts)
                    → 0.5 at 30 days, 0.25 at 60 days, ~0.04 at 90 days
  centrality_bonus = log1p(n_children)
                    → 0 for leaf concepts
                    → grows slowly with the number of concepts that use this
                       one as a parent (hub nodes surface first)
 """
 from __future__ import annotations
 import logging
 import math
 from datetime import datetime, timezone, timedelta
 from typing import Optional
@@ -19,34 +34,62 @@ log = logging.getLogger("festinger.recollection")
 ZERO_HIT_TEMPLATE = "You have no memory of {concept}. Try: memory_load query='{concept}' or gutask context {concept}"
 # Half-life for recency decay: saliency halves after this many days of silence.
 RECENCY_HALF_LIFE = 30.0
 # Flat bonus added to effective_score for concepts found in the system message.
 # Large enough to push grounding concepts above freshly-encountered domain words.
 SESSION_BOOST = 2.0
 # ---------------------------------------------------------------------------
-# Query (reads from in-memory cache)
+# Scoring
 # ---------------------------------------------------------------------------
-def query_edges(
+def recency_decay(row: SoasRow) -> float:
    concept_id: int,
    confidence_floor: float,
    recency_days: int,
 ) -> list[UrdEdge]:
    """
-    Return URD edges for *concept_id* that pass confidence + recency filters.
+    Exponential decay based on days since last_seen.
-    All reads are pure in-memory — zero network.
+    Returns 1.0 if last_seen is unknown (no penalty for fresh concepts).
    Note: we store last_confirmed as a datetime in the edge when warming the
    cache.  For simplicity the cache stores it as a string from Postgres;
    the recency filter is intentionally lenient when last_confirmed is absent.
    """
-    edges = cache.urd_by_concept.get(concept_id, [])
+    if row.last_seen is None:
-    if not edges:
+        return 1.0
-        return []
+    now = datetime.now(tz=timezone.utc)
    last = row.last_seen
    if last.tzinfo is None:
        last = last.replace(tzinfo=timezone.utc)
    days = (now - last).total_seconds() / 86400.0
    return math.exp(-days / RECENCY_HALF_LIFE)
-    cutoff = datetime.now(tz=timezone.utc) - timedelta(days=recency_days)
+
-    result = []
+def centrality_bonus(concept_id: int) -> float:
-    for e in edges:
+    """
-        if e.confidence < confidence_floor:
+    log1p of the number of concepts that reference this one as a parent.
-            continue
+    Hub concepts (many children) rise to the top of the recollection block.
-        result.append(e)
+    """
-    return result
+    n_children = len(cache.urd_by_parent.get(concept_id, []))
    return math.log1p(n_children)
 def effective_score(concept_id: int, session_boosted: bool = False) -> float:
    """Combined score used to rank concepts within the recollection block."""
    token = cache.soas_by_id.get(concept_id, "")
    row = cache.soas_by_token.get(token)
    if row is None:
        return 0.0
    base = row.saliency * recency_decay(row) + centrality_bonus(concept_id)
    return base + (SESSION_BOOST if session_boosted else 0.0)
 # ---------------------------------------------------------------------------
 # Query
 # ---------------------------------------------------------------------------
 def query_edges(concept_id: int, confidence_floor: float) -> list[UrdEdge]:
    """Return URD edges for *concept_id* above the confidence floor."""
    return [
        e for e in cache.urd_by_concept.get(concept_id, [])
        if e.confidence >= confidence_floor
    ]
 # ---------------------------------------------------------------------------
@@ -57,27 +100,28 @@ def render_hit(
    concept_token: str,
    edges: list[UrdEdge],
    concept_id: int,
    score: float,
    reverse_edges: list[UrdEdge] | None = None,
 ) -> str:
    """
-    Render one concept's recollection line:
+    Render one concept's recollection line, e.g.:
-        gnommoweb: [type] repo  [membership] glitch_university
+        gnommoweb [s=3.2]: [type] repo  [membership] glitch_university  [type←] omega13
-        omega13: [membership←] gnommoweb  (reverse: gnommoweb is a member of omega13)
+    The score annotation helps with debugging.
-    Pending-conflict dimensions get a '?' suffix on the dim token.
+    Pending-conflict dimensions get a '?' suffix.
    """
    has_pending = concept_id in cache.pending_conflicts
    parts = []
    for e in edges:
        dim_label = e.dim_token + ("?" if has_pending else "")
        parts.append(f"[{dim_label}] {e.parent_token}")
    # Bidirectional: surface concepts where this concept appears as a parent
    for e in (reverse_edges or [])[:3]:
        child_token = cache.soas_by_id.get(e.concept_id, str(e.concept_id))
        parts.append(f"[{e.dim_token}←] {child_token}")
-    return f"{concept_token}: {'  '.join(parts)}"
+    score_str = f"{score:.1f}"
    return f"{concept_token} [s={score_str}]: {'  '.join(parts)}"
-def render_zero_hit(concept_token: str) -> str:
+def render_zero_hit(concept_token: str, score: float) -> str:
    line = ZERO_HIT_TEMPLATE.format(concept=concept_token)
    soas_row = cache.soas_by_token.get(concept_token)
    if soas_row and soas_row.first_seen_context:
@@ -88,37 +132,45 @@ def render_zero_hit(concept_token: str) -> str:
 def build_recollection_block(
    salient_concept_ids: list[int],
    confidence_floor: float,
-    recency_days: int,
+    recency_days: int,        # kept for API compat, no longer used for edge filtering
    max_lines: int = 12,
    session_boost_ids: set[int] | None = None,
 ) -> Optional[str]:
    """
-    Build the full <recollection> block for a list of above-threshold concept IDs.
+    Build the <recollection> block, ranked by effective_score (recency-decayed
-    Returns None if there is nothing to say.
+    saliency + centrality bonus + optional session boost).
-    Concepts with known edges are shown first (most informative); zero-hit
+    Concepts with known edges are shown first; zero-hit lines fill the remainder.
-    "don't know" lines fill the remaining budget up to max_lines.
+    session_boost_ids: concept IDs found in the system message — ranked higher
    to surface grounding context (agent persona, project name) first.
    """
-    hit_lines: list[str] = []
+    boosted = session_boost_ids or set()
    zero_lines: list[str] = []
-    # Deduplicate — same concept may appear twice if novel + known
+    # Deduplicate and score
    seen: set[int] = set()
    scored: list[tuple[float, int]] = []
    for cid in salient_concept_ids:
        if cid in seen:
            continue
        seen.add(cid)
        scored.append((effective_score(cid, session_boosted=(cid in boosted)), cid))
    # Highest score first
    scored.sort(key=lambda x: x[0], reverse=True)
    hit_lines: list[str] = []
    zero_lines: list[str] = []
    for score, cid in scored:
        token = cache.soas_by_id.get(cid, str(cid))
-        edges = query_edges(cid, confidence_floor, recency_days)
+        edges = query_edges(cid, confidence_floor)
        # Bidirectional: find edges where this concept appears as a parent
        reverse_edges = cache.urd_by_parent.get(cid, [])
        if edges or reverse_edges:
-            hit_lines.append(render_hit(token, edges, cid, reverse_edges))
+            hit_lines.append(render_hit(token, edges, cid, score, reverse_edges))
        else:
-            zero_lines.append(render_zero_hit(token))
+            zero_lines.append(render_zero_hit(token, score))
    # Hits always included (up to max_lines); zero-hits fill the remainder
    lines = hit_lines[:max_lines]
    remaining = max_lines - len(lines)
    lines += zero_lines[:remaining]
@@ -126,8 +178,7 @@ def build_recollection_block(
    if not lines:
        return None
-    body = "\n".join(lines)
+    return f"<recollection>\n" + "\n".join(lines) + "\n</recollection>"
    return f"<recollection>\n{body}\n</recollection>"
 # ---------------------------------------------------------------------------
@@ -133,6 +133,21 @@ async def insert_urd_edge(
    cache.urd_by_concept.setdefault(req.concept_id, []).append(edge)
    cache.urd_by_concept_dim[key] = edge
    cache.urd_by_parent.setdefault(req.parent_id, []).append(edge)
    # Mark the concept as a domain token: saliency=1 means "first use".
    # saliency=0 is the sentinel for common-English words (dictionary seed),
    # which the encounter loop silently skips.  Any concept that has a URD
    # edge is definitionally domain-specific and must start above zero.
    concept_token_str = cache.soas_by_id.get(req.concept_id, "")
    row = cache.soas_by_token.get(concept_token_str)
    if row and row.saliency == 0.0:
        async with pool.acquire() as conn:
            await conn.execute(
                "UPDATE soas SET saliency = 1.0, novelty = 1.0 WHERE id = $1 AND saliency = 0",
                req.concept_id,
            )
        row.saliency = 1.0
        row.novelty = 1.0
    log.info(
        "urd insert  concept=%d parent=%d dim=%d is_isa=%s source=%s",
        req.concept_id, req.parent_id, req.dim_id, req.is_isa, req.source,