Adding changes to festinger

2026-04-25 11:31:58 +02:00
parent abc5cf5952
commit df28e56add
8 changed files with 707 additions and 118 deletions
@@ -0,0 +1,47 @@
+# Hermes config for gerhard — evaluation instance
+#
+# LLM calls go to festinger, which sits in front of the real inference
+# providers. The agent identity is encoded in the base URL so festinger
+# can route to the right model and build the per-agent knowledge graph.
+#
+#   festinger routing:  http://festinger:11434/gerhard/v1/chat/completions
+#   → festinger looks up agent_name="gerhard" in agent_models table
+#   → if found: routes to the registered provider/model
+#   → if not found: falls through to upstream_openai in config.yaml
+
+model:
+  provider: "custom"
+  base_url: "http://festinger:11434/gerhard/v1"
+  # No api_key needed — festinger does not authenticate OpenAI-compat requests.
+  # Set a placeholder so the OpenAI SDK doesn't complain.
+  api_key: "festinger"
+  # Model name is what gets sent in the request body. festinger can override
+  # this per-agent via the agent_models table. Set something reasonable as
+  # a default (festinger's upstream_openai fallback will use whatever it supports).
+  default: "claude-opus-4-6"
+
+terminal:
+  backend: "local"
+  cwd: "."
+  timeout: 180
+  lifetime_seconds: 300
+
+agent:
+  max_turns: 60
+  verbose: false
+  reasoning_effort: "medium"
+
+memory:
+  memory_enabled: true
+  user_profile_enabled: true
+
+compression:
+  enabled: true
+  threshold: 0.50
+  target_ratio: 0.20
+  protect_last_n: 20
+
+session_reset:
+  mode: both
+  idle_minutes: 1440
+  at_hour: 4
@@ -76,19 +76,36 @@ services:
      - "host.docker.internal:host-gateway"

  gerhard:
-    image: agent0ai/agent-zero:latest
+    build:
+      context: ../../hermes-agent
+    image: hermes-agent
    container_name: gerhard
-    ports:
-      - "50007:80"
    volumes:
-      - ./agents/gerhard:/a0/usr
-      - ${HOME}/.ssh:/root/.ssh
+      - ./agents/gerhard-hermes:/opt/data
    restart: unless-stopped
    environment:
-      AUTH_LOGIN: ${AUTH_LOGIN}
-      AUTH_PASSWORD: ${AUTH_PASSWORD}
+      HERMES_UID: ${HERMES_UID:-1000}
+      HERMES_GID: ${HERMES_GID:-1000}
    extra_hosts:
      - "host.docker.internal:host-gateway"
+    command: ["gateway", "run"]
+
+  gerhard-dashboard:
+    image: hermes-agent
+    container_name: gerhard-dashboard
+    ports:
+      - "50007:9119"    # web dashboard at localhost:50007
+    volumes:
+      - ./agents/gerhard-hermes:/opt/data
+    restart: unless-stopped
+    depends_on:
+      - gerhard
+    environment:
+      HERMES_UID: ${HERMES_UID:-1000}
+      HERMES_GID: ${HERMES_GID:-1000}
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    command: ["dashboard", "--host", "0.0.0.0", "--no-open"]

  postgres:
    image: postgres:16-alpine
@@ -135,7 +152,7 @@ services:
      - gunnar
      - rind
      - abyssinthia
-      - gerhard
+      - gerhard-dashboard

 volumes:
  festinger-pgdata:
@@ -122,3 +122,17 @@ CREATE TABLE IF NOT EXISTS agent_models (
    model_id    INT  NOT NULL REFERENCES models(id) ON DELETE CASCADE,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT now()
 );
+
+-- ---------------------------------------------------------------------------
+-- recollection_log — every prompt where a recollection block was injected
+-- ---------------------------------------------------------------------------
+CREATE TABLE IF NOT EXISTS recollection_log (
+    id                  BIGSERIAL PRIMARY KEY,
+    created_at          TIMESTAMPTZ NOT NULL DEFAULT now(),
+    agent_name          TEXT        NOT NULL DEFAULT '',
+    salient_tokens      TEXT[]      NOT NULL DEFAULT '{}',
+    recollection_block  TEXT        NOT NULL,
+    messages_json       JSONB       NOT NULL DEFAULT '[]'
+);
+
+CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC);
@@ -7,6 +7,7 @@ on the hot path. Writes are write-through: in-memory first, then Postgres.
 from __future__ import annotations

 from dataclasses import dataclass, field
+from datetime import datetime, timezone
 from typing import Optional


@@ -18,6 +19,7 @@ class SoasRow:
    saliency: float = 0.0
    novelty: float = 0.0
    first_seen_context: str = ""
+    last_seen: Optional[datetime] = None


@dataclass
@@ -4,9 +4,11 @@ Database layer — asyncpg pool, schema init, cache warm-up, flush.
 from __future__ import annotations

 import asyncio
+import json
 import logging
 import math
 import os
+from datetime import datetime, timezone
 from pathlib import Path

 import asyncpg
@@ -70,6 +72,22 @@ async def init_schema(pool: asyncpg.Pool) -> None:
            )
            """
        )
+        # Migration: recollection event log
+        await conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS recollection_log (
+                id                  BIGSERIAL PRIMARY KEY,
+                created_at          TIMESTAMPTZ NOT NULL DEFAULT now(),
+                agent_name          TEXT        NOT NULL DEFAULT '',
+                salient_tokens      TEXT[]      NOT NULL DEFAULT '{}',
+                recollection_block  TEXT        NOT NULL,
+                messages_json       JSONB       NOT NULL DEFAULT '[]'
+            )
+            """
+        )
+        await conn.execute(
+            "CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)"
+        )
    log.info("schema applied")


@@ -135,7 +153,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
    """Load all SOAS and URD rows into the in-memory cache."""
    async with pool.acquire() as conn:
        soas_rows = await conn.fetch(
-            "SELECT id, token, encounter_count, saliency, novelty, first_seen_context FROM soas"
+            "SELECT id, token, encounter_count, saliency, novelty, first_seen_context, last_seen FROM soas"
        )
        for r in soas_rows:
            row = SoasRow(
@@ -145,6 +163,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
                saliency=r["saliency"],
                novelty=r["novelty"],
                first_seen_context=r["first_seen_context"] or "",
+                last_seen=r["last_seen"],
            )
            cache.soas_by_token[r["token"]] = row
            cache.soas_by_id[r["id"]] = r["token"]
@@ -378,7 +397,18 @@ def recalculate_saliency(encounter_count: int, is_common_english: bool) -> float
 # ---------------------------------------------------------------------------

 async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
-    """Flush staged encounter_count deltas to Postgres in one batch UPDATE."""
+    """
+    Flush staged encounter_count deltas to Postgres.
+
+    Saliency model:
+      saliency = 0  →  common English word (dictionary seed).  Never touched.
+      saliency ≥ 1  →  domain concept.  Incremented by +delta on each flush.
+
+    saliency=1 is set by urd_writer when a concept first gets a URD edge,
+    meaning "mentioned at least once in a meaningful context".  Each flush
+    adds the number of new encounters since the last flush, so saliency
+    equals the total number of times the concept has been seen by the agent.
+    """
    deltas = cache.drain_deltas()
    if not deltas:
        return
@@ -388,23 +418,10 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
            for soas_id, delta in deltas.items():
                token = cache.soas_by_id.get(soas_id, "")
                row = cache.soas_by_token.get(token)
-                new_count = (row.encounter_count if row else 0)
-                # novelty = 0 for common English words (pre-seeded)
-                is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False
-
-                # Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not
-                # be promoted above the read threshold by encounter-count alone.
-                # Their saliency is raised explicitly when the LLM confirms them.
-                is_unconfirmed_novel = (
-                    row is not None
-                    and row.novelty > 0.0
-                    and not cache.urd_by_concept.get(soas_id)
-                )
-                if is_unconfirmed_novel:
-                    new_saliency = row.saliency   # preserve low saliency until LLM confirms
-                else:
-                    new_saliency = recalculate_saliency(new_count, is_common)
-
+                if not row or row.saliency == 0.0:
+                    # saliency=0 is the English-word sentinel — never increment.
+                    continue
+                new_saliency = row.saliency + delta
                await conn.execute(
                    """
                    UPDATE soas
@@ -415,8 +432,8 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
                    """,
                    delta, new_saliency, soas_id,
                )
-                if row:
-                    row.saliency = new_saliency
+                row.saliency = new_saliency
+                row.last_seen = datetime.now(timezone.utc)

    log.debug("flushed %d saliency deltas", len(deltas))

@@ -429,3 +446,32 @@ async def get_config(pool: asyncpg.Pool, key: str, default: str = "") -> str:
    async with pool.acquire() as conn:
        row = await conn.fetchrow("SELECT value FROM config WHERE key = $1", key)
    return row["value"] if row else default
+
+
+# ---------------------------------------------------------------------------
+# Recollection log
+# ---------------------------------------------------------------------------
+
+async def log_recollection(
+    pool: asyncpg.Pool,
+    agent_name: str,
+    salient_tokens: list[str],
+    recollection_block: str,
+    messages: list[dict],
+) -> None:
+    """Persist one recollection event. Fire-and-forget — never raises."""
+    try:
+        async with pool.acquire() as conn:
+            await conn.execute(
+                """
+                INSERT INTO recollection_log
+                    (agent_name, salient_tokens, recollection_block, messages_json)
+                VALUES ($1, $2, $3, $4)
+                """,
+                agent_name or "",
+                salient_tokens,
+                recollection_block,
+                json.dumps(messages),
+            )
+    except Exception:
+        log.exception("recollection_log insert failed")
@@ -2,18 +2,23 @@
 Festinger — main FastAPI application.

 Routes:
-  POST /api/chat               Ollama-compatible chat (loop detection + recollection injection)
-  POST /api/generate           Ollama-compatible generate
-  POST /v1/messages            Anthropic Messages API proxy (loop detection + recollection)
-  POST /v1/chat/completions    OpenAI-compatible proxy (loop detection + recollection)
-  POST /iknowthat              Manual write path (gutask iknowthat)
-  POST /resolve/run            Manually trigger nightly resolution job
-  POST /reload                 Reload URD cache (called by resolution job)
-  GET  /health                 Health + stats
-  GET  /conflicts              Pending and recently resolved conflicts
-  GET  /admin                  Minimal admin UI
-  *    /v1/{path}              Passthrough to upstream Anthropic
-  *    /{path}                 Passthrough to upstream Ollama
+  POST /api/chat                         Ollama-compatible chat (loop detection + recollection injection)
+  POST /api/generate                     Ollama-compatible generate
+  POST /v1/messages                      Anthropic Messages API proxy (loop detection + recollection)
+  POST /v1/chat/completions              OpenAI-compatible proxy (loop detection + recollection)
+  POST /chat/completions                 Alias for /v1/chat/completions (LiteLLM without /v1 prefix)
+  POST /{agent_id}/v1/chat/completions   Agent-prefixed OpenAI proxy — agent identity from URL
+  POST /{agent_id}/chat/completions      Agent-prefixed OpenAI proxy (no /v1 prefix variant)
+  POST /{agent_id}/v1/messages           Agent-prefixed Anthropic proxy — agent identity from URL
+  POST /{agent_id}/api/chat              Agent-prefixed Ollama chat — agent identity from URL
+  POST /{agent_id}/api/generate          Agent-prefixed Ollama generate — agent identity from URL
+  POST /iknowthat                        Manual write path (gutask iknowthat)
+  POST /resolve/run                      Manually trigger nightly resolution job
+  POST /reload                           Reload URD cache (called by resolution job)
+  GET  /health                           Health + stats
+  GET  /conflicts                        Pending and recently resolved conflicts
+  GET  /admin                            Minimal admin UI
+  *    /{path}                           Passthrough to upstream Ollama or Anthropic
 """
 from __future__ import annotations

@@ -38,7 +43,7 @@ from .db import (
    close_pool, get_config, get_or_create_soas,
    get_pool, init_schema, bootstrap_dimensions,
    bootstrap_english_dictionary, warm_cache, reload_urd_cache,
-    flush_encounter_deltas, reset_graph,
+    flush_encounter_deltas, reset_graph, log_recollection,
 )
 from .loop_detector import apply_mitigations, record_and_check, session_key
 from .cue_scanner import scan_cues
@@ -499,6 +504,30 @@ def _last_assistant_message_text(body: dict, path: str) -> str:
    return ""


+def _system_message_text(body: dict, path: str) -> str:
+    """
+    Extract the system message text from the request body.
+    - OpenAI/Ollama: first message with role='system' in the messages list.
+    - Anthropic: top-level 'system' field (string or content-block list).
+    """
+    if path == "/v1/messages":
+        sys_field = body.get("system") or ""
+        if isinstance(sys_field, list):
+            return " ".join(
+                b.get("text", "") for b in sys_field
+                if isinstance(b, dict) and b.get("type") == "text"
+            )
+        return sys_field
+
+    if path in ("/api/chat", "/v1/chat/completions", "/api/generate"):
+        messages = body.get("messages", [])
+        sys_msg = next((m for m in messages if m.get("role") == "system"), None)
+        if sys_msg:
+            return " ".join(_extract_text_strings(sys_msg.get("content", "")))
+
+    return ""
+
+
 def inject_recollection_anthropic(body: dict, block: str) -> dict:
    """
    Inject a recollection block into an Anthropic Messages API request.
@@ -723,6 +752,7 @@ async def _route_agent_chat(
    cfg: dict,
    request_headers: dict,
    min_len: int,
+    agent_name: str = "",
 ) -> Response:
    """
    Route an OpenAI-compatible chat completions request to the agent's
@@ -740,7 +770,7 @@ async def _route_agent_chat(
    # (Configure write_model_id or a per-agent model for a cloud/separate
    # model if you want memory building alongside local inference.)
    body = await process_prompt(
-        body, "/v1/chat/completions", pool, cfg, request_headers,
+        body, "/v1/chat/completions", pool, cfg, request_headers, agent_name=agent_name,
    )

    sess = session_key(agent_model.model_name, body.get("messages", []))
@@ -845,17 +875,21 @@ async def process_prompt(
    pool,
    cfg: dict,
    request_headers: dict | None = None,
+    agent_name: str = "",
 ) -> dict:
    """
    Run the saliency + recollection pipeline over the prompt.
    Returns a (possibly modified) body dict with the recollection block injected.
+    agent_name may be passed in directly (e.g. extracted from the URL path) to
+    avoid re-parsing the body/headers; falls back to _extract_agent_name if empty.
    """
    read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
    conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
    recency_days = int(await get_config(pool, "recollection_recency_days", "90"))

    hdrs = request_headers or {}
-    agent_name, _ = _extract_agent_name(body, hdrs)  # body already cleaned by route handler
+    if not agent_name:
+        agent_name, _ = _extract_agent_name(body, hdrs)  # body already cleaned by route handler

    # Last user message — primary source for recollection reads.
    user_text = _last_user_message_text(body, path)
@@ -874,7 +908,21 @@ async def process_prompt(
        for cue in scan_cues(assistant_text):
            await enqueue_cue(cue)

-    # 3. Token loop over user message for saliency-triggered recollection.
+    # 3. Session boost — scan the system message for domain tokens.
+    #    Concepts grounding the agent's persona or project context rank higher
+    #    in the recollection block for the entire session.
+    session_boost_ids: set[int] = set()
+    sys_text = _system_message_text(body, path)
+    if sys_text:
+        for tok in tokenize(sys_text):
+            row = cache.soas_by_token.get(tok)
+            if row and row.saliency > 0.0:
+                session_boost_ids.add(row.id)
+        if session_boost_ids:
+            boost_tokens = [cache.soas_by_id.get(i, str(i)) for i in session_boost_ids]
+            log.debug("session_boost | agent=%s tokens=%s", agent_name or "(none)", boost_tokens)
+
+    # 4. Token loop over user message for saliency-triggered recollection.
    tokens = tokenize(user_text)
    salient_for_read: list[int] = []

@@ -882,8 +930,8 @@ async def process_prompt(
        soas_row = cache.soas_by_token.get(token)
        if soas_row is None:
            continue
-        if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
-            continue  # common English — skip
+        if soas_row.saliency == 0.0:
+            continue  # saliency=0 is the English-word sentinel — skip
        cache.record_encounter(soas_row.id)
        if soas_row.saliency >= read_threshold:
            salient_for_read.append(soas_row.id)
@@ -891,8 +939,24 @@ async def process_prompt(
    if not salient_for_read:
        return body

-    # 5. Build recollection block
-    block = build_recollection_block(salient_for_read, conf_floor, recency_days)
+    # 5. Conflict spike — warn loudly if any salient concept has a pending contradiction.
+    conflicted = [
+        cache.soas_by_id.get(cid, str(cid))
+        for cid in salient_for_read
+        if cid in cache.pending_conflicts
+    ]
+    if conflicted:
+        log.warning(
+            "recollection_conflict_spike | agent=%s conflicted=%s  "
+            "(recollection block will show '?' suffix on affected dimensions)",
+            agent_name or "(none)", conflicted,
+        )
+
+    # 6. Build recollection block
+    block = build_recollection_block(
+        salient_for_read, conf_floor, recency_days,
+        session_boost_ids=session_boost_ids,
+    )
    if not block:
        return body

@@ -902,16 +966,24 @@ async def process_prompt(
        agent_name or "(none)", salient_tokens, block,
    )

-    # 6. Inject into messages
+    # 7. Inject into messages
    if path == "/api/chat" or path == "/v1/chat/completions":
        body = dict(body)
        body["messages"] = inject_recollection(body.get("messages", []), block)
+        log_messages = body["messages"]
    elif path == "/v1/messages":
        body = inject_recollection_anthropic(body, block)
-    # /api/generate uses a flat prompt string — prepend there
+        # Reconstruct a messages-style list for the log viewer
+        sys_content = body.get("system", "")
+        log_messages = ([{"role": "system", "content": sys_content}] if sys_content else []) + body.get("messages", [])
    elif path == "/api/generate":
        body = dict(body)
        body["prompt"] = block + "\n\n" + body.get("prompt", "")
+        log_messages = [{"role": "user", "content": body["prompt"]}]
+    else:
+        log_messages = []
+
+    asyncio.create_task(log_recollection(pool, agent_name or "", salient_tokens, block, log_messages))

    return body

@@ -920,17 +992,20 @@ async def process_prompt(
 # Routes
 # ---------------------------------------------------------------------------

-@app.post("/api/chat")
-async def chat(request: Request) -> Response:
+async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    _, body = _extract_agent_name(await request.json(), dict(request.headers))
+    raw_body = await request.json()
+    if agent_name:
+        _, body = _extract_agent_name(raw_body, dict(request.headers))
+    else:
+        agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
-    log.info("chat  route=/api/chat model=%s", model)
+    log.info("chat  route=/api/chat model=%s agent=%s", model, agent_name or "—")
    try:
-        body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
+        body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name)
        text, raw = await call_ollama("/api/chat", body, upstream)
        sess = session_key(model, body.get("messages", []))
        count = record_and_check(sess, text, min_len)
@@ -953,17 +1028,20 @@ async def chat(request: Request) -> Response:
        raise


-@app.post("/api/generate")
-async def generate(request: Request) -> Response:
+async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
-    _, body = _extract_agent_name(await request.json(), dict(request.headers))
+    raw_body = await request.json()
+    if agent_name:
+        _, body = _extract_agent_name(raw_body, dict(request.headers))
+    else:
+        agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_ollama"]
    min_len = cfg["detection"]["min_length"]
-    log.info("chat  route=/api/generate model=%s", model)
+    log.info("chat  route=/api/generate model=%s agent=%s", model, agent_name or "—")
    try:
-        body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
+        body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name)
        messages = [{"role": "user", "content": body.get("prompt", "")}]
        sess = session_key(model, messages)
        text, raw = await call_ollama("/api/generate", body, upstream)
@@ -987,31 +1065,44 @@ async def generate(request: Request) -> Response:
        raise


+@app.post("/api/chat")
+async def chat(request: Request) -> Response:
+    return await _handle_ollama_chat(request)
+
+
+@app.post("/api/generate")
+async def generate(request: Request) -> Response:
+    return await _handle_ollama_generate(request)
+
+
 # ---------------------------------------------------------------------------
 # Anthropic Messages API  (POST /v1/messages)
 # ---------------------------------------------------------------------------

-@app.post("/v1/messages")
-async def anthropic_messages(request: Request) -> Response:
+async def _handle_anthropic_messages(request: Request, agent_name: str = "") -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
    raw_body = await request.body()
-    _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
+    parsed = json.loads(raw_body)
+    if agent_name:
+        _, body = _extract_agent_name(parsed, dict(request.headers))
+    else:
+        agent_name, body = _extract_agent_name(parsed, dict(request.headers))
    # Capture streaming intent BEFORE call_anthropic forces stream=False
    original_stream: bool = bool(body.get("stream", False))
    model = body.get("model", "unknown")
    upstream = cfg["upstream_anthropic"]
    min_len = cfg["detection"]["min_length"]
    log.info(
-        "chat  route=/v1/messages model=%s upstream=%s stream=%s  req_preview=%.200s",
-        model, upstream, original_stream,
+        "chat  route=/v1/messages model=%s upstream=%s agent=%s stream=%s  req_preview=%.200s",
+        model, upstream, agent_name or "—", original_stream,
        raw_body[:200].decode(errors="replace").replace("\n", " "),
    )
    try:
        headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
        if "anthropic-version" not in {k.lower() for k in headers}:
            headers["anthropic-version"] = "2023-06-01"
-        body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
+        body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_anthropic(body, upstream, headers)
@@ -1048,17 +1139,24 @@ async def anthropic_messages(request: Request) -> Response:
        raise


+@app.post("/v1/messages")
+async def anthropic_messages(request: Request) -> Response:
+    return await _handle_anthropic_messages(request)
+
+
 # ---------------------------------------------------------------------------
 # OpenAI-compatible chat completions  (POST /v1/chat/completions)
 # ---------------------------------------------------------------------------

-@app.post("/v1/chat/completions")
-async def openai_chat_completions(request: Request) -> Response:
+async def _handle_openai_chat(request: Request, agent_name: str = "") -> Response:
    cfg = request.app.state.yaml_config
    pool = request.app.state.pool
    raw_body = await request.json()
    hdrs = dict(request.headers)
-    agent_name, body = _extract_agent_name(raw_body, hdrs)  # strips agent_id/agent_name
+    if agent_name:
+        _, body = _extract_agent_name(raw_body, hdrs)
+    else:
+        agent_name, body = _extract_agent_name(raw_body, hdrs)  # strips agent_id/agent_name
    model = body.get("model", "unknown")
    upstream = cfg["upstream_openai"]
    min_len = cfg["detection"]["min_length"]
@@ -1077,12 +1175,12 @@ async def openai_chat_completions(request: Request) -> Response:
                    agent_model.base_url or "(default)",
                )
                return await _route_agent_chat(
-                    body, agent_model, original_stream, pool, cfg, hdrs, min_len
+                    body, agent_model, original_stream, pool, cfg, hdrs, min_len, agent_name=agent_name,
                )

        # Standard path — forward to configured upstream unchanged
        headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
-        body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs)
+        body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name)
        messages = body.get("messages", [])
        sess = session_key(model, messages)
        text, raw = await call_openai(body, upstream, headers)
@@ -1110,12 +1208,54 @@ async def openai_chat_completions(request: Request) -> Response:
        raise


+@app.post("/v1/chat/completions")
+async def openai_chat_completions(request: Request) -> Response:
+    return await _handle_openai_chat(request)
+
+
 # Alias: some LiteLLM provider types (custom_openai, openai_like) omit the
 # /v1 prefix and post directly to /chat/completions.
@app.post("/chat/completions")
 async def openai_chat_completions_no_prefix(request: Request) -> Response:
    """Alias for /v1/chat/completions — handles LiteLLM providers that omit /v1."""
-    return await openai_chat_completions(request)
+    return await _handle_openai_chat(request)
+
+
+# ---------------------------------------------------------------------------
+# Agent-prefixed routes  /{agent_id}/...
+#
+# Configure each agent instance's base_url to include its name:
+#   http://festinger:11434/gerhard/v1      ← hermes (OpenAI-compat)
+#   http://festinger:11434/dobby/v1/messages  ← Agent0 (Anthropic)
+#   http://festinger:11434/gunnar           ← Ollama clients
+#
+# The agent_id is extracted from the URL path and takes priority over any
+# agent_name/agent_id passed in the request body or headers.
+# ---------------------------------------------------------------------------
+
+@app.post("/{agent_id}/v1/chat/completions")
+async def openai_chat_with_agent_id(agent_id: str, request: Request) -> Response:
+    return await _handle_openai_chat(request, agent_name=agent_id.lower())
+
+
+@app.post("/{agent_id}/chat/completions")
+async def openai_chat_with_agent_id_no_v1(agent_id: str, request: Request) -> Response:
+    return await _handle_openai_chat(request, agent_name=agent_id.lower())
+
+
+@app.post("/{agent_id}/v1/messages")
+async def anthropic_messages_with_agent_id(agent_id: str, request: Request) -> Response:
+    return await _handle_anthropic_messages(request, agent_name=agent_id.lower())
+
+
+@app.post("/{agent_id}/api/chat")
+async def ollama_chat_with_agent_id(agent_id: str, request: Request) -> Response:
+    return await _handle_ollama_chat(request, agent_name=agent_id.lower())
+
+
+@app.post("/{agent_id}/api/generate")
+async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Response:
+    return await _handle_ollama_generate(request, agent_name=agent_id.lower())


 # ---------------------------------------------------------------------------
@@ -1679,6 +1819,262 @@ async def test_reset(scenario_id: str, request: Request) -> dict:
    return await reset_scenario(pool, scenario_id)


+# ---------------------------------------------------------------------------
+# /recollection-log — browse prompts where recollection was injected
+# ---------------------------------------------------------------------------
+
+RECOLLECTION_LOG_HTML = """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Festinger — Recollection Log</title>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body { font-family: monospace; background: #0d0d1a; color: #ccc; padding: 20px; }
+  h1 { color: #fff; font-size: 1.2em; margin-bottom: 6px; }
+  .subtitle { font-size: 0.75em; color: #555; margin-bottom: 20px; }
+  .toolbar { display: flex; gap: 10px; align-items: center; margin-bottom: 18px; }
+  .btn { padding: 6px 14px; border: none; border-radius: 3px; cursor: pointer;
+         font-family: monospace; font-size: 0.82em; }
+  .btn-danger { background: #6b1a1a; color: #ffaaaa; }
+  .btn-danger:hover { background: #8b2a2a; }
+  .btn-secondary { background: #1e1e40; color: #aaa; }
+  .btn-secondary:hover { background: #2a2a55; }
+  .filter-row { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; }
+  .filter-row input { background: #1a1a2e; border: 1px solid #2a2a4e; color: #ddd;
+    padding: 5px 10px; border-radius: 3px; font-family: monospace; font-size: 0.82em; width: 220px; }
+  .count { font-size: 0.75em; color: #555; }
+
+  .entry { background: #12122a; border: 1px solid #1e1e40; border-radius: 5px;
+           margin-bottom: 14px; overflow: hidden; }
+  .entry-header { padding: 10px 14px; display: flex; gap: 16px; align-items: baseline;
+                  border-bottom: 1px solid #1e1e40; cursor: pointer; }
+  .entry-header:hover { background: #1a1a3a; }
+  .ts { color: #555; font-size: 0.78em; white-space: nowrap; }
+  .agent { color: #7070ff; font-size: 0.82em; font-weight: bold; }
+  .tokens { font-size: 0.78em; color: #aaa; flex: 1; }
+  .tok-tag { display: inline-block; background: #1e1e50; color: #9090ff;
+             border-radius: 3px; padding: 1px 6px; margin: 1px 3px 1px 0; }
+  .toggle-hint { font-size: 0.7em; color: #444; margin-left: auto; white-space: nowrap; }
+
+  .entry-body { display: none; padding: 14px; }
+  .entry-body.open { display: block; }
+
+  .section-label { font-size: 0.68em; text-transform: uppercase; letter-spacing: 0.1em;
+                   color: #555; margin-bottom: 6px; margin-top: 14px; }
+  .section-label:first-child { margin-top: 0; }
+
+  .recollection-block { background: #0a1020; border-left: 3px solid #3040aa;
+    padding: 10px 12px; font-size: 0.83em; white-space: pre-wrap; color: #aabbff;
+    border-radius: 0 3px 3px 0; line-height: 1.5; }
+
+  .messages-accordion { margin-top: 8px; }
+  .msg-toggle { background: #1a1a3a; border: none; color: #777; font-family: monospace;
+                font-size: 0.78em; padding: 5px 10px; border-radius: 3px; cursor: pointer;
+                width: 100%; text-align: left; }
+  .msg-toggle:hover { background: #222250; color: #aaa; }
+  .messages-list { display: none; margin-top: 8px; }
+  .messages-list.open { display: block; }
+  .msg { border: 1px solid #1e1e40; border-radius: 3px; margin-bottom: 6px; overflow: hidden; }
+  .msg-role { padding: 4px 10px; font-size: 0.72em; font-weight: bold; text-transform: uppercase;
+              letter-spacing: 0.08em; }
+  .role-system  { background: #1a2040; color: #6688ff; }
+  .role-user    { background: #1a2a1a; color: #66bb66; }
+  .role-assistant { background: #2a1a1a; color: #cc8844; }
+  .role-other   { background: #1e1e2e; color: #888; }
+  .msg-content  { padding: 8px 10px; font-size: 0.8em; white-space: pre-wrap;
+                  line-height: 1.5; max-height: 300px; overflow-y: auto; color: #ccc; }
+
+  .empty { color: #444; font-size: 0.85em; padding: 30px 0; text-align: center; }
+  .pagination { display: flex; gap: 8px; align-items: center; margin-top: 20px; }
+  .pg-btn { padding: 5px 12px; background: #1e1e40; border: none; color: #aaa;
+            border-radius: 3px; cursor: pointer; font-family: monospace; font-size: 0.8em; }
+  .pg-btn:hover { background: #2a2a55; }
+  .pg-btn:disabled { opacity: 0.35; cursor: default; }
+  .pg-info { font-size: 0.78em; color: #555; }
+  a.back { color: #5555aa; text-decoration: none; font-size: 0.82em; display: inline-block; margin-bottom: 16px; }
+  a.back:hover { color: #8888ff; }
+</style>
+</head>
+<body>
+<a href="/admin" class="back">← admin</a>
+<h1>Recollection Log</h1>
+<p class="subtitle">Every prompt where Festinger injected a &lt;recollection&gt; block.</p>
+
+<div class="toolbar">
+  <button class="btn btn-danger" onclick="clearLog()">Clear log</button>
+  <button class="btn btn-secondary" onclick="load()">Refresh</button>
+  <span class="count" id="count-label"></span>
+</div>
+
+<div class="filter-row">
+  <input id="agent-filter" type="text" placeholder="Filter by agent…" oninput="load()" />
+</div>
+
+<div id="entries"></div>
+<div class="pagination">
+  <button class="pg-btn" id="btn-prev" onclick="prevPage()" disabled>← prev</button>
+  <span class="pg-info" id="pg-info"></span>
+  <button class="pg-btn" id="btn-next" onclick="nextPage()">next →</button>
+</div>
+
+<script>
+let currentPage = 0;
+const PAGE = 20;
+
+async function load() {
+  const agent = document.getElementById('agent-filter').value.trim();
+  const qs = new URLSearchParams({ limit: PAGE, offset: currentPage * PAGE });
+  if (agent) qs.set('agent', agent);
+  const res = await fetch('/recollection-log/data?' + qs);
+  const data = await res.json();
+  renderEntries(data.entries);
+  const total = data.total;
+  document.getElementById('count-label').textContent = total + ' event' + (total === 1 ? '' : 's');
+  document.getElementById('pg-info').textContent =
+    'page ' + (currentPage + 1) + ' of ' + Math.max(1, Math.ceil(total / PAGE));
+  document.getElementById('btn-prev').disabled = currentPage === 0;
+  document.getElementById('btn-next').disabled = (currentPage + 1) * PAGE >= total;
+}
+
+function renderEntries(entries) {
+  const el = document.getElementById('entries');
+  if (!entries.length) {
+    el.innerHTML = '<div class="empty">No recollection events yet.</div>';
+    return;
+  }
+  el.innerHTML = entries.map((e, i) => {
+    const tokHtml = (e.salient_tokens || []).map(t =>
+      '<span class="tok-tag">' + esc(t) + '</span>'
+    ).join('');
+    const msgsHtml = (e.messages || []).map(m => {
+      const role = (m.role || 'unknown').toLowerCase();
+      const cls = ['system','user','assistant'].includes(role) ? 'role-' + role : 'role-other';
+      const content = typeof m.content === 'string' ? m.content
+        : JSON.stringify(m.content, null, 2);
+      return '<div class="msg"><div class="msg-role ' + cls + '">' + esc(role) + '</div>'
+           + '<div class="msg-content">' + esc(content) + '</div></div>';
+    }).join('');
+    return '<div class="entry">'
+      + '<div class="entry-header" onclick="toggle(' + i + ')">'
+      + '  <span class="ts">' + esc(e.created_at) + '</span>'
+      + '  <span class="agent">' + esc(e.agent_name || '(none)') + '</span>'
+      + '  <span class="tokens">' + tokHtml + '</span>'
+      + '  <span class="toggle-hint">click to expand</span>'
+      + '</div>'
+      + '<div class="entry-body" id="entry-body-' + i + '">'
+      + '  <div class="section-label">Recollection block</div>'
+      + '  <pre class="recollection-block">' + esc(e.recollection_block) + '</pre>'
+      + '  <div class="section-label" style="margin-top:14px">Full prompt (' + (e.messages || []).length + ' messages)</div>'
+      + '  <div class="messages-accordion">'
+      + '    <button class="msg-toggle" onclick="toggleMsgs(event,' + i + ')">Show messages ▾</button>'
+      + '    <div class="messages-list" id="msgs-' + i + '">' + msgsHtml + '</div>'
+      + '  </div>'
+      + '</div>'
+      + '</div>';
+  }).join('');
+}
+
+function toggle(i) {
+  const el = document.getElementById('entry-body-' + i);
+  el.classList.toggle('open');
+}
+
+function toggleMsgs(evt, i) {
+  evt.stopPropagation();
+  const el = document.getElementById('msgs-' + i);
+  el.classList.toggle('open');
+  evt.target.textContent = el.classList.contains('open') ? 'Hide messages ▴' : 'Show messages ▾';
+}
+
+function esc(s) {
+  return String(s)
+    .replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')
+    .replace(/"/g,'&quot;');
+}
+
+function prevPage() { currentPage = Math.max(0, currentPage - 1); load(); }
+function nextPage() { currentPage++; load(); }
+
+async function clearLog() {
+  if (!confirm('Delete all recollection log entries?')) return;
+  await fetch('/recollection-log', { method: 'DELETE' });
+  currentPage = 0;
+  load();
+}
+
+load();
+</script>
+</body>
+</html>
+"""
+
+
+@app.get("/recollection-log", response_class=HTMLResponse)
+async def recollection_log_ui() -> HTMLResponse:
+    return HTMLResponse(RECOLLECTION_LOG_HTML)
+
+
+@app.get("/recollection-log/data")
+async def recollection_log_data(
+    request: Request,
+    limit: int = 20,
+    offset: int = 0,
+    agent: str = "",
+) -> dict:
+    pool = request.app.state.pool
+    async with pool.acquire() as conn:
+        if agent:
+            rows = await conn.fetch(
+                """
+                SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
+                FROM recollection_log
+                WHERE agent_name ILIKE $1
+                ORDER BY created_at DESC
+                LIMIT $2 OFFSET $3
+                """,
+                f"%{agent}%", limit, offset,
+            )
+            total = await conn.fetchval(
+                "SELECT COUNT(*) FROM recollection_log WHERE agent_name ILIKE $1",
+                f"%{agent}%",
+            )
+        else:
+            rows = await conn.fetch(
+                """
+                SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
+                FROM recollection_log
+                ORDER BY created_at DESC
+                LIMIT $1 OFFSET $2
+                """,
+                limit, offset,
+            )
+            total = await conn.fetchval("SELECT COUNT(*) FROM recollection_log")
+
+    entries = []
+    for r in rows:
+        entries.append({
+            "id": r["id"],
+            "created_at": r["created_at"].strftime("%Y-%m-%d %H:%M:%S UTC"),
+            "agent_name": r["agent_name"],
+            "salient_tokens": list(r["salient_tokens"]),
+            "recollection_block": r["recollection_block"],
+            "messages": r["messages_json"],
+        })
+
+    return {"total": total, "offset": offset, "limit": limit, "entries": entries}
+
+
+@app.delete("/recollection-log")
+async def clear_recollection_log(request: Request) -> dict:
+    pool = request.app.state.pool
+    async with pool.acquire() as conn:
+        result = await conn.execute("DELETE FROM recollection_log")
+    deleted = int(result.split()[-1]) if result else 0
+    log.info("recollection_log cleared  deleted=%d", deleted)
+    return {"status": "ok", "deleted": deleted}
+
+
 # ---------------------------------------------------------------------------
 # /graph — knowledge graph explorer
 # ---------------------------------------------------------------------------
@@ -2510,6 +2906,7 @@ ADMIN_HTML = """<!DOCTYPE html>
  <h1>Festinger</h1>
  <p class="subtitle">Ollama-compatible inference middleware — loop detection &amp; Recollections world model
    &nbsp;&mdash;&nbsp;<a href="/graph" style="color:#1a1a2e">Knowledge Graph Explorer</a>
+    &nbsp;&mdash;&nbsp;<a href="/recollection-log" style="color:#1a1a2e">Recollection Log</a>
    &nbsp;&mdash;&nbsp;<a href="/models-ui" style="color:#1a1a2e">Model Manager</a>
  </p>

@@ -2,13 +2,28 @@
 Recollection engine — read path.

 For each salient concept found in an intercepted prompt:
-  - Query URD edges (from in-memory cache, filtered by confidence + recency)
+  - Query URD edges (from in-memory cache, filtered by confidence)
+  - Rank by effective score (recency-decayed saliency + centrality bonus)
  - Render hit or zero-hit block
-  - Inject the <recollection> block into the prompt before forwarding to Ollama
+  - Inject the <recollection> block into the prompt
+
+Scoring
+-------
+effective_score(c) = saliency(c) * recency_decay(c) + centrality_bonus(c)
+
+  recency_decay   = exp(-days_since_last_seen / RECENCY_HALF_LIFE)
+                    → 1.0 if never seen (no penalty for brand-new concepts)
+                    → 0.5 at 30 days, 0.25 at 60 days, ~0.04 at 90 days
+
+  centrality_bonus = log1p(n_children)
+                    → 0 for leaf concepts
+                    → grows slowly with the number of concepts that use this
+                       one as a parent (hub nodes surface first)
 """
 from __future__ import annotations

 import logging
+import math
 from datetime import datetime, timezone, timedelta
 from typing import Optional

@@ -19,34 +34,62 @@ log = logging.getLogger("festinger.recollection")

 ZERO_HIT_TEMPLATE = "You have no memory of {concept}. Try: memory_load query='{concept}' or gutask context {concept}"

+# Half-life for recency decay: saliency halves after this many days of silence.
+RECENCY_HALF_LIFE = 30.0
+
+# Flat bonus added to effective_score for concepts found in the system message.
+# Large enough to push grounding concepts above freshly-encountered domain words.
+SESSION_BOOST = 2.0
+

 # ---------------------------------------------------------------------------
-# Query (reads from in-memory cache)
+# Scoring
 # ---------------------------------------------------------------------------

-def query_edges(
-    concept_id: int,
-    confidence_floor: float,
-    recency_days: int,
-) -> list[UrdEdge]:
+def recency_decay(row: SoasRow) -> float:
    """
-    Return URD edges for *concept_id* that pass confidence + recency filters.
-    All reads are pure in-memory — zero network.
-    Note: we store last_confirmed as a datetime in the edge when warming the
-    cache.  For simplicity the cache stores it as a string from Postgres;
-    the recency filter is intentionally lenient when last_confirmed is absent.
+    Exponential decay based on days since last_seen.
+    Returns 1.0 if last_seen is unknown (no penalty for fresh concepts).
    """
-    edges = cache.urd_by_concept.get(concept_id, [])
-    if not edges:
-        return []
+    if row.last_seen is None:
+        return 1.0
+    now = datetime.now(tz=timezone.utc)
+    last = row.last_seen
+    if last.tzinfo is None:
+        last = last.replace(tzinfo=timezone.utc)
+    days = (now - last).total_seconds() / 86400.0
+    return math.exp(-days / RECENCY_HALF_LIFE)

-    cutoff = datetime.now(tz=timezone.utc) - timedelta(days=recency_days)
-    result = []
-    for e in edges:
-        if e.confidence < confidence_floor:
-            continue
-        result.append(e)
-    return result
+
+def centrality_bonus(concept_id: int) -> float:
+    """
+    log1p of the number of concepts that reference this one as a parent.
+    Hub concepts (many children) rise to the top of the recollection block.
+    """
+    n_children = len(cache.urd_by_parent.get(concept_id, []))
+    return math.log1p(n_children)
+
+
+def effective_score(concept_id: int, session_boosted: bool = False) -> float:
+    """Combined score used to rank concepts within the recollection block."""
+    token = cache.soas_by_id.get(concept_id, "")
+    row = cache.soas_by_token.get(token)
+    if row is None:
+        return 0.0
+    base = row.saliency * recency_decay(row) + centrality_bonus(concept_id)
+    return base + (SESSION_BOOST if session_boosted else 0.0)
+
+
+# ---------------------------------------------------------------------------
+# Query
+# ---------------------------------------------------------------------------
+
+def query_edges(concept_id: int, confidence_floor: float) -> list[UrdEdge]:
+    """Return URD edges for *concept_id* above the confidence floor."""
+    return [
+        e for e in cache.urd_by_concept.get(concept_id, [])
+        if e.confidence >= confidence_floor
+    ]


 # ---------------------------------------------------------------------------
@@ -57,27 +100,28 @@ def render_hit(
    concept_token: str,
    edges: list[UrdEdge],
    concept_id: int,
+    score: float,
    reverse_edges: list[UrdEdge] | None = None,
 ) -> str:
    """
-    Render one concept's recollection line:
-        gnommoweb: [type] repo  [membership] glitch_university
-        omega13: [membership←] gnommoweb  (reverse: gnommoweb is a member of omega13)
-    Pending-conflict dimensions get a '?' suffix on the dim token.
+    Render one concept's recollection line, e.g.:
+        gnommoweb [s=3.2]: [type] repo  [membership] glitch_university  [type←] omega13
+    The score annotation helps with debugging.
+    Pending-conflict dimensions get a '?' suffix.
    """
    has_pending = concept_id in cache.pending_conflicts
    parts = []
    for e in edges:
        dim_label = e.dim_token + ("?" if has_pending else "")
        parts.append(f"[{dim_label}] {e.parent_token}")
-    # Bidirectional: surface concepts where this concept appears as a parent
    for e in (reverse_edges or [])[:3]:
        child_token = cache.soas_by_id.get(e.concept_id, str(e.concept_id))
        parts.append(f"[{e.dim_token}←] {child_token}")
-    return f"{concept_token}: {'  '.join(parts)}"
+    score_str = f"{score:.1f}"
+    return f"{concept_token} [s={score_str}]: {'  '.join(parts)}"


-def render_zero_hit(concept_token: str) -> str:
+def render_zero_hit(concept_token: str, score: float) -> str:
    line = ZERO_HIT_TEMPLATE.format(concept=concept_token)
    soas_row = cache.soas_by_token.get(concept_token)
    if soas_row and soas_row.first_seen_context:
@@ -88,37 +132,45 @@ def render_zero_hit(concept_token: str) -> str:
 def build_recollection_block(
    salient_concept_ids: list[int],
    confidence_floor: float,
-    recency_days: int,
+    recency_days: int,        # kept for API compat, no longer used for edge filtering
    max_lines: int = 12,
+    session_boost_ids: set[int] | None = None,
 ) -> Optional[str]:
    """
-    Build the full <recollection> block for a list of above-threshold concept IDs.
-    Returns None if there is nothing to say.
+    Build the <recollection> block, ranked by effective_score (recency-decayed
+    saliency + centrality bonus + optional session boost).

-    Concepts with known edges are shown first (most informative); zero-hit
-    "don't know" lines fill the remaining budget up to max_lines.
+    Concepts with known edges are shown first; zero-hit lines fill the remainder.
+    session_boost_ids: concept IDs found in the system message — ranked higher
+    to surface grounding context (agent persona, project name) first.
    """
-    hit_lines: list[str] = []
-    zero_lines: list[str] = []
+    boosted = session_boost_ids or set()

-    # Deduplicate — same concept may appear twice if novel + known
+    # Deduplicate and score
    seen: set[int] = set()
+    scored: list[tuple[float, int]] = []
    for cid in salient_concept_ids:
        if cid in seen:
            continue
        seen.add(cid)
+        scored.append((effective_score(cid, session_boosted=(cid in boosted)), cid))

+    # Highest score first
+    scored.sort(key=lambda x: x[0], reverse=True)
+
+    hit_lines: list[str] = []
+    zero_lines: list[str] = []
+
+    for score, cid in scored:
        token = cache.soas_by_id.get(cid, str(cid))
-        edges = query_edges(cid, confidence_floor, recency_days)
-        # Bidirectional: find edges where this concept appears as a parent
+        edges = query_edges(cid, confidence_floor)
        reverse_edges = cache.urd_by_parent.get(cid, [])

        if edges or reverse_edges:
-            hit_lines.append(render_hit(token, edges, cid, reverse_edges))
+            hit_lines.append(render_hit(token, edges, cid, score, reverse_edges))
        else:
-            zero_lines.append(render_zero_hit(token))
+            zero_lines.append(render_zero_hit(token, score))

-    # Hits always included (up to max_lines); zero-hits fill the remainder
    lines = hit_lines[:max_lines]
    remaining = max_lines - len(lines)
    lines += zero_lines[:remaining]
@@ -126,8 +178,7 @@ def build_recollection_block(
    if not lines:
        return None

-    body = "\n".join(lines)
-    return f"<recollection>\n{body}\n</recollection>"
+    return f"<recollection>\n" + "\n".join(lines) + "\n</recollection>"


 # ---------------------------------------------------------------------------
@@ -133,6 +133,21 @@ async def insert_urd_edge(
    cache.urd_by_concept.setdefault(req.concept_id, []).append(edge)
    cache.urd_by_concept_dim[key] = edge
    cache.urd_by_parent.setdefault(req.parent_id, []).append(edge)
+
+    # Mark the concept as a domain token: saliency=1 means "first use".
+    # saliency=0 is the sentinel for common-English words (dictionary seed),
+    # which the encounter loop silently skips.  Any concept that has a URD
+    # edge is definitionally domain-specific and must start above zero.
+    concept_token_str = cache.soas_by_id.get(req.concept_id, "")
+    row = cache.soas_by_token.get(concept_token_str)
+    if row and row.saliency == 0.0:
+        async with pool.acquire() as conn:
+            await conn.execute(
+                "UPDATE soas SET saliency = 1.0, novelty = 1.0 WHERE id = $1 AND saliency = 0",
+                req.concept_id,
+            )
+        row.saliency = 1.0
+        row.novelty = 1.0
    log.info(
        "urd insert  concept=%d parent=%d dim=%d is_isa=%s source=%s",
        req.concept_id, req.parent_id, req.dim_id, req.is_isa, req.source,