From df28e56adde59cb5b0f2320d0af88635c15d95b9 Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Sat, 25 Apr 2026 11:31:58 +0200 Subject: [PATCH] Adding changes to festinger --- agents/gerhard-hermes/config.yaml | 47 ++ docker-compose.yml | 33 +- plugins/festinger/db/schema.sql | 14 + plugins/festinger/festinger/cache.py | 2 + plugins/festinger/festinger/db.py | 88 +++- plugins/festinger/festinger/main.py | 485 ++++++++++++++++++-- plugins/festinger/festinger/recollection.py | 141 ++++-- plugins/festinger/festinger/urd_writer.py | 15 + 8 files changed, 707 insertions(+), 118 deletions(-) create mode 100644 agents/gerhard-hermes/config.yaml diff --git a/agents/gerhard-hermes/config.yaml b/agents/gerhard-hermes/config.yaml new file mode 100644 index 0000000..d948b1a --- /dev/null +++ b/agents/gerhard-hermes/config.yaml @@ -0,0 +1,47 @@ +# Hermes config for gerhard — evaluation instance +# +# LLM calls go to festinger, which sits in front of the real inference +# providers. The agent identity is encoded in the base URL so festinger +# can route to the right model and build the per-agent knowledge graph. +# +# festinger routing: http://festinger:11434/gerhard/v1/chat/completions +# → festinger looks up agent_name="gerhard" in agent_models table +# → if found: routes to the registered provider/model +# → if not found: falls through to upstream_openai in config.yaml + +model: + provider: "custom" + base_url: "http://festinger:11434/gerhard/v1" + # No api_key needed — festinger does not authenticate OpenAI-compat requests. + # Set a placeholder so the OpenAI SDK doesn't complain. + api_key: "festinger" + # Model name is what gets sent in the request body. festinger can override + # this per-agent via the agent_models table. Set something reasonable as + # a default (festinger's upstream_openai fallback will use whatever it supports). + default: "claude-opus-4-6" + +terminal: + backend: "local" + cwd: "." + timeout: 180 + lifetime_seconds: 300 + +agent: + max_turns: 60 + verbose: false + reasoning_effort: "medium" + +memory: + memory_enabled: true + user_profile_enabled: true + +compression: + enabled: true + threshold: 0.50 + target_ratio: 0.20 + protect_last_n: 20 + +session_reset: + mode: both + idle_minutes: 1440 + at_hour: 4 diff --git a/docker-compose.yml b/docker-compose.yml index d8c337f..aeeee9f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -76,19 +76,36 @@ services: - "host.docker.internal:host-gateway" gerhard: - image: agent0ai/agent-zero:latest + build: + context: ../../hermes-agent + image: hermes-agent container_name: gerhard - ports: - - "50007:80" volumes: - - ./agents/gerhard:/a0/usr - - ${HOME}/.ssh:/root/.ssh + - ./agents/gerhard-hermes:/opt/data restart: unless-stopped environment: - AUTH_LOGIN: ${AUTH_LOGIN} - AUTH_PASSWORD: ${AUTH_PASSWORD} + HERMES_UID: ${HERMES_UID:-1000} + HERMES_GID: ${HERMES_GID:-1000} extra_hosts: - "host.docker.internal:host-gateway" + command: ["gateway", "run"] + + gerhard-dashboard: + image: hermes-agent + container_name: gerhard-dashboard + ports: + - "50007:9119" # web dashboard at localhost:50007 + volumes: + - ./agents/gerhard-hermes:/opt/data + restart: unless-stopped + depends_on: + - gerhard + environment: + HERMES_UID: ${HERMES_UID:-1000} + HERMES_GID: ${HERMES_GID:-1000} + extra_hosts: + - "host.docker.internal:host-gateway" + command: ["dashboard", "--host", "0.0.0.0", "--no-open"] postgres: image: postgres:16-alpine @@ -135,7 +152,7 @@ services: - gunnar - rind - abyssinthia - - gerhard + - gerhard-dashboard volumes: festinger-pgdata: diff --git a/plugins/festinger/db/schema.sql b/plugins/festinger/db/schema.sql index 9e2093e..2023466 100644 --- a/plugins/festinger/db/schema.sql +++ b/plugins/festinger/db/schema.sql @@ -122,3 +122,17 @@ CREATE TABLE IF NOT EXISTS agent_models ( model_id INT NOT NULL REFERENCES models(id) ON DELETE CASCADE, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); + +-- --------------------------------------------------------------------------- +-- recollection_log — every prompt where a recollection block was injected +-- --------------------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS recollection_log ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + agent_name TEXT NOT NULL DEFAULT '', + salient_tokens TEXT[] NOT NULL DEFAULT '{}', + recollection_block TEXT NOT NULL, + messages_json JSONB NOT NULL DEFAULT '[]' +); + +CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC); diff --git a/plugins/festinger/festinger/cache.py b/plugins/festinger/festinger/cache.py index ed13bbc..81e74aa 100644 --- a/plugins/festinger/festinger/cache.py +++ b/plugins/festinger/festinger/cache.py @@ -7,6 +7,7 @@ on the hot path. Writes are write-through: in-memory first, then Postgres. from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime, timezone from typing import Optional @@ -18,6 +19,7 @@ class SoasRow: saliency: float = 0.0 novelty: float = 0.0 first_seen_context: str = "" + last_seen: Optional[datetime] = None @dataclass diff --git a/plugins/festinger/festinger/db.py b/plugins/festinger/festinger/db.py index 3e82a0b..e89594d 100644 --- a/plugins/festinger/festinger/db.py +++ b/plugins/festinger/festinger/db.py @@ -4,9 +4,11 @@ Database layer — asyncpg pool, schema init, cache warm-up, flush. from __future__ import annotations import asyncio +import json import logging import math import os +from datetime import datetime, timezone from pathlib import Path import asyncpg @@ -70,6 +72,22 @@ async def init_schema(pool: asyncpg.Pool) -> None: ) """ ) + # Migration: recollection event log + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS recollection_log ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + agent_name TEXT NOT NULL DEFAULT '', + salient_tokens TEXT[] NOT NULL DEFAULT '{}', + recollection_block TEXT NOT NULL, + messages_json JSONB NOT NULL DEFAULT '[]' + ) + """ + ) + await conn.execute( + "CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)" + ) log.info("schema applied") @@ -135,7 +153,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None: """Load all SOAS and URD rows into the in-memory cache.""" async with pool.acquire() as conn: soas_rows = await conn.fetch( - "SELECT id, token, encounter_count, saliency, novelty, first_seen_context FROM soas" + "SELECT id, token, encounter_count, saliency, novelty, first_seen_context, last_seen FROM soas" ) for r in soas_rows: row = SoasRow( @@ -145,6 +163,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None: saliency=r["saliency"], novelty=r["novelty"], first_seen_context=r["first_seen_context"] or "", + last_seen=r["last_seen"], ) cache.soas_by_token[r["token"]] = row cache.soas_by_id[r["id"]] = r["token"] @@ -378,7 +397,18 @@ def recalculate_saliency(encounter_count: int, is_common_english: bool) -> float # --------------------------------------------------------------------------- async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: - """Flush staged encounter_count deltas to Postgres in one batch UPDATE.""" + """ + Flush staged encounter_count deltas to Postgres. + + Saliency model: + saliency = 0 → common English word (dictionary seed). Never touched. + saliency ≥ 1 → domain concept. Incremented by +delta on each flush. + + saliency=1 is set by urd_writer when a concept first gets a URD edge, + meaning "mentioned at least once in a meaningful context". Each flush + adds the number of new encounters since the last flush, so saliency + equals the total number of times the concept has been seen by the agent. + """ deltas = cache.drain_deltas() if not deltas: return @@ -388,23 +418,10 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: for soas_id, delta in deltas.items(): token = cache.soas_by_id.get(soas_id, "") row = cache.soas_by_token.get(token) - new_count = (row.encounter_count if row else 0) - # novelty = 0 for common English words (pre-seeded) - is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False - - # Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not - # be promoted above the read threshold by encounter-count alone. - # Their saliency is raised explicitly when the LLM confirms them. - is_unconfirmed_novel = ( - row is not None - and row.novelty > 0.0 - and not cache.urd_by_concept.get(soas_id) - ) - if is_unconfirmed_novel: - new_saliency = row.saliency # preserve low saliency until LLM confirms - else: - new_saliency = recalculate_saliency(new_count, is_common) - + if not row or row.saliency == 0.0: + # saliency=0 is the English-word sentinel — never increment. + continue + new_saliency = row.saliency + delta await conn.execute( """ UPDATE soas @@ -415,8 +432,8 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: """, delta, new_saliency, soas_id, ) - if row: - row.saliency = new_saliency + row.saliency = new_saliency + row.last_seen = datetime.now(timezone.utc) log.debug("flushed %d saliency deltas", len(deltas)) @@ -429,3 +446,32 @@ async def get_config(pool: asyncpg.Pool, key: str, default: str = "") -> str: async with pool.acquire() as conn: row = await conn.fetchrow("SELECT value FROM config WHERE key = $1", key) return row["value"] if row else default + + +# --------------------------------------------------------------------------- +# Recollection log +# --------------------------------------------------------------------------- + +async def log_recollection( + pool: asyncpg.Pool, + agent_name: str, + salient_tokens: list[str], + recollection_block: str, + messages: list[dict], +) -> None: + """Persist one recollection event. Fire-and-forget — never raises.""" + try: + async with pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO recollection_log + (agent_name, salient_tokens, recollection_block, messages_json) + VALUES ($1, $2, $3, $4) + """, + agent_name or "", + salient_tokens, + recollection_block, + json.dumps(messages), + ) + except Exception: + log.exception("recollection_log insert failed") diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index 4c51d85..2373d57 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -2,18 +2,23 @@ Festinger — main FastAPI application. Routes: - POST /api/chat Ollama-compatible chat (loop detection + recollection injection) - POST /api/generate Ollama-compatible generate - POST /v1/messages Anthropic Messages API proxy (loop detection + recollection) - POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection) - POST /iknowthat Manual write path (gutask iknowthat) - POST /resolve/run Manually trigger nightly resolution job - POST /reload Reload URD cache (called by resolution job) - GET /health Health + stats - GET /conflicts Pending and recently resolved conflicts - GET /admin Minimal admin UI - * /v1/{path} Passthrough to upstream Anthropic - * /{path} Passthrough to upstream Ollama + POST /api/chat Ollama-compatible chat (loop detection + recollection injection) + POST /api/generate Ollama-compatible generate + POST /v1/messages Anthropic Messages API proxy (loop detection + recollection) + POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection) + POST /chat/completions Alias for /v1/chat/completions (LiteLLM without /v1 prefix) + POST /{agent_id}/v1/chat/completions Agent-prefixed OpenAI proxy — agent identity from URL + POST /{agent_id}/chat/completions Agent-prefixed OpenAI proxy (no /v1 prefix variant) + POST /{agent_id}/v1/messages Agent-prefixed Anthropic proxy — agent identity from URL + POST /{agent_id}/api/chat Agent-prefixed Ollama chat — agent identity from URL + POST /{agent_id}/api/generate Agent-prefixed Ollama generate — agent identity from URL + POST /iknowthat Manual write path (gutask iknowthat) + POST /resolve/run Manually trigger nightly resolution job + POST /reload Reload URD cache (called by resolution job) + GET /health Health + stats + GET /conflicts Pending and recently resolved conflicts + GET /admin Minimal admin UI + * /{path} Passthrough to upstream Ollama or Anthropic """ from __future__ import annotations @@ -38,7 +43,7 @@ from .db import ( close_pool, get_config, get_or_create_soas, get_pool, init_schema, bootstrap_dimensions, bootstrap_english_dictionary, warm_cache, reload_urd_cache, - flush_encounter_deltas, reset_graph, + flush_encounter_deltas, reset_graph, log_recollection, ) from .loop_detector import apply_mitigations, record_and_check, session_key from .cue_scanner import scan_cues @@ -499,6 +504,30 @@ def _last_assistant_message_text(body: dict, path: str) -> str: return "" +def _system_message_text(body: dict, path: str) -> str: + """ + Extract the system message text from the request body. + - OpenAI/Ollama: first message with role='system' in the messages list. + - Anthropic: top-level 'system' field (string or content-block list). + """ + if path == "/v1/messages": + sys_field = body.get("system") or "" + if isinstance(sys_field, list): + return " ".join( + b.get("text", "") for b in sys_field + if isinstance(b, dict) and b.get("type") == "text" + ) + return sys_field + + if path in ("/api/chat", "/v1/chat/completions", "/api/generate"): + messages = body.get("messages", []) + sys_msg = next((m for m in messages if m.get("role") == "system"), None) + if sys_msg: + return " ".join(_extract_text_strings(sys_msg.get("content", ""))) + + return "" + + def inject_recollection_anthropic(body: dict, block: str) -> dict: """ Inject a recollection block into an Anthropic Messages API request. @@ -723,6 +752,7 @@ async def _route_agent_chat( cfg: dict, request_headers: dict, min_len: int, + agent_name: str = "", ) -> Response: """ Route an OpenAI-compatible chat completions request to the agent's @@ -740,7 +770,7 @@ async def _route_agent_chat( # (Configure write_model_id or a per-agent model for a cloud/separate # model if you want memory building alongside local inference.) body = await process_prompt( - body, "/v1/chat/completions", pool, cfg, request_headers, + body, "/v1/chat/completions", pool, cfg, request_headers, agent_name=agent_name, ) sess = session_key(agent_model.model_name, body.get("messages", [])) @@ -845,17 +875,21 @@ async def process_prompt( pool, cfg: dict, request_headers: dict | None = None, + agent_name: str = "", ) -> dict: """ Run the saliency + recollection pipeline over the prompt. Returns a (possibly modified) body dict with the recollection block injected. + agent_name may be passed in directly (e.g. extracted from the URL path) to + avoid re-parsing the body/headers; falls back to _extract_agent_name if empty. """ read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5")) conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6")) recency_days = int(await get_config(pool, "recollection_recency_days", "90")) hdrs = request_headers or {} - agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler + if not agent_name: + agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler # Last user message — primary source for recollection reads. user_text = _last_user_message_text(body, path) @@ -874,7 +908,21 @@ async def process_prompt( for cue in scan_cues(assistant_text): await enqueue_cue(cue) - # 3. Token loop over user message for saliency-triggered recollection. + # 3. Session boost — scan the system message for domain tokens. + # Concepts grounding the agent's persona or project context rank higher + # in the recollection block for the entire session. + session_boost_ids: set[int] = set() + sys_text = _system_message_text(body, path) + if sys_text: + for tok in tokenize(sys_text): + row = cache.soas_by_token.get(tok) + if row and row.saliency > 0.0: + session_boost_ids.add(row.id) + if session_boost_ids: + boost_tokens = [cache.soas_by_id.get(i, str(i)) for i in session_boost_ids] + log.debug("session_boost | agent=%s tokens=%s", agent_name or "(none)", boost_tokens) + + # 4. Token loop over user message for saliency-triggered recollection. tokens = tokenize(user_text) salient_for_read: list[int] = [] @@ -882,8 +930,8 @@ async def process_prompt( soas_row = cache.soas_by_token.get(token) if soas_row is None: continue - if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: - continue # common English — skip + if soas_row.saliency == 0.0: + continue # saliency=0 is the English-word sentinel — skip cache.record_encounter(soas_row.id) if soas_row.saliency >= read_threshold: salient_for_read.append(soas_row.id) @@ -891,8 +939,24 @@ async def process_prompt( if not salient_for_read: return body - # 5. Build recollection block - block = build_recollection_block(salient_for_read, conf_floor, recency_days) + # 5. Conflict spike — warn loudly if any salient concept has a pending contradiction. + conflicted = [ + cache.soas_by_id.get(cid, str(cid)) + for cid in salient_for_read + if cid in cache.pending_conflicts + ] + if conflicted: + log.warning( + "recollection_conflict_spike | agent=%s conflicted=%s " + "(recollection block will show '?' suffix on affected dimensions)", + agent_name or "(none)", conflicted, + ) + + # 6. Build recollection block + block = build_recollection_block( + salient_for_read, conf_floor, recency_days, + session_boost_ids=session_boost_ids, + ) if not block: return body @@ -902,16 +966,24 @@ async def process_prompt( agent_name or "(none)", salient_tokens, block, ) - # 6. Inject into messages + # 7. Inject into messages if path == "/api/chat" or path == "/v1/chat/completions": body = dict(body) body["messages"] = inject_recollection(body.get("messages", []), block) + log_messages = body["messages"] elif path == "/v1/messages": body = inject_recollection_anthropic(body, block) - # /api/generate uses a flat prompt string — prepend there + # Reconstruct a messages-style list for the log viewer + sys_content = body.get("system", "") + log_messages = ([{"role": "system", "content": sys_content}] if sys_content else []) + body.get("messages", []) elif path == "/api/generate": body = dict(body) body["prompt"] = block + "\n\n" + body.get("prompt", "") + log_messages = [{"role": "user", "content": body["prompt"]}] + else: + log_messages = [] + + asyncio.create_task(log_recollection(pool, agent_name or "", salient_tokens, block, log_messages)) return body @@ -920,17 +992,20 @@ async def process_prompt( # Routes # --------------------------------------------------------------------------- -@app.post("/api/chat") -async def chat(request: Request) -> Response: +async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool - _, body = _extract_agent_name(await request.json(), dict(request.headers)) + raw_body = await request.json() + if agent_name: + _, body = _extract_agent_name(raw_body, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(raw_body, dict(request.headers)) model = body.get("model", "unknown") upstream = cfg["upstream_ollama"] min_len = cfg["detection"]["min_length"] - log.info("chat route=/api/chat model=%s", model) + log.info("chat route=/api/chat model=%s agent=%s", model, agent_name or "—") try: - body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers)) + body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name) text, raw = await call_ollama("/api/chat", body, upstream) sess = session_key(model, body.get("messages", [])) count = record_and_check(sess, text, min_len) @@ -953,17 +1028,20 @@ async def chat(request: Request) -> Response: raise -@app.post("/api/generate") -async def generate(request: Request) -> Response: +async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool - _, body = _extract_agent_name(await request.json(), dict(request.headers)) + raw_body = await request.json() + if agent_name: + _, body = _extract_agent_name(raw_body, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(raw_body, dict(request.headers)) model = body.get("model", "unknown") upstream = cfg["upstream_ollama"] min_len = cfg["detection"]["min_length"] - log.info("chat route=/api/generate model=%s", model) + log.info("chat route=/api/generate model=%s agent=%s", model, agent_name or "—") try: - body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers)) + body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name) messages = [{"role": "user", "content": body.get("prompt", "")}] sess = session_key(model, messages) text, raw = await call_ollama("/api/generate", body, upstream) @@ -987,31 +1065,44 @@ async def generate(request: Request) -> Response: raise +@app.post("/api/chat") +async def chat(request: Request) -> Response: + return await _handle_ollama_chat(request) + + +@app.post("/api/generate") +async def generate(request: Request) -> Response: + return await _handle_ollama_generate(request) + + # --------------------------------------------------------------------------- # Anthropic Messages API (POST /v1/messages) # --------------------------------------------------------------------------- -@app.post("/v1/messages") -async def anthropic_messages(request: Request) -> Response: +async def _handle_anthropic_messages(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool raw_body = await request.body() - _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers)) + parsed = json.loads(raw_body) + if agent_name: + _, body = _extract_agent_name(parsed, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(parsed, dict(request.headers)) # Capture streaming intent BEFORE call_anthropic forces stream=False original_stream: bool = bool(body.get("stream", False)) model = body.get("model", "unknown") upstream = cfg["upstream_anthropic"] min_len = cfg["detection"]["min_length"] log.info( - "chat route=/v1/messages model=%s upstream=%s stream=%s req_preview=%.200s", - model, upstream, original_stream, + "chat route=/v1/messages model=%s upstream=%s agent=%s stream=%s req_preview=%.200s", + model, upstream, agent_name or "—", original_stream, raw_body[:200].decode(errors="replace").replace("\n", " "), ) try: headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS) if "anthropic-version" not in {k.lower() for k in headers}: headers["anthropic-version"] = "2023-06-01" - body = await process_prompt(body, "/v1/messages", pool, cfg, headers) + body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name) messages = body.get("messages", []) sess = session_key(model, messages) text, raw = await call_anthropic(body, upstream, headers) @@ -1048,17 +1139,24 @@ async def anthropic_messages(request: Request) -> Response: raise +@app.post("/v1/messages") +async def anthropic_messages(request: Request) -> Response: + return await _handle_anthropic_messages(request) + + # --------------------------------------------------------------------------- # OpenAI-compatible chat completions (POST /v1/chat/completions) # --------------------------------------------------------------------------- -@app.post("/v1/chat/completions") -async def openai_chat_completions(request: Request) -> Response: +async def _handle_openai_chat(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool raw_body = await request.json() hdrs = dict(request.headers) - agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name + if agent_name: + _, body = _extract_agent_name(raw_body, hdrs) + else: + agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name model = body.get("model", "unknown") upstream = cfg["upstream_openai"] min_len = cfg["detection"]["min_length"] @@ -1077,12 +1175,12 @@ async def openai_chat_completions(request: Request) -> Response: agent_model.base_url or "(default)", ) return await _route_agent_chat( - body, agent_model, original_stream, pool, cfg, hdrs, min_len + body, agent_model, original_stream, pool, cfg, hdrs, min_len, agent_name=agent_name, ) # Standard path — forward to configured upstream unchanged headers = _relay_headers(request, OPENAI_RELAY_HEADERS) - body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs) + body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name) messages = body.get("messages", []) sess = session_key(model, messages) text, raw = await call_openai(body, upstream, headers) @@ -1110,12 +1208,54 @@ async def openai_chat_completions(request: Request) -> Response: raise +@app.post("/v1/chat/completions") +async def openai_chat_completions(request: Request) -> Response: + return await _handle_openai_chat(request) + + # Alias: some LiteLLM provider types (custom_openai, openai_like) omit the # /v1 prefix and post directly to /chat/completions. @app.post("/chat/completions") async def openai_chat_completions_no_prefix(request: Request) -> Response: """Alias for /v1/chat/completions — handles LiteLLM providers that omit /v1.""" - return await openai_chat_completions(request) + return await _handle_openai_chat(request) + + +# --------------------------------------------------------------------------- +# Agent-prefixed routes /{agent_id}/... +# +# Configure each agent instance's base_url to include its name: +# http://festinger:11434/gerhard/v1 ← hermes (OpenAI-compat) +# http://festinger:11434/dobby/v1/messages ← Agent0 (Anthropic) +# http://festinger:11434/gunnar ← Ollama clients +# +# The agent_id is extracted from the URL path and takes priority over any +# agent_name/agent_id passed in the request body or headers. +# --------------------------------------------------------------------------- + +@app.post("/{agent_id}/v1/chat/completions") +async def openai_chat_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_openai_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/chat/completions") +async def openai_chat_with_agent_id_no_v1(agent_id: str, request: Request) -> Response: + return await _handle_openai_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/v1/messages") +async def anthropic_messages_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_anthropic_messages(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/api/chat") +async def ollama_chat_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_ollama_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/api/generate") +async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_ollama_generate(request, agent_name=agent_id.lower()) # --------------------------------------------------------------------------- @@ -1679,6 +1819,262 @@ async def test_reset(scenario_id: str, request: Request) -> dict: return await reset_scenario(pool, scenario_id) +# --------------------------------------------------------------------------- +# /recollection-log — browse prompts where recollection was injected +# --------------------------------------------------------------------------- + +RECOLLECTION_LOG_HTML = """ + + + +Festinger — Recollection Log + + + +← admin +

Recollection Log

+

Every prompt where Festinger injected a <recollection> block.

+ +
+ + + +
+ +
+ +
+ +
+ + + + + +""" + + +@app.get("/recollection-log", response_class=HTMLResponse) +async def recollection_log_ui() -> HTMLResponse: + return HTMLResponse(RECOLLECTION_LOG_HTML) + + +@app.get("/recollection-log/data") +async def recollection_log_data( + request: Request, + limit: int = 20, + offset: int = 0, + agent: str = "", +) -> dict: + pool = request.app.state.pool + async with pool.acquire() as conn: + if agent: + rows = await conn.fetch( + """ + SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json + FROM recollection_log + WHERE agent_name ILIKE $1 + ORDER BY created_at DESC + LIMIT $2 OFFSET $3 + """, + f"%{agent}%", limit, offset, + ) + total = await conn.fetchval( + "SELECT COUNT(*) FROM recollection_log WHERE agent_name ILIKE $1", + f"%{agent}%", + ) + else: + rows = await conn.fetch( + """ + SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json + FROM recollection_log + ORDER BY created_at DESC + LIMIT $1 OFFSET $2 + """, + limit, offset, + ) + total = await conn.fetchval("SELECT COUNT(*) FROM recollection_log") + + entries = [] + for r in rows: + entries.append({ + "id": r["id"], + "created_at": r["created_at"].strftime("%Y-%m-%d %H:%M:%S UTC"), + "agent_name": r["agent_name"], + "salient_tokens": list(r["salient_tokens"]), + "recollection_block": r["recollection_block"], + "messages": r["messages_json"], + }) + + return {"total": total, "offset": offset, "limit": limit, "entries": entries} + + +@app.delete("/recollection-log") +async def clear_recollection_log(request: Request) -> dict: + pool = request.app.state.pool + async with pool.acquire() as conn: + result = await conn.execute("DELETE FROM recollection_log") + deleted = int(result.split()[-1]) if result else 0 + log.info("recollection_log cleared deleted=%d", deleted) + return {"status": "ok", "deleted": deleted} + + # --------------------------------------------------------------------------- # /graph — knowledge graph explorer # --------------------------------------------------------------------------- @@ -2510,6 +2906,7 @@ ADMIN_HTML = """

Festinger

Ollama-compatible inference middleware — loop detection & Recollections world model  — Knowledge Graph Explorer +  — Recollection Log  — Model Manager

diff --git a/plugins/festinger/festinger/recollection.py b/plugins/festinger/festinger/recollection.py index 3e4f366..009a12e 100644 --- a/plugins/festinger/festinger/recollection.py +++ b/plugins/festinger/festinger/recollection.py @@ -2,13 +2,28 @@ Recollection engine — read path. For each salient concept found in an intercepted prompt: - - Query URD edges (from in-memory cache, filtered by confidence + recency) + - Query URD edges (from in-memory cache, filtered by confidence) + - Rank by effective score (recency-decayed saliency + centrality bonus) - Render hit or zero-hit block - - Inject the block into the prompt before forwarding to Ollama + - Inject the block into the prompt + +Scoring +------- +effective_score(c) = saliency(c) * recency_decay(c) + centrality_bonus(c) + + recency_decay = exp(-days_since_last_seen / RECENCY_HALF_LIFE) + → 1.0 if never seen (no penalty for brand-new concepts) + → 0.5 at 30 days, 0.25 at 60 days, ~0.04 at 90 days + + centrality_bonus = log1p(n_children) + → 0 for leaf concepts + → grows slowly with the number of concepts that use this + one as a parent (hub nodes surface first) """ from __future__ import annotations import logging +import math from datetime import datetime, timezone, timedelta from typing import Optional @@ -19,34 +34,62 @@ log = logging.getLogger("festinger.recollection") ZERO_HIT_TEMPLATE = "You have no memory of {concept}. Try: memory_load query='{concept}' or gutask context {concept}" +# Half-life for recency decay: saliency halves after this many days of silence. +RECENCY_HALF_LIFE = 30.0 + +# Flat bonus added to effective_score for concepts found in the system message. +# Large enough to push grounding concepts above freshly-encountered domain words. +SESSION_BOOST = 2.0 + # --------------------------------------------------------------------------- -# Query (reads from in-memory cache) +# Scoring # --------------------------------------------------------------------------- -def query_edges( - concept_id: int, - confidence_floor: float, - recency_days: int, -) -> list[UrdEdge]: +def recency_decay(row: SoasRow) -> float: """ - Return URD edges for *concept_id* that pass confidence + recency filters. - All reads are pure in-memory — zero network. - Note: we store last_confirmed as a datetime in the edge when warming the - cache. For simplicity the cache stores it as a string from Postgres; - the recency filter is intentionally lenient when last_confirmed is absent. + Exponential decay based on days since last_seen. + Returns 1.0 if last_seen is unknown (no penalty for fresh concepts). """ - edges = cache.urd_by_concept.get(concept_id, []) - if not edges: - return [] + if row.last_seen is None: + return 1.0 + now = datetime.now(tz=timezone.utc) + last = row.last_seen + if last.tzinfo is None: + last = last.replace(tzinfo=timezone.utc) + days = (now - last).total_seconds() / 86400.0 + return math.exp(-days / RECENCY_HALF_LIFE) - cutoff = datetime.now(tz=timezone.utc) - timedelta(days=recency_days) - result = [] - for e in edges: - if e.confidence < confidence_floor: - continue - result.append(e) - return result + +def centrality_bonus(concept_id: int) -> float: + """ + log1p of the number of concepts that reference this one as a parent. + Hub concepts (many children) rise to the top of the recollection block. + """ + n_children = len(cache.urd_by_parent.get(concept_id, [])) + return math.log1p(n_children) + + +def effective_score(concept_id: int, session_boosted: bool = False) -> float: + """Combined score used to rank concepts within the recollection block.""" + token = cache.soas_by_id.get(concept_id, "") + row = cache.soas_by_token.get(token) + if row is None: + return 0.0 + base = row.saliency * recency_decay(row) + centrality_bonus(concept_id) + return base + (SESSION_BOOST if session_boosted else 0.0) + + +# --------------------------------------------------------------------------- +# Query +# --------------------------------------------------------------------------- + +def query_edges(concept_id: int, confidence_floor: float) -> list[UrdEdge]: + """Return URD edges for *concept_id* above the confidence floor.""" + return [ + e for e in cache.urd_by_concept.get(concept_id, []) + if e.confidence >= confidence_floor + ] # --------------------------------------------------------------------------- @@ -57,27 +100,28 @@ def render_hit( concept_token: str, edges: list[UrdEdge], concept_id: int, + score: float, reverse_edges: list[UrdEdge] | None = None, ) -> str: """ - Render one concept's recollection line: - gnommoweb: [type] repo [membership] glitch_university - omega13: [membership←] gnommoweb (reverse: gnommoweb is a member of omega13) - Pending-conflict dimensions get a '?' suffix on the dim token. + Render one concept's recollection line, e.g.: + gnommoweb [s=3.2]: [type] repo [membership] glitch_university [type←] omega13 + The score annotation helps with debugging. + Pending-conflict dimensions get a '?' suffix. """ has_pending = concept_id in cache.pending_conflicts parts = [] for e in edges: dim_label = e.dim_token + ("?" if has_pending else "") parts.append(f"[{dim_label}] {e.parent_token}") - # Bidirectional: surface concepts where this concept appears as a parent for e in (reverse_edges or [])[:3]: child_token = cache.soas_by_id.get(e.concept_id, str(e.concept_id)) parts.append(f"[{e.dim_token}←] {child_token}") - return f"{concept_token}: {' '.join(parts)}" + score_str = f"{score:.1f}" + return f"{concept_token} [s={score_str}]: {' '.join(parts)}" -def render_zero_hit(concept_token: str) -> str: +def render_zero_hit(concept_token: str, score: float) -> str: line = ZERO_HIT_TEMPLATE.format(concept=concept_token) soas_row = cache.soas_by_token.get(concept_token) if soas_row and soas_row.first_seen_context: @@ -88,37 +132,45 @@ def render_zero_hit(concept_token: str) -> str: def build_recollection_block( salient_concept_ids: list[int], confidence_floor: float, - recency_days: int, + recency_days: int, # kept for API compat, no longer used for edge filtering max_lines: int = 12, + session_boost_ids: set[int] | None = None, ) -> Optional[str]: """ - Build the full block for a list of above-threshold concept IDs. - Returns None if there is nothing to say. + Build the block, ranked by effective_score (recency-decayed + saliency + centrality bonus + optional session boost). - Concepts with known edges are shown first (most informative); zero-hit - "don't know" lines fill the remaining budget up to max_lines. + Concepts with known edges are shown first; zero-hit lines fill the remainder. + session_boost_ids: concept IDs found in the system message — ranked higher + to surface grounding context (agent persona, project name) first. """ - hit_lines: list[str] = [] - zero_lines: list[str] = [] + boosted = session_boost_ids or set() - # Deduplicate — same concept may appear twice if novel + known + # Deduplicate and score seen: set[int] = set() + scored: list[tuple[float, int]] = [] for cid in salient_concept_ids: if cid in seen: continue seen.add(cid) + scored.append((effective_score(cid, session_boosted=(cid in boosted)), cid)) + # Highest score first + scored.sort(key=lambda x: x[0], reverse=True) + + hit_lines: list[str] = [] + zero_lines: list[str] = [] + + for score, cid in scored: token = cache.soas_by_id.get(cid, str(cid)) - edges = query_edges(cid, confidence_floor, recency_days) - # Bidirectional: find edges where this concept appears as a parent + edges = query_edges(cid, confidence_floor) reverse_edges = cache.urd_by_parent.get(cid, []) if edges or reverse_edges: - hit_lines.append(render_hit(token, edges, cid, reverse_edges)) + hit_lines.append(render_hit(token, edges, cid, score, reverse_edges)) else: - zero_lines.append(render_zero_hit(token)) + zero_lines.append(render_zero_hit(token, score)) - # Hits always included (up to max_lines); zero-hits fill the remainder lines = hit_lines[:max_lines] remaining = max_lines - len(lines) lines += zero_lines[:remaining] @@ -126,8 +178,7 @@ def build_recollection_block( if not lines: return None - body = "\n".join(lines) - return f"\n{body}\n" + return f"\n" + "\n".join(lines) + "\n" # --------------------------------------------------------------------------- diff --git a/plugins/festinger/festinger/urd_writer.py b/plugins/festinger/festinger/urd_writer.py index b8b0529..2c511d7 100644 --- a/plugins/festinger/festinger/urd_writer.py +++ b/plugins/festinger/festinger/urd_writer.py @@ -133,6 +133,21 @@ async def insert_urd_edge( cache.urd_by_concept.setdefault(req.concept_id, []).append(edge) cache.urd_by_concept_dim[key] = edge cache.urd_by_parent.setdefault(req.parent_id, []).append(edge) + + # Mark the concept as a domain token: saliency=1 means "first use". + # saliency=0 is the sentinel for common-English words (dictionary seed), + # which the encounter loop silently skips. Any concept that has a URD + # edge is definitionally domain-specific and must start above zero. + concept_token_str = cache.soas_by_id.get(req.concept_id, "") + row = cache.soas_by_token.get(concept_token_str) + if row and row.saliency == 0.0: + async with pool.acquire() as conn: + await conn.execute( + "UPDATE soas SET saliency = 1.0, novelty = 1.0 WHERE id = $1 AND saliency = 0", + req.concept_id, + ) + row.saliency = 1.0 + row.novelty = 1.0 log.info( "urd insert concept=%d parent=%d dim=%d is_isa=%s source=%s", req.concept_id, req.parent_id, req.dim_id, req.is_isa, req.source,