Adding changes to festinger

This commit is contained in:
2026-04-25 11:31:58 +02:00
parent abc5cf5952
commit df28e56add
8 changed files with 707 additions and 118 deletions
+47
View File
@@ -0,0 +1,47 @@
# Hermes config for gerhard — evaluation instance
#
# LLM calls go to festinger, which sits in front of the real inference
# providers. The agent identity is encoded in the base URL so festinger
# can route to the right model and build the per-agent knowledge graph.
#
# festinger routing: http://festinger:11434/gerhard/v1/chat/completions
# → festinger looks up agent_name="gerhard" in agent_models table
# → if found: routes to the registered provider/model
# → if not found: falls through to upstream_openai in config.yaml
model:
provider: "custom"
base_url: "http://festinger:11434/gerhard/v1"
# No api_key needed — festinger does not authenticate OpenAI-compat requests.
# Set a placeholder so the OpenAI SDK doesn't complain.
api_key: "festinger"
# Model name is what gets sent in the request body. festinger can override
# this per-agent via the agent_models table. Set something reasonable as
# a default (festinger's upstream_openai fallback will use whatever it supports).
default: "claude-opus-4-6"
terminal:
backend: "local"
cwd: "."
timeout: 180
lifetime_seconds: 300
agent:
max_turns: 60
verbose: false
reasoning_effort: "medium"
memory:
memory_enabled: true
user_profile_enabled: true
compression:
enabled: true
threshold: 0.50
target_ratio: 0.20
protect_last_n: 20
session_reset:
mode: both
idle_minutes: 1440
at_hour: 4
+25 -8
View File
@@ -76,19 +76,36 @@ services:
- "host.docker.internal:host-gateway"
gerhard:
image: agent0ai/agent-zero:latest
build:
context: ../../hermes-agent
image: hermes-agent
container_name: gerhard
ports:
- "50007:80"
volumes:
- ./agents/gerhard:/a0/usr
- ${HOME}/.ssh:/root/.ssh
- ./agents/gerhard-hermes:/opt/data
restart: unless-stopped
environment:
AUTH_LOGIN: ${AUTH_LOGIN}
AUTH_PASSWORD: ${AUTH_PASSWORD}
HERMES_UID: ${HERMES_UID:-1000}
HERMES_GID: ${HERMES_GID:-1000}
extra_hosts:
- "host.docker.internal:host-gateway"
command: ["gateway", "run"]
gerhard-dashboard:
image: hermes-agent
container_name: gerhard-dashboard
ports:
- "50007:9119" # web dashboard at localhost:50007
volumes:
- ./agents/gerhard-hermes:/opt/data
restart: unless-stopped
depends_on:
- gerhard
environment:
HERMES_UID: ${HERMES_UID:-1000}
HERMES_GID: ${HERMES_GID:-1000}
extra_hosts:
- "host.docker.internal:host-gateway"
command: ["dashboard", "--host", "0.0.0.0", "--no-open"]
postgres:
image: postgres:16-alpine
@@ -135,7 +152,7 @@ services:
- gunnar
- rind
- abyssinthia
- gerhard
- gerhard-dashboard
volumes:
festinger-pgdata:
+14
View File
@@ -122,3 +122,17 @@ CREATE TABLE IF NOT EXISTS agent_models (
model_id INT NOT NULL REFERENCES models(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ---------------------------------------------------------------------------
-- recollection_log — every prompt where a recollection block was injected
-- ---------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS recollection_log (
id BIGSERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
agent_name TEXT NOT NULL DEFAULT '',
salient_tokens TEXT[] NOT NULL DEFAULT '{}',
recollection_block TEXT NOT NULL,
messages_json JSONB NOT NULL DEFAULT '[]'
);
CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC);
+2
View File
@@ -7,6 +7,7 @@ on the hot path. Writes are write-through: in-memory first, then Postgres.
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Optional
@@ -18,6 +19,7 @@ class SoasRow:
saliency: float = 0.0
novelty: float = 0.0
first_seen_context: str = ""
last_seen: Optional[datetime] = None
@dataclass
+67 -21
View File
@@ -4,9 +4,11 @@ Database layer — asyncpg pool, schema init, cache warm-up, flush.
from __future__ import annotations
import asyncio
import json
import logging
import math
import os
from datetime import datetime, timezone
from pathlib import Path
import asyncpg
@@ -70,6 +72,22 @@ async def init_schema(pool: asyncpg.Pool) -> None:
)
"""
)
# Migration: recollection event log
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS recollection_log (
id BIGSERIAL PRIMARY KEY,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
agent_name TEXT NOT NULL DEFAULT '',
salient_tokens TEXT[] NOT NULL DEFAULT '{}',
recollection_block TEXT NOT NULL,
messages_json JSONB NOT NULL DEFAULT '[]'
)
"""
)
await conn.execute(
"CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)"
)
log.info("schema applied")
@@ -135,7 +153,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
"""Load all SOAS and URD rows into the in-memory cache."""
async with pool.acquire() as conn:
soas_rows = await conn.fetch(
"SELECT id, token, encounter_count, saliency, novelty, first_seen_context FROM soas"
"SELECT id, token, encounter_count, saliency, novelty, first_seen_context, last_seen FROM soas"
)
for r in soas_rows:
row = SoasRow(
@@ -145,6 +163,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None:
saliency=r["saliency"],
novelty=r["novelty"],
first_seen_context=r["first_seen_context"] or "",
last_seen=r["last_seen"],
)
cache.soas_by_token[r["token"]] = row
cache.soas_by_id[r["id"]] = r["token"]
@@ -378,7 +397,18 @@ def recalculate_saliency(encounter_count: int, is_common_english: bool) -> float
# ---------------------------------------------------------------------------
async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
"""Flush staged encounter_count deltas to Postgres in one batch UPDATE."""
"""
Flush staged encounter_count deltas to Postgres.
Saliency model:
saliency = 0 → common English word (dictionary seed). Never touched.
saliency ≥ 1 → domain concept. Incremented by +delta on each flush.
saliency=1 is set by urd_writer when a concept first gets a URD edge,
meaning "mentioned at least once in a meaningful context". Each flush
adds the number of new encounters since the last flush, so saliency
equals the total number of times the concept has been seen by the agent.
"""
deltas = cache.drain_deltas()
if not deltas:
return
@@ -388,23 +418,10 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
for soas_id, delta in deltas.items():
token = cache.soas_by_id.get(soas_id, "")
row = cache.soas_by_token.get(token)
new_count = (row.encounter_count if row else 0)
# novelty = 0 for common English words (pre-seeded)
is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False
# Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not
# be promoted above the read threshold by encounter-count alone.
# Their saliency is raised explicitly when the LLM confirms them.
is_unconfirmed_novel = (
row is not None
and row.novelty > 0.0
and not cache.urd_by_concept.get(soas_id)
)
if is_unconfirmed_novel:
new_saliency = row.saliency # preserve low saliency until LLM confirms
else:
new_saliency = recalculate_saliency(new_count, is_common)
if not row or row.saliency == 0.0:
# saliency=0 is the English-word sentinel — never increment.
continue
new_saliency = row.saliency + delta
await conn.execute(
"""
UPDATE soas
@@ -415,8 +432,8 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
""",
delta, new_saliency, soas_id,
)
if row:
row.saliency = new_saliency
row.saliency = new_saliency
row.last_seen = datetime.now(timezone.utc)
log.debug("flushed %d saliency deltas", len(deltas))
@@ -429,3 +446,32 @@ async def get_config(pool: asyncpg.Pool, key: str, default: str = "") -> str:
async with pool.acquire() as conn:
row = await conn.fetchrow("SELECT value FROM config WHERE key = $1", key)
return row["value"] if row else default
# ---------------------------------------------------------------------------
# Recollection log
# ---------------------------------------------------------------------------
async def log_recollection(
pool: asyncpg.Pool,
agent_name: str,
salient_tokens: list[str],
recollection_block: str,
messages: list[dict],
) -> None:
"""Persist one recollection event. Fire-and-forget — never raises."""
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO recollection_log
(agent_name, salient_tokens, recollection_block, messages_json)
VALUES ($1, $2, $3, $4)
""",
agent_name or "",
salient_tokens,
recollection_block,
json.dumps(messages),
)
except Exception:
log.exception("recollection_log insert failed")
+441 -44
View File
@@ -2,18 +2,23 @@
Festinger — main FastAPI application.
Routes:
POST /api/chat Ollama-compatible chat (loop detection + recollection injection)
POST /api/generate Ollama-compatible generate
POST /v1/messages Anthropic Messages API proxy (loop detection + recollection)
POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection)
POST /iknowthat Manual write path (gutask iknowthat)
POST /resolve/run Manually trigger nightly resolution job
POST /reload Reload URD cache (called by resolution job)
GET /health Health + stats
GET /conflicts Pending and recently resolved conflicts
GET /admin Minimal admin UI
* /v1/{path} Passthrough to upstream Anthropic
* /{path} Passthrough to upstream Ollama
POST /api/chat Ollama-compatible chat (loop detection + recollection injection)
POST /api/generate Ollama-compatible generate
POST /v1/messages Anthropic Messages API proxy (loop detection + recollection)
POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection)
POST /chat/completions Alias for /v1/chat/completions (LiteLLM without /v1 prefix)
POST /{agent_id}/v1/chat/completions Agent-prefixed OpenAI proxy — agent identity from URL
POST /{agent_id}/chat/completions Agent-prefixed OpenAI proxy (no /v1 prefix variant)
POST /{agent_id}/v1/messages Agent-prefixed Anthropic proxy — agent identity from URL
POST /{agent_id}/api/chat Agent-prefixed Ollama chat — agent identity from URL
POST /{agent_id}/api/generate Agent-prefixed Ollama generate — agent identity from URL
POST /iknowthat Manual write path (gutask iknowthat)
POST /resolve/run Manually trigger nightly resolution job
POST /reload Reload URD cache (called by resolution job)
GET /health Health + stats
GET /conflicts Pending and recently resolved conflicts
GET /admin Minimal admin UI
* /{path} Passthrough to upstream Ollama or Anthropic
"""
from __future__ import annotations
@@ -38,7 +43,7 @@ from .db import (
close_pool, get_config, get_or_create_soas,
get_pool, init_schema, bootstrap_dimensions,
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
flush_encounter_deltas, reset_graph,
flush_encounter_deltas, reset_graph, log_recollection,
)
from .loop_detector import apply_mitigations, record_and_check, session_key
from .cue_scanner import scan_cues
@@ -499,6 +504,30 @@ def _last_assistant_message_text(body: dict, path: str) -> str:
return ""
def _system_message_text(body: dict, path: str) -> str:
"""
Extract the system message text from the request body.
- OpenAI/Ollama: first message with role='system' in the messages list.
- Anthropic: top-level 'system' field (string or content-block list).
"""
if path == "/v1/messages":
sys_field = body.get("system") or ""
if isinstance(sys_field, list):
return " ".join(
b.get("text", "") for b in sys_field
if isinstance(b, dict) and b.get("type") == "text"
)
return sys_field
if path in ("/api/chat", "/v1/chat/completions", "/api/generate"):
messages = body.get("messages", [])
sys_msg = next((m for m in messages if m.get("role") == "system"), None)
if sys_msg:
return " ".join(_extract_text_strings(sys_msg.get("content", "")))
return ""
def inject_recollection_anthropic(body: dict, block: str) -> dict:
"""
Inject a recollection block into an Anthropic Messages API request.
@@ -723,6 +752,7 @@ async def _route_agent_chat(
cfg: dict,
request_headers: dict,
min_len: int,
agent_name: str = "",
) -> Response:
"""
Route an OpenAI-compatible chat completions request to the agent's
@@ -740,7 +770,7 @@ async def _route_agent_chat(
# (Configure write_model_id or a per-agent model for a cloud/separate
# model if you want memory building alongside local inference.)
body = await process_prompt(
body, "/v1/chat/completions", pool, cfg, request_headers,
body, "/v1/chat/completions", pool, cfg, request_headers, agent_name=agent_name,
)
sess = session_key(agent_model.model_name, body.get("messages", []))
@@ -845,17 +875,21 @@ async def process_prompt(
pool,
cfg: dict,
request_headers: dict | None = None,
agent_name: str = "",
) -> dict:
"""
Run the saliency + recollection pipeline over the prompt.
Returns a (possibly modified) body dict with the recollection block injected.
agent_name may be passed in directly (e.g. extracted from the URL path) to
avoid re-parsing the body/headers; falls back to _extract_agent_name if empty.
"""
read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
hdrs = request_headers or {}
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
if not agent_name:
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
# Last user message — primary source for recollection reads.
user_text = _last_user_message_text(body, path)
@@ -874,7 +908,21 @@ async def process_prompt(
for cue in scan_cues(assistant_text):
await enqueue_cue(cue)
# 3. Token loop over user message for saliency-triggered recollection.
# 3. Session boost — scan the system message for domain tokens.
# Concepts grounding the agent's persona or project context rank higher
# in the recollection block for the entire session.
session_boost_ids: set[int] = set()
sys_text = _system_message_text(body, path)
if sys_text:
for tok in tokenize(sys_text):
row = cache.soas_by_token.get(tok)
if row and row.saliency > 0.0:
session_boost_ids.add(row.id)
if session_boost_ids:
boost_tokens = [cache.soas_by_id.get(i, str(i)) for i in session_boost_ids]
log.debug("session_boost | agent=%s tokens=%s", agent_name or "(none)", boost_tokens)
# 4. Token loop over user message for saliency-triggered recollection.
tokens = tokenize(user_text)
salient_for_read: list[int] = []
@@ -882,8 +930,8 @@ async def process_prompt(
soas_row = cache.soas_by_token.get(token)
if soas_row is None:
continue
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
continue # common English — skip
if soas_row.saliency == 0.0:
continue # saliency=0 is the English-word sentinel — skip
cache.record_encounter(soas_row.id)
if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id)
@@ -891,8 +939,24 @@ async def process_prompt(
if not salient_for_read:
return body
# 5. Build recollection block
block = build_recollection_block(salient_for_read, conf_floor, recency_days)
# 5. Conflict spike — warn loudly if any salient concept has a pending contradiction.
conflicted = [
cache.soas_by_id.get(cid, str(cid))
for cid in salient_for_read
if cid in cache.pending_conflicts
]
if conflicted:
log.warning(
"recollection_conflict_spike | agent=%s conflicted=%s "
"(recollection block will show '?' suffix on affected dimensions)",
agent_name or "(none)", conflicted,
)
# 6. Build recollection block
block = build_recollection_block(
salient_for_read, conf_floor, recency_days,
session_boost_ids=session_boost_ids,
)
if not block:
return body
@@ -902,16 +966,24 @@ async def process_prompt(
agent_name or "(none)", salient_tokens, block,
)
# 6. Inject into messages
# 7. Inject into messages
if path == "/api/chat" or path == "/v1/chat/completions":
body = dict(body)
body["messages"] = inject_recollection(body.get("messages", []), block)
log_messages = body["messages"]
elif path == "/v1/messages":
body = inject_recollection_anthropic(body, block)
# /api/generate uses a flat prompt string — prepend there
# Reconstruct a messages-style list for the log viewer
sys_content = body.get("system", "")
log_messages = ([{"role": "system", "content": sys_content}] if sys_content else []) + body.get("messages", [])
elif path == "/api/generate":
body = dict(body)
body["prompt"] = block + "\n\n" + body.get("prompt", "")
log_messages = [{"role": "user", "content": body["prompt"]}]
else:
log_messages = []
asyncio.create_task(log_recollection(pool, agent_name or "", salient_tokens, block, log_messages))
return body
@@ -920,17 +992,20 @@ async def process_prompt(
# Routes
# ---------------------------------------------------------------------------
@app.post("/api/chat")
async def chat(request: Request) -> Response:
async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Response:
cfg = request.app.state.yaml_config
pool = request.app.state.pool
_, body = _extract_agent_name(await request.json(), dict(request.headers))
raw_body = await request.json()
if agent_name:
_, body = _extract_agent_name(raw_body, dict(request.headers))
else:
agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
model = body.get("model", "unknown")
upstream = cfg["upstream_ollama"]
min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/chat model=%s", model)
log.info("chat route=/api/chat model=%s agent=%s", model, agent_name or "")
try:
body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name)
text, raw = await call_ollama("/api/chat", body, upstream)
sess = session_key(model, body.get("messages", []))
count = record_and_check(sess, text, min_len)
@@ -953,17 +1028,20 @@ async def chat(request: Request) -> Response:
raise
@app.post("/api/generate")
async def generate(request: Request) -> Response:
async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Response:
cfg = request.app.state.yaml_config
pool = request.app.state.pool
_, body = _extract_agent_name(await request.json(), dict(request.headers))
raw_body = await request.json()
if agent_name:
_, body = _extract_agent_name(raw_body, dict(request.headers))
else:
agent_name, body = _extract_agent_name(raw_body, dict(request.headers))
model = body.get("model", "unknown")
upstream = cfg["upstream_ollama"]
min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/generate model=%s", model)
log.info("chat route=/api/generate model=%s agent=%s", model, agent_name or "")
try:
body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name)
messages = [{"role": "user", "content": body.get("prompt", "")}]
sess = session_key(model, messages)
text, raw = await call_ollama("/api/generate", body, upstream)
@@ -987,31 +1065,44 @@ async def generate(request: Request) -> Response:
raise
@app.post("/api/chat")
async def chat(request: Request) -> Response:
return await _handle_ollama_chat(request)
@app.post("/api/generate")
async def generate(request: Request) -> Response:
return await _handle_ollama_generate(request)
# ---------------------------------------------------------------------------
# Anthropic Messages API (POST /v1/messages)
# ---------------------------------------------------------------------------
@app.post("/v1/messages")
async def anthropic_messages(request: Request) -> Response:
async def _handle_anthropic_messages(request: Request, agent_name: str = "") -> Response:
cfg = request.app.state.yaml_config
pool = request.app.state.pool
raw_body = await request.body()
_, body = _extract_agent_name(json.loads(raw_body), dict(request.headers))
parsed = json.loads(raw_body)
if agent_name:
_, body = _extract_agent_name(parsed, dict(request.headers))
else:
agent_name, body = _extract_agent_name(parsed, dict(request.headers))
# Capture streaming intent BEFORE call_anthropic forces stream=False
original_stream: bool = bool(body.get("stream", False))
model = body.get("model", "unknown")
upstream = cfg["upstream_anthropic"]
min_len = cfg["detection"]["min_length"]
log.info(
"chat route=/v1/messages model=%s upstream=%s stream=%s req_preview=%.200s",
model, upstream, original_stream,
"chat route=/v1/messages model=%s upstream=%s agent=%s stream=%s req_preview=%.200s",
model, upstream, agent_name or "", original_stream,
raw_body[:200].decode(errors="replace").replace("\n", " "),
)
try:
headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
if "anthropic-version" not in {k.lower() for k in headers}:
headers["anthropic-version"] = "2023-06-01"
body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name)
messages = body.get("messages", [])
sess = session_key(model, messages)
text, raw = await call_anthropic(body, upstream, headers)
@@ -1048,17 +1139,24 @@ async def anthropic_messages(request: Request) -> Response:
raise
@app.post("/v1/messages")
async def anthropic_messages(request: Request) -> Response:
return await _handle_anthropic_messages(request)
# ---------------------------------------------------------------------------
# OpenAI-compatible chat completions (POST /v1/chat/completions)
# ---------------------------------------------------------------------------
@app.post("/v1/chat/completions")
async def openai_chat_completions(request: Request) -> Response:
async def _handle_openai_chat(request: Request, agent_name: str = "") -> Response:
cfg = request.app.state.yaml_config
pool = request.app.state.pool
raw_body = await request.json()
hdrs = dict(request.headers)
agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name
if agent_name:
_, body = _extract_agent_name(raw_body, hdrs)
else:
agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name
model = body.get("model", "unknown")
upstream = cfg["upstream_openai"]
min_len = cfg["detection"]["min_length"]
@@ -1077,12 +1175,12 @@ async def openai_chat_completions(request: Request) -> Response:
agent_model.base_url or "(default)",
)
return await _route_agent_chat(
body, agent_model, original_stream, pool, cfg, hdrs, min_len
body, agent_model, original_stream, pool, cfg, hdrs, min_len, agent_name=agent_name,
)
# Standard path — forward to configured upstream unchanged
headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name)
messages = body.get("messages", [])
sess = session_key(model, messages)
text, raw = await call_openai(body, upstream, headers)
@@ -1110,12 +1208,54 @@ async def openai_chat_completions(request: Request) -> Response:
raise
@app.post("/v1/chat/completions")
async def openai_chat_completions(request: Request) -> Response:
return await _handle_openai_chat(request)
# Alias: some LiteLLM provider types (custom_openai, openai_like) omit the
# /v1 prefix and post directly to /chat/completions.
@app.post("/chat/completions")
async def openai_chat_completions_no_prefix(request: Request) -> Response:
"""Alias for /v1/chat/completions — handles LiteLLM providers that omit /v1."""
return await openai_chat_completions(request)
return await _handle_openai_chat(request)
# ---------------------------------------------------------------------------
# Agent-prefixed routes /{agent_id}/...
#
# Configure each agent instance's base_url to include its name:
# http://festinger:11434/gerhard/v1 ← hermes (OpenAI-compat)
# http://festinger:11434/dobby/v1/messages ← Agent0 (Anthropic)
# http://festinger:11434/gunnar ← Ollama clients
#
# The agent_id is extracted from the URL path and takes priority over any
# agent_name/agent_id passed in the request body or headers.
# ---------------------------------------------------------------------------
@app.post("/{agent_id}/v1/chat/completions")
async def openai_chat_with_agent_id(agent_id: str, request: Request) -> Response:
return await _handle_openai_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/chat/completions")
async def openai_chat_with_agent_id_no_v1(agent_id: str, request: Request) -> Response:
return await _handle_openai_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/v1/messages")
async def anthropic_messages_with_agent_id(agent_id: str, request: Request) -> Response:
return await _handle_anthropic_messages(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/api/chat")
async def ollama_chat_with_agent_id(agent_id: str, request: Request) -> Response:
return await _handle_ollama_chat(request, agent_name=agent_id.lower())
@app.post("/{agent_id}/api/generate")
async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Response:
return await _handle_ollama_generate(request, agent_name=agent_id.lower())
# ---------------------------------------------------------------------------
@@ -1679,6 +1819,262 @@ async def test_reset(scenario_id: str, request: Request) -> dict:
return await reset_scenario(pool, scenario_id)
# ---------------------------------------------------------------------------
# /recollection-log — browse prompts where recollection was injected
# ---------------------------------------------------------------------------
RECOLLECTION_LOG_HTML = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Festinger — Recollection Log</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: monospace; background: #0d0d1a; color: #ccc; padding: 20px; }
h1 { color: #fff; font-size: 1.2em; margin-bottom: 6px; }
.subtitle { font-size: 0.75em; color: #555; margin-bottom: 20px; }
.toolbar { display: flex; gap: 10px; align-items: center; margin-bottom: 18px; }
.btn { padding: 6px 14px; border: none; border-radius: 3px; cursor: pointer;
font-family: monospace; font-size: 0.82em; }
.btn-danger { background: #6b1a1a; color: #ffaaaa; }
.btn-danger:hover { background: #8b2a2a; }
.btn-secondary { background: #1e1e40; color: #aaa; }
.btn-secondary:hover { background: #2a2a55; }
.filter-row { display: flex; gap: 8px; align-items: center; margin-bottom: 16px; }
.filter-row input { background: #1a1a2e; border: 1px solid #2a2a4e; color: #ddd;
padding: 5px 10px; border-radius: 3px; font-family: monospace; font-size: 0.82em; width: 220px; }
.count { font-size: 0.75em; color: #555; }
.entry { background: #12122a; border: 1px solid #1e1e40; border-radius: 5px;
margin-bottom: 14px; overflow: hidden; }
.entry-header { padding: 10px 14px; display: flex; gap: 16px; align-items: baseline;
border-bottom: 1px solid #1e1e40; cursor: pointer; }
.entry-header:hover { background: #1a1a3a; }
.ts { color: #555; font-size: 0.78em; white-space: nowrap; }
.agent { color: #7070ff; font-size: 0.82em; font-weight: bold; }
.tokens { font-size: 0.78em; color: #aaa; flex: 1; }
.tok-tag { display: inline-block; background: #1e1e50; color: #9090ff;
border-radius: 3px; padding: 1px 6px; margin: 1px 3px 1px 0; }
.toggle-hint { font-size: 0.7em; color: #444; margin-left: auto; white-space: nowrap; }
.entry-body { display: none; padding: 14px; }
.entry-body.open { display: block; }
.section-label { font-size: 0.68em; text-transform: uppercase; letter-spacing: 0.1em;
color: #555; margin-bottom: 6px; margin-top: 14px; }
.section-label:first-child { margin-top: 0; }
.recollection-block { background: #0a1020; border-left: 3px solid #3040aa;
padding: 10px 12px; font-size: 0.83em; white-space: pre-wrap; color: #aabbff;
border-radius: 0 3px 3px 0; line-height: 1.5; }
.messages-accordion { margin-top: 8px; }
.msg-toggle { background: #1a1a3a; border: none; color: #777; font-family: monospace;
font-size: 0.78em; padding: 5px 10px; border-radius: 3px; cursor: pointer;
width: 100%; text-align: left; }
.msg-toggle:hover { background: #222250; color: #aaa; }
.messages-list { display: none; margin-top: 8px; }
.messages-list.open { display: block; }
.msg { border: 1px solid #1e1e40; border-radius: 3px; margin-bottom: 6px; overflow: hidden; }
.msg-role { padding: 4px 10px; font-size: 0.72em; font-weight: bold; text-transform: uppercase;
letter-spacing: 0.08em; }
.role-system { background: #1a2040; color: #6688ff; }
.role-user { background: #1a2a1a; color: #66bb66; }
.role-assistant { background: #2a1a1a; color: #cc8844; }
.role-other { background: #1e1e2e; color: #888; }
.msg-content { padding: 8px 10px; font-size: 0.8em; white-space: pre-wrap;
line-height: 1.5; max-height: 300px; overflow-y: auto; color: #ccc; }
.empty { color: #444; font-size: 0.85em; padding: 30px 0; text-align: center; }
.pagination { display: flex; gap: 8px; align-items: center; margin-top: 20px; }
.pg-btn { padding: 5px 12px; background: #1e1e40; border: none; color: #aaa;
border-radius: 3px; cursor: pointer; font-family: monospace; font-size: 0.8em; }
.pg-btn:hover { background: #2a2a55; }
.pg-btn:disabled { opacity: 0.35; cursor: default; }
.pg-info { font-size: 0.78em; color: #555; }
a.back { color: #5555aa; text-decoration: none; font-size: 0.82em; display: inline-block; margin-bottom: 16px; }
a.back:hover { color: #8888ff; }
</style>
</head>
<body>
<a href="/admin" class="back">← admin</a>
<h1>Recollection Log</h1>
<p class="subtitle">Every prompt where Festinger injected a &lt;recollection&gt; block.</p>
<div class="toolbar">
<button class="btn btn-danger" onclick="clearLog()">Clear log</button>
<button class="btn btn-secondary" onclick="load()">Refresh</button>
<span class="count" id="count-label"></span>
</div>
<div class="filter-row">
<input id="agent-filter" type="text" placeholder="Filter by agent…" oninput="load()" />
</div>
<div id="entries"></div>
<div class="pagination">
<button class="pg-btn" id="btn-prev" onclick="prevPage()" disabled>← prev</button>
<span class="pg-info" id="pg-info"></span>
<button class="pg-btn" id="btn-next" onclick="nextPage()">next →</button>
</div>
<script>
let currentPage = 0;
const PAGE = 20;
async function load() {
const agent = document.getElementById('agent-filter').value.trim();
const qs = new URLSearchParams({ limit: PAGE, offset: currentPage * PAGE });
if (agent) qs.set('agent', agent);
const res = await fetch('/recollection-log/data?' + qs);
const data = await res.json();
renderEntries(data.entries);
const total = data.total;
document.getElementById('count-label').textContent = total + ' event' + (total === 1 ? '' : 's');
document.getElementById('pg-info').textContent =
'page ' + (currentPage + 1) + ' of ' + Math.max(1, Math.ceil(total / PAGE));
document.getElementById('btn-prev').disabled = currentPage === 0;
document.getElementById('btn-next').disabled = (currentPage + 1) * PAGE >= total;
}
function renderEntries(entries) {
const el = document.getElementById('entries');
if (!entries.length) {
el.innerHTML = '<div class="empty">No recollection events yet.</div>';
return;
}
el.innerHTML = entries.map((e, i) => {
const tokHtml = (e.salient_tokens || []).map(t =>
'<span class="tok-tag">' + esc(t) + '</span>'
).join('');
const msgsHtml = (e.messages || []).map(m => {
const role = (m.role || 'unknown').toLowerCase();
const cls = ['system','user','assistant'].includes(role) ? 'role-' + role : 'role-other';
const content = typeof m.content === 'string' ? m.content
: JSON.stringify(m.content, null, 2);
return '<div class="msg"><div class="msg-role ' + cls + '">' + esc(role) + '</div>'
+ '<div class="msg-content">' + esc(content) + '</div></div>';
}).join('');
return '<div class="entry">'
+ '<div class="entry-header" onclick="toggle(' + i + ')">'
+ ' <span class="ts">' + esc(e.created_at) + '</span>'
+ ' <span class="agent">' + esc(e.agent_name || '(none)') + '</span>'
+ ' <span class="tokens">' + tokHtml + '</span>'
+ ' <span class="toggle-hint">click to expand</span>'
+ '</div>'
+ '<div class="entry-body" id="entry-body-' + i + '">'
+ ' <div class="section-label">Recollection block</div>'
+ ' <pre class="recollection-block">' + esc(e.recollection_block) + '</pre>'
+ ' <div class="section-label" style="margin-top:14px">Full prompt (' + (e.messages || []).length + ' messages)</div>'
+ ' <div class="messages-accordion">'
+ ' <button class="msg-toggle" onclick="toggleMsgs(event,' + i + ')">Show messages ▾</button>'
+ ' <div class="messages-list" id="msgs-' + i + '">' + msgsHtml + '</div>'
+ ' </div>'
+ '</div>'
+ '</div>';
}).join('');
}
function toggle(i) {
const el = document.getElementById('entry-body-' + i);
el.classList.toggle('open');
}
function toggleMsgs(evt, i) {
evt.stopPropagation();
const el = document.getElementById('msgs-' + i);
el.classList.toggle('open');
evt.target.textContent = el.classList.contains('open') ? 'Hide messages ▴' : 'Show messages ▾';
}
function esc(s) {
return String(s)
.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')
.replace(/"/g,'&quot;');
}
function prevPage() { currentPage = Math.max(0, currentPage - 1); load(); }
function nextPage() { currentPage++; load(); }
async function clearLog() {
if (!confirm('Delete all recollection log entries?')) return;
await fetch('/recollection-log', { method: 'DELETE' });
currentPage = 0;
load();
}
load();
</script>
</body>
</html>
"""
@app.get("/recollection-log", response_class=HTMLResponse)
async def recollection_log_ui() -> HTMLResponse:
return HTMLResponse(RECOLLECTION_LOG_HTML)
@app.get("/recollection-log/data")
async def recollection_log_data(
request: Request,
limit: int = 20,
offset: int = 0,
agent: str = "",
) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
if agent:
rows = await conn.fetch(
"""
SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
FROM recollection_log
WHERE agent_name ILIKE $1
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
""",
f"%{agent}%", limit, offset,
)
total = await conn.fetchval(
"SELECT COUNT(*) FROM recollection_log WHERE agent_name ILIKE $1",
f"%{agent}%",
)
else:
rows = await conn.fetch(
"""
SELECT id, created_at, agent_name, salient_tokens, recollection_block, messages_json
FROM recollection_log
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
""",
limit, offset,
)
total = await conn.fetchval("SELECT COUNT(*) FROM recollection_log")
entries = []
for r in rows:
entries.append({
"id": r["id"],
"created_at": r["created_at"].strftime("%Y-%m-%d %H:%M:%S UTC"),
"agent_name": r["agent_name"],
"salient_tokens": list(r["salient_tokens"]),
"recollection_block": r["recollection_block"],
"messages": r["messages_json"],
})
return {"total": total, "offset": offset, "limit": limit, "entries": entries}
@app.delete("/recollection-log")
async def clear_recollection_log(request: Request) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM recollection_log")
deleted = int(result.split()[-1]) if result else 0
log.info("recollection_log cleared deleted=%d", deleted)
return {"status": "ok", "deleted": deleted}
# ---------------------------------------------------------------------------
# /graph — knowledge graph explorer
# ---------------------------------------------------------------------------
@@ -2510,6 +2906,7 @@ ADMIN_HTML = """<!DOCTYPE html>
<h1>Festinger</h1>
<p class="subtitle">Ollama-compatible inference middleware — loop detection &amp; Recollections world model
&nbsp;&mdash;&nbsp;<a href="/graph" style="color:#1a1a2e">Knowledge Graph Explorer</a>
&nbsp;&mdash;&nbsp;<a href="/recollection-log" style="color:#1a1a2e">Recollection Log</a>
&nbsp;&mdash;&nbsp;<a href="/models-ui" style="color:#1a1a2e">Model Manager</a>
</p>
+96 -45
View File
@@ -2,13 +2,28 @@
Recollection engine — read path.
For each salient concept found in an intercepted prompt:
- Query URD edges (from in-memory cache, filtered by confidence + recency)
- Query URD edges (from in-memory cache, filtered by confidence)
- Rank by effective score (recency-decayed saliency + centrality bonus)
- Render hit or zero-hit block
- Inject the <recollection> block into the prompt before forwarding to Ollama
- Inject the <recollection> block into the prompt
Scoring
-------
effective_score(c) = saliency(c) * recency_decay(c) + centrality_bonus(c)
recency_decay = exp(-days_since_last_seen / RECENCY_HALF_LIFE)
→ 1.0 if never seen (no penalty for brand-new concepts)
→ 0.5 at 30 days, 0.25 at 60 days, ~0.04 at 90 days
centrality_bonus = log1p(n_children)
→ 0 for leaf concepts
→ grows slowly with the number of concepts that use this
one as a parent (hub nodes surface first)
"""
from __future__ import annotations
import logging
import math
from datetime import datetime, timezone, timedelta
from typing import Optional
@@ -19,34 +34,62 @@ log = logging.getLogger("festinger.recollection")
ZERO_HIT_TEMPLATE = "You have no memory of {concept}. Try: memory_load query='{concept}' or gutask context {concept}"
# Half-life for recency decay: saliency halves after this many days of silence.
RECENCY_HALF_LIFE = 30.0
# Flat bonus added to effective_score for concepts found in the system message.
# Large enough to push grounding concepts above freshly-encountered domain words.
SESSION_BOOST = 2.0
# ---------------------------------------------------------------------------
# Query (reads from in-memory cache)
# Scoring
# ---------------------------------------------------------------------------
def query_edges(
concept_id: int,
confidence_floor: float,
recency_days: int,
) -> list[UrdEdge]:
def recency_decay(row: SoasRow) -> float:
"""
Return URD edges for *concept_id* that pass confidence + recency filters.
All reads are pure in-memory — zero network.
Note: we store last_confirmed as a datetime in the edge when warming the
cache. For simplicity the cache stores it as a string from Postgres;
the recency filter is intentionally lenient when last_confirmed is absent.
Exponential decay based on days since last_seen.
Returns 1.0 if last_seen is unknown (no penalty for fresh concepts).
"""
edges = cache.urd_by_concept.get(concept_id, [])
if not edges:
return []
if row.last_seen is None:
return 1.0
now = datetime.now(tz=timezone.utc)
last = row.last_seen
if last.tzinfo is None:
last = last.replace(tzinfo=timezone.utc)
days = (now - last).total_seconds() / 86400.0
return math.exp(-days / RECENCY_HALF_LIFE)
cutoff = datetime.now(tz=timezone.utc) - timedelta(days=recency_days)
result = []
for e in edges:
if e.confidence < confidence_floor:
continue
result.append(e)
return result
def centrality_bonus(concept_id: int) -> float:
"""
log1p of the number of concepts that reference this one as a parent.
Hub concepts (many children) rise to the top of the recollection block.
"""
n_children = len(cache.urd_by_parent.get(concept_id, []))
return math.log1p(n_children)
def effective_score(concept_id: int, session_boosted: bool = False) -> float:
"""Combined score used to rank concepts within the recollection block."""
token = cache.soas_by_id.get(concept_id, "")
row = cache.soas_by_token.get(token)
if row is None:
return 0.0
base = row.saliency * recency_decay(row) + centrality_bonus(concept_id)
return base + (SESSION_BOOST if session_boosted else 0.0)
# ---------------------------------------------------------------------------
# Query
# ---------------------------------------------------------------------------
def query_edges(concept_id: int, confidence_floor: float) -> list[UrdEdge]:
"""Return URD edges for *concept_id* above the confidence floor."""
return [
e for e in cache.urd_by_concept.get(concept_id, [])
if e.confidence >= confidence_floor
]
# ---------------------------------------------------------------------------
@@ -57,27 +100,28 @@ def render_hit(
concept_token: str,
edges: list[UrdEdge],
concept_id: int,
score: float,
reverse_edges: list[UrdEdge] | None = None,
) -> str:
"""
Render one concept's recollection line:
gnommoweb: [type] repo [membership] glitch_university
omega13: [membership←] gnommoweb (reverse: gnommoweb is a member of omega13)
Pending-conflict dimensions get a '?' suffix on the dim token.
Render one concept's recollection line, e.g.:
gnommoweb [s=3.2]: [type] repo [membership] glitch_university [type←] omega13
The score annotation helps with debugging.
Pending-conflict dimensions get a '?' suffix.
"""
has_pending = concept_id in cache.pending_conflicts
parts = []
for e in edges:
dim_label = e.dim_token + ("?" if has_pending else "")
parts.append(f"[{dim_label}] {e.parent_token}")
# Bidirectional: surface concepts where this concept appears as a parent
for e in (reverse_edges or [])[:3]:
child_token = cache.soas_by_id.get(e.concept_id, str(e.concept_id))
parts.append(f"[{e.dim_token}←] {child_token}")
return f"{concept_token}: {' '.join(parts)}"
score_str = f"{score:.1f}"
return f"{concept_token} [s={score_str}]: {' '.join(parts)}"
def render_zero_hit(concept_token: str) -> str:
def render_zero_hit(concept_token: str, score: float) -> str:
line = ZERO_HIT_TEMPLATE.format(concept=concept_token)
soas_row = cache.soas_by_token.get(concept_token)
if soas_row and soas_row.first_seen_context:
@@ -88,37 +132,45 @@ def render_zero_hit(concept_token: str) -> str:
def build_recollection_block(
salient_concept_ids: list[int],
confidence_floor: float,
recency_days: int,
recency_days: int, # kept for API compat, no longer used for edge filtering
max_lines: int = 12,
session_boost_ids: set[int] | None = None,
) -> Optional[str]:
"""
Build the full <recollection> block for a list of above-threshold concept IDs.
Returns None if there is nothing to say.
Build the <recollection> block, ranked by effective_score (recency-decayed
saliency + centrality bonus + optional session boost).
Concepts with known edges are shown first (most informative); zero-hit
"don't know" lines fill the remaining budget up to max_lines.
Concepts with known edges are shown first; zero-hit lines fill the remainder.
session_boost_ids: concept IDs found in the system message — ranked higher
to surface grounding context (agent persona, project name) first.
"""
hit_lines: list[str] = []
zero_lines: list[str] = []
boosted = session_boost_ids or set()
# Deduplicate — same concept may appear twice if novel + known
# Deduplicate and score
seen: set[int] = set()
scored: list[tuple[float, int]] = []
for cid in salient_concept_ids:
if cid in seen:
continue
seen.add(cid)
scored.append((effective_score(cid, session_boosted=(cid in boosted)), cid))
# Highest score first
scored.sort(key=lambda x: x[0], reverse=True)
hit_lines: list[str] = []
zero_lines: list[str] = []
for score, cid in scored:
token = cache.soas_by_id.get(cid, str(cid))
edges = query_edges(cid, confidence_floor, recency_days)
# Bidirectional: find edges where this concept appears as a parent
edges = query_edges(cid, confidence_floor)
reverse_edges = cache.urd_by_parent.get(cid, [])
if edges or reverse_edges:
hit_lines.append(render_hit(token, edges, cid, reverse_edges))
hit_lines.append(render_hit(token, edges, cid, score, reverse_edges))
else:
zero_lines.append(render_zero_hit(token))
zero_lines.append(render_zero_hit(token, score))
# Hits always included (up to max_lines); zero-hits fill the remainder
lines = hit_lines[:max_lines]
remaining = max_lines - len(lines)
lines += zero_lines[:remaining]
@@ -126,8 +178,7 @@ def build_recollection_block(
if not lines:
return None
body = "\n".join(lines)
return f"<recollection>\n{body}\n</recollection>"
return f"<recollection>\n" + "\n".join(lines) + "\n</recollection>"
# ---------------------------------------------------------------------------
+15
View File
@@ -133,6 +133,21 @@ async def insert_urd_edge(
cache.urd_by_concept.setdefault(req.concept_id, []).append(edge)
cache.urd_by_concept_dim[key] = edge
cache.urd_by_parent.setdefault(req.parent_id, []).append(edge)
# Mark the concept as a domain token: saliency=1 means "first use".
# saliency=0 is the sentinel for common-English words (dictionary seed),
# which the encounter loop silently skips. Any concept that has a URD
# edge is definitionally domain-specific and must start above zero.
concept_token_str = cache.soas_by_id.get(req.concept_id, "")
row = cache.soas_by_token.get(concept_token_str)
if row and row.saliency == 0.0:
async with pool.acquire() as conn:
await conn.execute(
"UPDATE soas SET saliency = 1.0, novelty = 1.0 WHERE id = $1 AND saliency = 0",
req.concept_id,
)
row.saliency = 1.0
row.novelty = 1.0
log.info(
"urd insert concept=%d parent=%d dim=%d is_isa=%s source=%s",
req.concept_id, req.parent_id, req.dim_id, req.is_isa, req.source,