diff --git a/agents/gerhard-hermes/config.yaml b/agents/gerhard-hermes/config.yaml new file mode 100644 index 0000000..d948b1a --- /dev/null +++ b/agents/gerhard-hermes/config.yaml @@ -0,0 +1,47 @@ +# Hermes config for gerhard — evaluation instance +# +# LLM calls go to festinger, which sits in front of the real inference +# providers. The agent identity is encoded in the base URL so festinger +# can route to the right model and build the per-agent knowledge graph. +# +# festinger routing: http://festinger:11434/gerhard/v1/chat/completions +# → festinger looks up agent_name="gerhard" in agent_models table +# → if found: routes to the registered provider/model +# → if not found: falls through to upstream_openai in config.yaml + +model: + provider: "custom" + base_url: "http://festinger:11434/gerhard/v1" + # No api_key needed — festinger does not authenticate OpenAI-compat requests. + # Set a placeholder so the OpenAI SDK doesn't complain. + api_key: "festinger" + # Model name is what gets sent in the request body. festinger can override + # this per-agent via the agent_models table. Set something reasonable as + # a default (festinger's upstream_openai fallback will use whatever it supports). + default: "claude-opus-4-6" + +terminal: + backend: "local" + cwd: "." + timeout: 180 + lifetime_seconds: 300 + +agent: + max_turns: 60 + verbose: false + reasoning_effort: "medium" + +memory: + memory_enabled: true + user_profile_enabled: true + +compression: + enabled: true + threshold: 0.50 + target_ratio: 0.20 + protect_last_n: 20 + +session_reset: + mode: both + idle_minutes: 1440 + at_hour: 4 diff --git a/docker-compose.yml b/docker-compose.yml index d8c337f..aeeee9f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -76,19 +76,36 @@ services: - "host.docker.internal:host-gateway" gerhard: - image: agent0ai/agent-zero:latest + build: + context: ../../hermes-agent + image: hermes-agent container_name: gerhard - ports: - - "50007:80" volumes: - - ./agents/gerhard:/a0/usr - - ${HOME}/.ssh:/root/.ssh + - ./agents/gerhard-hermes:/opt/data restart: unless-stopped environment: - AUTH_LOGIN: ${AUTH_LOGIN} - AUTH_PASSWORD: ${AUTH_PASSWORD} + HERMES_UID: ${HERMES_UID:-1000} + HERMES_GID: ${HERMES_GID:-1000} extra_hosts: - "host.docker.internal:host-gateway" + command: ["gateway", "run"] + + gerhard-dashboard: + image: hermes-agent + container_name: gerhard-dashboard + ports: + - "50007:9119" # web dashboard at localhost:50007 + volumes: + - ./agents/gerhard-hermes:/opt/data + restart: unless-stopped + depends_on: + - gerhard + environment: + HERMES_UID: ${HERMES_UID:-1000} + HERMES_GID: ${HERMES_GID:-1000} + extra_hosts: + - "host.docker.internal:host-gateway" + command: ["dashboard", "--host", "0.0.0.0", "--no-open"] postgres: image: postgres:16-alpine @@ -135,7 +152,7 @@ services: - gunnar - rind - abyssinthia - - gerhard + - gerhard-dashboard volumes: festinger-pgdata: diff --git a/plugins/festinger/db/schema.sql b/plugins/festinger/db/schema.sql index 9e2093e..2023466 100644 --- a/plugins/festinger/db/schema.sql +++ b/plugins/festinger/db/schema.sql @@ -122,3 +122,17 @@ CREATE TABLE IF NOT EXISTS agent_models ( model_id INT NOT NULL REFERENCES models(id) ON DELETE CASCADE, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); + +-- --------------------------------------------------------------------------- +-- recollection_log — every prompt where a recollection block was injected +-- --------------------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS recollection_log ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + agent_name TEXT NOT NULL DEFAULT '', + salient_tokens TEXT[] NOT NULL DEFAULT '{}', + recollection_block TEXT NOT NULL, + messages_json JSONB NOT NULL DEFAULT '[]' +); + +CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC); diff --git a/plugins/festinger/festinger/cache.py b/plugins/festinger/festinger/cache.py index ed13bbc..81e74aa 100644 --- a/plugins/festinger/festinger/cache.py +++ b/plugins/festinger/festinger/cache.py @@ -7,6 +7,7 @@ on the hot path. Writes are write-through: in-memory first, then Postgres. from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime, timezone from typing import Optional @@ -18,6 +19,7 @@ class SoasRow: saliency: float = 0.0 novelty: float = 0.0 first_seen_context: str = "" + last_seen: Optional[datetime] = None @dataclass diff --git a/plugins/festinger/festinger/db.py b/plugins/festinger/festinger/db.py index 3e82a0b..e89594d 100644 --- a/plugins/festinger/festinger/db.py +++ b/plugins/festinger/festinger/db.py @@ -4,9 +4,11 @@ Database layer — asyncpg pool, schema init, cache warm-up, flush. from __future__ import annotations import asyncio +import json import logging import math import os +from datetime import datetime, timezone from pathlib import Path import asyncpg @@ -70,6 +72,22 @@ async def init_schema(pool: asyncpg.Pool) -> None: ) """ ) + # Migration: recollection event log + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS recollection_log ( + id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + agent_name TEXT NOT NULL DEFAULT '', + salient_tokens TEXT[] NOT NULL DEFAULT '{}', + recollection_block TEXT NOT NULL, + messages_json JSONB NOT NULL DEFAULT '[]' + ) + """ + ) + await conn.execute( + "CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)" + ) log.info("schema applied") @@ -135,7 +153,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None: """Load all SOAS and URD rows into the in-memory cache.""" async with pool.acquire() as conn: soas_rows = await conn.fetch( - "SELECT id, token, encounter_count, saliency, novelty, first_seen_context FROM soas" + "SELECT id, token, encounter_count, saliency, novelty, first_seen_context, last_seen FROM soas" ) for r in soas_rows: row = SoasRow( @@ -145,6 +163,7 @@ async def warm_cache(pool: asyncpg.Pool) -> None: saliency=r["saliency"], novelty=r["novelty"], first_seen_context=r["first_seen_context"] or "", + last_seen=r["last_seen"], ) cache.soas_by_token[r["token"]] = row cache.soas_by_id[r["id"]] = r["token"] @@ -378,7 +397,18 @@ def recalculate_saliency(encounter_count: int, is_common_english: bool) -> float # --------------------------------------------------------------------------- async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: - """Flush staged encounter_count deltas to Postgres in one batch UPDATE.""" + """ + Flush staged encounter_count deltas to Postgres. + + Saliency model: + saliency = 0 → common English word (dictionary seed). Never touched. + saliency ≥ 1 → domain concept. Incremented by +delta on each flush. + + saliency=1 is set by urd_writer when a concept first gets a URD edge, + meaning "mentioned at least once in a meaningful context". Each flush + adds the number of new encounters since the last flush, so saliency + equals the total number of times the concept has been seen by the agent. + """ deltas = cache.drain_deltas() if not deltas: return @@ -388,23 +418,10 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: for soas_id, delta in deltas.items(): token = cache.soas_by_id.get(soas_id, "") row = cache.soas_by_token.get(token) - new_count = (row.encounter_count if row else 0) - # novelty = 0 for common English words (pre-seeded) - is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False - - # Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not - # be promoted above the read threshold by encounter-count alone. - # Their saliency is raised explicitly when the LLM confirms them. - is_unconfirmed_novel = ( - row is not None - and row.novelty > 0.0 - and not cache.urd_by_concept.get(soas_id) - ) - if is_unconfirmed_novel: - new_saliency = row.saliency # preserve low saliency until LLM confirms - else: - new_saliency = recalculate_saliency(new_count, is_common) - + if not row or row.saliency == 0.0: + # saliency=0 is the English-word sentinel — never increment. + continue + new_saliency = row.saliency + delta await conn.execute( """ UPDATE soas @@ -415,8 +432,8 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None: """, delta, new_saliency, soas_id, ) - if row: - row.saliency = new_saliency + row.saliency = new_saliency + row.last_seen = datetime.now(timezone.utc) log.debug("flushed %d saliency deltas", len(deltas)) @@ -429,3 +446,32 @@ async def get_config(pool: asyncpg.Pool, key: str, default: str = "") -> str: async with pool.acquire() as conn: row = await conn.fetchrow("SELECT value FROM config WHERE key = $1", key) return row["value"] if row else default + + +# --------------------------------------------------------------------------- +# Recollection log +# --------------------------------------------------------------------------- + +async def log_recollection( + pool: asyncpg.Pool, + agent_name: str, + salient_tokens: list[str], + recollection_block: str, + messages: list[dict], +) -> None: + """Persist one recollection event. Fire-and-forget — never raises.""" + try: + async with pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO recollection_log + (agent_name, salient_tokens, recollection_block, messages_json) + VALUES ($1, $2, $3, $4) + """, + agent_name or "", + salient_tokens, + recollection_block, + json.dumps(messages), + ) + except Exception: + log.exception("recollection_log insert failed") diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index 4c51d85..2373d57 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -2,18 +2,23 @@ Festinger — main FastAPI application. Routes: - POST /api/chat Ollama-compatible chat (loop detection + recollection injection) - POST /api/generate Ollama-compatible generate - POST /v1/messages Anthropic Messages API proxy (loop detection + recollection) - POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection) - POST /iknowthat Manual write path (gutask iknowthat) - POST /resolve/run Manually trigger nightly resolution job - POST /reload Reload URD cache (called by resolution job) - GET /health Health + stats - GET /conflicts Pending and recently resolved conflicts - GET /admin Minimal admin UI - * /v1/{path} Passthrough to upstream Anthropic - * /{path} Passthrough to upstream Ollama + POST /api/chat Ollama-compatible chat (loop detection + recollection injection) + POST /api/generate Ollama-compatible generate + POST /v1/messages Anthropic Messages API proxy (loop detection + recollection) + POST /v1/chat/completions OpenAI-compatible proxy (loop detection + recollection) + POST /chat/completions Alias for /v1/chat/completions (LiteLLM without /v1 prefix) + POST /{agent_id}/v1/chat/completions Agent-prefixed OpenAI proxy — agent identity from URL + POST /{agent_id}/chat/completions Agent-prefixed OpenAI proxy (no /v1 prefix variant) + POST /{agent_id}/v1/messages Agent-prefixed Anthropic proxy — agent identity from URL + POST /{agent_id}/api/chat Agent-prefixed Ollama chat — agent identity from URL + POST /{agent_id}/api/generate Agent-prefixed Ollama generate — agent identity from URL + POST /iknowthat Manual write path (gutask iknowthat) + POST /resolve/run Manually trigger nightly resolution job + POST /reload Reload URD cache (called by resolution job) + GET /health Health + stats + GET /conflicts Pending and recently resolved conflicts + GET /admin Minimal admin UI + * /{path} Passthrough to upstream Ollama or Anthropic """ from __future__ import annotations @@ -38,7 +43,7 @@ from .db import ( close_pool, get_config, get_or_create_soas, get_pool, init_schema, bootstrap_dimensions, bootstrap_english_dictionary, warm_cache, reload_urd_cache, - flush_encounter_deltas, reset_graph, + flush_encounter_deltas, reset_graph, log_recollection, ) from .loop_detector import apply_mitigations, record_and_check, session_key from .cue_scanner import scan_cues @@ -499,6 +504,30 @@ def _last_assistant_message_text(body: dict, path: str) -> str: return "" +def _system_message_text(body: dict, path: str) -> str: + """ + Extract the system message text from the request body. + - OpenAI/Ollama: first message with role='system' in the messages list. + - Anthropic: top-level 'system' field (string or content-block list). + """ + if path == "/v1/messages": + sys_field = body.get("system") or "" + if isinstance(sys_field, list): + return " ".join( + b.get("text", "") for b in sys_field + if isinstance(b, dict) and b.get("type") == "text" + ) + return sys_field + + if path in ("/api/chat", "/v1/chat/completions", "/api/generate"): + messages = body.get("messages", []) + sys_msg = next((m for m in messages if m.get("role") == "system"), None) + if sys_msg: + return " ".join(_extract_text_strings(sys_msg.get("content", ""))) + + return "" + + def inject_recollection_anthropic(body: dict, block: str) -> dict: """ Inject a recollection block into an Anthropic Messages API request. @@ -723,6 +752,7 @@ async def _route_agent_chat( cfg: dict, request_headers: dict, min_len: int, + agent_name: str = "", ) -> Response: """ Route an OpenAI-compatible chat completions request to the agent's @@ -740,7 +770,7 @@ async def _route_agent_chat( # (Configure write_model_id or a per-agent model for a cloud/separate # model if you want memory building alongside local inference.) body = await process_prompt( - body, "/v1/chat/completions", pool, cfg, request_headers, + body, "/v1/chat/completions", pool, cfg, request_headers, agent_name=agent_name, ) sess = session_key(agent_model.model_name, body.get("messages", [])) @@ -845,17 +875,21 @@ async def process_prompt( pool, cfg: dict, request_headers: dict | None = None, + agent_name: str = "", ) -> dict: """ Run the saliency + recollection pipeline over the prompt. Returns a (possibly modified) body dict with the recollection block injected. + agent_name may be passed in directly (e.g. extracted from the URL path) to + avoid re-parsing the body/headers; falls back to _extract_agent_name if empty. """ read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5")) conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6")) recency_days = int(await get_config(pool, "recollection_recency_days", "90")) hdrs = request_headers or {} - agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler + if not agent_name: + agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler # Last user message — primary source for recollection reads. user_text = _last_user_message_text(body, path) @@ -874,7 +908,21 @@ async def process_prompt( for cue in scan_cues(assistant_text): await enqueue_cue(cue) - # 3. Token loop over user message for saliency-triggered recollection. + # 3. Session boost — scan the system message for domain tokens. + # Concepts grounding the agent's persona or project context rank higher + # in the recollection block for the entire session. + session_boost_ids: set[int] = set() + sys_text = _system_message_text(body, path) + if sys_text: + for tok in tokenize(sys_text): + row = cache.soas_by_token.get(tok) + if row and row.saliency > 0.0: + session_boost_ids.add(row.id) + if session_boost_ids: + boost_tokens = [cache.soas_by_id.get(i, str(i)) for i in session_boost_ids] + log.debug("session_boost | agent=%s tokens=%s", agent_name or "(none)", boost_tokens) + + # 4. Token loop over user message for saliency-triggered recollection. tokens = tokenize(user_text) salient_for_read: list[int] = [] @@ -882,8 +930,8 @@ async def process_prompt( soas_row = cache.soas_by_token.get(token) if soas_row is None: continue - if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: - continue # common English — skip + if soas_row.saliency == 0.0: + continue # saliency=0 is the English-word sentinel — skip cache.record_encounter(soas_row.id) if soas_row.saliency >= read_threshold: salient_for_read.append(soas_row.id) @@ -891,8 +939,24 @@ async def process_prompt( if not salient_for_read: return body - # 5. Build recollection block - block = build_recollection_block(salient_for_read, conf_floor, recency_days) + # 5. Conflict spike — warn loudly if any salient concept has a pending contradiction. + conflicted = [ + cache.soas_by_id.get(cid, str(cid)) + for cid in salient_for_read + if cid in cache.pending_conflicts + ] + if conflicted: + log.warning( + "recollection_conflict_spike | agent=%s conflicted=%s " + "(recollection block will show '?' suffix on affected dimensions)", + agent_name or "(none)", conflicted, + ) + + # 6. Build recollection block + block = build_recollection_block( + salient_for_read, conf_floor, recency_days, + session_boost_ids=session_boost_ids, + ) if not block: return body @@ -902,16 +966,24 @@ async def process_prompt( agent_name or "(none)", salient_tokens, block, ) - # 6. Inject into messages + # 7. Inject into messages if path == "/api/chat" or path == "/v1/chat/completions": body = dict(body) body["messages"] = inject_recollection(body.get("messages", []), block) + log_messages = body["messages"] elif path == "/v1/messages": body = inject_recollection_anthropic(body, block) - # /api/generate uses a flat prompt string — prepend there + # Reconstruct a messages-style list for the log viewer + sys_content = body.get("system", "") + log_messages = ([{"role": "system", "content": sys_content}] if sys_content else []) + body.get("messages", []) elif path == "/api/generate": body = dict(body) body["prompt"] = block + "\n\n" + body.get("prompt", "") + log_messages = [{"role": "user", "content": body["prompt"]}] + else: + log_messages = [] + + asyncio.create_task(log_recollection(pool, agent_name or "", salient_tokens, block, log_messages)) return body @@ -920,17 +992,20 @@ async def process_prompt( # Routes # --------------------------------------------------------------------------- -@app.post("/api/chat") -async def chat(request: Request) -> Response: +async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool - _, body = _extract_agent_name(await request.json(), dict(request.headers)) + raw_body = await request.json() + if agent_name: + _, body = _extract_agent_name(raw_body, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(raw_body, dict(request.headers)) model = body.get("model", "unknown") upstream = cfg["upstream_ollama"] min_len = cfg["detection"]["min_length"] - log.info("chat route=/api/chat model=%s", model) + log.info("chat route=/api/chat model=%s agent=%s", model, agent_name or "—") try: - body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers)) + body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name) text, raw = await call_ollama("/api/chat", body, upstream) sess = session_key(model, body.get("messages", [])) count = record_and_check(sess, text, min_len) @@ -953,17 +1028,20 @@ async def chat(request: Request) -> Response: raise -@app.post("/api/generate") -async def generate(request: Request) -> Response: +async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool - _, body = _extract_agent_name(await request.json(), dict(request.headers)) + raw_body = await request.json() + if agent_name: + _, body = _extract_agent_name(raw_body, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(raw_body, dict(request.headers)) model = body.get("model", "unknown") upstream = cfg["upstream_ollama"] min_len = cfg["detection"]["min_length"] - log.info("chat route=/api/generate model=%s", model) + log.info("chat route=/api/generate model=%s agent=%s", model, agent_name or "—") try: - body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers)) + body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name) messages = [{"role": "user", "content": body.get("prompt", "")}] sess = session_key(model, messages) text, raw = await call_ollama("/api/generate", body, upstream) @@ -987,31 +1065,44 @@ async def generate(request: Request) -> Response: raise +@app.post("/api/chat") +async def chat(request: Request) -> Response: + return await _handle_ollama_chat(request) + + +@app.post("/api/generate") +async def generate(request: Request) -> Response: + return await _handle_ollama_generate(request) + + # --------------------------------------------------------------------------- # Anthropic Messages API (POST /v1/messages) # --------------------------------------------------------------------------- -@app.post("/v1/messages") -async def anthropic_messages(request: Request) -> Response: +async def _handle_anthropic_messages(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool raw_body = await request.body() - _, body = _extract_agent_name(json.loads(raw_body), dict(request.headers)) + parsed = json.loads(raw_body) + if agent_name: + _, body = _extract_agent_name(parsed, dict(request.headers)) + else: + agent_name, body = _extract_agent_name(parsed, dict(request.headers)) # Capture streaming intent BEFORE call_anthropic forces stream=False original_stream: bool = bool(body.get("stream", False)) model = body.get("model", "unknown") upstream = cfg["upstream_anthropic"] min_len = cfg["detection"]["min_length"] log.info( - "chat route=/v1/messages model=%s upstream=%s stream=%s req_preview=%.200s", - model, upstream, original_stream, + "chat route=/v1/messages model=%s upstream=%s agent=%s stream=%s req_preview=%.200s", + model, upstream, agent_name or "—", original_stream, raw_body[:200].decode(errors="replace").replace("\n", " "), ) try: headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS) if "anthropic-version" not in {k.lower() for k in headers}: headers["anthropic-version"] = "2023-06-01" - body = await process_prompt(body, "/v1/messages", pool, cfg, headers) + body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name) messages = body.get("messages", []) sess = session_key(model, messages) text, raw = await call_anthropic(body, upstream, headers) @@ -1048,17 +1139,24 @@ async def anthropic_messages(request: Request) -> Response: raise +@app.post("/v1/messages") +async def anthropic_messages(request: Request) -> Response: + return await _handle_anthropic_messages(request) + + # --------------------------------------------------------------------------- # OpenAI-compatible chat completions (POST /v1/chat/completions) # --------------------------------------------------------------------------- -@app.post("/v1/chat/completions") -async def openai_chat_completions(request: Request) -> Response: +async def _handle_openai_chat(request: Request, agent_name: str = "") -> Response: cfg = request.app.state.yaml_config pool = request.app.state.pool raw_body = await request.json() hdrs = dict(request.headers) - agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name + if agent_name: + _, body = _extract_agent_name(raw_body, hdrs) + else: + agent_name, body = _extract_agent_name(raw_body, hdrs) # strips agent_id/agent_name model = body.get("model", "unknown") upstream = cfg["upstream_openai"] min_len = cfg["detection"]["min_length"] @@ -1077,12 +1175,12 @@ async def openai_chat_completions(request: Request) -> Response: agent_model.base_url or "(default)", ) return await _route_agent_chat( - body, agent_model, original_stream, pool, cfg, hdrs, min_len + body, agent_model, original_stream, pool, cfg, hdrs, min_len, agent_name=agent_name, ) # Standard path — forward to configured upstream unchanged headers = _relay_headers(request, OPENAI_RELAY_HEADERS) - body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs) + body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name) messages = body.get("messages", []) sess = session_key(model, messages) text, raw = await call_openai(body, upstream, headers) @@ -1110,12 +1208,54 @@ async def openai_chat_completions(request: Request) -> Response: raise +@app.post("/v1/chat/completions") +async def openai_chat_completions(request: Request) -> Response: + return await _handle_openai_chat(request) + + # Alias: some LiteLLM provider types (custom_openai, openai_like) omit the # /v1 prefix and post directly to /chat/completions. @app.post("/chat/completions") async def openai_chat_completions_no_prefix(request: Request) -> Response: """Alias for /v1/chat/completions — handles LiteLLM providers that omit /v1.""" - return await openai_chat_completions(request) + return await _handle_openai_chat(request) + + +# --------------------------------------------------------------------------- +# Agent-prefixed routes /{agent_id}/... +# +# Configure each agent instance's base_url to include its name: +# http://festinger:11434/gerhard/v1 ← hermes (OpenAI-compat) +# http://festinger:11434/dobby/v1/messages ← Agent0 (Anthropic) +# http://festinger:11434/gunnar ← Ollama clients +# +# The agent_id is extracted from the URL path and takes priority over any +# agent_name/agent_id passed in the request body or headers. +# --------------------------------------------------------------------------- + +@app.post("/{agent_id}/v1/chat/completions") +async def openai_chat_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_openai_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/chat/completions") +async def openai_chat_with_agent_id_no_v1(agent_id: str, request: Request) -> Response: + return await _handle_openai_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/v1/messages") +async def anthropic_messages_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_anthropic_messages(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/api/chat") +async def ollama_chat_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_ollama_chat(request, agent_name=agent_id.lower()) + + +@app.post("/{agent_id}/api/generate") +async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Response: + return await _handle_ollama_generate(request, agent_name=agent_id.lower()) # --------------------------------------------------------------------------- @@ -1679,6 +1819,262 @@ async def test_reset(scenario_id: str, request: Request) -> dict: return await reset_scenario(pool, scenario_id) +# --------------------------------------------------------------------------- +# /recollection-log — browse prompts where recollection was injected +# --------------------------------------------------------------------------- + +RECOLLECTION_LOG_HTML = """ + +
+ +Every prompt where Festinger injected a <recollection> block.
+ + + +Ollama-compatible inference middleware — loop detection & Recollections world model — Knowledge Graph Explorer + — Recollection Log — Model Manager
diff --git a/plugins/festinger/festinger/recollection.py b/plugins/festinger/festinger/recollection.py index 3e4f366..009a12e 100644 --- a/plugins/festinger/festinger/recollection.py +++ b/plugins/festinger/festinger/recollection.py @@ -2,13 +2,28 @@ Recollection engine — read path. For each salient concept found in an intercepted prompt: - - Query URD edges (from in-memory cache, filtered by confidence + recency) + - Query URD edges (from in-memory cache, filtered by confidence) + - Rank by effective score (recency-decayed saliency + centrality bonus) - Render hit or zero-hit block - - Inject the