diff --git a/plugins/festinger/festinger/db.py b/plugins/festinger/festinger/db.py index 0352031..e7f62a6 100644 --- a/plugins/festinger/festinger/db.py +++ b/plugins/festinger/festinger/db.py @@ -237,6 +237,95 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow: return soas_row +# --------------------------------------------------------------------------- +# Novel-word bootstrap +# --------------------------------------------------------------------------- + +# Saliency assigned to a word seen for the first time that is not in the +# standard English dictionary. Must be above saliency_read_threshold (0.5) +# so the recollection engine immediately picks it up. +NOVEL_INITIAL_SALIENCY = 2.0 + + +async def create_novel_soas(pool: asyncpg.Pool, token: str) -> SoasRow: + """ + Insert a domain-specific (non-dictionary) token with an initial saliency + high enough to trigger recollection on the very first encounter. + novelty=1.0 distinguishes these rows from common-English seeds. + Idempotent — returns the existing row if already present. + """ + async with pool.acquire() as conn: + row = await conn.fetchrow( + """ + INSERT INTO soas (token, saliency, novelty, encounter_count) + VALUES ($1, $2, 1.0, 1) + ON CONFLICT (token) DO UPDATE + SET encounter_count = soas.encounter_count + 1, + last_seen = now() + RETURNING id, token, encounter_count, saliency, novelty + """, + token, NOVEL_INITIAL_SALIENCY, + ) + soas_row = SoasRow( + id=row["id"], + token=row["token"], + encounter_count=row["encounter_count"], + saliency=row["saliency"], + novelty=row["novelty"], + ) + cache.soas_by_token[token] = soas_row + cache.soas_by_id[row["id"]] = token + return soas_row + + +# --------------------------------------------------------------------------- +# Graph reset +# --------------------------------------------------------------------------- + +async def reset_graph(pool: asyncpg.Pool) -> dict: + """ + Wipe all learned knowledge (URD edges, domain SOAS entries, + resolution queue, write log) while keeping the standard-English + dictionary seed (novelty=0) intact. + After the wipe, dimensions are re-bootstrapped and the cache is re-warmed. + """ + async with pool.acquire() as conn: + async with conn.transaction(): + # Order matters: FK constraints resolution_queue → soas, urd → soas + rq = await conn.execute("DELETE FROM resolution_queue") + kw = await conn.execute("DELETE FROM kg_write_log") + urd = await conn.execute("DELETE FROM urd") + # Keep only common-English seeds (novelty = 0); delete domain words + soas = await conn.execute("DELETE FROM soas WHERE novelty > 0") + + def _count(result: str) -> int: + try: + return int(result.split()[-1]) + except (ValueError, IndexError): + return 0 + + counts = { + "urd_deleted": _count(urd), + "soas_deleted": _count(soas), + "resolution_queue_deleted": _count(rq), + "kg_write_log_deleted": _count(kw), + } + + # Clear in-memory state + cache.urd_by_concept.clear() + cache.urd_by_concept_dim.clear() + cache.pending_conflicts.clear() + # Remove domain words from SOAS cache (keep novelty=0 entries) + domain_tokens = [t for t, r in list(cache.soas_by_token.items()) if r.novelty > 0] + for t in domain_tokens: + row = cache.soas_by_token.pop(t, None) + if row: + cache.soas_by_id.pop(row.id, None) + + log.info("graph reset: %s", counts) + return counts + + # --------------------------------------------------------------------------- # Saliency recalculation (log-scale) # --------------------------------------------------------------------------- diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index c4bb9a9..119651f 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -36,7 +36,7 @@ from .db import ( close_pool, get_config, get_or_create_soas, get_pool, init_schema, bootstrap_dimensions, bootstrap_english_dictionary, warm_cache, reload_urd_cache, - flush_encounter_deltas, + flush_encounter_deltas, create_novel_soas, reset_graph, ) from .loop_detector import apply_mitigations, record_and_check, session_key from .cue_scanner import scan_cues @@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict: for token in tokens: soas_row = cache.soas_by_token.get(token) + if soas_row is None: - # New token — get_or_create happens in background via queue when needed - continue # unknown token — skip saliency for now; write queue handles creation + # Token is absent from the dictionary entirely → novel domain word. + # Give it an initial high saliency so recollection fires immediately + # and instructs the model to ask the user what it is. + soas_row = await create_novel_soas(pool, token) + salient_for_read.append(soas_row.id) + # Do NOT add to salient_for_write: we have no basis for LLM-inferred + # relationships yet — let the conversation teach us via cue scanner. + continue + + if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: + # Common English word pre-seeded from the dictionary — not interesting. + continue cache.record_encounter(soas_row.id) if soas_row.saliency >= read_threshold: salient_for_read.append(soas_row.id) - if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0: + if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0: + # Only enqueue domain-specific words for LLM relationship extraction, + # not freshly-created novel words (novelty=1.0 but just inserted). salient_for_write.append(token) for token in salient_for_write: @@ -908,6 +921,25 @@ async def reload(request: Request) -> dict: return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)} +@app.post("/reset") +async def reset(request: Request) -> dict: + """ + Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log). + Keeps the standard-English dictionary seed intact. + Re-bootstraps dimension roots so the graph is ready for new learning. + """ + pool = request.app.state.pool + counts = await reset_graph(pool) + # Re-bootstrap dimension self-referential roots + await bootstrap_dimensions(pool) + # Re-warm the URD cache (should now be empty except roots) + await reload_urd_cache(pool) + log.info("graph reset complete") + return {"status": "ok", **counts, + "soas_remaining": len(cache.soas_by_token), + "urd_edges": len(cache.urd_by_concept_dim)} + + # --------------------------------------------------------------------------- # /health # --------------------------------------------------------------------------- @@ -1779,6 +1811,7 @@ ADMIN_HTML = """