Adding improved saliency pipeline

This commit is contained in:
2026-04-20 18:12:57 +02:00
parent 84b4a88ba1
commit 3fae2c62bd
3 changed files with 149 additions and 5 deletions
+89
View File
@@ -237,6 +237,95 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
return soas_row
# ---------------------------------------------------------------------------
# Novel-word bootstrap
# ---------------------------------------------------------------------------
# Saliency assigned to a word seen for the first time that is not in the
# standard English dictionary. Must be above saliency_read_threshold (0.5)
# so the recollection engine immediately picks it up.
NOVEL_INITIAL_SALIENCY = 2.0
async def create_novel_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
"""
Insert a domain-specific (non-dictionary) token with an initial saliency
high enough to trigger recollection on the very first encounter.
novelty=1.0 distinguishes these rows from common-English seeds.
Idempotent — returns the existing row if already present.
"""
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO soas (token, saliency, novelty, encounter_count)
VALUES ($1, $2, 1.0, 1)
ON CONFLICT (token) DO UPDATE
SET encounter_count = soas.encounter_count + 1,
last_seen = now()
RETURNING id, token, encounter_count, saliency, novelty
""",
token, NOVEL_INITIAL_SALIENCY,
)
soas_row = SoasRow(
id=row["id"],
token=row["token"],
encounter_count=row["encounter_count"],
saliency=row["saliency"],
novelty=row["novelty"],
)
cache.soas_by_token[token] = soas_row
cache.soas_by_id[row["id"]] = token
return soas_row
# ---------------------------------------------------------------------------
# Graph reset
# ---------------------------------------------------------------------------
async def reset_graph(pool: asyncpg.Pool) -> dict:
"""
Wipe all learned knowledge (URD edges, domain SOAS entries,
resolution queue, write log) while keeping the standard-English
dictionary seed (novelty=0) intact.
After the wipe, dimensions are re-bootstrapped and the cache is re-warmed.
"""
async with pool.acquire() as conn:
async with conn.transaction():
# Order matters: FK constraints resolution_queue → soas, urd → soas
rq = await conn.execute("DELETE FROM resolution_queue")
kw = await conn.execute("DELETE FROM kg_write_log")
urd = await conn.execute("DELETE FROM urd")
# Keep only common-English seeds (novelty = 0); delete domain words
soas = await conn.execute("DELETE FROM soas WHERE novelty > 0")
def _count(result: str) -> int:
try:
return int(result.split()[-1])
except (ValueError, IndexError):
return 0
counts = {
"urd_deleted": _count(urd),
"soas_deleted": _count(soas),
"resolution_queue_deleted": _count(rq),
"kg_write_log_deleted": _count(kw),
}
# Clear in-memory state
cache.urd_by_concept.clear()
cache.urd_by_concept_dim.clear()
cache.pending_conflicts.clear()
# Remove domain words from SOAS cache (keep novelty=0 entries)
domain_tokens = [t for t, r in list(cache.soas_by_token.items()) if r.novelty > 0]
for t in domain_tokens:
row = cache.soas_by_token.pop(t, None)
if row:
cache.soas_by_id.pop(row.id, None)
log.info("graph reset: %s", counts)
return counts
# ---------------------------------------------------------------------------
# Saliency recalculation (log-scale)
# ---------------------------------------------------------------------------
+59 -4
View File
@@ -36,7 +36,7 @@ from .db import (
close_pool, get_config, get_or_create_soas,
get_pool, init_schema, bootstrap_dimensions,
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
flush_encounter_deltas,
flush_encounter_deltas, create_novel_soas, reset_graph,
)
from .loop_detector import apply_mitigations, record_and_check, session_key
from .cue_scanner import scan_cues
@@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
for token in tokens:
soas_row = cache.soas_by_token.get(token)
if soas_row is None:
# New token — get_or_create happens in background via queue when needed
continue # unknown token — skip saliency for now; write queue handles creation
# Token is absent from the dictionary entirely → novel domain word.
# Give it an initial high saliency so recollection fires immediately
# and instructs the model to ask the user what it is.
soas_row = await create_novel_soas(pool, token)
salient_for_read.append(soas_row.id)
# Do NOT add to salient_for_write: we have no basis for LLM-inferred
# relationships yet — let the conversation teach us via cue scanner.
continue
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
# Common English word pre-seeded from the dictionary — not interesting.
continue
cache.record_encounter(soas_row.id)
if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id)
if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0:
if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0:
# Only enqueue domain-specific words for LLM relationship extraction,
# not freshly-created novel words (novelty=1.0 but just inserted).
salient_for_write.append(token)
for token in salient_for_write:
@@ -908,6 +921,25 @@ async def reload(request: Request) -> dict:
return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}
@app.post("/reset")
async def reset(request: Request) -> dict:
"""
Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log).
Keeps the standard-English dictionary seed intact.
Re-bootstraps dimension roots so the graph is ready for new learning.
"""
pool = request.app.state.pool
counts = await reset_graph(pool)
# Re-bootstrap dimension self-referential roots
await bootstrap_dimensions(pool)
# Re-warm the URD cache (should now be empty except roots)
await reload_urd_cache(pool)
log.info("graph reset complete")
return {"status": "ok", **counts,
"soas_remaining": len(cache.soas_by_token),
"urd_edges": len(cache.urd_by_concept_dim)}
# ---------------------------------------------------------------------------
# /health
# ---------------------------------------------------------------------------
@@ -1779,6 +1811,7 @@ ADMIN_HTML = """<!DOCTYPE html>
<div class="actions">
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
<button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
<button onclick="resetGraph(this)" style="color:#b00;border-color:#e0b0b0">&#9888; Reset knowledge graph</button>
</div>
<pre id="result" style="display:none"></pre>
@@ -1956,6 +1989,28 @@ ADMIN_HTML = """<!DOCTYPE html>
}}
}}
async function resetGraph(btn) {{
if (!confirm(
'This will DELETE all learned URD edges, domain concepts, conflicts, and write logs.\n\n' +
'The standard English dictionary seed will be kept.\n\n' +
'Are you sure?'
)) return;
btn.disabled = true;
showResult('Resetting knowledge graph…', true);
try {{
const r = await fetch('/reset', {{method: 'POST'}});
const d = await r.json();
showResult(JSON.stringify(d, null, 2), r.ok);
await loadStats();
await loadConflicts();
await loadLog(0);
}} catch(e) {{
showResult('Error: ' + e.message, false);
}} finally {{
btn.disabled = false;
}}
}}
const LOG_PAGE_SIZE = 50;
let logOffset = 0;
let logTotal = 0;
+1 -1
View File
@@ -17,7 +17,7 @@ from .cache import SoasRow, UrdEdge
log = logging.getLogger("festinger.recollection")
ZERO_HIT_TEMPLATE = "? {concept}: no recollection stored yet."
ZERO_HIT_TEMPLATE = "You can't remember what {concept} is. Ask."
# ---------------------------------------------------------------------------