Adding improved saliency pipeline

This commit is contained in:
2026-04-20 18:12:57 +02:00
parent 84b4a88ba1
commit 3fae2c62bd
3 changed files with 149 additions and 5 deletions
+59 -4
View File
@@ -36,7 +36,7 @@ from .db import (
close_pool, get_config, get_or_create_soas,
get_pool, init_schema, bootstrap_dimensions,
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
flush_encounter_deltas,
flush_encounter_deltas, create_novel_soas, reset_graph,
)
from .loop_detector import apply_mitigations, record_and_check, session_key
from .cue_scanner import scan_cues
@@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
for token in tokens:
soas_row = cache.soas_by_token.get(token)
if soas_row is None:
# New token — get_or_create happens in background via queue when needed
continue # unknown token — skip saliency for now; write queue handles creation
# Token is absent from the dictionary entirely → novel domain word.
# Give it an initial high saliency so recollection fires immediately
# and instructs the model to ask the user what it is.
soas_row = await create_novel_soas(pool, token)
salient_for_read.append(soas_row.id)
# Do NOT add to salient_for_write: we have no basis for LLM-inferred
# relationships yet — let the conversation teach us via cue scanner.
continue
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
# Common English word pre-seeded from the dictionary — not interesting.
continue
cache.record_encounter(soas_row.id)
if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id)
if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0:
if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0:
# Only enqueue domain-specific words for LLM relationship extraction,
# not freshly-created novel words (novelty=1.0 but just inserted).
salient_for_write.append(token)
for token in salient_for_write:
@@ -908,6 +921,25 @@ async def reload(request: Request) -> dict:
return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}
@app.post("/reset")
async def reset(request: Request) -> dict:
"""
Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log).
Keeps the standard-English dictionary seed intact.
Re-bootstraps dimension roots so the graph is ready for new learning.
"""
pool = request.app.state.pool
counts = await reset_graph(pool)
# Re-bootstrap dimension self-referential roots
await bootstrap_dimensions(pool)
# Re-warm the URD cache (should now be empty except roots)
await reload_urd_cache(pool)
log.info("graph reset complete")
return {"status": "ok", **counts,
"soas_remaining": len(cache.soas_by_token),
"urd_edges": len(cache.urd_by_concept_dim)}
# ---------------------------------------------------------------------------
# /health
# ---------------------------------------------------------------------------
@@ -1779,6 +1811,7 @@ ADMIN_HTML = """<!DOCTYPE html>
<div class="actions">
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
<button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
<button onclick="resetGraph(this)" style="color:#b00;border-color:#e0b0b0">&#9888; Reset knowledge graph</button>
</div>
<pre id="result" style="display:none"></pre>
@@ -1956,6 +1989,28 @@ ADMIN_HTML = """<!DOCTYPE html>
}}
}}
async function resetGraph(btn) {{
if (!confirm(
'This will DELETE all learned URD edges, domain concepts, conflicts, and write logs.\n\n' +
'The standard English dictionary seed will be kept.\n\n' +
'Are you sure?'
)) return;
btn.disabled = true;
showResult('Resetting knowledge graph…', true);
try {{
const r = await fetch('/reset', {{method: 'POST'}});
const d = await r.json();
showResult(JSON.stringify(d, null, 2), r.ok);
await loadStats();
await loadConflicts();
await loadLog(0);
}} catch(e) {{
showResult('Error: ' + e.message, false);
}} finally {{
btn.disabled = false;
}}
}}
const LOG_PAGE_SIZE = 50;
let logOffset = 0;
let logTotal = 0;