Adding improved saliency pipeline

2026-04-20 18:12:57 +02:00
parent 84b4a88ba1
commit 3fae2c62bd
3 changed files with 149 additions and 5 deletions
@@ -36,7 +36,7 @@ from .db import (
    close_pool, get_config, get_or_create_soas,
    get_pool, init_schema, bootstrap_dimensions,
    bootstrap_english_dictionary, warm_cache, reload_urd_cache,
-    flush_encounter_deltas,
+    flush_encounter_deltas, create_novel_soas, reset_graph,
 )
 from .loop_detector import apply_mitigations, record_and_check, session_key
 from .cue_scanner import scan_cues
@@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:

    for token in tokens:
        soas_row = cache.soas_by_token.get(token)
+
        if soas_row is None:
-            # New token — get_or_create happens in background via queue when needed
-                continue  # unknown token — skip saliency for now; write queue handles creation
+            # Token is absent from the dictionary entirely → novel domain word.
+            # Give it an initial high saliency so recollection fires immediately
+            # and instructs the model to ask the user what it is.
+            soas_row = await create_novel_soas(pool, token)
+            salient_for_read.append(soas_row.id)
+            # Do NOT add to salient_for_write: we have no basis for LLM-inferred
+            # relationships yet — let the conversation teach us via cue scanner.
+            continue
+
+        if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
+            # Common English word pre-seeded from the dictionary — not interesting.
+            continue

        cache.record_encounter(soas_row.id)

        if soas_row.saliency >= read_threshold:
            salient_for_read.append(soas_row.id)

-        if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0:
+        if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0:
+            # Only enqueue domain-specific words for LLM relationship extraction,
+            # not freshly-created novel words (novelty=1.0 but just inserted).
            salient_for_write.append(token)

    for token in salient_for_write:
@@ -908,6 +921,25 @@ async def reload(request: Request) -> dict:
    return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}


+@app.post("/reset")
+async def reset(request: Request) -> dict:
+    """
+    Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log).
+    Keeps the standard-English dictionary seed intact.
+    Re-bootstraps dimension roots so the graph is ready for new learning.
+    """
+    pool = request.app.state.pool
+    counts = await reset_graph(pool)
+    # Re-bootstrap dimension self-referential roots
+    await bootstrap_dimensions(pool)
+    # Re-warm the URD cache (should now be empty except roots)
+    await reload_urd_cache(pool)
+    log.info("graph reset complete")
+    return {"status": "ok", **counts,
+            "soas_remaining": len(cache.soas_by_token),
+            "urd_edges": len(cache.urd_by_concept_dim)}
+
+
 # ---------------------------------------------------------------------------
 # /health
 # ---------------------------------------------------------------------------
@@ -1779,6 +1811,7 @@ ADMIN_HTML = """<!DOCTYPE html>
  <div class="actions">
    <button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
    <button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
+    <button onclick="resetGraph(this)" style="color:#b00;border-color:#e0b0b0">&#9888; Reset knowledge graph</button>
  </div>
  <pre id="result" style="display:none"></pre>

@@ -1956,6 +1989,28 @@ ADMIN_HTML = """<!DOCTYPE html>
      }}
    }}

+    async function resetGraph(btn) {{
+      if (!confirm(
+        'This will DELETE all learned URD edges, domain concepts, conflicts, and write logs.\n\n' +
+        'The standard English dictionary seed will be kept.\n\n' +
+        'Are you sure?'
+      )) return;
+      btn.disabled = true;
+      showResult('Resetting knowledge graph…', true);
+      try {{
+        const r = await fetch('/reset', {{method: 'POST'}});
+        const d = await r.json();
+        showResult(JSON.stringify(d, null, 2), r.ok);
+        await loadStats();
+        await loadConflicts();
+        await loadLog(0);
+      }} catch(e) {{
+        showResult('Error: ' + e.message, false);
+      }} finally {{
+        btn.disabled = false;
+      }}
+    }}
+
    const LOG_PAGE_SIZE = 50;
    let logOffset = 0;
    let logTotal = 0;