Adding improved saliency pipeline
This commit is contained in:
@@ -237,6 +237,95 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
|
||||
return soas_row
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Novel-word bootstrap
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Saliency assigned to a word seen for the first time that is not in the
|
||||
# standard English dictionary. Must be above saliency_read_threshold (0.5)
|
||||
# so the recollection engine immediately picks it up.
|
||||
NOVEL_INITIAL_SALIENCY = 2.0
|
||||
|
||||
|
||||
async def create_novel_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
|
||||
"""
|
||||
Insert a domain-specific (non-dictionary) token with an initial saliency
|
||||
high enough to trigger recollection on the very first encounter.
|
||||
novelty=1.0 distinguishes these rows from common-English seeds.
|
||||
Idempotent — returns the existing row if already present.
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO soas (token, saliency, novelty, encounter_count)
|
||||
VALUES ($1, $2, 1.0, 1)
|
||||
ON CONFLICT (token) DO UPDATE
|
||||
SET encounter_count = soas.encounter_count + 1,
|
||||
last_seen = now()
|
||||
RETURNING id, token, encounter_count, saliency, novelty
|
||||
""",
|
||||
token, NOVEL_INITIAL_SALIENCY,
|
||||
)
|
||||
soas_row = SoasRow(
|
||||
id=row["id"],
|
||||
token=row["token"],
|
||||
encounter_count=row["encounter_count"],
|
||||
saliency=row["saliency"],
|
||||
novelty=row["novelty"],
|
||||
)
|
||||
cache.soas_by_token[token] = soas_row
|
||||
cache.soas_by_id[row["id"]] = token
|
||||
return soas_row
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Graph reset
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def reset_graph(pool: asyncpg.Pool) -> dict:
|
||||
"""
|
||||
Wipe all learned knowledge (URD edges, domain SOAS entries,
|
||||
resolution queue, write log) while keeping the standard-English
|
||||
dictionary seed (novelty=0) intact.
|
||||
After the wipe, dimensions are re-bootstrapped and the cache is re-warmed.
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# Order matters: FK constraints resolution_queue → soas, urd → soas
|
||||
rq = await conn.execute("DELETE FROM resolution_queue")
|
||||
kw = await conn.execute("DELETE FROM kg_write_log")
|
||||
urd = await conn.execute("DELETE FROM urd")
|
||||
# Keep only common-English seeds (novelty = 0); delete domain words
|
||||
soas = await conn.execute("DELETE FROM soas WHERE novelty > 0")
|
||||
|
||||
def _count(result: str) -> int:
|
||||
try:
|
||||
return int(result.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
return 0
|
||||
|
||||
counts = {
|
||||
"urd_deleted": _count(urd),
|
||||
"soas_deleted": _count(soas),
|
||||
"resolution_queue_deleted": _count(rq),
|
||||
"kg_write_log_deleted": _count(kw),
|
||||
}
|
||||
|
||||
# Clear in-memory state
|
||||
cache.urd_by_concept.clear()
|
||||
cache.urd_by_concept_dim.clear()
|
||||
cache.pending_conflicts.clear()
|
||||
# Remove domain words from SOAS cache (keep novelty=0 entries)
|
||||
domain_tokens = [t for t, r in list(cache.soas_by_token.items()) if r.novelty > 0]
|
||||
for t in domain_tokens:
|
||||
row = cache.soas_by_token.pop(t, None)
|
||||
if row:
|
||||
cache.soas_by_id.pop(row.id, None)
|
||||
|
||||
log.info("graph reset: %s", counts)
|
||||
return counts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Saliency recalculation (log-scale)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -36,7 +36,7 @@ from .db import (
|
||||
close_pool, get_config, get_or_create_soas,
|
||||
get_pool, init_schema, bootstrap_dimensions,
|
||||
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
|
||||
flush_encounter_deltas,
|
||||
flush_encounter_deltas, create_novel_soas, reset_graph,
|
||||
)
|
||||
from .loop_detector import apply_mitigations, record_and_check, session_key
|
||||
from .cue_scanner import scan_cues
|
||||
@@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
||||
|
||||
for token in tokens:
|
||||
soas_row = cache.soas_by_token.get(token)
|
||||
|
||||
if soas_row is None:
|
||||
# New token — get_or_create happens in background via queue when needed
|
||||
continue # unknown token — skip saliency for now; write queue handles creation
|
||||
# Token is absent from the dictionary entirely → novel domain word.
|
||||
# Give it an initial high saliency so recollection fires immediately
|
||||
# and instructs the model to ask the user what it is.
|
||||
soas_row = await create_novel_soas(pool, token)
|
||||
salient_for_read.append(soas_row.id)
|
||||
# Do NOT add to salient_for_write: we have no basis for LLM-inferred
|
||||
# relationships yet — let the conversation teach us via cue scanner.
|
||||
continue
|
||||
|
||||
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
|
||||
# Common English word pre-seeded from the dictionary — not interesting.
|
||||
continue
|
||||
|
||||
cache.record_encounter(soas_row.id)
|
||||
|
||||
if soas_row.saliency >= read_threshold:
|
||||
salient_for_read.append(soas_row.id)
|
||||
|
||||
if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0:
|
||||
if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0:
|
||||
# Only enqueue domain-specific words for LLM relationship extraction,
|
||||
# not freshly-created novel words (novelty=1.0 but just inserted).
|
||||
salient_for_write.append(token)
|
||||
|
||||
for token in salient_for_write:
|
||||
@@ -908,6 +921,25 @@ async def reload(request: Request) -> dict:
|
||||
return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}
|
||||
|
||||
|
||||
@app.post("/reset")
|
||||
async def reset(request: Request) -> dict:
|
||||
"""
|
||||
Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log).
|
||||
Keeps the standard-English dictionary seed intact.
|
||||
Re-bootstraps dimension roots so the graph is ready for new learning.
|
||||
"""
|
||||
pool = request.app.state.pool
|
||||
counts = await reset_graph(pool)
|
||||
# Re-bootstrap dimension self-referential roots
|
||||
await bootstrap_dimensions(pool)
|
||||
# Re-warm the URD cache (should now be empty except roots)
|
||||
await reload_urd_cache(pool)
|
||||
log.info("graph reset complete")
|
||||
return {"status": "ok", **counts,
|
||||
"soas_remaining": len(cache.soas_by_token),
|
||||
"urd_edges": len(cache.urd_by_concept_dim)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /health
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1779,6 +1811,7 @@ ADMIN_HTML = """<!DOCTYPE html>
|
||||
<div class="actions">
|
||||
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
|
||||
<button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
|
||||
<button onclick="resetGraph(this)" style="color:#b00;border-color:#e0b0b0">⚠ Reset knowledge graph</button>
|
||||
</div>
|
||||
<pre id="result" style="display:none"></pre>
|
||||
|
||||
@@ -1956,6 +1989,28 @@ ADMIN_HTML = """<!DOCTYPE html>
|
||||
}}
|
||||
}}
|
||||
|
||||
async function resetGraph(btn) {{
|
||||
if (!confirm(
|
||||
'This will DELETE all learned URD edges, domain concepts, conflicts, and write logs.\n\n' +
|
||||
'The standard English dictionary seed will be kept.\n\n' +
|
||||
'Are you sure?'
|
||||
)) return;
|
||||
btn.disabled = true;
|
||||
showResult('Resetting knowledge graph…', true);
|
||||
try {{
|
||||
const r = await fetch('/reset', {{method: 'POST'}});
|
||||
const d = await r.json();
|
||||
showResult(JSON.stringify(d, null, 2), r.ok);
|
||||
await loadStats();
|
||||
await loadConflicts();
|
||||
await loadLog(0);
|
||||
}} catch(e) {{
|
||||
showResult('Error: ' + e.message, false);
|
||||
}} finally {{
|
||||
btn.disabled = false;
|
||||
}}
|
||||
}}
|
||||
|
||||
const LOG_PAGE_SIZE = 50;
|
||||
let logOffset = 0;
|
||||
let logTotal = 0;
|
||||
|
||||
@@ -17,7 +17,7 @@ from .cache import SoasRow, UrdEdge
|
||||
|
||||
log = logging.getLogger("festinger.recollection")
|
||||
|
||||
ZERO_HIT_TEMPLATE = "? {concept}: no recollection stored yet."
|
||||
ZERO_HIT_TEMPLATE = "You can't remember what {concept} is. Ask."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user