Adding improved saliency pipeline
This commit is contained in:
@@ -237,6 +237,95 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
|
|||||||
return soas_row
|
return soas_row
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Novel-word bootstrap
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Saliency assigned to a word seen for the first time that is not in the
|
||||||
|
# standard English dictionary. Must be above saliency_read_threshold (0.5)
|
||||||
|
# so the recollection engine immediately picks it up.
|
||||||
|
NOVEL_INITIAL_SALIENCY = 2.0
|
||||||
|
|
||||||
|
|
||||||
|
async def create_novel_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
|
||||||
|
"""
|
||||||
|
Insert a domain-specific (non-dictionary) token with an initial saliency
|
||||||
|
high enough to trigger recollection on the very first encounter.
|
||||||
|
novelty=1.0 distinguishes these rows from common-English seeds.
|
||||||
|
Idempotent — returns the existing row if already present.
|
||||||
|
"""
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"""
|
||||||
|
INSERT INTO soas (token, saliency, novelty, encounter_count)
|
||||||
|
VALUES ($1, $2, 1.0, 1)
|
||||||
|
ON CONFLICT (token) DO UPDATE
|
||||||
|
SET encounter_count = soas.encounter_count + 1,
|
||||||
|
last_seen = now()
|
||||||
|
RETURNING id, token, encounter_count, saliency, novelty
|
||||||
|
""",
|
||||||
|
token, NOVEL_INITIAL_SALIENCY,
|
||||||
|
)
|
||||||
|
soas_row = SoasRow(
|
||||||
|
id=row["id"],
|
||||||
|
token=row["token"],
|
||||||
|
encounter_count=row["encounter_count"],
|
||||||
|
saliency=row["saliency"],
|
||||||
|
novelty=row["novelty"],
|
||||||
|
)
|
||||||
|
cache.soas_by_token[token] = soas_row
|
||||||
|
cache.soas_by_id[row["id"]] = token
|
||||||
|
return soas_row
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Graph reset
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def reset_graph(pool: asyncpg.Pool) -> dict:
|
||||||
|
"""
|
||||||
|
Wipe all learned knowledge (URD edges, domain SOAS entries,
|
||||||
|
resolution queue, write log) while keeping the standard-English
|
||||||
|
dictionary seed (novelty=0) intact.
|
||||||
|
After the wipe, dimensions are re-bootstrapped and the cache is re-warmed.
|
||||||
|
"""
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
async with conn.transaction():
|
||||||
|
# Order matters: FK constraints resolution_queue → soas, urd → soas
|
||||||
|
rq = await conn.execute("DELETE FROM resolution_queue")
|
||||||
|
kw = await conn.execute("DELETE FROM kg_write_log")
|
||||||
|
urd = await conn.execute("DELETE FROM urd")
|
||||||
|
# Keep only common-English seeds (novelty = 0); delete domain words
|
||||||
|
soas = await conn.execute("DELETE FROM soas WHERE novelty > 0")
|
||||||
|
|
||||||
|
def _count(result: str) -> int:
|
||||||
|
try:
|
||||||
|
return int(result.split()[-1])
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
counts = {
|
||||||
|
"urd_deleted": _count(urd),
|
||||||
|
"soas_deleted": _count(soas),
|
||||||
|
"resolution_queue_deleted": _count(rq),
|
||||||
|
"kg_write_log_deleted": _count(kw),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Clear in-memory state
|
||||||
|
cache.urd_by_concept.clear()
|
||||||
|
cache.urd_by_concept_dim.clear()
|
||||||
|
cache.pending_conflicts.clear()
|
||||||
|
# Remove domain words from SOAS cache (keep novelty=0 entries)
|
||||||
|
domain_tokens = [t for t, r in list(cache.soas_by_token.items()) if r.novelty > 0]
|
||||||
|
for t in domain_tokens:
|
||||||
|
row = cache.soas_by_token.pop(t, None)
|
||||||
|
if row:
|
||||||
|
cache.soas_by_id.pop(row.id, None)
|
||||||
|
|
||||||
|
log.info("graph reset: %s", counts)
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Saliency recalculation (log-scale)
|
# Saliency recalculation (log-scale)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ from .db import (
|
|||||||
close_pool, get_config, get_or_create_soas,
|
close_pool, get_config, get_or_create_soas,
|
||||||
get_pool, init_schema, bootstrap_dimensions,
|
get_pool, init_schema, bootstrap_dimensions,
|
||||||
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
|
bootstrap_english_dictionary, warm_cache, reload_urd_cache,
|
||||||
flush_encounter_deltas,
|
flush_encounter_deltas, create_novel_soas, reset_graph,
|
||||||
)
|
)
|
||||||
from .loop_detector import apply_mitigations, record_and_check, session_key
|
from .loop_detector import apply_mitigations, record_and_check, session_key
|
||||||
from .cue_scanner import scan_cues
|
from .cue_scanner import scan_cues
|
||||||
@@ -497,16 +497,29 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
|||||||
|
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
soas_row = cache.soas_by_token.get(token)
|
soas_row = cache.soas_by_token.get(token)
|
||||||
|
|
||||||
if soas_row is None:
|
if soas_row is None:
|
||||||
# New token — get_or_create happens in background via queue when needed
|
# Token is absent from the dictionary entirely → novel domain word.
|
||||||
continue # unknown token — skip saliency for now; write queue handles creation
|
# Give it an initial high saliency so recollection fires immediately
|
||||||
|
# and instructs the model to ask the user what it is.
|
||||||
|
soas_row = await create_novel_soas(pool, token)
|
||||||
|
salient_for_read.append(soas_row.id)
|
||||||
|
# Do NOT add to salient_for_write: we have no basis for LLM-inferred
|
||||||
|
# relationships yet — let the conversation teach us via cue scanner.
|
||||||
|
continue
|
||||||
|
|
||||||
|
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
|
||||||
|
# Common English word pre-seeded from the dictionary — not interesting.
|
||||||
|
continue
|
||||||
|
|
||||||
cache.record_encounter(soas_row.id)
|
cache.record_encounter(soas_row.id)
|
||||||
|
|
||||||
if soas_row.saliency >= read_threshold:
|
if soas_row.saliency >= read_threshold:
|
||||||
salient_for_read.append(soas_row.id)
|
salient_for_read.append(soas_row.id)
|
||||||
|
|
||||||
if soas_row.saliency >= write_threshold and soas_row.novelty < 1.0:
|
if soas_row.saliency >= write_threshold and soas_row.novelty > 0.0:
|
||||||
|
# Only enqueue domain-specific words for LLM relationship extraction,
|
||||||
|
# not freshly-created novel words (novelty=1.0 but just inserted).
|
||||||
salient_for_write.append(token)
|
salient_for_write.append(token)
|
||||||
|
|
||||||
for token in salient_for_write:
|
for token in salient_for_write:
|
||||||
@@ -908,6 +921,25 @@ async def reload(request: Request) -> dict:
|
|||||||
return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}
|
return {"status": "ok", "urd_edges": len(cache.urd_by_concept_dim)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/reset")
|
||||||
|
async def reset(request: Request) -> dict:
|
||||||
|
"""
|
||||||
|
Wipe all learned knowledge (URD, domain SOAS, resolution queue, write log).
|
||||||
|
Keeps the standard-English dictionary seed intact.
|
||||||
|
Re-bootstraps dimension roots so the graph is ready for new learning.
|
||||||
|
"""
|
||||||
|
pool = request.app.state.pool
|
||||||
|
counts = await reset_graph(pool)
|
||||||
|
# Re-bootstrap dimension self-referential roots
|
||||||
|
await bootstrap_dimensions(pool)
|
||||||
|
# Re-warm the URD cache (should now be empty except roots)
|
||||||
|
await reload_urd_cache(pool)
|
||||||
|
log.info("graph reset complete")
|
||||||
|
return {"status": "ok", **counts,
|
||||||
|
"soas_remaining": len(cache.soas_by_token),
|
||||||
|
"urd_edges": len(cache.urd_by_concept_dim)}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# /health
|
# /health
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -1779,6 +1811,7 @@ ADMIN_HTML = """<!DOCTYPE html>
|
|||||||
<div class="actions">
|
<div class="actions">
|
||||||
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
|
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
|
||||||
<button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
|
<button onclick="runWordnetImport(this)">Import WordNet lemmas</button>
|
||||||
|
<button onclick="resetGraph(this)" style="color:#b00;border-color:#e0b0b0">⚠ Reset knowledge graph</button>
|
||||||
</div>
|
</div>
|
||||||
<pre id="result" style="display:none"></pre>
|
<pre id="result" style="display:none"></pre>
|
||||||
|
|
||||||
@@ -1956,6 +1989,28 @@ ADMIN_HTML = """<!DOCTYPE html>
|
|||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
async function resetGraph(btn) {{
|
||||||
|
if (!confirm(
|
||||||
|
'This will DELETE all learned URD edges, domain concepts, conflicts, and write logs.\n\n' +
|
||||||
|
'The standard English dictionary seed will be kept.\n\n' +
|
||||||
|
'Are you sure?'
|
||||||
|
)) return;
|
||||||
|
btn.disabled = true;
|
||||||
|
showResult('Resetting knowledge graph…', true);
|
||||||
|
try {{
|
||||||
|
const r = await fetch('/reset', {{method: 'POST'}});
|
||||||
|
const d = await r.json();
|
||||||
|
showResult(JSON.stringify(d, null, 2), r.ok);
|
||||||
|
await loadStats();
|
||||||
|
await loadConflicts();
|
||||||
|
await loadLog(0);
|
||||||
|
}} catch(e) {{
|
||||||
|
showResult('Error: ' + e.message, false);
|
||||||
|
}} finally {{
|
||||||
|
btn.disabled = false;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
|
||||||
const LOG_PAGE_SIZE = 50;
|
const LOG_PAGE_SIZE = 50;
|
||||||
let logOffset = 0;
|
let logOffset = 0;
|
||||||
let logTotal = 0;
|
let logTotal = 0;
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from .cache import SoasRow, UrdEdge
|
|||||||
|
|
||||||
log = logging.getLogger("festinger.recollection")
|
log = logging.getLogger("festinger.recollection")
|
||||||
|
|
||||||
ZERO_HIT_TEMPLATE = "? {concept}: no recollection stored yet."
|
ZERO_HIT_TEMPLATE = "You can't remember what {concept} is. Ask."
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user