Adding sanity to the recollection decider

This commit is contained in:
2026-04-21 19:09:25 +02:00
parent 128dd653e7
commit ccbb5b2d45
4 changed files with 158 additions and 90 deletions
+22 -4
View File
@@ -250,10 +250,16 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
# Novel-word bootstrap # Novel-word bootstrap
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Saliency assigned to a word seen for the first time that is not in the # Saliency for a word first seen that is not in the standard English dictionary.
# standard English dictionary. Must be above saliency_read_threshold (0.5) # Kept BELOW saliency_read_threshold (default 0.5) so the concept doesn't
# so the recollection engine immediately picks it up. # surface in recollection until the LLM has confirmed it is meaningful.
NOVEL_INITIAL_SALIENCY = 2.0 # Once the LLM inserts a URD edge, saliency is raised to NOVEL_CONFIRMED_SALIENCY.
NOVEL_INITIAL_SALIENCY = 0.1
# Saliency set after the LLM confirms a concept by generating a URD triple.
# Must be well above saliency_read_threshold so the concept becomes a
# recollection attractor on subsequent turns.
NOVEL_CONFIRMED_SALIENCY = 2.0
async def create_novel_soas(pool: asyncpg.Pool, token: str, context: str = "") -> SoasRow: async def create_novel_soas(pool: asyncpg.Pool, token: str, context: str = "") -> SoasRow:
@@ -375,6 +381,18 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
new_count = (row.encounter_count if row else 0) new_count = (row.encounter_count if row else 0)
# novelty = 0 for common English words (pre-seeded) # novelty = 0 for common English words (pre-seeded)
is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False
# Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not
# be promoted above the read threshold by encounter-count alone.
# Their saliency is raised explicitly when the LLM confirms them.
is_unconfirmed_novel = (
row is not None
and row.novelty > 0.0
and not cache.urd_by_concept.get(soas_id)
)
if is_unconfirmed_novel:
new_saliency = row.saliency # preserve low saliency until LLM confirms
else:
new_saliency = recalculate_saliency(new_count, is_common) new_saliency = recalculate_saliency(new_count, is_common)
await conn.execute( await conn.execute(
+12 -11
View File
@@ -84,24 +84,25 @@ async def _call_openai(model: ModelConfig, prompt: str) -> str:
# Structured prompts # Structured prompts
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
CONTEXT_EXTRACT_PROMPT_TEMPLATE = """You are a knowledge extraction assistant helping build a semantic memory graph. CONTEXT_DISCOVER_PROMPT_TEMPLATE = """You are building a semantic memory graph for an AI agent.
The following words appear in the conversation below and are NOT in the standard English dictionary — they are likely domain-specific terms that should be remembered: The following words appeared in the user message and are NOT in the standard English dictionary. They may be domain-specific terms worth remembering, or they may be typos/noise:
{concepts} {candidates}
Read the conversation excerpt and, for each concept above, generate relationship triples IF the excerpt provides enough context to make a confident assertion. Only assert what is clearly evidenced in the text — do not guess or hallucinate. Read the user message and evaluate each candidate. For candidates that ARE real domain concepts (project names, system names, proper nouns, technical identifiers), generate a relationship triple ONLY IF the message provides clear evidence. For typos, noise, or common words written oddly, return nothing.
CONVERSATION EXCERPT: USER MESSAGE:
{context} {context}
Return a JSON array of triples. Each triple must have: Return a JSON array. Each item must have:
- "concept": one of the domain words listed above (exactly as spelled) - "concept": one of the candidate words above (use the exact spelling, lowercase)
- "parent": the broader category or container (e.g. "software-repository", "glitch-university") - "parent": what it is or belongs to (e.g. "software-repository", "glitch-university")
- "dimension": one of [{dimensions}] or a new specific one if none fit - "dimension": one of [{dimensions}] or a new specific one if none fit
- "is_isa": true if concept IS A type of parent (classification), false if concept BELONGS TO parent (containment) - "is_isa": true if concept IS A type of parent (classification), false if concept BELONGS TO parent (containment)
- "confidence": 0.01.0 reflecting how clearly the excerpt supports this assertion - "confidence": 0.01.0 reflecting how clearly the message supports this
Return ONLY the JSON array. No explanation. If no confident assertions can be made, return []. Only include candidates you are confident are real domain concepts. Return [] if none qualify.
Return ONLY the JSON array. No explanation.
Example: Example:
[ [
{{"concept": "gnommoweb", "parent": "software-repository", "dimension": "type", "is_isa": true, "confidence": 0.9}}, {{"concept": "gnommoweb", "parent": "software-repository", "dimension": "type", "is_isa": true, "confidence": 0.9}},
@@ -120,7 +121,7 @@ class ContextTriple:
def parse_context_triples(response: str) -> list[ContextTriple]: def parse_context_triples(response: str) -> list[ContextTriple]:
"""Parse JSON array of context-aware triples from LLM response.""" """Parse JSON array of discovered triples from LLM response."""
try: try:
start = response.find("[") start = response.find("[")
end = response.rfind("]") + 1 end = response.rfind("]") + 1
+56 -30
View File
@@ -44,7 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize from .tokenizer import tokenize
from .write_queue import enqueue_context_extract, enqueue_cue, start_worker, stop_worker from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario
@@ -484,6 +484,21 @@ def extract_prompt_text(body: dict, path: str) -> str:
return body.get("prompt", "") return body.get("prompt", "")
def _last_user_message_text(body: dict, path: str) -> str:
"""
Extract only the last user message for the write path.
Agent responses, thinking traces, and system prompts are excluded —
they are noise for concept discovery.
"""
if path in ("/api/chat", "/v1/chat/completions", "/v1/messages"):
messages = body.get("messages", [])
last_user = next((m for m in reversed(messages) if m.get("role") == "user"), None)
if last_user:
return " ".join(_extract_text_strings(last_user.get("content", "")))
return ""
return body.get("prompt", "")
def inject_recollection_anthropic(body: dict, block: str) -> dict: def inject_recollection_anthropic(body: dict, block: str) -> dict:
""" """
Inject a recollection block into an Anthropic Messages API request. Inject a recollection block into an Anthropic Messages API request.
@@ -512,69 +527,80 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
Returns a (possibly modified) body dict with the recollection block injected. Returns a (possibly modified) body dict with the recollection block injected.
""" """
read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5")) read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
write_threshold = float(await get_config(pool, "saliency_write_threshold", "1.2"))
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6")) conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
recency_days = int(await get_config(pool, "recollection_recency_days", "90")) recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
prompt_text = extract_prompt_text(body, path) # Extract only the last user message — agent responses and reasoning traces
if not prompt_text.strip(): # are noise for both cue scanning and concept discovery.
user_text = _last_user_message_text(body, path)
if not user_text.strip():
return body return body
# 1. Scan for explicit relationship cues (bypass threshold) # 1. Scan user message for explicit relationship cues (fast, inline, bypasses LLM).
cues = scan_cues(prompt_text) for cue in scan_cues(user_text):
for cue in cues:
await enqueue_cue(cue) await enqueue_cue(cue)
# 2. Tokenise + update saliency # 2. Tokenise the recent context (last user + last assistant) for the read path.
# Novel words from the user turn are also collected as LLM candidates.
prompt_text = extract_prompt_text(body, path)
tokens = tokenize(prompt_text) tokens = tokenize(prompt_text)
salient_for_read: list[int] = [] salient_for_read: list[int] = []
# Novel domain words found in this turn — not in the standard dictionary. # Candidate novel tokens from the USER message only — structural tokens
# Capped to avoid flooding on unexpectedly large turns. # (paths, versions, numbers) are filtered out. Capped to avoid flooding
MAX_NOVEL_PER_PROMPT = 8 # on very long messages.
novel_this_prompt: list[str] = [] MAX_NOVEL_PER_TURN = 8
novel_candidates: list[str] = []
# Only collect candidates from user-side tokens
user_tokens = set(tokenize(user_text))
for token in tokens: for token in tokens:
soas_row = cache.soas_by_token.get(token) soas_row = cache.soas_by_token.get(token)
if soas_row is None: if soas_row is None:
# Token absent from dictionary candidate novel domain word. # Token absent from cache entirely candidate domain word.
# Skip structural tokens (paths, versions, numbers). # Restrict to user-side tokens so we don't mine agent responses.
if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT: if (
novel_this_prompt.append(token) token in user_tokens
and not _is_structural_token(token)
and len(novel_candidates) < MAX_NOVEL_PER_TURN
):
novel_candidates.append(token)
continue continue
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
# Common English word pre-seeded from the dictionary — not interesting. # Common English word — skip.
continue continue
cache.record_encounter(soas_row.id) cache.record_encounter(soas_row.id)
# Only surface in recollection if saliency is above threshold.
# Unconfirmed novel words (saliency=NOVEL_INITIAL_SALIENCY=0.1) are
# deliberately kept below the threshold until the LLM confirms them.
if soas_row.saliency >= read_threshold: if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id) salient_for_read.append(soas_row.id)
# Create SOAS entries for novel words and add them to the read list. # Register novel candidates in SOAS at low saliency (below read threshold).
# Capture first-seen context so zero-hit recollection can include a hint. # They become recollection attractors only after the LLM confirms them.
for token in novel_this_prompt: for token in novel_candidates:
ctx = _sentence_containing(prompt_text, token) ctx = _sentence_containing(user_text, token)
soas_row = await create_novel_soas(pool, token, context=ctx) await create_novel_soas(pool, token, context=ctx)
salient_for_read.append(soas_row.id) # Do NOT add to salient_for_read — no zero-hit recollection until confirmed.
# Enqueue context-aware LLM extraction for all novel words found this turn. # 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
# The LLM reads the actual conversation text and extracts relationships from if novel_candidates and len(user_text) >= 20:
# evidence — one call per turn, not one per concept. await enqueue_context_discover(user_text, novel_candidates)
if novel_this_prompt:
await enqueue_context_extract(novel_this_prompt, prompt_text)
if not salient_for_read: if not salient_for_read:
return body return body
# 3. Build recollection block # 5. Build recollection block
block = build_recollection_block(salient_for_read, conf_floor, recency_days) block = build_recollection_block(salient_for_read, conf_floor, recency_days)
if not block: if not block:
return body return body
# 4. Inject into messages # 6. Inject into messages
if path == "/api/chat" or path == "/v1/chat/completions": if path == "/api/chat" or path == "/v1/chat/completions":
body = dict(body) body = dict(body)
body["messages"] = inject_recollection(body.get("messages", []), block) body["messages"] = inject_recollection(body.get("messages", []), block)
+68 -45
View File
@@ -15,10 +15,10 @@ import asyncpg
from .cache import SoasRow from .cache import SoasRow
from .cue_scanner import CueTriple from .cue_scanner import CueTriple
from .db import get_or_create_soas, get_config from .db import get_or_create_soas, get_config, create_novel_soas, NOVEL_CONFIRMED_SALIENCY
from .llm_client import ( from .llm_client import (
ModelConfig, get_model_config, call_llm, ModelConfig, get_model_config, call_llm,
CONTEXT_EXTRACT_PROMPT_TEMPLATE, parse_context_triples, CONTEXT_DISCOVER_PROMPT_TEMPLATE, parse_context_triples,
) )
from .urd_writer import InsertRequest, insert_urd_edge from .urd_writer import InsertRequest, insert_urd_edge
from . import cache from . import cache
@@ -27,14 +27,16 @@ log = logging.getLogger("festinger.write_queue")
@dataclass @dataclass
class ContextExtractRequest: class ContextDiscoverRequest:
""" """
Novel concepts seen in a single conversation turn, with the turn text as context. User message text plus candidate tokens (words absent from the English
The LLM reads the context and extracts relationship triples from evidence in the text — dictionary) submitted for LLM-driven concept discovery.
much more accurate than asking about concepts in isolation.
candidate_tokens are hints for the LLM — it decides which are real domain
concepts vs typos/noise, and extracts relationship triples from the text.
""" """
concept_tokens: list[str] user_text: str
context_text: str candidate_tokens: list[str]
@dataclass @dataclass
@@ -52,12 +54,15 @@ _LLM_CONCURRENCY = 2
_llm_semaphore: asyncio.Semaphore | None = None _llm_semaphore: asyncio.Semaphore | None = None
async def enqueue_context_extract(tokens: list[str], context: str) -> None: async def enqueue_context_discover(user_text: str, candidate_tokens: list[str]) -> None:
"""Enqueue a context-aware extraction request for a batch of novel concepts.""" """Enqueue a user message for LLM-driven concept discovery and relation extraction."""
try: try:
_queue.put_nowait(ContextExtractRequest(concept_tokens=tokens, context_text=context)) _queue.put_nowait(ContextDiscoverRequest(
user_text=user_text,
candidate_tokens=candidate_tokens,
))
except asyncio.QueueFull: except asyncio.QueueFull:
log.warning("write queue full — dropping context extract for: %s", tokens) log.warning("write queue full — dropping context discover")
async def enqueue_cue(triple: CueTriple) -> None: async def enqueue_cue(triple: CueTriple) -> None:
@@ -94,11 +99,11 @@ async def _worker(pool: asyncpg.Pool) -> None:
if isinstance(item, CueWriteRequest): if isinstance(item, CueWriteRequest):
# Fast path: no LLM involved, process inline. # Fast path: no LLM involved, process inline.
await _process_cue(pool, item.triple) await _process_cue(pool, item.triple)
elif isinstance(item, ContextExtractRequest): elif isinstance(item, ContextDiscoverRequest):
# Slow path: fire off without awaiting so the worker stays free. # Slow path: fire off without awaiting so the worker stays free.
asyncio.create_task( asyncio.create_task(
_process_context_extract_guarded( _process_context_discover_guarded(
pool, item.concept_tokens, item.context_text pool, item.user_text, item.candidate_tokens
) )
) )
except Exception as e: except Exception as e:
@@ -107,18 +112,18 @@ async def _worker(pool: asyncpg.Pool) -> None:
_queue.task_done() _queue.task_done()
async def _process_context_extract_guarded( async def _process_context_discover_guarded(
pool: asyncpg.Pool, pool: asyncpg.Pool,
concept_tokens: list[str], user_text: str,
context_text: str, candidate_tokens: list[str],
) -> None: ) -> None:
"""Wrapper that acquires the LLM semaphore before context extraction.""" """Wrapper that acquires the LLM semaphore before concept discovery."""
assert _llm_semaphore is not None assert _llm_semaphore is not None
async with _llm_semaphore: async with _llm_semaphore:
try: try:
await _process_context_extract(pool, concept_tokens, context_text) await _process_context_discover(pool, user_text, candidate_tokens)
except Exception as e: except Exception as e:
log.exception("context extract task error for %s: %s", concept_tokens, e) log.exception("context discover task error: %s", e)
async def stop_worker() -> None: async def stop_worker() -> None:
@@ -149,22 +154,24 @@ async def _process_cue(pool: asyncpg.Pool, triple: CueTriple) -> None:
log.info("cue triple collision: %s", collision) log.info("cue triple collision: %s", collision)
async def _process_context_extract( async def _process_context_discover(
pool: asyncpg.Pool, pool: asyncpg.Pool,
concept_tokens: list[str], user_text: str,
context_text: str, candidate_tokens: list[str],
) -> None: ) -> None:
""" """
Ask the local LLM to extract relationships for novel concepts from the Ask the local LLM to evaluate candidate tokens (dictionary misses) and extract
actual conversation context. One LLM call per prompt turn, not per concept. relationship triples for those it judges to be real domain concepts.
This is fundamentally better than asking about concepts in isolation: Saliency feedback loop:
the model reads what was actually said about e.g. 'gnommoweb' and asserts - Confirmed concepts (triples inserted) → saliency raised to NOVEL_CONFIRMED_SALIENCY
only what the text supports — no hallucination, evidence-grounded confidence. so they surface as recollection hits on subsequent turns.
- Rejected concepts (no triples) → saliency stays low (NOVEL_INITIAL_SALIENCY),
effectively hiding typos and noise from the recollection engine.
""" """
write_model_id = await get_config(pool, "write_model_id") write_model_id = await get_config(pool, "write_model_id")
if not write_model_id: if not write_model_id:
log.debug("no write_model_id configured — skipping context extract") log.debug("no write_model_id configured — skipping context discover")
return return
model = await get_model_config(pool, write_model_id) model = await get_model_config(pool, write_model_id)
@@ -173,31 +180,35 @@ async def _process_context_extract(
return return
seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"] seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"]
prompt = CONTEXT_EXTRACT_PROMPT_TEMPLATE.format( prompt = CONTEXT_DISCOVER_PROMPT_TEMPLATE.format(
concepts=", ".join(concept_tokens), candidates=", ".join(candidate_tokens),
context=context_text[:3000], # cap to avoid exceeding model context context=user_text[:2000],
dimensions=", ".join(seed_dims), dimensions=", ".join(seed_dims),
) )
try: try:
response = await call_llm(model, prompt) response = await call_llm(model, prompt)
except Exception as e: except Exception as e:
log.warning("LLM call failed for context extract %s: %s", concept_tokens, e) log.warning("LLM call failed for context discover: %s", e)
return return
triples = parse_context_triples(response) triples = parse_context_triples(response)
if not triples: if not triples:
log.info("LLM returned no context triples for: %s", concept_tokens) log.debug("LLM found no domain concepts among candidates: %s", candidate_tokens)
return return
concept_set = set(concept_tokens) MIN_CONFIDENCE = 0.6
inserted = 0 candidate_set = set(candidate_tokens)
confirmed: set[str] = set() # candidates confirmed as real concepts
for t in triples: for t in triples:
# Reject any concept the LLM invented that wasn't in our list if not t.concept or not t.parent or not t.dimension:
if not t.concept or t.concept not in concept_set:
log.debug("context extract: ignoring hallucinated concept %r", t.concept)
continue continue
if not t.parent or not t.dimension: if t.concept not in candidate_set:
log.debug("context discover: ignoring concept not in candidate list: %r", t.concept)
continue
if t.confidence < MIN_CONFIDENCE:
log.debug("context discover: low confidence triple skipped: %s", t)
continue continue
subj_row = await get_or_create_soas(pool, t.concept) subj_row = await get_or_create_soas(pool, t.concept)
@@ -214,9 +225,21 @@ async def _process_context_extract(
) )
collision = await insert_urd_edge(pool, req) collision = await insert_urd_edge(pool, req)
if not collision: if not collision:
inserted += 1 confirmed.add(t.concept)
log.info( # Raise saliency for confirmed concepts so recollection fires on next encounter.
"context extract complete: concepts=%s%d triples → %d inserted", for concept in confirmed:
concept_tokens, len(triples), inserted, async with pool.acquire() as conn:
await conn.execute(
"UPDATE soas SET saliency = $1 WHERE token = $2 AND saliency < $1",
NOVEL_CONFIRMED_SALIENCY, concept,
)
row = cache.soas_by_token.get(concept)
if row and row.saliency < NOVEL_CONFIRMED_SALIENCY:
row.saliency = NOVEL_CONFIRMED_SALIENCY
rejected = candidate_set - confirmed
log.info(
"context discover: candidates=%s confirmed=%s rejected=%s",
candidate_tokens, sorted(confirmed), sorted(rejected),
) )