Adding sanity to the recollection decider

This commit is contained in:
2026-04-21 19:09:25 +02:00
parent 128dd653e7
commit ccbb5b2d45
4 changed files with 158 additions and 90 deletions
+23 -5
View File
@@ -250,10 +250,16 @@ async def get_or_create_soas(pool: asyncpg.Pool, token: str) -> SoasRow:
# Novel-word bootstrap
# ---------------------------------------------------------------------------
# Saliency assigned to a word seen for the first time that is not in the
# standard English dictionary. Must be above saliency_read_threshold (0.5)
# so the recollection engine immediately picks it up.
NOVEL_INITIAL_SALIENCY = 2.0
# Saliency for a word first seen that is not in the standard English dictionary.
# Kept BELOW saliency_read_threshold (default 0.5) so the concept doesn't
# surface in recollection until the LLM has confirmed it is meaningful.
# Once the LLM inserts a URD edge, saliency is raised to NOVEL_CONFIRMED_SALIENCY.
NOVEL_INITIAL_SALIENCY = 0.1
# Saliency set after the LLM confirms a concept by generating a URD triple.
# Must be well above saliency_read_threshold so the concept becomes a
# recollection attractor on subsequent turns.
NOVEL_CONFIRMED_SALIENCY = 2.0
async def create_novel_soas(pool: asyncpg.Pool, token: str, context: str = "") -> SoasRow:
@@ -375,7 +381,19 @@ async def flush_encounter_deltas(pool: asyncpg.Pool) -> None:
new_count = (row.encounter_count if row else 0)
# novelty = 0 for common English words (pre-seeded)
is_common = (row.novelty == 0.0 and row.saliency == 0.0) if row else False
new_saliency = recalculate_saliency(new_count, is_common)
# Unconfirmed novel concepts (novelty > 0, no URD edges yet) must not
# be promoted above the read threshold by encounter-count alone.
# Their saliency is raised explicitly when the LLM confirms them.
is_unconfirmed_novel = (
row is not None
and row.novelty > 0.0
and not cache.urd_by_concept.get(soas_id)
)
if is_unconfirmed_novel:
new_saliency = row.saliency # preserve low saliency until LLM confirms
else:
new_saliency = recalculate_saliency(new_count, is_common)
await conn.execute(
"""
+12 -11
View File
@@ -84,24 +84,25 @@ async def _call_openai(model: ModelConfig, prompt: str) -> str:
# Structured prompts
# ---------------------------------------------------------------------------
CONTEXT_EXTRACT_PROMPT_TEMPLATE = """You are a knowledge extraction assistant helping build a semantic memory graph.
CONTEXT_DISCOVER_PROMPT_TEMPLATE = """You are building a semantic memory graph for an AI agent.
The following words appear in the conversation below and are NOT in the standard English dictionary — they are likely domain-specific terms that should be remembered:
{concepts}
The following words appeared in the user message and are NOT in the standard English dictionary. They may be domain-specific terms worth remembering, or they may be typos/noise:
{candidates}
Read the conversation excerpt and, for each concept above, generate relationship triples IF the excerpt provides enough context to make a confident assertion. Only assert what is clearly evidenced in the text — do not guess or hallucinate.
Read the user message and evaluate each candidate. For candidates that ARE real domain concepts (project names, system names, proper nouns, technical identifiers), generate a relationship triple ONLY IF the message provides clear evidence. For typos, noise, or common words written oddly, return nothing.
CONVERSATION EXCERPT:
USER MESSAGE:
{context}
Return a JSON array of triples. Each triple must have:
- "concept": one of the domain words listed above (exactly as spelled)
- "parent": the broader category or container (e.g. "software-repository", "glitch-university")
Return a JSON array. Each item must have:
- "concept": one of the candidate words above (use the exact spelling, lowercase)
- "parent": what it is or belongs to (e.g. "software-repository", "glitch-university")
- "dimension": one of [{dimensions}] or a new specific one if none fit
- "is_isa": true if concept IS A type of parent (classification), false if concept BELONGS TO parent (containment)
- "confidence": 0.01.0 reflecting how clearly the excerpt supports this assertion
- "confidence": 0.01.0 reflecting how clearly the message supports this
Return ONLY the JSON array. No explanation. If no confident assertions can be made, return [].
Only include candidates you are confident are real domain concepts. Return [] if none qualify.
Return ONLY the JSON array. No explanation.
Example:
[
{{"concept": "gnommoweb", "parent": "software-repository", "dimension": "type", "is_isa": true, "confidence": 0.9}},
@@ -120,7 +121,7 @@ class ContextTriple:
def parse_context_triples(response: str) -> list[ContextTriple]:
"""Parse JSON array of context-aware triples from LLM response."""
"""Parse JSON array of discovered triples from LLM response."""
try:
start = response.find("[")
end = response.rfind("]") + 1
+56 -30
View File
@@ -44,7 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize
from .write_queue import enqueue_context_extract, enqueue_cue, start_worker, stop_worker
from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario
@@ -484,6 +484,21 @@ def extract_prompt_text(body: dict, path: str) -> str:
return body.get("prompt", "")
def _last_user_message_text(body: dict, path: str) -> str:
"""
Extract only the last user message for the write path.
Agent responses, thinking traces, and system prompts are excluded —
they are noise for concept discovery.
"""
if path in ("/api/chat", "/v1/chat/completions", "/v1/messages"):
messages = body.get("messages", [])
last_user = next((m for m in reversed(messages) if m.get("role") == "user"), None)
if last_user:
return " ".join(_extract_text_strings(last_user.get("content", "")))
return ""
return body.get("prompt", "")
def inject_recollection_anthropic(body: dict, block: str) -> dict:
"""
Inject a recollection block into an Anthropic Messages API request.
@@ -512,69 +527,80 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
Returns a (possibly modified) body dict with the recollection block injected.
"""
read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
write_threshold = float(await get_config(pool, "saliency_write_threshold", "1.2"))
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
prompt_text = extract_prompt_text(body, path)
if not prompt_text.strip():
# Extract only the last user message — agent responses and reasoning traces
# are noise for both cue scanning and concept discovery.
user_text = _last_user_message_text(body, path)
if not user_text.strip():
return body
# 1. Scan for explicit relationship cues (bypass threshold)
cues = scan_cues(prompt_text)
for cue in cues:
# 1. Scan user message for explicit relationship cues (fast, inline, bypasses LLM).
for cue in scan_cues(user_text):
await enqueue_cue(cue)
# 2. Tokenise + update saliency
# 2. Tokenise the recent context (last user + last assistant) for the read path.
# Novel words from the user turn are also collected as LLM candidates.
prompt_text = extract_prompt_text(body, path)
tokens = tokenize(prompt_text)
salient_for_read: list[int] = []
# Novel domain words found in this turn — not in the standard dictionary.
# Capped to avoid flooding on unexpectedly large turns.
MAX_NOVEL_PER_PROMPT = 8
novel_this_prompt: list[str] = []
# Candidate novel tokens from the USER message only — structural tokens
# (paths, versions, numbers) are filtered out. Capped to avoid flooding
# on very long messages.
MAX_NOVEL_PER_TURN = 8
novel_candidates: list[str] = []
# Only collect candidates from user-side tokens
user_tokens = set(tokenize(user_text))
for token in tokens:
soas_row = cache.soas_by_token.get(token)
if soas_row is None:
# Token absent from dictionary candidate novel domain word.
# Skip structural tokens (paths, versions, numbers).
if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT:
novel_this_prompt.append(token)
# Token absent from cache entirely candidate domain word.
# Restrict to user-side tokens so we don't mine agent responses.
if (
token in user_tokens
and not _is_structural_token(token)
and len(novel_candidates) < MAX_NOVEL_PER_TURN
):
novel_candidates.append(token)
continue
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
# Common English word pre-seeded from the dictionary — not interesting.
# Common English word — skip.
continue
cache.record_encounter(soas_row.id)
# Only surface in recollection if saliency is above threshold.
# Unconfirmed novel words (saliency=NOVEL_INITIAL_SALIENCY=0.1) are
# deliberately kept below the threshold until the LLM confirms them.
if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id)
# Create SOAS entries for novel words and add them to the read list.
# Capture first-seen context so zero-hit recollection can include a hint.
for token in novel_this_prompt:
ctx = _sentence_containing(prompt_text, token)
soas_row = await create_novel_soas(pool, token, context=ctx)
salient_for_read.append(soas_row.id)
# Register novel candidates in SOAS at low saliency (below read threshold).
# They become recollection attractors only after the LLM confirms them.
for token in novel_candidates:
ctx = _sentence_containing(user_text, token)
await create_novel_soas(pool, token, context=ctx)
# Do NOT add to salient_for_read — no zero-hit recollection until confirmed.
# Enqueue context-aware LLM extraction for all novel words found this turn.
# The LLM reads the actual conversation text and extracts relationships from
# evidence — one call per turn, not one per concept.
if novel_this_prompt:
await enqueue_context_extract(novel_this_prompt, prompt_text)
# 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
if novel_candidates and len(user_text) >= 20:
await enqueue_context_discover(user_text, novel_candidates)
if not salient_for_read:
return body
# 3. Build recollection block
# 5. Build recollection block
block = build_recollection_block(salient_for_read, conf_floor, recency_days)
if not block:
return body
# 4. Inject into messages
# 6. Inject into messages
if path == "/api/chat" or path == "/v1/chat/completions":
body = dict(body)
body["messages"] = inject_recollection(body.get("messages", []), block)
+67 -44
View File
@@ -15,10 +15,10 @@ import asyncpg
from .cache import SoasRow
from .cue_scanner import CueTriple
from .db import get_or_create_soas, get_config
from .db import get_or_create_soas, get_config, create_novel_soas, NOVEL_CONFIRMED_SALIENCY
from .llm_client import (
ModelConfig, get_model_config, call_llm,
CONTEXT_EXTRACT_PROMPT_TEMPLATE, parse_context_triples,
CONTEXT_DISCOVER_PROMPT_TEMPLATE, parse_context_triples,
)
from .urd_writer import InsertRequest, insert_urd_edge
from . import cache
@@ -27,14 +27,16 @@ log = logging.getLogger("festinger.write_queue")
@dataclass
class ContextExtractRequest:
class ContextDiscoverRequest:
"""
Novel concepts seen in a single conversation turn, with the turn text as context.
The LLM reads the context and extracts relationship triples from evidence in the text —
much more accurate than asking about concepts in isolation.
User message text plus candidate tokens (words absent from the English
dictionary) submitted for LLM-driven concept discovery.
candidate_tokens are hints for the LLM — it decides which are real domain
concepts vs typos/noise, and extracts relationship triples from the text.
"""
concept_tokens: list[str]
context_text: str
user_text: str
candidate_tokens: list[str]
@dataclass
@@ -52,12 +54,15 @@ _LLM_CONCURRENCY = 2
_llm_semaphore: asyncio.Semaphore | None = None
async def enqueue_context_extract(tokens: list[str], context: str) -> None:
"""Enqueue a context-aware extraction request for a batch of novel concepts."""
async def enqueue_context_discover(user_text: str, candidate_tokens: list[str]) -> None:
"""Enqueue a user message for LLM-driven concept discovery and relation extraction."""
try:
_queue.put_nowait(ContextExtractRequest(concept_tokens=tokens, context_text=context))
_queue.put_nowait(ContextDiscoverRequest(
user_text=user_text,
candidate_tokens=candidate_tokens,
))
except asyncio.QueueFull:
log.warning("write queue full — dropping context extract for: %s", tokens)
log.warning("write queue full — dropping context discover")
async def enqueue_cue(triple: CueTriple) -> None:
@@ -94,11 +99,11 @@ async def _worker(pool: asyncpg.Pool) -> None:
if isinstance(item, CueWriteRequest):
# Fast path: no LLM involved, process inline.
await _process_cue(pool, item.triple)
elif isinstance(item, ContextExtractRequest):
elif isinstance(item, ContextDiscoverRequest):
# Slow path: fire off without awaiting so the worker stays free.
asyncio.create_task(
_process_context_extract_guarded(
pool, item.concept_tokens, item.context_text
_process_context_discover_guarded(
pool, item.user_text, item.candidate_tokens
)
)
except Exception as e:
@@ -107,18 +112,18 @@ async def _worker(pool: asyncpg.Pool) -> None:
_queue.task_done()
async def _process_context_extract_guarded(
async def _process_context_discover_guarded(
pool: asyncpg.Pool,
concept_tokens: list[str],
context_text: str,
user_text: str,
candidate_tokens: list[str],
) -> None:
"""Wrapper that acquires the LLM semaphore before context extraction."""
"""Wrapper that acquires the LLM semaphore before concept discovery."""
assert _llm_semaphore is not None
async with _llm_semaphore:
try:
await _process_context_extract(pool, concept_tokens, context_text)
await _process_context_discover(pool, user_text, candidate_tokens)
except Exception as e:
log.exception("context extract task error for %s: %s", concept_tokens, e)
log.exception("context discover task error: %s", e)
async def stop_worker() -> None:
@@ -149,22 +154,24 @@ async def _process_cue(pool: asyncpg.Pool, triple: CueTriple) -> None:
log.info("cue triple collision: %s", collision)
async def _process_context_extract(
async def _process_context_discover(
pool: asyncpg.Pool,
concept_tokens: list[str],
context_text: str,
user_text: str,
candidate_tokens: list[str],
) -> None:
"""
Ask the local LLM to extract relationships for novel concepts from the
actual conversation context. One LLM call per prompt turn, not per concept.
Ask the local LLM to evaluate candidate tokens (dictionary misses) and extract
relationship triples for those it judges to be real domain concepts.
This is fundamentally better than asking about concepts in isolation:
the model reads what was actually said about e.g. 'gnommoweb' and asserts
only what the text supports — no hallucination, evidence-grounded confidence.
Saliency feedback loop:
- Confirmed concepts (triples inserted) → saliency raised to NOVEL_CONFIRMED_SALIENCY
so they surface as recollection hits on subsequent turns.
- Rejected concepts (no triples) → saliency stays low (NOVEL_INITIAL_SALIENCY),
effectively hiding typos and noise from the recollection engine.
"""
write_model_id = await get_config(pool, "write_model_id")
if not write_model_id:
log.debug("no write_model_id configured — skipping context extract")
log.debug("no write_model_id configured — skipping context discover")
return
model = await get_model_config(pool, write_model_id)
@@ -173,31 +180,35 @@ async def _process_context_extract(
return
seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"]
prompt = CONTEXT_EXTRACT_PROMPT_TEMPLATE.format(
concepts=", ".join(concept_tokens),
context=context_text[:3000], # cap to avoid exceeding model context
prompt = CONTEXT_DISCOVER_PROMPT_TEMPLATE.format(
candidates=", ".join(candidate_tokens),
context=user_text[:2000],
dimensions=", ".join(seed_dims),
)
try:
response = await call_llm(model, prompt)
except Exception as e:
log.warning("LLM call failed for context extract %s: %s", concept_tokens, e)
log.warning("LLM call failed for context discover: %s", e)
return
triples = parse_context_triples(response)
if not triples:
log.info("LLM returned no context triples for: %s", concept_tokens)
log.debug("LLM found no domain concepts among candidates: %s", candidate_tokens)
return
concept_set = set(concept_tokens)
inserted = 0
MIN_CONFIDENCE = 0.6
candidate_set = set(candidate_tokens)
confirmed: set[str] = set() # candidates confirmed as real concepts
for t in triples:
# Reject any concept the LLM invented that wasn't in our list
if not t.concept or t.concept not in concept_set:
log.debug("context extract: ignoring hallucinated concept %r", t.concept)
if not t.concept or not t.parent or not t.dimension:
continue
if not t.parent or not t.dimension:
if t.concept not in candidate_set:
log.debug("context discover: ignoring concept not in candidate list: %r", t.concept)
continue
if t.confidence < MIN_CONFIDENCE:
log.debug("context discover: low confidence triple skipped: %s", t)
continue
subj_row = await get_or_create_soas(pool, t.concept)
@@ -214,9 +225,21 @@ async def _process_context_extract(
)
collision = await insert_urd_edge(pool, req)
if not collision:
inserted += 1
confirmed.add(t.concept)
# Raise saliency for confirmed concepts so recollection fires on next encounter.
for concept in confirmed:
async with pool.acquire() as conn:
await conn.execute(
"UPDATE soas SET saliency = $1 WHERE token = $2 AND saliency < $1",
NOVEL_CONFIRMED_SALIENCY, concept,
)
row = cache.soas_by_token.get(concept)
if row and row.saliency < NOVEL_CONFIRMED_SALIENCY:
row.saliency = NOVEL_CONFIRMED_SALIENCY
rejected = candidate_set - confirmed
log.info(
"context extract complete: concepts=%s%d triples → %d inserted",
concept_tokens, len(triples), inserted,
"context discover: candidates=%s confirmed=%s rejected=%s",
candidate_tokens, sorted(confirmed), sorted(rejected),
)