Adding llm to do this

This commit is contained in:
2026-04-21 18:32:21 +02:00
parent 314f145740
commit 128dd653e7
3 changed files with 125 additions and 70 deletions
+10 -18
View File
@@ -44,7 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize
from .write_queue import enqueue_concept, enqueue_cue, start_worker, stop_worker
from .write_queue import enqueue_context_extract, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario
@@ -528,11 +528,10 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
# 2. Tokenise + update saliency
tokens = tokenize(prompt_text)
salient_for_read: list[int] = []
salient_for_write: list[str] = []
# Novel words found this prompt that aren't in the cache yet.
# We cap at MAX_NOVEL_PER_PROMPT to avoid flooding on large system prompts.
MAX_NOVEL_PER_PROMPT = 5
# Novel domain words found in this turn — not in the standard dictionary.
# Capped to avoid flooding on unexpectedly large turns.
MAX_NOVEL_PER_PROMPT = 8
novel_this_prompt: list[str] = []
for token in tokens:
@@ -540,7 +539,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
if soas_row is None:
# Token absent from dictionary → candidate novel domain word.
# Skip structural tokens (paths, versions, numbers) and apply a per-prompt cap.
# Skip structural tokens (paths, versions, numbers).
if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT:
novel_this_prompt.append(token)
continue
@@ -554,16 +553,6 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id)
# Only enqueue for LLM write if the concept already has URD edges —
# i.e. we know *something* about it and may want to expand that knowledge.
# Never enqueue freshly-novel words: let the conversation teach us instead.
if (
soas_row.saliency >= write_threshold
and soas_row.novelty > 0.0
and cache.urd_by_concept.get(soas_row.id)
):
salient_for_write.append(token)
# Create SOAS entries for novel words and add them to the read list.
# Capture first-seen context so zero-hit recollection can include a hint.
for token in novel_this_prompt:
@@ -571,8 +560,11 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
soas_row = await create_novel_soas(pool, token, context=ctx)
salient_for_read.append(soas_row.id)
for token in salient_for_write:
await enqueue_concept(token)
# Enqueue context-aware LLM extraction for all novel words found this turn.
# The LLM reads the actual conversation text and extracts relationships from
# evidence — one call per turn, not one per concept.
if novel_this_prompt:
await enqueue_context_extract(novel_this_prompt, prompt_text)
if not salient_for_read:
return body