Adding llm to do this
This commit is contained in:
@@ -44,7 +44,7 @@ from .cue_scanner import scan_cues
|
||||
from .recollection import build_recollection_block, inject_recollection
|
||||
from .resolution_job import run_resolution_job, last_run_timestamp
|
||||
from .tokenizer import tokenize
|
||||
from .write_queue import enqueue_concept, enqueue_cue, start_worker, stop_worker
|
||||
from .write_queue import enqueue_context_extract, enqueue_cue, start_worker, stop_worker
|
||||
from .urd_writer import InsertRequest, insert_urd_edge
|
||||
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
|
||||
from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario
|
||||
@@ -528,11 +528,10 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
||||
# 2. Tokenise + update saliency
|
||||
tokens = tokenize(prompt_text)
|
||||
salient_for_read: list[int] = []
|
||||
salient_for_write: list[str] = []
|
||||
|
||||
# Novel words found this prompt that aren't in the cache yet.
|
||||
# We cap at MAX_NOVEL_PER_PROMPT to avoid flooding on large system prompts.
|
||||
MAX_NOVEL_PER_PROMPT = 5
|
||||
# Novel domain words found in this turn — not in the standard dictionary.
|
||||
# Capped to avoid flooding on unexpectedly large turns.
|
||||
MAX_NOVEL_PER_PROMPT = 8
|
||||
novel_this_prompt: list[str] = []
|
||||
|
||||
for token in tokens:
|
||||
@@ -540,7 +539,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
||||
|
||||
if soas_row is None:
|
||||
# Token absent from dictionary → candidate novel domain word.
|
||||
# Skip structural tokens (paths, versions, numbers) and apply a per-prompt cap.
|
||||
# Skip structural tokens (paths, versions, numbers).
|
||||
if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT:
|
||||
novel_this_prompt.append(token)
|
||||
continue
|
||||
@@ -554,16 +553,6 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
||||
if soas_row.saliency >= read_threshold:
|
||||
salient_for_read.append(soas_row.id)
|
||||
|
||||
# Only enqueue for LLM write if the concept already has URD edges —
|
||||
# i.e. we know *something* about it and may want to expand that knowledge.
|
||||
# Never enqueue freshly-novel words: let the conversation teach us instead.
|
||||
if (
|
||||
soas_row.saliency >= write_threshold
|
||||
and soas_row.novelty > 0.0
|
||||
and cache.urd_by_concept.get(soas_row.id)
|
||||
):
|
||||
salient_for_write.append(token)
|
||||
|
||||
# Create SOAS entries for novel words and add them to the read list.
|
||||
# Capture first-seen context so zero-hit recollection can include a hint.
|
||||
for token in novel_this_prompt:
|
||||
@@ -571,8 +560,11 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
|
||||
soas_row = await create_novel_soas(pool, token, context=ctx)
|
||||
salient_for_read.append(soas_row.id)
|
||||
|
||||
for token in salient_for_write:
|
||||
await enqueue_concept(token)
|
||||
# Enqueue context-aware LLM extraction for all novel words found this turn.
|
||||
# The LLM reads the actual conversation text and extracts relationships from
|
||||
# evidence — one call per turn, not one per concept.
|
||||
if novel_this_prompt:
|
||||
await enqueue_context_extract(novel_this_prompt, prompt_text)
|
||||
|
||||
if not salient_for_read:
|
||||
return body
|
||||
|
||||
Reference in New Issue
Block a user