Adding llm to do this

This commit is contained in:
2026-04-21 18:32:21 +02:00
parent 314f145740
commit 128dd653e7
3 changed files with 125 additions and 70 deletions
+53 -14
View File
@@ -84,27 +84,67 @@ async def _call_openai(model: ModelConfig, prompt: str) -> str:
# Structured prompts # Structured prompts
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
WRITE_PROMPT_TEMPLATE = """You are a knowledge extraction assistant. CONTEXT_EXTRACT_PROMPT_TEMPLATE = """You are a knowledge extraction assistant helping build a semantic memory graph.
Given the concept: "{concept}" The following words appear in the conversation below and are NOT in the standard English dictionary — they are likely domain-specific terms that should be remembered:
{concepts}
And these known dimensions: {dimensions} Read the conversation excerpt and, for each concept above, generate relationship triples IF the excerpt provides enough context to make a confident assertion. Only assert what is clearly evidenced in the text — do not guess or hallucinate.
Return a JSON array of triples describing what you know about this concept. CONVERSATION EXCERPT:
Each triple must have these fields: {context}
- "parent": the containing concept (string)
- "dimension": one of the known dimensions above, or a new specific one if none fit
- "is_isa": true if this is a classification (ISA), false if membership/containment (ISPART)
- "confidence": 0.0 to 1.0
Return ONLY the JSON array. No explanation. Example: Return a JSON array of triples. Each triple must have:
- "concept": one of the domain words listed above (exactly as spelled)
- "parent": the broader category or container (e.g. "software-repository", "glitch-university")
- "dimension": one of [{dimensions}] or a new specific one if none fit
- "is_isa": true if concept IS A type of parent (classification), false if concept BELONGS TO parent (containment)
- "confidence": 0.01.0 reflecting how clearly the excerpt supports this assertion
Return ONLY the JSON array. No explanation. If no confident assertions can be made, return [].
Example:
[ [
{{"parent": "software-repository", "dimension": "type", "is_isa": true, "confidence": 0.9}}, {{"concept": "gnommoweb", "parent": "software-repository", "dimension": "type", "is_isa": true, "confidence": 0.9}},
{{"parent": "glitch-university", "dimension": "membership", "is_isa": false, "confidence": 0.85}} {{"concept": "gnommoweb", "parent": "glitch-university", "dimension": "membership", "is_isa": false, "confidence": 0.85}}
] ]
""" """
@dataclass
class ContextTriple:
concept: str
parent: str
dimension: str
is_isa: bool
confidence: float
def parse_context_triples(response: str) -> list[ContextTriple]:
"""Parse JSON array of context-aware triples from LLM response."""
try:
start = response.find("[")
end = response.rfind("]") + 1
if start == -1 or end == 0:
return []
data = json.loads(response[start:end])
triples = []
for item in data:
if not isinstance(item, dict):
continue
triples.append(ContextTriple(
concept=str(item.get("concept", "")).strip().lower(),
parent=str(item.get("parent", "")).strip().lower(),
dimension=str(item.get("dimension", "type")).strip().lower(),
is_isa=bool(item.get("is_isa", True)),
confidence=float(item.get("confidence", 0.7)),
))
return triples
except (json.JSONDecodeError, ValueError) as e:
log.warning("failed to parse context triples: %s", e)
return []
# Kept for backwards-compatibility with the resolution job prompts below.
@dataclass @dataclass
class LLMTriple: class LLMTriple:
parent: str parent: str
@@ -114,9 +154,8 @@ class LLMTriple:
def parse_llm_triples(response: str) -> list[LLMTriple]: def parse_llm_triples(response: str) -> list[LLMTriple]:
"""Parse JSON array of triples from LLM response.""" """Parse JSON array of single-concept triples (used by resolution job only)."""
try: try:
# Find the JSON array in the response
start = response.find("[") start = response.find("[")
end = response.rfind("]") + 1 end = response.rfind("]") + 1
if start == -1 or end == 0: if start == -1 or end == 0:
+10 -18
View File
@@ -44,7 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize from .tokenizer import tokenize
from .write_queue import enqueue_concept, enqueue_cue, start_worker, stop_worker from .write_queue import enqueue_context_extract, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario
@@ -528,11 +528,10 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
# 2. Tokenise + update saliency # 2. Tokenise + update saliency
tokens = tokenize(prompt_text) tokens = tokenize(prompt_text)
salient_for_read: list[int] = [] salient_for_read: list[int] = []
salient_for_write: list[str] = []
# Novel words found this prompt that aren't in the cache yet. # Novel domain words found in this turn — not in the standard dictionary.
# We cap at MAX_NOVEL_PER_PROMPT to avoid flooding on large system prompts. # Capped to avoid flooding on unexpectedly large turns.
MAX_NOVEL_PER_PROMPT = 5 MAX_NOVEL_PER_PROMPT = 8
novel_this_prompt: list[str] = [] novel_this_prompt: list[str] = []
for token in tokens: for token in tokens:
@@ -540,7 +539,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
if soas_row is None: if soas_row is None:
# Token absent from dictionary → candidate novel domain word. # Token absent from dictionary → candidate novel domain word.
# Skip structural tokens (paths, versions, numbers) and apply a per-prompt cap. # Skip structural tokens (paths, versions, numbers).
if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT: if not _is_structural_token(token) and len(novel_this_prompt) < MAX_NOVEL_PER_PROMPT:
novel_this_prompt.append(token) novel_this_prompt.append(token)
continue continue
@@ -554,16 +553,6 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
if soas_row.saliency >= read_threshold: if soas_row.saliency >= read_threshold:
salient_for_read.append(soas_row.id) salient_for_read.append(soas_row.id)
# Only enqueue for LLM write if the concept already has URD edges —
# i.e. we know *something* about it and may want to expand that knowledge.
# Never enqueue freshly-novel words: let the conversation teach us instead.
if (
soas_row.saliency >= write_threshold
and soas_row.novelty > 0.0
and cache.urd_by_concept.get(soas_row.id)
):
salient_for_write.append(token)
# Create SOAS entries for novel words and add them to the read list. # Create SOAS entries for novel words and add them to the read list.
# Capture first-seen context so zero-hit recollection can include a hint. # Capture first-seen context so zero-hit recollection can include a hint.
for token in novel_this_prompt: for token in novel_this_prompt:
@@ -571,8 +560,11 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
soas_row = await create_novel_soas(pool, token, context=ctx) soas_row = await create_novel_soas(pool, token, context=ctx)
salient_for_read.append(soas_row.id) salient_for_read.append(soas_row.id)
for token in salient_for_write: # Enqueue context-aware LLM extraction for all novel words found this turn.
await enqueue_concept(token) # The LLM reads the actual conversation text and extracts relationships from
# evidence — one call per turn, not one per concept.
if novel_this_prompt:
await enqueue_context_extract(novel_this_prompt, prompt_text)
if not salient_for_read: if not salient_for_read:
return body return body
+62 -38
View File
@@ -18,7 +18,7 @@ from .cue_scanner import CueTriple
from .db import get_or_create_soas, get_config from .db import get_or_create_soas, get_config
from .llm_client import ( from .llm_client import (
ModelConfig, get_model_config, call_llm, ModelConfig, get_model_config, call_llm,
WRITE_PROMPT_TEMPLATE, parse_llm_triples, CONTEXT_EXTRACT_PROMPT_TEMPLATE, parse_context_triples,
) )
from .urd_writer import InsertRequest, insert_urd_edge from .urd_writer import InsertRequest, insert_urd_edge
from . import cache from . import cache
@@ -27,10 +27,14 @@ log = logging.getLogger("festinger.write_queue")
@dataclass @dataclass
class WriteRequest: class ContextExtractRequest:
"""A concept that crossed the write threshold — needs LLM-assisted classification.""" """
concept_token: str Novel concepts seen in a single conversation turn, with the turn text as context.
trigger: str = "saliency" # 'saliency' | 'cue' The LLM reads the context and extracts relationship triples from evidence in the text —
much more accurate than asking about concepts in isolation.
"""
concept_tokens: list[str]
context_text: str
@dataclass @dataclass
@@ -48,11 +52,12 @@ _LLM_CONCURRENCY = 2
_llm_semaphore: asyncio.Semaphore | None = None _llm_semaphore: asyncio.Semaphore | None = None
async def enqueue_concept(token: str) -> None: async def enqueue_context_extract(tokens: list[str], context: str) -> None:
"""Enqueue a context-aware extraction request for a batch of novel concepts."""
try: try:
_queue.put_nowait(WriteRequest(concept_token=token)) _queue.put_nowait(ContextExtractRequest(concept_tokens=tokens, context_text=context))
except asyncio.QueueFull: except asyncio.QueueFull:
log.warning("write queue full — dropping concept: %s", token) log.warning("write queue full — dropping context extract for: %s", tokens)
async def enqueue_cue(triple: CueTriple) -> None: async def enqueue_cue(triple: CueTriple) -> None:
@@ -89,10 +94,12 @@ async def _worker(pool: asyncpg.Pool) -> None:
if isinstance(item, CueWriteRequest): if isinstance(item, CueWriteRequest):
# Fast path: no LLM involved, process inline. # Fast path: no LLM involved, process inline.
await _process_cue(pool, item.triple) await _process_cue(pool, item.triple)
elif isinstance(item, WriteRequest): elif isinstance(item, ContextExtractRequest):
# Slow path: fire off without awaiting so the worker stays free. # Slow path: fire off without awaiting so the worker stays free.
asyncio.create_task( asyncio.create_task(
_process_concept_guarded(pool, item.concept_token) _process_context_extract_guarded(
pool, item.concept_tokens, item.context_text
)
) )
except Exception as e: except Exception as e:
log.exception("write queue worker error: %s", e) log.exception("write queue worker error: %s", e)
@@ -100,14 +107,18 @@ async def _worker(pool: asyncpg.Pool) -> None:
_queue.task_done() _queue.task_done()
async def _process_concept_guarded(pool: asyncpg.Pool, concept_token: str) -> None: async def _process_context_extract_guarded(
"""Wrapper that acquires the LLM semaphore before calling the model.""" pool: asyncpg.Pool,
concept_tokens: list[str],
context_text: str,
) -> None:
"""Wrapper that acquires the LLM semaphore before context extraction."""
assert _llm_semaphore is not None assert _llm_semaphore is not None
async with _llm_semaphore: async with _llm_semaphore:
try: try:
await _process_concept(pool, concept_token) await _process_context_extract(pool, concept_tokens, context_text)
except Exception as e: except Exception as e:
log.exception("concept LLM task error for %s: %s", concept_token, e) log.exception("context extract task error for %s: %s", concept_tokens, e)
async def stop_worker() -> None: async def stop_worker() -> None:
@@ -138,11 +149,22 @@ async def _process_cue(pool: asyncpg.Pool, triple: CueTriple) -> None:
log.info("cue triple collision: %s", collision) log.info("cue triple collision: %s", collision)
async def _process_concept(pool: asyncpg.Pool, concept_token: str) -> None: async def _process_context_extract(
"""Call cloud LLM to classify the concept, then insert all returned triples.""" pool: asyncpg.Pool,
concept_tokens: list[str],
context_text: str,
) -> None:
"""
Ask the local LLM to extract relationships for novel concepts from the
actual conversation context. One LLM call per prompt turn, not per concept.
This is fundamentally better than asking about concepts in isolation:
the model reads what was actually said about e.g. 'gnommoweb' and asserts
only what the text supports — no hallucination, evidence-grounded confidence.
"""
write_model_id = await get_config(pool, "write_model_id") write_model_id = await get_config(pool, "write_model_id")
if not write_model_id: if not write_model_id:
log.debug("no write_model_id configured — skipping LLM write for %s", concept_token) log.debug("no write_model_id configured — skipping context extract")
return return
model = await get_model_config(pool, write_model_id) model = await get_model_config(pool, write_model_id)
@@ -150,32 +172,35 @@ async def _process_concept(pool: asyncpg.Pool, concept_token: str) -> None:
log.warning("write_model_id=%s not found in models table", write_model_id) log.warning("write_model_id=%s not found in models table", write_model_id)
return return
known_dims = list(cache.soas_by_token.keys())
# Keep only seed dimensions + short list for prompt brevity
seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"] seed_dims = ["type", "membership", "runs-on", "tech", "owned-by", "geography"]
dimensions_str = ", ".join(seed_dims) prompt = CONTEXT_EXTRACT_PROMPT_TEMPLATE.format(
concepts=", ".join(concept_tokens),
prompt = WRITE_PROMPT_TEMPLATE.format( context=context_text[:3000], # cap to avoid exceeding model context
concept=concept_token, dimensions=", ".join(seed_dims),
dimensions=dimensions_str,
) )
try: try:
response = await call_llm(model, prompt) response = await call_llm(model, prompt)
except Exception as e: except Exception as e:
log.warning("LLM call failed for concept %s: %s", concept_token, e) log.warning("LLM call failed for context extract %s: %s", concept_tokens, e)
return return
triples = parse_llm_triples(response) triples = parse_context_triples(response)
if not triples: if not triples:
log.info("LLM returned no triples for concept: %s", concept_token) log.info("LLM returned no context triples for: %s", concept_tokens)
return return
subj_row = await get_or_create_soas(pool, concept_token) concept_set = set(concept_tokens)
inserted = 0
for t in triples: for t in triples:
# Reject any concept the LLM invented that wasn't in our list
if not t.concept or t.concept not in concept_set:
log.debug("context extract: ignoring hallucinated concept %r", t.concept)
continue
if not t.parent or not t.dimension: if not t.parent or not t.dimension:
continue continue
subj_row = await get_or_create_soas(pool, t.concept)
parent_row = await get_or_create_soas(pool, t.parent) parent_row = await get_or_create_soas(pool, t.parent)
dim_row = await get_or_create_soas(pool, t.dimension) dim_row = await get_or_create_soas(pool, t.dimension)
@@ -185,14 +210,13 @@ async def _process_concept(pool: asyncpg.Pool, concept_token: str) -> None:
dim_id=dim_row.id, dim_id=dim_row.id,
is_isa=t.is_isa, is_isa=t.is_isa,
confidence=t.confidence, confidence=t.confidence,
source="cloud_llm", source="context_llm",
) )
await insert_urd_edge(pool, req) collision = await insert_urd_edge(pool, req)
if not collision:
inserted += 1
# Mark concept as confirmed — set novelty=1.0 log.info(
async with pool.acquire() as conn: "context extract complete: concepts=%s%d triples → %d inserted",
await conn.execute( concept_tokens, len(triples), inserted,
"UPDATE soas SET novelty = 1.0 WHERE id = $1", subj_row.id )
)
if concept_token in cache.soas_by_token:
cache.soas_by_token[concept_token].novelty = 1.0