diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index af154a8..53a1759 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -38,7 +38,7 @@ from .db import ( close_pool, get_config, get_or_create_soas, get_pool, init_schema, bootstrap_dimensions, bootstrap_english_dictionary, warm_cache, reload_urd_cache, - flush_encounter_deltas, create_novel_soas, reset_graph, + flush_encounter_deltas, reset_graph, ) from .loop_detector import apply_mitigations, record_and_check, session_key from .cue_scanner import scan_cues @@ -46,7 +46,7 @@ from .recollection import build_recollection_block, inject_recollection from .resolution_job import run_resolution_job, last_run_timestamp from .tokenizer import tokenize from .llm_client import ModelConfig -from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker +from .write_queue import enqueue_cue, start_worker, stop_worker from .urd_writer import InsertRequest, insert_urd_edge from .wordnet import import_wordnet, CITATION as WORDNET_CITATION from .test_scenarios import SCENARIOS, seed_scenario, reset_scenario @@ -438,32 +438,18 @@ def _extract_text_strings(content) -> list[str]: return [] -# Tokens that look structural/technical — skip novel-word detection for these. -# Matches: paths (foo/bar), emails (a@b), file extensions (foo.py), dotted names (1.2.3), -# pure numbers, hex literals/colours. +# Tokens that look structural/technical — kept for potential future use. _STRUCTURAL_RE = re.compile( - r"[/@]" # URL-like separator or email @ - r"|\.\w" # dotted extension or namespace (e.g. foo.py, omega.13) - r"|^\d+$" # pure digits - r"|^\d[\d.]+\d$" # version string like 1.2 or 3.4.5 + r"[/@]" + r"|\.\w" + r"|^\d+$" + r"|^\d[\d.]+\d$" r"|^#[0-9a-f]{3,6}$" # hex colour r"|^0x[0-9a-f]+$", # hex literal re.IGNORECASE, ) -def _is_structural_token(token: str) -> bool: - """Return True if token looks like a path, version, number, or URL fragment.""" - return bool(_STRUCTURAL_RE.search(token)) - - -def _sentence_containing(text: str, token: str, max_chars: int = 80) -> str: - """Return a short excerpt of the first sentence containing token (case-insensitive).""" - for sentence in re.split(r"(?<=[.!?])\s+|\n+", text): - if token.lower() in sentence.lower(): - return sentence.strip()[:max_chars] - return "" - def extract_prompt_text(body: dict, path: str) -> str: """ @@ -501,6 +487,18 @@ def _last_user_message_text(body: dict, path: str) -> str: return body.get("prompt", "") +def _last_assistant_message_text(body: dict, path: str) -> str: + """Extract the last assistant message for cue scanning.""" + if path in ("/api/chat", "/v1/chat/completions", "/v1/messages"): + messages = body.get("messages", []) + last_assistant = next( + (m for m in reversed(messages) if m.get("role") == "assistant"), None + ) + if last_assistant: + return " ".join(_extract_text_strings(last_assistant.get("content", ""))) + return "" + + def inject_recollection_anthropic(body: dict, block: str) -> dict: """ Inject a recollection block into an Anthropic Messages API request. @@ -689,20 +687,6 @@ def _get_upstream_semaphore(base_url: str) -> asyncio.Semaphore: return _local_upstream_semaphores[key] -_last_discovery: dict[str, float] = {} -DISCOVERY_COOLDOWN_SECONDS = 60.0 - - -def _discovery_allowed(agent_name: str) -> bool: - """Rate-limit context discovery to at most once per cooldown period per agent.""" - key = agent_name or "_default" - now = time.time() - if now - _last_discovery.get(key, 0.0) < DISCOVERY_COOLDOWN_SECONDS: - log.debug("context discover throttled for agent=%s", key) - return False - _last_discovery[key] = now - return True - async def _get_agent_routing_model(pool, agent_name: str) -> ModelConfig | None: """ @@ -757,7 +741,6 @@ async def _route_agent_chat( # model if you want memory building alongside local inference.) body = await process_prompt( body, "/v1/chat/completions", pool, cfg, request_headers, - skip_discovery=is_local, ) sess = session_key(agent_model.model_name, body.get("messages", [])) @@ -862,84 +845,49 @@ async def process_prompt( pool, cfg: dict, request_headers: dict | None = None, - skip_discovery: bool = False, ) -> dict: """ Run the saliency + recollection pipeline over the prompt. Returns a (possibly modified) body dict with the recollection block injected. - - skip_discovery: when True, novel-word candidates are still registered in - SOAS (low saliency) but the LLM context-discover call is suppressed. - Used when the agent is routed to a local single-GPU model that cannot - handle simultaneous requests. """ read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5")) conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6")) recency_days = int(await get_config(pool, "recollection_recency_days", "90")) hdrs = request_headers or {} - request_model = _extract_request_model_config(path, body, hdrs, cfg) agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler - # Only the last user message — assistant responses and tool outputs are noise. + # Last user message — primary source for recollection reads. user_text = _last_user_message_text(body, path) if not user_text.strip(): return body - # 1. Scan user message for explicit relationship cues (fast, no LLM). + # 1. Scan user message for explicit relationship cues (fast, regex-only). for cue in scan_cues(user_text): await enqueue_cue(cue) - # 2. Single token loop over the user message only. - # The previous code also tokenised the last assistant message for the read - # path, but assistant output is large and full of technical terms that look - # novel — it was the main source of spurious discovery calls. - # Concepts the assistant mentioned were already processed when they first - # appeared in a user turn, so re-scanning is redundant. + # 2. Also scan the last assistant message for is-a / is-part-of assertions. + # Agents frequently state facts in their responses ("X is a Y", "X runs on Z"). + # No token-loop needed here — just cue extraction. + assistant_text = _last_assistant_message_text(body, path) + if assistant_text: + for cue in scan_cues(assistant_text): + await enqueue_cue(cue) + + # 3. Token loop over user message for saliency-triggered recollection. tokens = tokenize(user_text) salient_for_read: list[int] = [] - # Novel candidates: unknown words that look domain-specific. - # Cap at 3 — we want a targeted LLM call, not a flood. - MAX_NOVEL_PER_TURN = 3 - novel_candidates: list[str] = [] - for token in tokens: soas_row = cache.soas_by_token.get(token) - if soas_row is None: - if ( - not _is_structural_token(token) - and len(novel_candidates) < MAX_NOVEL_PER_TURN - ): - novel_candidates.append(token) continue - if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: continue # common English — skip - cache.record_encounter(soas_row.id) - if soas_row.saliency >= read_threshold: salient_for_read.append(soas_row.id) - # Register novel candidates at low saliency (below read threshold). - for token in novel_candidates: - ctx = _sentence_containing(user_text, token) - await create_novel_soas(pool, token, context=ctx) - - # 3. Enqueue LLM-driven discovery — rate-limited per agent. - # At most one discovery call per DISCOVERY_COOLDOWN_SECONDS per agent, - # so a long conversation with many novel words doesn't queue a storm of - # concurrent LM Studio calls. - if novel_candidates and len(user_text) >= 20 and not skip_discovery: - if _discovery_allowed(agent_name): - await enqueue_context_discover( - user_text, novel_candidates, - agent_name=agent_name, - fallback_model=request_model, - ) - if not salient_for_read: return body @@ -1229,6 +1177,107 @@ async def iknowthat(request: Request) -> dict: } +# --------------------------------------------------------------------------- +# /concepts — browse and search domain concepts +# --------------------------------------------------------------------------- + +@app.get("/concepts") +async def list_concepts( + request: Request, + q: str = "", + limit: int = 50, + offset: int = 0, +) -> dict: + """ + Return domain concepts (saliency > 0 or novelty > 0) with their URD edges. + Optionally filter by token prefix/substring via ?q=. + """ + pool = request.app.state.pool + async with pool.acquire() as conn: + if q: + rows = await conn.fetch( + """ + SELECT id, token, saliency, novelty, encounter_count, first_seen_context + FROM soas + WHERE (saliency > 0 OR novelty > 0) AND token ILIKE $1 + ORDER BY saliency DESC, encounter_count DESC + LIMIT $2 OFFSET $3 + """, + f"%{q}%", limit, offset, + ) + total = await conn.fetchval( + "SELECT COUNT(*) FROM soas WHERE (saliency > 0 OR novelty > 0) AND token ILIKE $1", + f"%{q}%", + ) + else: + rows = await conn.fetch( + """ + SELECT id, token, saliency, novelty, encounter_count, first_seen_context + FROM soas + WHERE saliency > 0 OR novelty > 0 + ORDER BY saliency DESC, encounter_count DESC + LIMIT $1 OFFSET $2 + """, + limit, offset, + ) + total = await conn.fetchval( + "SELECT COUNT(*) FROM soas WHERE saliency > 0 OR novelty > 0" + ) + + result = [] + for r in rows: + cid = r["id"] + edges = cache.urd_by_concept.get(cid, []) + result.append({ + "id": cid, + "token": r["token"], + "saliency": round(r["saliency"], 3), + "novelty": round(r["novelty"], 3), + "encounter_count": r["encounter_count"], + "first_seen_context": r["first_seen_context"] or "", + "edges": [ + { + "dim": e.dim_token, + "parent": e.parent_token, + "is_isa": e.is_isa, + "confidence": round(e.confidence, 3), + "source": e.source, + } + for e in edges + ], + }) + + return {"total": total, "concepts": result} + + +@app.delete("/concepts/{token:path}") +async def delete_concept(token: str, request: Request) -> dict: + """Remove a concept and all its URD edges from SOAS.""" + pool = request.app.state.pool + token = token.lower().strip() + async with pool.acquire() as conn: + async with conn.transaction(): + await conn.execute("DELETE FROM kg_write_log WHERE concept_token = $1", token) + soas_id = await conn.fetchval("SELECT id FROM soas WHERE token = $1", token) + if soas_id is None: + return {"error": f"concept {token!r} not found"} + await conn.execute("DELETE FROM resolution_queue WHERE concept_id = $1", soas_id) + await conn.execute("DELETE FROM urd WHERE id = $1", soas_id) + await conn.execute("DELETE FROM soas WHERE id = $1", soas_id) + + # Evict from cache + row = cache.soas_by_token.pop(token, None) + if row: + cache.soas_by_id.pop(row.id, None) + edges = cache.urd_by_concept.pop(row.id, []) + for e in edges: + cache.urd_by_concept_dim.pop((row.id, e.dim_id), None) + cache.urd_by_parent.pop(row.id, None) + cache.pending_conflicts.discard(row.id) + + return {"status": "deleted", "token": token} + + # --------------------------------------------------------------------------- # /models — LLM model management # --------------------------------------------------------------------------- @@ -2283,6 +2332,11 @@ ADMIN_HTML = """ .log-nav {{ margin-top: 0.5em; display: flex; gap: 1em; align-items: center; font-size: 0.85em; }} footer {{ margin-top: 3em; padding-top: 1em; border-top: 1px solid #ddd; font-size: 0.78em; color: #888; }} footer a {{ color: #888; }} + .fact-form {{ display: flex; gap: 0.6em; flex-wrap: wrap; align-items: flex-end; margin-top: 0.6em; }} + .fact-form input, .fact-form select {{ font-family: monospace; padding: 5px 8px; border: 1px solid #ccc; border-radius: 3px; }} + .concept-edges {{ font-size: 0.78em; color: #555; }} + .search-row {{ display: flex; gap: 0.6em; align-items: center; margin-bottom: 0.8em; }} + .search-row input {{ font-family: monospace; padding: 5px 10px; border: 1px solid #ccc; border-radius: 3px; width: 220px; }}
@@ -2355,6 +2409,51 @@ ADMIN_HTML = """ ++ Domain concepts with saliency above zero. Add facts manually; the agent's + is-a / is-part-of statements are also captured automatically via cue scanning. +
+ +| Concept | Saliency | Encounters | ' + + 'Edges (dim → parent) | First seen | ' + + ' |
|---|---|---|---|---|---|
| ${{c.token}} | +${{c.saliency}} | +${{c.encounter_count}} | +${{edgeStr || 'none'}} | +${{ctx}} | ++ |