diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index 9c71677..e92c251 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -863,76 +863,67 @@ async def process_prompt( recency_days = int(await get_config(pool, "recollection_recency_days", "90")) hdrs = request_headers or {} - # Derive a ModelConfig from the intercepted request so context discovery can - # mirror Agent0's current model without a separate write_model_id config. request_model = _extract_request_model_config(path, body, hdrs, cfg) agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler - # Extract only the last user message — agent responses and reasoning traces - # are noise for both cue scanning and concept discovery. + # Only the last user message — assistant responses and tool outputs are noise. user_text = _last_user_message_text(body, path) if not user_text.strip(): return body - # 1. Scan user message for explicit relationship cues (fast, inline, bypasses LLM). + # 1. Scan user message for explicit relationship cues (fast, no LLM). for cue in scan_cues(user_text): await enqueue_cue(cue) - # 2. Tokenise the recent context (last user + last assistant) for the read path. - # Novel words from the user turn are also collected as LLM candidates. - prompt_text = extract_prompt_text(body, path) - tokens = tokenize(prompt_text) + # 2. Single token loop over the user message only. + # The previous code also tokenised the last assistant message for the read + # path, but assistant output is large and full of technical terms that look + # novel — it was the main source of spurious discovery calls. + # Concepts the assistant mentioned were already processed when they first + # appeared in a user turn, so re-scanning is redundant. + tokens = tokenize(user_text) salient_for_read: list[int] = [] - # Candidate novel tokens from the USER message only — structural tokens - # (paths, versions, numbers) are filtered out. Capped to avoid flooding - # on very long messages. - MAX_NOVEL_PER_TURN = 8 + # Novel candidates: unknown words that look domain-specific. + # Cap at 3 — we want a targeted LLM call, not a flood. + MAX_NOVEL_PER_TURN = 3 novel_candidates: list[str] = [] - # Only collect candidates from user-side tokens - user_tokens = set(tokenize(user_text)) - for token in tokens: soas_row = cache.soas_by_token.get(token) if soas_row is None: - # Token absent from cache entirely — candidate domain word. - # Restrict to user-side tokens so we don't mine agent responses. if ( - token in user_tokens - and not _is_structural_token(token) + not _is_structural_token(token) and len(novel_candidates) < MAX_NOVEL_PER_TURN ): novel_candidates.append(token) continue if soas_row.saliency == 0.0 and soas_row.novelty == 0.0: - # Common English word — skip. - continue + continue # common English — skip cache.record_encounter(soas_row.id) - # Only surface in recollection if saliency is above threshold. - # Unconfirmed novel words (saliency=NOVEL_INITIAL_SALIENCY=0.1) are - # deliberately kept below the threshold until the LLM confirms them. if soas_row.saliency >= read_threshold: salient_for_read.append(soas_row.id) - # Register novel candidates in SOAS at low saliency (below read threshold). - # They become recollection attractors only after the LLM confirms them. + # Register novel candidates at low saliency (below read threshold). for token in novel_candidates: ctx = _sentence_containing(user_text, token) await create_novel_soas(pool, token, context=ctx) - # Do NOT add to salient_for_read — no zero-hit recollection until confirmed. - # 3. Enqueue for LLM-driven discovery if there are candidates to evaluate. + # 3. Enqueue LLM-driven discovery — rate-limited per agent. + # At most one discovery call per DISCOVERY_COOLDOWN_SECONDS per agent, + # so a long conversation with many novel words doesn't queue a storm of + # concurrent LM Studio calls. if novel_candidates and len(user_text) >= 20 and not skip_discovery: - await enqueue_context_discover( - user_text, novel_candidates, - agent_name=agent_name, - fallback_model=request_model, - ) + if _discovery_allowed(agent_name): + await enqueue_context_discover( + user_text, novel_candidates, + agent_name=agent_name, + fallback_model=request_model, + ) if not salient_for_read: return body