diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py
index 9c71677..e92c251 100644
--- a/plugins/festinger/festinger/main.py
+++ b/plugins/festinger/festinger/main.py
@@ -863,76 +863,67 @@ async def process_prompt(
     recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
 
     hdrs = request_headers or {}
-    # Derive a ModelConfig from the intercepted request so context discovery can
-    # mirror Agent0's current model without a separate write_model_id config.
     request_model = _extract_request_model_config(path, body, hdrs, cfg)
     agent_name, _ = _extract_agent_name(body, hdrs)  # body already cleaned by route handler
 
-    # Extract only the last user message — agent responses and reasoning traces
-    # are noise for both cue scanning and concept discovery.
+    # Only the last user message — assistant responses and tool outputs are noise.
     user_text = _last_user_message_text(body, path)
     if not user_text.strip():
         return body
 
-    # 1. Scan user message for explicit relationship cues (fast, inline, bypasses LLM).
+    # 1. Scan user message for explicit relationship cues (fast, no LLM).
     for cue in scan_cues(user_text):
         await enqueue_cue(cue)
 
-    # 2. Tokenise the recent context (last user + last assistant) for the read path.
-    #    Novel words from the user turn are also collected as LLM candidates.
-    prompt_text = extract_prompt_text(body, path)
-    tokens = tokenize(prompt_text)
+    # 2. Single token loop over the user message only.
+    #    The previous code also tokenised the last assistant message for the read
+    #    path, but assistant output is large and full of technical terms that look
+    #    novel — it was the main source of spurious discovery calls.
+    #    Concepts the assistant mentioned were already processed when they first
+    #    appeared in a user turn, so re-scanning is redundant.
+    tokens = tokenize(user_text)
     salient_for_read: list[int] = []
 
-    # Candidate novel tokens from the USER message only — structural tokens
-    # (paths, versions, numbers) are filtered out.  Capped to avoid flooding
-    # on very long messages.
-    MAX_NOVEL_PER_TURN = 8
+    # Novel candidates: unknown words that look domain-specific.
+    # Cap at 3 — we want a targeted LLM call, not a flood.
+    MAX_NOVEL_PER_TURN = 3
     novel_candidates: list[str] = []
 
-    # Only collect candidates from user-side tokens
-    user_tokens = set(tokenize(user_text))
-
     for token in tokens:
         soas_row = cache.soas_by_token.get(token)
 
         if soas_row is None:
-            # Token absent from cache entirely — candidate domain word.
-            # Restrict to user-side tokens so we don't mine agent responses.
             if (
-                token in user_tokens
-                and not _is_structural_token(token)
+                not _is_structural_token(token)
                 and len(novel_candidates) < MAX_NOVEL_PER_TURN
             ):
                 novel_candidates.append(token)
             continue
 
         if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
-            # Common English word — skip.
-            continue
+            continue  # common English — skip
 
         cache.record_encounter(soas_row.id)
 
-        # Only surface in recollection if saliency is above threshold.
-        # Unconfirmed novel words (saliency=NOVEL_INITIAL_SALIENCY=0.1) are
-        # deliberately kept below the threshold until the LLM confirms them.
         if soas_row.saliency >= read_threshold:
             salient_for_read.append(soas_row.id)
 
-    # Register novel candidates in SOAS at low saliency (below read threshold).
-    # They become recollection attractors only after the LLM confirms them.
+    # Register novel candidates at low saliency (below read threshold).
     for token in novel_candidates:
         ctx = _sentence_containing(user_text, token)
         await create_novel_soas(pool, token, context=ctx)
-        # Do NOT add to salient_for_read — no zero-hit recollection until confirmed.
 
-    # 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
+    # 3. Enqueue LLM-driven discovery — rate-limited per agent.
+    #    At most one discovery call per DISCOVERY_COOLDOWN_SECONDS per agent,
+    #    so a long conversation with many novel words doesn't queue a storm of
+    #    concurrent LM Studio calls.
     if novel_candidates and len(user_text) >= 20 and not skip_discovery:
-        await enqueue_context_discover(
-            user_text, novel_candidates,
-            agent_name=agent_name,
-            fallback_model=request_model,
-        )
+        if _discovery_allowed(agent_name):
+            await enqueue_context_discover(
+                user_text, novel_candidates,
+                agent_name=agent_name,
+                fallback_model=request_model,
+            )
 
     if not salient_for_read:
         return body