Tuning concept dsicovery
This commit is contained in:
@@ -863,71 +863,62 @@ async def process_prompt(
|
|||||||
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
|
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
|
||||||
|
|
||||||
hdrs = request_headers or {}
|
hdrs = request_headers or {}
|
||||||
# Derive a ModelConfig from the intercepted request so context discovery can
|
|
||||||
# mirror Agent0's current model without a separate write_model_id config.
|
|
||||||
request_model = _extract_request_model_config(path, body, hdrs, cfg)
|
request_model = _extract_request_model_config(path, body, hdrs, cfg)
|
||||||
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
|
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
|
||||||
|
|
||||||
# Extract only the last user message — agent responses and reasoning traces
|
# Only the last user message — assistant responses and tool outputs are noise.
|
||||||
# are noise for both cue scanning and concept discovery.
|
|
||||||
user_text = _last_user_message_text(body, path)
|
user_text = _last_user_message_text(body, path)
|
||||||
if not user_text.strip():
|
if not user_text.strip():
|
||||||
return body
|
return body
|
||||||
|
|
||||||
# 1. Scan user message for explicit relationship cues (fast, inline, bypasses LLM).
|
# 1. Scan user message for explicit relationship cues (fast, no LLM).
|
||||||
for cue in scan_cues(user_text):
|
for cue in scan_cues(user_text):
|
||||||
await enqueue_cue(cue)
|
await enqueue_cue(cue)
|
||||||
|
|
||||||
# 2. Tokenise the recent context (last user + last assistant) for the read path.
|
# 2. Single token loop over the user message only.
|
||||||
# Novel words from the user turn are also collected as LLM candidates.
|
# The previous code also tokenised the last assistant message for the read
|
||||||
prompt_text = extract_prompt_text(body, path)
|
# path, but assistant output is large and full of technical terms that look
|
||||||
tokens = tokenize(prompt_text)
|
# novel — it was the main source of spurious discovery calls.
|
||||||
|
# Concepts the assistant mentioned were already processed when they first
|
||||||
|
# appeared in a user turn, so re-scanning is redundant.
|
||||||
|
tokens = tokenize(user_text)
|
||||||
salient_for_read: list[int] = []
|
salient_for_read: list[int] = []
|
||||||
|
|
||||||
# Candidate novel tokens from the USER message only — structural tokens
|
# Novel candidates: unknown words that look domain-specific.
|
||||||
# (paths, versions, numbers) are filtered out. Capped to avoid flooding
|
# Cap at 3 — we want a targeted LLM call, not a flood.
|
||||||
# on very long messages.
|
MAX_NOVEL_PER_TURN = 3
|
||||||
MAX_NOVEL_PER_TURN = 8
|
|
||||||
novel_candidates: list[str] = []
|
novel_candidates: list[str] = []
|
||||||
|
|
||||||
# Only collect candidates from user-side tokens
|
|
||||||
user_tokens = set(tokenize(user_text))
|
|
||||||
|
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
soas_row = cache.soas_by_token.get(token)
|
soas_row = cache.soas_by_token.get(token)
|
||||||
|
|
||||||
if soas_row is None:
|
if soas_row is None:
|
||||||
# Token absent from cache entirely — candidate domain word.
|
|
||||||
# Restrict to user-side tokens so we don't mine agent responses.
|
|
||||||
if (
|
if (
|
||||||
token in user_tokens
|
not _is_structural_token(token)
|
||||||
and not _is_structural_token(token)
|
|
||||||
and len(novel_candidates) < MAX_NOVEL_PER_TURN
|
and len(novel_candidates) < MAX_NOVEL_PER_TURN
|
||||||
):
|
):
|
||||||
novel_candidates.append(token)
|
novel_candidates.append(token)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
|
if soas_row.saliency == 0.0 and soas_row.novelty == 0.0:
|
||||||
# Common English word — skip.
|
continue # common English — skip
|
||||||
continue
|
|
||||||
|
|
||||||
cache.record_encounter(soas_row.id)
|
cache.record_encounter(soas_row.id)
|
||||||
|
|
||||||
# Only surface in recollection if saliency is above threshold.
|
|
||||||
# Unconfirmed novel words (saliency=NOVEL_INITIAL_SALIENCY=0.1) are
|
|
||||||
# deliberately kept below the threshold until the LLM confirms them.
|
|
||||||
if soas_row.saliency >= read_threshold:
|
if soas_row.saliency >= read_threshold:
|
||||||
salient_for_read.append(soas_row.id)
|
salient_for_read.append(soas_row.id)
|
||||||
|
|
||||||
# Register novel candidates in SOAS at low saliency (below read threshold).
|
# Register novel candidates at low saliency (below read threshold).
|
||||||
# They become recollection attractors only after the LLM confirms them.
|
|
||||||
for token in novel_candidates:
|
for token in novel_candidates:
|
||||||
ctx = _sentence_containing(user_text, token)
|
ctx = _sentence_containing(user_text, token)
|
||||||
await create_novel_soas(pool, token, context=ctx)
|
await create_novel_soas(pool, token, context=ctx)
|
||||||
# Do NOT add to salient_for_read — no zero-hit recollection until confirmed.
|
|
||||||
|
|
||||||
# 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
|
# 3. Enqueue LLM-driven discovery — rate-limited per agent.
|
||||||
|
# At most one discovery call per DISCOVERY_COOLDOWN_SECONDS per agent,
|
||||||
|
# so a long conversation with many novel words doesn't queue a storm of
|
||||||
|
# concurrent LM Studio calls.
|
||||||
if novel_candidates and len(user_text) >= 20 and not skip_discovery:
|
if novel_candidates and len(user_text) >= 20 and not skip_discovery:
|
||||||
|
if _discovery_allowed(agent_name):
|
||||||
await enqueue_context_discover(
|
await enqueue_context_discover(
|
||||||
user_text, novel_candidates,
|
user_text, novel_candidates,
|
||||||
agent_name=agent_name,
|
agent_name=agent_name,
|
||||||
|
|||||||
Reference in New Issue
Block a user