This commit is contained in:
2026-04-25 12:12:40 +02:00
parent f6d701b125
commit f30d029968
+78 -9
View File
@@ -477,19 +477,80 @@ def extract_prompt_text(body: dict, path: str) -> str:
return body.get("prompt", "") return body.get("prompt", "")
def _human_prefix(text: str, max_chars: int = 1000) -> str:
"""
Extract the human-written prefix of a user message, stopping before any
agent-framework context injection (EXTRAS, tool results, structured sections).
Agent Zero (and similar frameworks) append structured context to the user
message after a blank line followed by a section marker. We only want the
user's actual words — the first paragraph before any such marker.
Markers we stop at:
\\n\\n[ — Agent Zero [EXTRAS], [context], [solutions], …
\\n\\n< — XML-wrapped context blocks
\\n--- — horizontal rule separators
\\n# or \\n## — markdown headings injected by the framework
"""
# Structural section delimiters injected by agent frameworks
for delim in ("\n\n[", "\n\n<", "\n---", "\n[EXTRAS]", "\n[context]"):
idx = text.find(delim)
if 0 < idx < max_chars:
return text[:idx].strip()
return text[:max_chars].strip()
def _is_internal_subcall(user_text: str, sys_text: str = "") -> bool:
"""
Heuristic: returns True if this looks like an agent-internal sub-call
(memory extraction, consolidation, keyword search) rather than a main
user-facing turn. We skip recollection injection for these.
Signals checked in order:
1. User message is a JSON/array literal → memory operation payload
2. System message is very short → utility prompt, not a full agent
personality (Agent Zero's main system prompt is typically >5 000 chars;
memory sub-call prompts are usually <1 500 chars)
3. System message contains memory-task keywords in the first 200 chars
"""
# Signal 1: JSON payload
if user_text.strip().startswith(("{", "[")):
return True
if sys_text:
# Signal 2: system prompt suspiciously short for a real agent call
if len(sys_text) < 1500:
return True
# Signal 3: task-oriented system message (memory ops, keyword extraction…)
sys_head = sys_text[:200].lower()
task_markers = (
"extract keyword", "consolidat", "search for similar",
"rate the similarity", "memorize", "memory content",
)
if any(m in sys_head for m in task_markers):
return True
return False
def _last_user_message_text(body: dict, path: str) -> str: def _last_user_message_text(body: dict, path: str) -> str:
""" """
Extract only the last user message for the write path. Extract only the human-written prefix of the last user message.
Agent responses, thinking traces, and system prompts are excluded — Agent Zero and similar frameworks append large context sections (EXTRAS,
they are noise for concept discovery. solutions, tool results) after the user's actual words. We stop at the
first structural delimiter so that domain tokens in the injected context
don't spuriously trigger recollection.
""" """
if path in ("/api/chat", "/v1/chat/completions", "/v1/messages"): if path in ("/api/chat", "/v1/chat/completions", "/v1/messages"):
messages = body.get("messages", []) messages = body.get("messages", [])
last_user = next((m for m in reversed(messages) if m.get("role") == "user"), None) last_user = next((m for m in reversed(messages) if m.get("role") == "user"), None)
if last_user: if last_user:
return " ".join(_extract_text_strings(last_user.get("content", ""))) raw = " ".join(_extract_text_strings(last_user.get("content", "")))
return _human_prefix(raw)
return "" return ""
return body.get("prompt", "") raw = body.get("prompt", "")
return _human_prefix(raw)
def _last_assistant_message_text(body: dict, path: str) -> str: def _last_assistant_message_text(body: dict, path: str) -> str:
@@ -891,11 +952,20 @@ async def process_prompt(
if not agent_name: if not agent_name:
agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler agent_name, _ = _extract_agent_name(body, hdrs) # body already cleaned by route handler
# Last user message — primary source for recollection reads. # Last user message — human-written prefix only (strips agent framework context).
user_text = _last_user_message_text(body, path) user_text = _last_user_message_text(body, path)
if not user_text.strip(): if not user_text.strip():
return body return body
# Skip injection for internal agent sub-calls (memory extraction,
# consolidation, keyword search). These are detected by:
# - user message being a JSON payload, OR
# - system message being very short (utility prompt, not a full agent), OR
# - system message starting with memory-task keywords.
raw_sys_text = _system_message_text(body, path)
if _is_internal_subcall(user_text, raw_sys_text):
return body
# 1. Scan user message for explicit relationship cues (fast, regex-only). # 1. Scan user message for explicit relationship cues (fast, regex-only).
for cue in scan_cues(user_text): for cue in scan_cues(user_text):
await enqueue_cue(cue) await enqueue_cue(cue)
@@ -912,9 +982,8 @@ async def process_prompt(
# Concepts grounding the agent's persona or project context rank higher # Concepts grounding the agent's persona or project context rank higher
# in the recollection block for the entire session. # in the recollection block for the entire session.
session_boost_ids: set[int] = set() session_boost_ids: set[int] = set()
sys_text = _system_message_text(body, path) if raw_sys_text:
if sys_text: for tok in tokenize(raw_sys_text):
for tok in tokenize(sys_text):
row = cache.soas_by_token.get(tok) row = cache.soas_by_token.get(tok)
if row and row.saliency > 0.0: if row and row.saliency > 0.0:
session_boost_ids.add(row.id) session_boost_ids.add(row.id)