Mirror request model for context discovery — no write_model_id needed

Festinger now extracts provider/model/api-key from every intercepted
request and passes it to the context-discover queue as a fallback_model.
_process_context_discover uses it when write_model_id is not configured,
so Agent0's current model (LM Studio, Ollama, Anthropic) is automatically
reused for utility LLM calls without any extra setup.

Priority: write_model_id (explicit override) > fallback_model (request mirror)

Also updates upstream_openai default in config.yaml to LM Studio's
local address (host.docker.internal:1234).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-21 19:17:51 +02:00
parent ccbb5b2d45
commit 7210fe2066
3 changed files with 111 additions and 20 deletions
+67 -6
View File
@@ -44,6 +44,7 @@ from .cue_scanner import scan_cues
from .recollection import build_recollection_block, inject_recollection
from .resolution_job import run_resolution_job, last_run_timestamp
from .tokenizer import tokenize
from .llm_client import ModelConfig
from .write_queue import enqueue_context_discover, enqueue_cue, start_worker, stop_worker
from .urd_writer import InsertRequest, insert_urd_edge
from .wordnet import import_wordnet, CITATION as WORDNET_CITATION
@@ -517,11 +518,67 @@ def inject_recollection_anthropic(body: dict, block: str) -> dict:
return body
# ---------------------------------------------------------------------------
# Request model mirroring
# ---------------------------------------------------------------------------
def _extract_request_model_config(
path: str,
body: dict,
request_headers: dict,
cfg: dict,
) -> ModelConfig | None:
"""
Build a ModelConfig from the intercepted request so Festinger's utility
LLM calls (context discovery) can use the same provider/model as Agent0 —
no separate write_model_id configuration needed.
Provider inference:
/v1/messages → anthropic
/v1/chat/completions → lm-studio (OpenAI-compatible; base_url from upstream_openai)
/api/chat, /api/generate → lm-studio (Ollama's OpenAI-compat endpoint; base_url from upstream_ollama)
"""
model_name = body.get("model", "")
if not model_name:
return None
if path == "/v1/messages":
api_key = request_headers.get("x-api-key", "")
return ModelConfig(
provider="claude",
model_name=model_name,
api_key=api_key,
)
if path == "/v1/chat/completions":
auth = request_headers.get("authorization", "")
api_key = auth[len("Bearer "):].strip() if auth.lower().startswith("bearer ") else auth
base_url = cfg.get("upstream_openai", "")
return ModelConfig(
provider="lm-studio",
model_name=model_name,
api_key=api_key or "lm-studio",
base_url=base_url,
)
if path in ("/api/chat", "/api/generate"):
# Ollama exposes an OpenAI-compatible endpoint at the same base URL.
base_url = cfg.get("upstream_ollama", "")
return ModelConfig(
provider="lm-studio",
model_name=model_name,
api_key="ollama",
base_url=base_url,
)
return None
# ---------------------------------------------------------------------------
# Saliency + recollection pipeline
# ---------------------------------------------------------------------------
async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
async def process_prompt(body: dict, path: str, pool, cfg: dict, request_headers: dict | None = None) -> dict:
"""
Run the saliency + recollection pipeline over the prompt.
Returns a (possibly modified) body dict with the recollection block injected.
@@ -530,6 +587,10 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
# Derive a ModelConfig from the intercepted request so context discovery can
# mirror Agent0's current model without a separate write_model_id config.
request_model = _extract_request_model_config(path, body, request_headers or {}, cfg)
# Extract only the last user message — agent responses and reasoning traces
# are noise for both cue scanning and concept discovery.
user_text = _last_user_message_text(body, path)
@@ -590,7 +651,7 @@ async def process_prompt(body: dict, path: str, pool, cfg: dict) -> dict:
# 3. Enqueue for LLM-driven discovery if there are candidates to evaluate.
if novel_candidates and len(user_text) >= 20:
await enqueue_context_discover(user_text, novel_candidates)
await enqueue_context_discover(user_text, novel_candidates, fallback_model=request_model)
if not salient_for_read:
return body
@@ -628,7 +689,7 @@ async def chat(request: Request) -> Response:
min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/chat model=%s", model)
try:
body = await process_prompt(body, "/api/chat", pool, cfg)
body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers))
text, raw = await call_ollama("/api/chat", body, upstream)
sess = session_key(model, body.get("messages", []))
count = record_and_check(sess, text, min_len)
@@ -661,7 +722,7 @@ async def generate(request: Request) -> Response:
min_len = cfg["detection"]["min_length"]
log.info("chat route=/api/generate model=%s", model)
try:
body = await process_prompt(body, "/api/generate", pool, cfg)
body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers))
messages = [{"role": "user", "content": body.get("prompt", "")}]
sess = session_key(model, messages)
text, raw = await call_ollama("/api/generate", body, upstream)
@@ -709,7 +770,7 @@ async def anthropic_messages(request: Request) -> Response:
headers = _relay_headers(request, ANTHROPIC_RELAY_HEADERS)
if "anthropic-version" not in {k.lower() for k in headers}:
headers["anthropic-version"] = "2023-06-01"
body = await process_prompt(body, "/v1/messages", pool, cfg)
body = await process_prompt(body, "/v1/messages", pool, cfg, headers)
messages = body.get("messages", [])
sess = session_key(model, messages)
text, raw = await call_anthropic(body, upstream, headers)
@@ -761,7 +822,7 @@ async def openai_chat_completions(request: Request) -> Response:
log.info("chat route=/v1/chat/completions model=%s upstream=%s", model, upstream)
try:
headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg)
body = await process_prompt(body, "/v1/chat/completions", pool, cfg, headers)
messages = body.get("messages", [])
sess = session_key(model, messages)
text, raw = await call_openai(body, upstream, headers)