Adding content to git
This commit is contained in:
@@ -10,7 +10,7 @@ upstream_anthropic: "https://api.anthropic.com"
|
||||
# Where the real OpenAI-compatible API is running (for /v1/chat/completions).
|
||||
# For LM Studio set this to its local address, e.g. "http://host.docker.internal:1234"
|
||||
# Override via UPSTREAM_OPENAI env var if needed
|
||||
upstream_openai: "http://host.docker.internal:1234"
|
||||
upstream_openai: "http://host.docker.internal:11434"
|
||||
|
||||
# Port this proxy listens on inside the container (exposed as 11434 on the docker network)
|
||||
proxy_port: 11434
|
||||
|
||||
@@ -70,6 +70,12 @@ def load_yaml_config() -> dict:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
async def _feature_enabled(pool, key: str, default: bool = True) -> bool:
|
||||
"""Read a feature flag from the DB config table. Truthy unless explicitly disabled."""
|
||||
val = await get_config(pool, key, "true" if default else "false")
|
||||
return val.strip().lower() not in ("false", "0", "off", "no", "disabled")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lifespan — startup / shutdown
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -876,24 +882,24 @@ async def _route_agent_chat(
|
||||
return text, raw_o
|
||||
|
||||
text, raw = await _call(body)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
|
||||
if count >= 2:
|
||||
log.warning(
|
||||
"loop_detected (agent routed) agent_model=%s session=%s count=%d",
|
||||
agent_model.model_name, sess[1], count,
|
||||
)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["choices"] = [{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": override},
|
||||
"finish_reason": "stop",
|
||||
}]
|
||||
raw["loop_detected"] = True
|
||||
else:
|
||||
text, raw = await _call(body)
|
||||
record_and_check(sess, text, min_len)
|
||||
if await _feature_enabled(pool, "feature_loop_detection"):
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning(
|
||||
"loop_detected (agent routed) agent_model=%s session=%s count=%d",
|
||||
agent_model.model_name, sess[1], count,
|
||||
)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["choices"] = [{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": override},
|
||||
"finish_reason": "stop",
|
||||
}]
|
||||
raw["loop_detected"] = True
|
||||
else:
|
||||
text, raw = await _call(body)
|
||||
record_and_check(sess, text, min_len)
|
||||
|
||||
if original_stream:
|
||||
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
|
||||
@@ -944,6 +950,9 @@ async def process_prompt(
|
||||
agent_name may be passed in directly (e.g. extracted from the URL path) to
|
||||
avoid re-parsing the body/headers; falls back to _extract_agent_name if empty.
|
||||
"""
|
||||
if not await _feature_enabled(pool, "feature_recollection"):
|
||||
return body
|
||||
|
||||
read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
|
||||
conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
|
||||
recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
|
||||
@@ -1076,17 +1085,18 @@ async def _handle_ollama_chat(request: Request, agent_name: str = "") -> Respons
|
||||
try:
|
||||
body = await process_prompt(body, "/api/chat", pool, cfg, dict(request.headers), agent_name=agent_name)
|
||||
text, raw = await call_ollama("/api/chat", body, upstream)
|
||||
sess = session_key(model, body.get("messages", []))
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["message"] = {"role": "assistant", "content": override}
|
||||
raw["loop_detected"] = True
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_ollama("/api/chat", body, upstream)
|
||||
record_and_check(sess, text, min_len)
|
||||
if await _feature_enabled(pool, "feature_loop_detection"):
|
||||
sess = session_key(model, body.get("messages", []))
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["message"] = {"role": "assistant", "content": override}
|
||||
raw["loop_detected"] = True
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_ollama("/api/chat", body, upstream)
|
||||
record_and_check(sess, text, min_len)
|
||||
raw["message"] = {"role": "assistant", "content": text}
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
except UpstreamError as exc:
|
||||
@@ -1111,19 +1121,20 @@ async def _handle_ollama_generate(request: Request, agent_name: str = "") -> Res
|
||||
log.info("chat route=/api/generate model=%s agent=%s", model, agent_name or "—")
|
||||
try:
|
||||
body = await process_prompt(body, "/api/generate", pool, cfg, dict(request.headers), agent_name=agent_name)
|
||||
messages = [{"role": "user", "content": body.get("prompt", "")}]
|
||||
sess = session_key(model, messages)
|
||||
text, raw = await call_ollama("/api/generate", body, upstream)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["response"] = override
|
||||
raw["loop_detected"] = True
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_ollama("/api/generate", body, upstream)
|
||||
record_and_check(sess, text, min_len)
|
||||
if await _feature_enabled(pool, "feature_loop_detection"):
|
||||
messages = [{"role": "user", "content": body.get("prompt", "")}]
|
||||
sess = session_key(model, messages)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["response"] = override
|
||||
raw["loop_detected"] = True
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_ollama("/api/generate", body, upstream)
|
||||
record_and_check(sess, text, min_len)
|
||||
raw["response"] = text
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
except UpstreamError as exc:
|
||||
@@ -1172,21 +1183,22 @@ async def _handle_anthropic_messages(request: Request, agent_name: str = "") ->
|
||||
if "anthropic-version" not in {k.lower() for k in headers}:
|
||||
headers["anthropic-version"] = "2023-06-01"
|
||||
body = await process_prompt(body, "/v1/messages", pool, cfg, headers, agent_name=agent_name)
|
||||
messages = body.get("messages", [])
|
||||
sess = session_key(model, messages)
|
||||
text, raw = await call_anthropic(body, upstream, headers)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["content"] = [{"type": "text", "text": override}]
|
||||
raw["loop_detected"] = True
|
||||
if original_stream:
|
||||
return Response(content=_anthropic_to_sse(raw), media_type="text/event-stream")
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_anthropic(body, upstream, headers)
|
||||
record_and_check(sess, text, min_len)
|
||||
if await _feature_enabled(pool, "feature_loop_detection"):
|
||||
messages = body.get("messages", [])
|
||||
sess = session_key(model, messages)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
raw["content"] = [{"type": "text", "text": override}]
|
||||
raw["loop_detected"] = True
|
||||
if original_stream:
|
||||
return Response(content=_anthropic_to_sse(raw), media_type="text/event-stream")
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_anthropic(body, upstream, headers)
|
||||
record_and_check(sess, text, min_len)
|
||||
if original_stream:
|
||||
sse_bytes = _anthropic_to_sse(raw)
|
||||
log.info(
|
||||
@@ -1250,22 +1262,23 @@ async def _handle_openai_chat(request: Request, agent_name: str = "") -> Respons
|
||||
# Standard path — forward to configured upstream unchanged
|
||||
headers = _relay_headers(request, OPENAI_RELAY_HEADERS)
|
||||
body = await process_prompt(body, "/v1/chat/completions", pool, cfg, hdrs, agent_name=agent_name)
|
||||
messages = body.get("messages", [])
|
||||
sess = session_key(model, messages)
|
||||
text, raw = await call_openai(body, upstream, headers)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
if raw.get("choices"):
|
||||
raw["choices"][0]["message"]["content"] = override
|
||||
raw["loop_detected"] = True
|
||||
if original_stream:
|
||||
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_openai(body, upstream, headers)
|
||||
record_and_check(sess, text, min_len)
|
||||
if await _feature_enabled(pool, "feature_loop_detection"):
|
||||
messages = body.get("messages", [])
|
||||
sess = session_key(model, messages)
|
||||
count = record_and_check(sess, text, min_len)
|
||||
if count >= 2:
|
||||
log.warning("loop_detected model=%s session=%s count=%d", model, sess[1], count)
|
||||
body, override = apply_mitigations(body, count, cfg)
|
||||
if override is not None:
|
||||
if raw.get("choices"):
|
||||
raw["choices"][0]["message"]["content"] = override
|
||||
raw["loop_detected"] = True
|
||||
if original_stream:
|
||||
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
text, raw = await call_openai(body, upstream, headers)
|
||||
record_and_check(sess, text, min_len)
|
||||
if original_stream:
|
||||
return Response(content=_openai_sse_from_response(raw), media_type="text/event-stream")
|
||||
return Response(content=json.dumps(raw), media_type="application/json")
|
||||
@@ -3245,6 +3258,22 @@ ADMIN_HTML = """<!DOCTYPE html>
|
||||
— <a href="/models-ui" style="color:#1a1a2e">Model Manager</a>
|
||||
</p>
|
||||
|
||||
<h2>Pipeline features</h2>
|
||||
<p style="font-size:0.83em;color:#666;margin-bottom:0.8em">
|
||||
Toggle enrichment steps on/off without restarting. Changes take effect immediately.
|
||||
</p>
|
||||
<div style="display:flex;flex-direction:column;gap:0.7em;margin-bottom:1em" id="features-section">
|
||||
<label style="display:flex;align-items:center;gap:0.6em;cursor:pointer;font-size:0.9em">
|
||||
<input type="checkbox" id="feat-recollection" onchange="setFeature('feature_recollection', this.checked)" style="width:16px;height:16px;cursor:pointer">
|
||||
<span><b>Recollection</b> — inject knowledge-graph context into prompts</span>
|
||||
</label>
|
||||
<label style="display:flex;align-items:center;gap:0.6em;cursor:pointer;font-size:0.9em">
|
||||
<input type="checkbox" id="feat-loop-detection" onchange="setFeature('feature_loop_detection', this.checked)" style="width:16px;height:16px;cursor:pointer">
|
||||
<span><b>Loop detection</b> — retry with mitigations on repeated identical responses</span>
|
||||
</label>
|
||||
</div>
|
||||
<div id="features-status" style="font-size:0.8em;color:#666;margin-bottom:1.5em"></div>
|
||||
|
||||
<h2>World model stats</h2>
|
||||
<div class="stats" id="stats">
|
||||
<div class="stat"><div class="stat-label">SOAS tokens</div><div class="stat-value" id="s-soas">…</div></div>
|
||||
@@ -3406,6 +3435,32 @@ ADMIN_HTML = """<!DOCTYPE html>
|
||||
: 'never';
|
||||
}}
|
||||
|
||||
async function loadFeatures() {{
|
||||
const r = await fetch('/config');
|
||||
const cfg = (await r.json()).config;
|
||||
const isEnabled = key => (cfg[key] || 'true').trim().toLowerCase() !== 'false'
|
||||
&& (cfg[key] || 'true').trim().toLowerCase() !== '0'
|
||||
&& (cfg[key] || 'true').trim().toLowerCase() !== 'off'
|
||||
&& (cfg[key] || 'true').trim().toLowerCase() !== 'no';
|
||||
document.getElementById('feat-recollection').checked = isEnabled('feature_recollection');
|
||||
document.getElementById('feat-loop-detection').checked = isEnabled('feature_loop_detection');
|
||||
}}
|
||||
|
||||
async function setFeature(key, enabled) {{
|
||||
const val = enabled ? 'true' : 'false';
|
||||
const r = await fetch('/config', {{method:'POST', headers:{{'Content-Type':'application/json'}}, body:JSON.stringify({{key, value:val}})}});
|
||||
const d = await r.json();
|
||||
const status = document.getElementById('features-status');
|
||||
if (d.status === 'ok') {{
|
||||
status.textContent = `${{key}} set to ${{val}}`;
|
||||
status.style.color = '#2a7a2a';
|
||||
}} else {{
|
||||
status.textContent = `Error: ${{JSON.stringify(d)}}`;
|
||||
status.style.color = '#b00';
|
||||
}}
|
||||
setTimeout(() => {{ status.textContent = ''; }}, 3000);
|
||||
}}
|
||||
|
||||
let _cfg = {{}};
|
||||
|
||||
async function loadModels() {{
|
||||
@@ -3790,6 +3845,7 @@ ADMIN_HTML = """<!DOCTYPE html>
|
||||
}}
|
||||
|
||||
loadStats();
|
||||
loadFeatures();
|
||||
loadConflicts();
|
||||
loadLog(0);
|
||||
loadModels();
|
||||
|
||||
Reference in New Issue
Block a user