Configure all agents for local inference via festinger

- All agents now use lm_studio provider → http://festinger:11434
- ctx_length set to 32768 for Omega13 (128GB RAM); reduce for smaller machines
- Model: qwen2.5-7b-instruct (update to larger model on Omega13)
- Each agent has a unique A0_PERSISTENT_RUNTIME_ID for stable mcp_server_token
- agent_profile=agent0 and mcp_server_enabled=true set in all settings.json
- agents/agent0/prompts/ placeholder created for pull-on-start persona override
- pull-agent-identity.py now writes to usr/agents/agent0/prompts/ (correct override path)
- festinger: agent_frameworks table auto-seeded on startup with all 5 agents
- festinger: num_ctx injection, agent_frameworks CRUD + admin UI, /chat endpoint
- festinger: removed debug system_prompt logging

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 13:00:07 +02:00
parent 4a2b682f6d
commit 8e97cbc97a
24 changed files with 609 additions and 101 deletions
+36
View File
@@ -88,6 +88,42 @@ async def init_schema(pool: asyncpg.Pool) -> None:
await conn.execute(
"CREATE INDEX IF NOT EXISTS rl_created_idx ON recollection_log (created_at DESC)"
)
# Migration: per-model context length (0 = don't inject num_ctx)
await conn.execute(
"ALTER TABLE models ADD COLUMN IF NOT EXISTS ctx_length INT NOT NULL DEFAULT 0"
)
# Migration: agent framework routing (agent_id → Agent Zero endpoint + key)
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS agent_frameworks (
agent_id INTEGER PRIMARY KEY,
endpoint_url TEXT NOT NULL,
api_key TEXT NOT NULL DEFAULT '',
label TEXT NOT NULL DEFAULT '',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)
"""
)
# Seed default agent frameworks (INSERT OR IGNORE — never overwrites manual changes).
# API keys are mcp_server_token values derived from each agent's fixed
# A0_PERSISTENT_RUNTIME_ID + AUTH_LOGIN + AUTH_PASSWORD (see agents/<name>/.env).
# Endpoint URLs use Docker container names on the internal network.
for agent_id, label, endpoint_url, api_key in [
(1, "dobby", "http://dobby:80", "-d1yhCLT72cEFpiD"),
(2, "gemma", "http://gemma:80", "71I61Jd54p9wy20P"),
(3, "gunnar", "http://gunnar:80", "00oDLpLbWuS16IzE"),
(5, "rind", "http://rind:80", "3GRS5iP91Y2qQNLr"),
(6, "abyssinthia", "http://abyssinthia:80", "_XxQlg7qAxhmlyJh"),
]:
await conn.execute(
"""
INSERT INTO agent_frameworks (agent_id, label, endpoint_url, api_key)
VALUES ($1, $2, $3, $4)
ON CONFLICT (agent_id) DO NOTHING
""",
agent_id, label, endpoint_url, api_key,
)
log.info("schema applied")
+307 -15
View File
@@ -76,6 +76,23 @@ async def _feature_enabled(pool, key: str, default: bool = True) -> bool:
return val.strip().lower() not in ("false", "0", "off", "no", "disabled")
# ---------------------------------------------------------------------------
# Model ctx_length cache — model_name → num_ctx (0 = no injection)
# ---------------------------------------------------------------------------
_model_ctx_cache: dict[str, int] = {}
async def _reload_model_ctx_cache(pool) -> None:
"""Reload the model name → ctx_length map from DB."""
async with pool.acquire() as conn:
rows = await conn.fetch("SELECT model_name, ctx_length FROM models WHERE ctx_length > 0")
_model_ctx_cache.clear()
for r in rows:
_model_ctx_cache[r["model_name"]] = r["ctx_length"]
log.info("model_ctx_cache reloaded entries=%d", len(_model_ctx_cache))
# ---------------------------------------------------------------------------
# Lifespan — startup / shutdown
# ---------------------------------------------------------------------------
@@ -96,6 +113,7 @@ async def lifespan(app: FastAPI):
await bootstrap_dimensions(pool)
await bootstrap_english_dictionary(pool)
await warm_cache(pool)
await _reload_model_ctx_cache(pool)
await start_worker(pool)
# Schedule saliency flush every 30 s
@@ -397,11 +415,18 @@ async def call_anthropic(body: dict, upstream: str, headers: dict) -> tuple[str,
async def call_openai(body: dict, upstream: str, headers: dict) -> tuple[str, dict]:
"""
Forward a request to an OpenAI-compatible chat completions endpoint (non-streaming).
Injects num_ctx for Ollama if the model has a configured context length.
Returns (assistant_text, raw_response_dict).
"""
body = dict(body)
body["stream"] = False
model = body.get("model", "?")
# Inject num_ctx for Ollama if this model has a configured context length
ctx = _model_ctx_cache.get(model, 0)
if ctx > 0 and "num_ctx" not in body:
body["num_ctx"] = ctx
log.info("injecting num_ctx=%d for model=%s", ctx, model)
url = f"{upstream}/v1/chat/completions"
log.info("upstream_call provider=openai model=%s url=%s", model, url)
t0 = time.perf_counter()
@@ -1156,11 +1181,15 @@ async def gnommoweb_chat(request: Request) -> dict:
"""
Entry point for gnommoweb agent chat.
Looks up the agent's framework config (endpoint_url + api_key) from the
agent_frameworks table, forwards the message to Agent Zero's /api/api_message
endpoint, and returns the response.
Expected body:
{
"agent_id": <int>,
"conversation_id": <int|null>,
"context_id": <str|null>,
"context_id": <str|null>, # Agent Zero context id — pass back on subsequent turns
"user_id": <int>,
"message": <str>,
"history": [{"role": "user"|"assistant", "content": <str>}]
@@ -1176,24 +1205,128 @@ async def gnommoweb_chat(request: Request) -> dict:
data = await request.json()
agent_id = data.get("agent_id")
conversation_id = data.get("conversation_id")
context_id = data.get("context_id")
context_id = data.get("context_id") or ""
user_id = data.get("user_id")
message = data.get("message", "")
history = data.get("history", [])
log.info(
"gnommoweb_chat agent_id=%s conv=%s user=%s msg_len=%d hist=%d",
agent_id, conversation_id, user_id, len(message), len(history),
"gnommoweb_chat agent_id=%s conv=%s user=%s msg_len=%d ctx=%s",
agent_id, conversation_id, user_id, len(message), context_id or "(new)",
)
# TODO: route to agent framework (Agent Zero, etc.) based on agent config
# For now return a stub so gnommoweb has a working endpoint to call
return {
"message": f"[festinger stub] agent_id={agent_id} received: {message[:80]}",
"pose": "neutral",
pool = request.app.state.pool
# Look up agent framework config
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT endpoint_url, api_key, label FROM agent_frameworks WHERE agent_id = $1",
agent_id,
)
if not row:
log.warning("gnommoweb_chat no framework configured for agent_id=%s", agent_id)
return {
"message": f"[festinger] No Agent Zero endpoint configured for agent_id={agent_id}. Add it in the Festinger admin under Agent Frameworks.",
"pose": "neutral",
"context_id": context_id or None,
}
url = row["endpoint_url"].rstrip("/") + "/api/api_message"
headers = {
"Content-Type": "application/json",
"X-API-KEY": row["api_key"],
}
body = {
"message": message,
"context_id": context_id,
}
log.info("gnommoweb_chat forwarding to %s (agent=%s label=%s)", url, agent_id, row["label"])
try:
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(url, json=body, headers=headers)
if not r.is_success:
log.error("gnommoweb_chat agent_zero error %d: %s", r.status_code, r.text[:200])
return {
"message": f"[festinger] Agent Zero returned HTTP {r.status_code}: {r.text[:200]}",
"pose": "neutral",
"context_id": context_id or None,
}
resp = r.json()
new_context_id = resp.get("context_id") or context_id or None
reply = resp.get("response", "")
log.info("gnommoweb_chat reply len=%d new_ctx=%s", len(reply), new_context_id)
return {
"message": reply,
"pose": "neutral",
"context_id": new_context_id,
}
except httpx.TimeoutException:
log.error("gnommoweb_chat timeout forwarding to %s", url)
return {"message": "[festinger] Agent Zero timed out.", "pose": "neutral", "context_id": context_id or None}
except Exception as exc:
log.error("gnommoweb_chat error forwarding to %s: %s", url, exc)
return {"message": f"[festinger] Error: {exc}", "pose": "neutral", "context_id": context_id or None}
# ---------------------------------------------------------------------------
# /agent-frameworks — per-agent Agent Zero endpoint config
# ---------------------------------------------------------------------------
@app.get("/agent-frameworks")
async def list_agent_frameworks(request: Request) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT agent_id, endpoint_url, api_key, label, updated_at FROM agent_frameworks ORDER BY agent_id"
)
return {"agent_frameworks": [
{
"agent_id": r["agent_id"],
"endpoint_url": r["endpoint_url"],
"api_key": r["api_key"],
"label": r["label"],
"updated_at": r["updated_at"].isoformat(),
}
for r in rows
]}
@app.put("/agent-frameworks/{agent_id}")
async def upsert_agent_framework(agent_id: int, request: Request) -> dict:
pool = request.app.state.pool
data = await request.json()
endpoint_url = (data.get("endpoint_url") or "").strip()
api_key = (data.get("api_key") or "").strip()
label = (data.get("label") or "").strip()
if not endpoint_url:
return {"error": "endpoint_url is required"}
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO agent_frameworks (agent_id, endpoint_url, api_key, label, updated_at)
VALUES ($1, $2, $3, $4, now())
ON CONFLICT (agent_id) DO UPDATE
SET endpoint_url = $2, api_key = $3, label = $4, updated_at = now()
""",
agent_id, endpoint_url, api_key, label,
)
log.info("agent_framework upserted agent_id=%d url=%s label=%s", agent_id, endpoint_url, label)
return {"status": "ok", "agent_id": agent_id}
@app.delete("/agent-frameworks/{agent_id}")
async def delete_agent_framework(agent_id: int, request: Request) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
result = await conn.execute("DELETE FROM agent_frameworks WHERE agent_id=$1", agent_id)
deleted = int(result.split()[-1]) if result else 0
if not deleted:
return {"error": f"agent_id {agent_id} not found"}
log.info("agent_framework deleted agent_id=%d", agent_id)
return {"status": "ok", "deleted": agent_id}
@app.post("/api/chat")
async def chat(request: Request) -> Response:
@@ -1831,11 +1964,12 @@ async def list_models(request: Request) -> dict:
pool = request.app.state.pool
async with pool.acquire() as conn:
rows = await conn.fetch(
"SELECT id, provider, model_name, base_url, created_at FROM models ORDER BY id"
"SELECT id, provider, model_name, base_url, ctx_length, created_at FROM models ORDER BY id"
)
return {"models": [
{"id": r["id"], "provider": r["provider"], "model_name": r["model_name"],
"base_url": r["base_url"] or "", "created_at": r["created_at"].isoformat()}
"base_url": r["base_url"] or "", "ctx_length": r["ctx_length"],
"created_at": r["created_at"].isoformat()}
for r in rows
]}
@@ -1848,6 +1982,7 @@ async def create_model(request: Request) -> dict:
model_name = data.get("model_name", "").strip()
api_key = data.get("api_key", "").strip()
base_url = data.get("base_url", "").strip()
ctx_length = int(data.get("ctx_length") or 0)
if not provider or not model_name:
return {"error": "provider and model_name are required"}
if provider not in ("claude", "openai", "lm-studio"):
@@ -1856,13 +1991,33 @@ async def create_model(request: Request) -> dict:
return {"error": "api_key is required for claude provider"}
async with pool.acquire() as conn:
row = await conn.fetchrow(
"INSERT INTO models (provider, model_name, api_key, base_url) VALUES ($1,$2,$3,$4) RETURNING id",
provider, model_name, api_key, base_url,
"INSERT INTO models (provider, model_name, api_key, base_url, ctx_length) VALUES ($1,$2,$3,$4,$5) RETURNING id",
provider, model_name, api_key, base_url, ctx_length,
)
log.info("model created id=%d provider=%s model=%s base_url=%s", row["id"], provider, model_name, base_url)
await _reload_model_ctx_cache(pool)
log.info("model created id=%d provider=%s model=%s ctx_length=%d", row["id"], provider, model_name, ctx_length)
return {"status": "ok", "id": row["id"]}
@app.put("/models/{model_id}")
async def update_model(model_id: int, request: Request) -> dict:
"""Update an existing model's ctx_length (and optionally other fields)."""
pool = request.app.state.pool
data = await request.json()
ctx_length = int(data.get("ctx_length") or 0)
async with pool.acquire() as conn:
result = await conn.execute(
"UPDATE models SET ctx_length=$1 WHERE id=$2",
ctx_length, model_id,
)
updated = int(result.split()[-1]) if result else 0
if not updated:
return {"error": f"model {model_id} not found"}
await _reload_model_ctx_cache(pool)
log.info("model updated id=%d ctx_length=%d", model_id, ctx_length)
return {"status": "ok", "id": model_id, "ctx_length": ctx_length}
@app.delete("/models/{model_id}")
async def delete_model(model_id: int, request: Request) -> dict:
pool = request.app.state.pool
@@ -1871,6 +2026,7 @@ async def delete_model(model_id: int, request: Request) -> dict:
deleted = int(result.split()[-1]) if result else 0
if not deleted:
return {"error": f"model {model_id} not found"}
await _reload_model_ctx_cache(pool)
log.info("model deleted id=%d", model_id)
return {"status": "ok", "deleted": model_id}
@@ -3489,6 +3645,48 @@ ADMIN_HTML = """<!DOCTYPE html>
&nbsp;&mdash;&nbsp;<a href="/models-ui" style="color:#1a1a2e">Model Manager</a>
</p>
<h2>Agent Frameworks</h2>
<p style="font-size:0.83em;color:#666;margin-bottom:0.8em">
Map each gnommoweb <code>agent_id</code> to an Agent Zero endpoint. Festinger forwards
<code>POST /chat</code> requests here. The API key is Agent Zero's <code>mcp_server_token</code>
(derived from runtime_id + AUTH_LOGIN + AUTH_PASSWORD — stable across restarts).
</p>
<table id="af-table" style="width:100%;border-collapse:collapse;font-size:0.85em;margin-bottom:0.8em">
<thead>
<tr style="text-align:left;border-bottom:1px solid #ccc">
<th style="padding:4px 8px">Agent ID</th>
<th style="padding:4px 8px">Label</th>
<th style="padding:4px 8px">Endpoint URL</th>
<th style="padding:4px 8px">API Key</th>
<th style="padding:4px 8px"></th>
</tr>
</thead>
<tbody id="af-rows"><tr><td colspan="5" style="padding:6px 8px;color:#999">Loading…</td></tr></tbody>
</table>
<details style="margin-bottom:1.5em">
<summary style="cursor:pointer;font-size:0.85em;color:#444;user-select:none">Add / edit framework</summary>
<div style="display:grid;grid-template-columns:80px 1fr 1fr 1fr auto;gap:6px;margin-top:0.6em;align-items:end">
<div>
<label style="font-size:0.8em;color:#555">Agent ID</label>
<input id="af-agent-id" type="number" min="1" style="width:100%;padding:4px 6px;font-family:monospace;border:1px solid #ccc;border-radius:3px">
</div>
<div>
<label style="font-size:0.8em;color:#555">Label (e.g. gunnar)</label>
<input id="af-label" type="text" style="width:100%;padding:4px 6px;font-family:monospace;border:1px solid #ccc;border-radius:3px" placeholder="gunnar">
</div>
<div>
<label style="font-size:0.8em;color:#555">Endpoint URL</label>
<input id="af-url" type="text" style="width:100%;padding:4px 6px;font-family:monospace;border:1px solid #ccc;border-radius:3px" placeholder="http://gunnar:80">
</div>
<div>
<label style="font-size:0.8em;color:#555">API Key (mcp_server_token)</label>
<input id="af-key" type="text" style="width:100%;padding:4px 6px;font-family:monospace;border:1px solid #ccc;border-radius:3px">
</div>
<button onclick="saveFramework(this)" class="primary" style="height:30px;white-space:nowrap;align-self:end">Save</button>
</div>
</details>
<div id="af-status" style="font-size:0.8em;color:#666;margin-bottom:1.5em"></div>
<h2>Pipeline features</h2>
<p style="font-size:0.83em;color:#666;margin-bottom:0.8em">
Toggle enrichment steps on/off without restarting. Changes take effect immediately.
@@ -3682,6 +3880,77 @@ ADMIN_HTML = """<!DOCTYPE html>
: 'never';
}}
async function loadAgentFrameworks() {{
const r = await fetch('/agent-frameworks');
const d = await r.json();
const tbody = document.getElementById('af-rows');
if (!d.agent_frameworks || !d.agent_frameworks.length) {{
tbody.innerHTML = '<tr><td colspan="5" style="padding:6px 8px;color:#999">No frameworks configured yet.</td></tr>';
return;
}}
tbody.innerHTML = d.agent_frameworks.map(f => `
<tr style="border-bottom:1px solid #eee">
<td style="padding:4px 8px;font-family:monospace">${{f.agent_id}}</td>
<td style="padding:4px 8px">${{f.label || ''}}</td>
<td style="padding:4px 8px;font-family:monospace">${{f.endpoint_url}}</td>
<td style="padding:4px 8px;font-family:monospace">${{f.api_key ? f.api_key.slice(0,8) + '' : ''}}</td>
<td style="padding:4px 8px">
<button onclick="editFramework(${{f.agent_id}}, '${{f.label}}', '${{f.endpoint_url}}', '${{f.api_key}}')" style="font-size:0.8em;padding:2px 6px">Edit</button>
<button onclick="deleteFramework(${{f.agent_id}}, this)" style="font-size:0.8em;padding:2px 6px;margin-left:4px;color:#c00">Delete</button>
</td>
</tr>
`).join('');
}}
function editFramework(agentId, label, url, key) {{
document.getElementById('af-agent-id').value = agentId;
document.getElementById('af-label').value = label;
document.getElementById('af-url').value = url;
document.getElementById('af-key').value = key;
}}
async function saveFramework(btn) {{
const agentId = parseInt(document.getElementById('af-agent-id').value);
const label = document.getElementById('af-label').value.trim();
const url = document.getElementById('af-url').value.trim();
const key = document.getElementById('af-key').value.trim();
if (!agentId || !url) {{
document.getElementById('af-status').textContent = 'Agent ID and Endpoint URL are required.';
return;
}}
btn.disabled = true;
document.getElementById('af-status').textContent = 'Saving…';
try {{
const r = await fetch(`/agent-frameworks/${{agentId}}`, {{
method: 'PUT',
headers: {{'Content-Type': 'application/json'}},
body: JSON.stringify({{endpoint_url: url, api_key: key, label}})
}});
const d = await r.json();
document.getElementById('af-status').textContent = d.error ? 'Error: ' + d.error : 'Saved.';
await loadAgentFrameworks();
}} catch(e) {{
document.getElementById('af-status').textContent = 'Error: ' + e.message;
}} finally {{
btn.disabled = false;
}}
}}
async function deleteFramework(agentId, btn) {{
if (!confirm(`Delete framework for agent_id=${{agentId}}?`)) return;
btn.disabled = true;
try {{
const r = await fetch(`/agent-frameworks/${{agentId}}`, {{method: 'DELETE'}});
const d = await r.json();
document.getElementById('af-status').textContent = d.error ? 'Error: ' + d.error : `Deleted agent_id=${{agentId}}.`;
await loadAgentFrameworks();
}} catch(e) {{
document.getElementById('af-status').textContent = 'Error: ' + e.message;
}} finally {{
btn.disabled = false;
}}
}}
async function loadFeatures() {{
const r = await fetch('/config');
const cfg = (await r.json()).config;
@@ -4154,6 +4423,7 @@ ADMIN_HTML = """<!DOCTYPE html>
}}
loadStats();
loadAgentFrameworks();
loadFeatures();
loadTestChatModels();
loadConflicts();
@@ -4453,6 +4723,9 @@ function renderModels() {
const endpoint = m.base_url
? `<span style="color:#555;font-size:0.85em">${m.base_url}</span>`
: '<span style="color:#ccc">—</span>';
const ctx = m.ctx_length > 0
? `<span style="font-family:monospace;font-size:0.85em">${m.ctx_length.toLocaleString()}</span>`
: '<span style="color:#ccc">—</span>';
const rBtn = resolveId === sid
? `<button class="btn btn-sm btn-active" disabled>✓ resolve</button>`
: `<button class="btn btn-sm" onclick="setRole('resolve_model_id','${sid}')">set resolve</button>`;
@@ -4464,6 +4737,7 @@ function renderModels() {
<td>${providerPill(m.provider)}</td>
<td>${m.model_name}</td>
<td>${endpoint}</td>
<td>${ctx} <button class="btn btn-sm" onclick="editCtx(${m.id},${m.ctx_length},this)" title="Set context length">✎</button></td>
<td>${roleBadge(m.id)}</td>
<td style="display:flex;gap:6px;padding:6px 10px">
${rBtn} ${wBtn}
@@ -4499,6 +4773,24 @@ async function deleteModel(id, btn) {
await load();
}
// ─── Set context length ───────────────────────────────────────────────────────
async function editCtx(id, current, btn) {
const val = prompt(`Context length for model #${id} (0 = don't inject num_ctx, e.g. 8192):`, current);
if (val === null) return;
const ctx_length = parseInt(val) || 0;
btn.disabled = true;
const r = await fetch('/models/' + id, {
method: 'PUT',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({ctx_length}),
});
btn.disabled = false;
const d = await r.json();
if (d.error) { toast('Error: ' + d.error, true); return; }
toast(`Model #${id} context length set to ${ctx_length || 'auto'}`);
await load();
}
// ─── LM Studio discover ───────────────────────────────────────────────────────
async function discoverModels(btn) {
const base = document.getElementById('lms-url').value.trim();