Compare commits

...

8 Commits

Author SHA1 Message Date
gitprov 07329d8672 Switch Agent0 from MCP to REST API (/api/api_message)
MCP endpoint returns 403 (wrong token) and the mcp library's TaskGroup
error is opaque. The REST API (X-API-KEY header, /api/api_message) is
already validated in connectivity tests and returns proper responses.

mcp_key field is reused as the X-API-KEY value.
context_id replaces chat_id for conversation continuity.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 19:44:11 +02:00
gitprov 1006d4490a Clear Hermes session cache on token rejection retry
When the dashboard token is rejected and re-fetched, the stale
session_id was being reused, causing prompt.submit to go to a
non-existent session. Now clears both caches so the retry creates
a fresh session.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 19:04:36 +02:00
gitprov 6a259e1ef7 Add full traceback logging to Hermes route for diagnostics
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 20:09:09 +02:00
gitprov bb2a4e2293 Strip whitespace from endpoint URLs before use
Leading/trailing spaces in the endpoint field cause httpx to receive
a URL without a recognisable scheme, producing UnsupportedProtocol.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 20:06:35 +02:00
gitprov d74d1a74f8 Add debug logging to /v1/agent/chat route handler
Log agent_type, full models payload and selected model on every request
to diagnose endpoint routing and empty-endpoint issues.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 20:00:39 +02:00
gitprov 8704404c40 Fix agent0 connectivity test to use health + key check
Use GET /api/health (instant, no LLM call) for reachability and
POST /api/api_reset_chat to verify the API key is accepted.
Sending a real message blocks for the full LLM round-trip (>30s).

Also switch from MCP streamable-http to REST API (X-API-KEY header).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 19:09:02 +02:00
gitprov 9814d18e8c Add connectivity tests and fix Hermes handle + venv setup
- test_connectivity.py: connectivity tests for all four endpoint types
  (anthropic, openai, lm_studio, hermes, agent0) — treats no-credits as success
- test_hermes.py: raw WebSocket frame logger used to reverse-engineer protocol
- Fix handle_hermes: skip prompt.submit ack frame, read full text from
  message.complete payload.text, always raise on status==error
- Fix requirements.txt: use >= pins (fastapi/uvicorn versions didn't exist)
- Fix dev.sh: prefer python3.12 for venv (mcp>=1.9.0 requires 3.10+)
- Remove ANTHROPIC_KEY env var dependency from server.py (keys come from DB)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 19:00:51 +02:00
gitprov 604df52247 Remove API key env vars — keys come from DB via request payload
API keys (Anthropic, OpenAI etc.) are stored in the inference_endpoints
table and passed through the request from gnommoweb. Removed the
API_KEY_ANTHROPIC / AGENT_INFERENCE_MODEL env var fallbacks entirely.
Missing endpoint config now returns a clear error instead of silently
falling back to a hardcoded model.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 17:53:07 +02:00
5 changed files with 605 additions and 60 deletions
+2 -1
View File
@@ -55,7 +55,8 @@ git -C "$SCRIPT_DIR" pull --ff-only >> "$LOG_FILE" 2>&1 \
if [ ! -d "$SCRIPT_DIR/.venv" ]; then
log "$YELLOW" "Creating virtual environment..."
python3 -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1
PYTHON=$(which python3.12 || which python3.11 || which python3.10 || which python3)
$PYTHON -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1
fi
source "$SCRIPT_DIR/.venv/bin/activate"
+2 -2
View File
@@ -1,5 +1,5 @@
fastapi==0.135.1
uvicorn==0.41.0
fastapi>=0.115.0
uvicorn>=0.30.0
litellm>=1.80.0
mcp>=1.9.0
httpx>=0.27.0
+87 -57
View File
@@ -42,13 +42,8 @@ import litellm
# --- Config ---
API_KEY = os.getenv("AGENT_INFERENCE_KEY", "agent-inference-dev-key")
FALLBACK_MODEL = os.getenv("AGENT_INFERENCE_MODEL", "anthropic/claude-sonnet-4-20250514")
ANTHROPIC_KEY = os.getenv("API_KEY_ANTHROPIC", "")
AGENT_MAX_MESSAGES = int(os.getenv("AGENT_INFERENCE_MAX_MESSAGES", "10"))
if ANTHROPIC_KEY:
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_KEY
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("agent-inference")
@@ -273,60 +268,50 @@ async def handle_agent0_mcp(
valid_poses: list[str],
) -> ChatResponse:
"""
Send a message to a live Agent Zero instance via its MCP streamable-http
server and return the response as a ChatResponse.
Send a message to a live Agent Zero instance via its REST API
(POST /api/api_message, X-API-KEY header).
MCP URL format: {endpoint}/mcp/t-{mcp_key}/http
Tool called: send_message (built into every Agent Zero instance)
Conversation continuity is maintained via Agent Zero's chat_id, which maps
to a gnommoweb conversation_id in _a0_sessions (in-memory).
Conversation continuity is maintained via Agent Zero's context_id, which
maps to a gnommoweb conversation_id in _a0_sessions (in-memory).
"""
from mcp.client.streamable_http import streamablehttp_client
from mcp import ClientSession
endpoint = (model.endpoint or "").strip().rstrip("/")
api_key = model.mcp_key or "" # stored in mcp_key field; used as X-API-KEY
api_url = f"{endpoint}/api/api_message"
endpoint = (model.endpoint or "").rstrip("/")
mcp_key = model.mcp_key or ""
mcp_url = f"{endpoint}/mcp/t-{mcp_key}/http"
# Retrieve existing Agent0 chat_id for this conversation (if any)
a0_chat_id = _a0_sessions.get(req.conversation_id) if req.conversation_id else None
# Retrieve existing Agent0 context_id for this conversation (if any)
a0_context_id = _a0_sessions.get(req.conversation_id) if req.conversation_id else None
log.info(
f"[{agent.name}] → Agent0 MCP url={mcp_url} "
f"conv={req.conversation_id} a0_chat={a0_chat_id or 'new'} "
f"[{agent.name}] → Agent0 REST url={api_url} "
f"conv={req.conversation_id} a0_ctx={a0_context_id or 'new'} "
f"msg='{req.message[:60]}'"
)
try:
async with streamablehttp_client(mcp_url) as (read, write, _):
async with ClientSession(read, write) as session:
await session.initialize()
payload: dict = {
"message": req.message,
"lifetime_hours": 24,
}
if a0_context_id:
payload["context_id"] = a0_context_id
tool_args: dict = {
"message": req.message,
"persistent_chat": True,
}
if a0_chat_id:
tool_args["chat_id"] = a0_chat_id
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
api_url,
headers={"X-API-KEY": api_key, "Content-Type": "application/json"},
json=payload,
)
r.raise_for_status()
result = await session.call_tool("send_message", tool_args)
data = r.json()
log.info(f"[{agent.name}] ← Agent0 REST: {str(data)[:300]}")
# The tool returns a JSON-serialised ToolResponse / ToolError
raw = result.content[0].text if result.content else "{}"
log.info(f"[{agent.name}] ← Agent0 MCP raw: {raw[:300]}")
response_text = data.get("response", "")
new_context_id = data.get("context_id", "")
parsed = json.loads(raw)
if parsed.get("status") == "error":
raise RuntimeError(parsed.get("error", "Unknown Agent0 error"))
response_text = parsed.get("response", "")
new_chat_id = parsed.get("chat_id", "")
# Persist the Agent0 chat_id so the next turn continues the same context
if new_chat_id and req.conversation_id is not None:
_a0_sessions[req.conversation_id] = new_chat_id
# Persist context_id for conversation continuity
if new_context_id and req.conversation_id is not None:
_a0_sessions[req.conversation_id] = new_context_id
pose = pick_pose(response_text, valid_poses)
@@ -339,7 +324,7 @@ async def handle_agent0_mcp(
)
except Exception as e:
log.error(f"[{agent.name}] Agent0 MCP error: {e}")
log.error(f"[{agent.name}] Agent0 REST error: {e}")
fallback_pose = next(
(p for p in ("sorry", "annoyed", "neutral") if p in valid_poses),
valid_poses[0],
@@ -367,8 +352,10 @@ async def _fetch_hermes_token(endpoint: str) -> str:
if cached:
return cached
url = f"{endpoint}/"
log.info(f"[hermes] fetching token — GET {url!r} (len={len(url)}, bytes={url.encode()!r})")
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(f"{endpoint}/")
resp = await client.get(url)
resp.raise_for_status()
html = resp.text
@@ -410,7 +397,7 @@ async def handle_hermes(
except ImportError:
from websockets.client import connect as ws_connect # type: ignore
endpoint = (model.endpoint or "").rstrip("/")
endpoint = (model.endpoint or "").strip().rstrip("/")
hermes_session_id = _hermes_sessions.get(req.conversation_id) if req.conversation_id else None
token = await _fetch_hermes_token(endpoint)
@@ -456,21 +443,43 @@ async def handle_hermes(
log.info(f"[{agent.name}] created Hermes session {hermes_session_id}")
# ── 2. Submit prompt ──────────────────────────────────────────
await rpc("prompt.submit", {"session_id": hermes_session_id, "text": req.message})
# Send prompt.submit but do NOT wait for its ack via rpc() —
# the ack arrives interleaved with events (after message.start).
# We drain all frames below until message.complete arrives.
submit_id = f"h{req_id + 1}"
req_id += 1
await ws.send(json.dumps({
"jsonrpc": "2.0", "id": submit_id,
"method": "prompt.submit",
"params": {"session_id": hermes_session_id, "text": req.message},
}))
# ── 3. Stream events until message.complete ───────────────────
full_text = ""
while True:
raw = await asyncio.wait_for(ws.recv(), timeout=120.0)
msg = json.loads(raw)
# RPC ack for prompt.submit — ignore, keep reading
if msg.get("id") == submit_id:
continue
if msg.get("method") != "event":
continue
params = msg.get("params") or {}
etype = params.get("type", "")
payload = params.get("payload") or {}
if etype == "message.delta":
full_text += payload.get("text", "")
elif etype == "message.complete":
# payload.text holds the full assembled response
complete_text = payload.get("text", "")
if complete_text:
full_text = complete_text
if payload.get("status") == "error":
raise RuntimeError(full_text or "Hermes returned an error response")
break
elif etype == "error":
raise RuntimeError(payload.get("message", "Hermes error"))
@@ -492,6 +501,9 @@ async def handle_hermes(
if "401" in err_str or "403" in err_str or "Unauthorized" in err_str.lower():
log.warning(f"[{agent.name}] Hermes token rejected, re-fetching…")
_hermes_token_cache.pop(endpoint, None)
if req.conversation_id is not None:
_hermes_sessions.pop(req.conversation_id, None)
hermes_session_id = None # force session.create on retry
try:
token = await _fetch_hermes_token(endpoint)
return await _do_chat(token)
@@ -529,7 +541,7 @@ app = FastAPI(title="Agent Inference Service", version="1.2.0")
@app.get("/health")
async def health():
return {"status": "ok", "service": "agent-inference", "fallback_model": FALLBACK_MODEL}
return {"status": "ok", "service": "agent-inference"}
@app.post("/v1/agent/chat", response_model=ChatResponse)
@@ -541,6 +553,12 @@ async def agent_chat(req: ChatRequest, authorization: str = Header(default="")):
valid_poses = agent.poses if agent.poses else ["neutral"]
model = select_model(req.models)
log.info(
f"[route] agent={agent.name!r} agent_type={agent.agent_type!r} "
f"models_payload={req.models} "
f"selected_model={model}"
)
# ── Route: Hermes Agent ───────────────────────────────────────────────────
if agent.agent_type == "hermes":
if not model or not model.endpoint:
@@ -552,19 +570,31 @@ async def agent_chat(req: ChatRequest, authorization: str = Header(default="")):
pose=valid_poses[0],
conversation_id=req.conversation_id,
)
return await handle_hermes(req, agent, model, valid_poses)
try:
return await handle_hermes(req, agent, model, valid_poses)
except Exception as e:
import traceback
log.error(f"[hermes] unhandled exception:\n{traceback.format_exc()}")
raise
# ── Route: Agent0 MCP ─────────────────────────────────────────────────────
if agent.agent_type == "agent0" or (model and model.type == "agent0"):
return await handle_agent0_mcp(req, agent, model, valid_poses)
# ── Route: standard LLM via litellm ──────────────────────────────────────
model_id = model.model_id if model else FALLBACK_MODEL
api_base = model.endpoint if model else None
api_key = model.api_key if model else None
if not model or not model.model_id:
log.error(f"[{agent.name}] No inference endpoint configured for this agent")
return ChatResponse(
letter_id=0,
timestamp=int(time.time()),
message="*configuration error* No inference endpoint configured for this agent.",
pose=valid_poses[0],
conversation_id=req.conversation_id,
)
if not model_id:
model_id = FALLBACK_MODEL
model_id = model.model_id
api_base = model.endpoint
api_key = model.api_key
total_messages = len(req.history) + 1
system_prompt = build_system_prompt(agent, message_count=total_messages)
+370
View File
@@ -0,0 +1,370 @@
#!/usr/bin/env python3
"""
agent-inference connectivity tests.
Tests reachability for all four supported inference endpoint types:
- anthropic : Anthropic cloud API (via litellm)
- openai : OpenAI or compatible cloud API (via litellm)
- lm_studio : Local LM Studio (OpenAI-compatible, no key)
- hermes : Hermes Agent dashboard (JSON-RPC WebSocket)
- agent0 : Agent Zero (MCP streamable-http)
A test PASSES if the endpoint is reachable — even if the API key has
no credits. A 400/402/429 from the provider still means connectivity works.
A test FAILS only on network errors (connection refused, timeout, DNS failure)
or bad credentials (401/403).
Configuration via environment variables (or a local .env file):
ANTHROPIC_API_KEY — Anthropic API key
OPENAI_API_KEY — OpenAI API key
OPENAI_BASE_URL — optional, override for OpenAI-compatible endpoint
LM_STUDIO_URL — LM Studio base URL (default: http://localhost:1234)
LM_STUDIO_MODEL — model to use (default: first available)
HERMES_URL — Hermes dashboard URL (default: http://localhost:50007)
AGENT0_URL — Agent Zero base URL (e.g. http://localhost:50003)
AGENT0_API_KEY — Agent Zero X-API-KEY from the agent's dashboard
Usage:
python test_connectivity.py
python test_connectivity.py --anthropic --hermes # run specific tests only
"""
import argparse
import asyncio
import json
import os
import re
import sys
from typing import Optional
# Load .env if present
_env_file = os.path.join(os.path.dirname(__file__), ".env")
if os.path.exists(_env_file):
with open(_env_file) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, _, v = line.partition("=")
os.environ.setdefault(k.strip(), v.strip())
# ── ANSI colours ──────────────────────────────────────────────────────────────
GREEN = "\033[32m"
RED = "\033[31m"
YELLOW = "\033[33m"
GREY = "\033[90m"
RESET = "\033[0m"
def ok(label: str, detail: str = ""):
suffix = f" {GREY}{detail}{RESET}" if detail else ""
print(f" {GREEN}{RESET} {label}{suffix}")
def fail(label: str, detail: str = ""):
suffix = f" {GREY}{detail}{RESET}" if detail else ""
print(f" {RED}{RESET} {label}{suffix}")
def skip(label: str, reason: str = "not configured"):
print(f" {YELLOW}{RESET} {label} {GREY}({reason}){RESET}")
def section(title: str):
print(f"\n{title}")
print("" * len(title))
# ── Anthropic ─────────────────────────────────────────────────────────────────
async def test_anthropic():
section("Anthropic")
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not api_key:
skip("API reachability", "ANTHROPIC_API_KEY not set")
return
import httpx
try:
async with httpx.AsyncClient(timeout=15.0) as client:
r = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": "claude-haiku-4-5-20251001",
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r.status_code == 200:
ok("API reachable + responded", f"HTTP {r.status_code}")
elif r.status_code in (400, 402, 529):
body = r.json()
err = body.get("error", {}).get("message", "")
if "credit" in err.lower() or "balance" in err.lower():
ok("API reachable (no credits)", f"HTTP {r.status_code}{err[:80]}")
else:
ok("API reachable", f"HTTP {r.status_code}{err[:80]}")
elif r.status_code in (401, 403):
fail("Bad API key", f"HTTP {r.status_code}")
else:
fail("Unexpected response", f"HTTP {r.status_code}{r.text[:120]}")
except httpx.ConnectError as e:
fail("Connection failed", str(e))
except httpx.TimeoutException:
fail("Timeout (15s)")
except Exception as e:
fail("Error", str(e))
# ── OpenAI ────────────────────────────────────────────────────────────────────
async def test_openai():
section("OpenAI")
api_key = os.environ.get("OPENAI_API_KEY", "")
base_url = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
if not api_key:
skip("API reachability", "OPENAI_API_KEY not set")
return
import httpx
try:
async with httpx.AsyncClient(timeout=15.0) as client:
r = await client.post(
f"{base_url.rstrip('/')}/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"content-type": "application/json",
},
json={
"model": "gpt-4o-mini",
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r.status_code == 200:
ok("API reachable + responded", f"HTTP {r.status_code}")
elif r.status_code in (400, 402, 429):
body = r.json()
err = (body.get("error") or {}).get("message", "")
if any(w in err.lower() for w in ("quota", "credit", "balance", "insufficient")):
ok("API reachable (quota/credits issue)", f"HTTP {r.status_code}{err[:80]}")
else:
ok("API reachable", f"HTTP {r.status_code}{err[:80]}")
elif r.status_code in (401, 403):
fail("Bad API key", f"HTTP {r.status_code}")
else:
fail("Unexpected response", f"HTTP {r.status_code}{r.text[:120]}")
except httpx.ConnectError as e:
fail("Connection failed", str(e)[:120])
except httpx.TimeoutException:
fail("Timeout (15s)")
except Exception as e:
fail("Error", str(e))
# ── LM Studio ─────────────────────────────────────────────────────────────────
async def test_lm_studio():
section("LM Studio (local)")
base_url = os.environ.get("LM_STUDIO_URL", "http://localhost:1234").rstrip("/")
model = os.environ.get("LM_STUDIO_MODEL", "")
import httpx
try:
# Step 1: list models
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{base_url}/v1/models")
if r.status_code != 200:
fail("Models endpoint", f"HTTP {r.status_code}")
return
models = [m["id"] for m in r.json().get("data", [])]
if not models:
fail("No models loaded in LM Studio")
return
ok(f"Reachable — {len(models)} model(s)", ", ".join(models[:3]))
# Step 2: minimal inference — try each model until one works
candidates = [model] if model else models
inference_ok = False
async with httpx.AsyncClient(timeout=30.0) as client:
for chosen in candidates:
r2 = await client.post(
f"{base_url}/v1/chat/completions",
headers={"content-type": "application/json"},
json={
"model": chosen,
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r2.status_code == 200:
content = r2.json()["choices"][0]["message"]["content"]
ok(f"Inference OK ({chosen})", repr(content[:60]))
inference_ok = True
break
body = r2.text[:120]
if any(s in body for s in ("Failed to load model", "No models loaded", "not loaded")):
continue # model in catalog but not loaded — try next
fail(f"Inference failed ({chosen})", f"HTTP {r2.status_code}{body}")
inference_ok = True # stop trying, this is a real error
break
if not inference_ok:
fail("No model currently loaded in LM Studio", f"tried: {', '.join(candidates[:3])}")
except httpx.ConnectError:
fail("Not reachable", f"{base_url} — is LM Studio running?")
except httpx.TimeoutException:
fail("Timeout")
except Exception as e:
fail("Error", str(e))
# ── Hermes ────────────────────────────────────────────────────────────────────
async def test_hermes():
section("Hermes (WebSocket)")
endpoint = os.environ.get("HERMES_URL", "http://localhost:50007").rstrip("/")
import httpx
try:
from websockets.asyncio.client import connect as ws_connect
except ImportError:
from websockets.client import connect as ws_connect # type: ignore
# Step 1: fetch token from dashboard HTML
try:
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{endpoint}/")
m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', r.text)
if not m:
fail("Token fetch", "Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
return
token = m.group(1)
ok("Dashboard reachable + token fetched", f"{token[:12]}")
except httpx.ConnectError:
fail("Dashboard not reachable", f"{endpoint} — is gerhard-dashboard running?")
return
except Exception as e:
fail("Dashboard error", str(e))
return
# Step 2: WebSocket + session.create
ws_scheme = "wss" if endpoint.startswith("https") else "ws"
ws_url = f"{ws_scheme}://{endpoint.split('://', 1)[-1]}/api/ws?token={token}"
try:
async with ws_connect(ws_url) as ws:
await ws.send(json.dumps({"jsonrpc": "2.0", "id": "c1",
"method": "session.create", "params": {}}))
session_id = None
for _ in range(10): # drain up to 10 frames looking for the ack
raw = await asyncio.wait_for(ws.recv(), timeout=10.0)
msg = json.loads(raw)
if msg.get("id") == "c1":
result = msg.get("result") or {}
session_id = result.get("session_id") or result.get("id")
break
if session_id:
ok("WebSocket + session.create", f"session={session_id}")
else:
fail("session.create — no session_id in response")
except Exception as e:
fail("WebSocket error", str(e)[:120])
# ── Agent Zero ────────────────────────────────────────────────────────────────
async def test_agent0():
section("Agent Zero (REST)")
base_url = os.environ.get("AGENT0_URL", "")
api_key = os.environ.get("AGENT0_API_KEY", "")
if not base_url or not api_key:
skip("api_message endpoint", "AGENT0_URL and AGENT0_API_KEY not set")
return
url = f"{base_url.rstrip('/')}/api/api_message"
health_url = f"{base_url.rstrip('/')}/api/health"
import httpx
try:
# Step 1: health check (no auth needed, fast)
async with httpx.AsyncClient(timeout=10.0) as client:
rh = await client.get(health_url)
if rh.status_code != 200:
fail("Health endpoint", f"HTTP {rh.status_code}")
return
info = rh.json()
version = info.get("gitinfo", {}).get("version", "unknown")
ok(f"Reachable ({version})", health_url)
# Step 2: verify API key is accepted via api_reset_chat (fast, no LLM call)
reset_url = f"{base_url.rstrip('/')}/api/api_reset_chat"
async with httpx.AsyncClient(timeout=10.0) as client:
rk = await client.post(
reset_url,
headers={"X-API-KEY": api_key, "Content-Type": "application/json"},
json={},
)
if rk.status_code == 200:
ok("API key accepted", f"HTTP {rk.status_code}")
elif rk.status_code in (401, 403):
fail("Bad API key", f"HTTP {rk.status_code}")
else:
ok("API key accepted", f"HTTP {rk.status_code}")
except httpx.ConnectError as e:
fail("Not reachable", str(e)[:120])
except httpx.TimeoutException:
fail("Timeout (10s)")
except Exception as e:
fail("Error", str(e))
# ── Runner ────────────────────────────────────────────────────────────────────
async def main(args):
run_all = not any([args.anthropic, args.openai, args.lm_studio, args.hermes, args.agent0])
print(f"\nagent-inference connectivity tests")
print(f"{'='*40}")
if run_all or args.anthropic:
await test_anthropic()
if run_all or args.openai:
await test_openai()
if run_all or args.lm_studio:
await test_lm_studio()
if run_all or args.hermes:
await test_hermes()
if run_all or args.agent0:
await test_agent0()
print()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="agent-inference connectivity tests")
parser.add_argument("--anthropic", action="store_true")
parser.add_argument("--openai", action="store_true")
parser.add_argument("--lm-studio", dest="lm_studio", action="store_true")
parser.add_argument("--hermes", action="store_true")
parser.add_argument("--agent0", action="store_true")
args = parser.parse_args()
asyncio.run(main(args))
+144
View File
@@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""
Integration test: raw Hermes WebSocket conversation trace.
Shows every frame sent and received so you can see exactly what
the protocol looks like and diagnose where handle_hermes breaks.
Usage:
python test_hermes.py [endpoint]
endpoint defaults to http://localhost:50007
"""
import asyncio
import json
import re
import sys
import httpx
ENDPOINT = sys.argv[1].rstrip("/") if len(sys.argv) > 1 else "http://localhost:50007"
MESSAGE = "Hello Gerhard, what are you working on?"
def pp(direction: str, raw: str):
"""Pretty-print a raw WebSocket frame."""
try:
parsed = json.loads(raw)
body = json.dumps(parsed, indent=2)
except Exception:
body = raw
print(f"\n{''*60}")
print(f" {direction}")
print(f"{''*60}")
print(body)
async def fetch_token(endpoint: str) -> str:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(f"{endpoint}/")
resp.raise_for_status()
html = resp.text
m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', html)
if not m:
raise RuntimeError("Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
token = m.group(1)
print(f"[token] fetched: {token[:12]}")
return token
async def run():
token = await fetch_token(ENDPOINT)
ws_scheme = "wss" if ENDPOINT.startswith("https") else "ws"
ws_url = f"{ws_scheme}://{ENDPOINT.split('://', 1)[-1]}/api/ws?token={token}"
print(f"[ws] connecting to {ws_url}")
try:
from websockets.asyncio.client import connect as ws_connect
except ImportError:
from websockets.client import connect as ws_connect # type: ignore
async with ws_connect(ws_url) as ws:
print("[ws] connected\n")
# ── 1. session.create ─────────────────────────────────────────────
create_msg = json.dumps({
"jsonrpc": "2.0", "id": "t1",
"method": "session.create", "params": {}
})
pp("SEND →", create_msg)
await ws.send(create_msg)
session_id = None
print("\n[waiting for session.create response — all frames logged below]")
while session_id is None:
raw = await asyncio.wait_for(ws.recv(), timeout=15.0)
pp("RECV ←", raw)
msg = json.loads(raw)
if msg.get("id") == "t1":
result = msg.get("result") or {}
session_id = result.get("session_id") or result.get("id")
print(f"\n[session] id = {session_id}")
break
if not session_id:
print("[ERROR] No session_id in response — check frames above")
return
# ── 2. prompt.submit ──────────────────────────────────────────────
submit_msg = json.dumps({
"jsonrpc": "2.0", "id": "t2",
"method": "prompt.submit",
"params": {"session_id": session_id, "text": MESSAGE}
})
pp("SEND →", submit_msg)
await ws.send(submit_msg)
# ── 3. Read ALL frames until message.complete or timeout ──────────
print("\n[streaming — logging every frame until message.complete or 120s timeout]")
full_text = ""
while True:
try:
raw = await asyncio.wait_for(ws.recv(), timeout=120.0)
except asyncio.TimeoutError:
print("\n[TIMEOUT] No message.complete received within 120s")
break
pp("RECV ←", raw)
msg = json.loads(raw)
# RPC ack for prompt.submit
if msg.get("id") == "t2":
print("[ack] prompt.submit acknowledged")
continue
# Events
method = msg.get("method", "")
params = msg.get("params") or {}
etype = params.get("type", "")
payload = params.get("payload") or {}
if method == "event":
if etype == "message.delta":
chunk = payload.get("text", "")
full_text += chunk
print(f" [delta] +{len(chunk)} chars")
elif etype == "message.complete":
print(f"\n[complete] full response ({len(full_text)} chars):")
print(f" {repr(full_text[:200])}")
break
elif etype == "error":
print(f"\n[ERROR event] {payload}")
break
else:
print(f" [event:{etype}] (logged above)")
print(f"\n{''*60}")
print(f" FINAL RESPONSE: {repr(full_text) if full_text else '(empty)'}")
print(f"{''*60}\n")
if __name__ == "__main__":
asyncio.run(run())