Add connectivity tests and fix Hermes handle + venv setup

- test_connectivity.py: connectivity tests for all four endpoint types
  (anthropic, openai, lm_studio, hermes, agent0) — treats no-credits as success
- test_hermes.py: raw WebSocket frame logger used to reverse-engineer protocol
- Fix handle_hermes: skip prompt.submit ack frame, read full text from
  message.complete payload.text, always raise on status==error
- Fix requirements.txt: use >= pins (fastapi/uvicorn versions didn't exist)
- Fix dev.sh: prefer python3.12 for venv (mcp>=1.9.0 requires 3.10+)
- Remove ANTHROPIC_KEY env var dependency from server.py (keys come from DB)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 19:00:51 +02:00
parent 604df52247
commit 9814d18e8c
5 changed files with 521 additions and 4 deletions
+350
View File
@@ -0,0 +1,350 @@
#!/usr/bin/env python3
"""
agent-inference connectivity tests.
Tests reachability for all four supported inference endpoint types:
- anthropic : Anthropic cloud API (via litellm)
- openai : OpenAI or compatible cloud API (via litellm)
- lm_studio : Local LM Studio (OpenAI-compatible, no key)
- hermes : Hermes Agent dashboard (JSON-RPC WebSocket)
- agent0 : Agent Zero (MCP streamable-http)
A test PASSES if the endpoint is reachable — even if the API key has
no credits. A 400/402/429 from the provider still means connectivity works.
A test FAILS only on network errors (connection refused, timeout, DNS failure)
or bad credentials (401/403).
Configuration via environment variables (or a local .env file):
ANTHROPIC_API_KEY — Anthropic API key
OPENAI_API_KEY — OpenAI API key
OPENAI_BASE_URL — optional, override for OpenAI-compatible endpoint
LM_STUDIO_URL — LM Studio base URL (default: http://localhost:1234)
LM_STUDIO_MODEL — model to use (default: first available)
HERMES_URL — Hermes dashboard URL (default: http://localhost:50007)
AGENT0_URL — Agent Zero base URL
AGENT0_MCP_KEY — Agent Zero MCP token
Usage:
python test_connectivity.py
python test_connectivity.py --anthropic --hermes # run specific tests only
"""
import argparse
import asyncio
import json
import os
import re
import sys
from typing import Optional
# Load .env if present
_env_file = os.path.join(os.path.dirname(__file__), ".env")
if os.path.exists(_env_file):
with open(_env_file) as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, _, v = line.partition("=")
os.environ.setdefault(k.strip(), v.strip())
# ── ANSI colours ──────────────────────────────────────────────────────────────
GREEN = "\033[32m"
RED = "\033[31m"
YELLOW = "\033[33m"
GREY = "\033[90m"
RESET = "\033[0m"
def ok(label: str, detail: str = ""):
suffix = f" {GREY}{detail}{RESET}" if detail else ""
print(f" {GREEN}{RESET} {label}{suffix}")
def fail(label: str, detail: str = ""):
suffix = f" {GREY}{detail}{RESET}" if detail else ""
print(f" {RED}{RESET} {label}{suffix}")
def skip(label: str, reason: str = "not configured"):
print(f" {YELLOW}{RESET} {label} {GREY}({reason}){RESET}")
def section(title: str):
print(f"\n{title}")
print("" * len(title))
# ── Anthropic ─────────────────────────────────────────────────────────────────
async def test_anthropic():
section("Anthropic")
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
if not api_key:
skip("API reachability", "ANTHROPIC_API_KEY not set")
return
import httpx
try:
async with httpx.AsyncClient(timeout=15.0) as client:
r = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": "claude-haiku-4-5-20251001",
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r.status_code == 200:
ok("API reachable + responded", f"HTTP {r.status_code}")
elif r.status_code in (400, 402, 529):
body = r.json()
err = body.get("error", {}).get("message", "")
if "credit" in err.lower() or "balance" in err.lower():
ok("API reachable (no credits)", f"HTTP {r.status_code}{err[:80]}")
else:
ok("API reachable", f"HTTP {r.status_code}{err[:80]}")
elif r.status_code in (401, 403):
fail("Bad API key", f"HTTP {r.status_code}")
else:
fail("Unexpected response", f"HTTP {r.status_code}{r.text[:120]}")
except httpx.ConnectError as e:
fail("Connection failed", str(e))
except httpx.TimeoutException:
fail("Timeout (15s)")
except Exception as e:
fail("Error", str(e))
# ── OpenAI ────────────────────────────────────────────────────────────────────
async def test_openai():
section("OpenAI")
api_key = os.environ.get("OPENAI_API_KEY", "")
base_url = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
if not api_key:
skip("API reachability", "OPENAI_API_KEY not set")
return
import httpx
try:
async with httpx.AsyncClient(timeout=15.0) as client:
r = await client.post(
f"{base_url.rstrip('/')}/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"content-type": "application/json",
},
json={
"model": "gpt-4o-mini",
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r.status_code == 200:
ok("API reachable + responded", f"HTTP {r.status_code}")
elif r.status_code in (400, 402, 429):
body = r.json()
err = (body.get("error") or {}).get("message", "")
if any(w in err.lower() for w in ("quota", "credit", "balance", "insufficient")):
ok("API reachable (quota/credits issue)", f"HTTP {r.status_code}{err[:80]}")
else:
ok("API reachable", f"HTTP {r.status_code}{err[:80]}")
elif r.status_code in (401, 403):
fail("Bad API key", f"HTTP {r.status_code}")
else:
fail("Unexpected response", f"HTTP {r.status_code}{r.text[:120]}")
except httpx.ConnectError as e:
fail("Connection failed", str(e)[:120])
except httpx.TimeoutException:
fail("Timeout (15s)")
except Exception as e:
fail("Error", str(e))
# ── LM Studio ─────────────────────────────────────────────────────────────────
async def test_lm_studio():
section("LM Studio (local)")
base_url = os.environ.get("LM_STUDIO_URL", "http://localhost:1234").rstrip("/")
model = os.environ.get("LM_STUDIO_MODEL", "")
import httpx
try:
# Step 1: list models
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{base_url}/v1/models")
if r.status_code != 200:
fail("Models endpoint", f"HTTP {r.status_code}")
return
models = [m["id"] for m in r.json().get("data", [])]
if not models:
fail("No models loaded in LM Studio")
return
ok(f"Reachable — {len(models)} model(s)", ", ".join(models[:3]))
# Step 2: minimal inference — try each model until one works
candidates = [model] if model else models
inference_ok = False
async with httpx.AsyncClient(timeout=30.0) as client:
for chosen in candidates:
r2 = await client.post(
f"{base_url}/v1/chat/completions",
headers={"content-type": "application/json"},
json={
"model": chosen,
"max_tokens": 16,
"messages": [{"role": "user", "content": "ping"}],
},
)
if r2.status_code == 200:
content = r2.json()["choices"][0]["message"]["content"]
ok(f"Inference OK ({chosen})", repr(content[:60]))
inference_ok = True
break
body = r2.text[:120]
if any(s in body for s in ("Failed to load model", "No models loaded", "not loaded")):
continue # model in catalog but not loaded — try next
fail(f"Inference failed ({chosen})", f"HTTP {r2.status_code}{body}")
inference_ok = True # stop trying, this is a real error
break
if not inference_ok:
fail("No model currently loaded in LM Studio", f"tried: {', '.join(candidates[:3])}")
except httpx.ConnectError:
fail("Not reachable", f"{base_url} — is LM Studio running?")
except httpx.TimeoutException:
fail("Timeout")
except Exception as e:
fail("Error", str(e))
# ── Hermes ────────────────────────────────────────────────────────────────────
async def test_hermes():
section("Hermes (WebSocket)")
endpoint = os.environ.get("HERMES_URL", "http://localhost:50007").rstrip("/")
import httpx
try:
from websockets.asyncio.client import connect as ws_connect
except ImportError:
from websockets.client import connect as ws_connect # type: ignore
# Step 1: fetch token from dashboard HTML
try:
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(f"{endpoint}/")
m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', r.text)
if not m:
fail("Token fetch", "Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
return
token = m.group(1)
ok("Dashboard reachable + token fetched", f"{token[:12]}")
except httpx.ConnectError:
fail("Dashboard not reachable", f"{endpoint} — is gerhard-dashboard running?")
return
except Exception as e:
fail("Dashboard error", str(e))
return
# Step 2: WebSocket + session.create
ws_scheme = "wss" if endpoint.startswith("https") else "ws"
ws_url = f"{ws_scheme}://{endpoint.split('://', 1)[-1]}/api/ws?token={token}"
try:
async with ws_connect(ws_url) as ws:
await ws.send(json.dumps({"jsonrpc": "2.0", "id": "c1",
"method": "session.create", "params": {}}))
session_id = None
for _ in range(10): # drain up to 10 frames looking for the ack
raw = await asyncio.wait_for(ws.recv(), timeout=10.0)
msg = json.loads(raw)
if msg.get("id") == "c1":
result = msg.get("result") or {}
session_id = result.get("session_id") or result.get("id")
break
if session_id:
ok("WebSocket + session.create", f"session={session_id}")
else:
fail("session.create — no session_id in response")
except Exception as e:
fail("WebSocket error", str(e)[:120])
# ── Agent Zero ────────────────────────────────────────────────────────────────
async def test_agent0():
section("Agent Zero (MCP)")
base_url = os.environ.get("AGENT0_URL", "")
mcp_key = os.environ.get("AGENT0_MCP_KEY", "")
if not base_url or not mcp_key:
skip("MCP endpoint", "AGENT0_URL and AGENT0_MCP_KEY not set")
return
mcp_url = f"{base_url.rstrip('/')}/mcp/t-{mcp_key}/http"
import httpx
try:
async with httpx.AsyncClient(timeout=10.0) as client:
r = await client.get(mcp_url, headers={"Accept": "application/json"})
if r.status_code in (200, 405):
ok("MCP endpoint reachable", f"HTTP {r.status_code} at {mcp_url}")
elif r.status_code == 401:
fail("Bad MCP key", f"HTTP {r.status_code}")
else:
ok("MCP endpoint reachable", f"HTTP {r.status_code}")
except httpx.ConnectError as e:
fail("Not reachable", str(e)[:120])
except httpx.TimeoutException:
fail("Timeout (10s)")
except Exception as e:
fail("Error", str(e))
# ── Runner ────────────────────────────────────────────────────────────────────
async def main(args):
run_all = not any([args.anthropic, args.openai, args.lm_studio, args.hermes, args.agent0])
print(f"\nagent-inference connectivity tests")
print(f"{'='*40}")
if run_all or args.anthropic:
await test_anthropic()
if run_all or args.openai:
await test_openai()
if run_all or args.lm_studio:
await test_lm_studio()
if run_all or args.hermes:
await test_hermes()
if run_all or args.agent0:
await test_agent0()
print()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="agent-inference connectivity tests")
parser.add_argument("--anthropic", action="store_true")
parser.add_argument("--openai", action="store_true")
parser.add_argument("--lm-studio", dest="lm_studio", action="store_true")
parser.add_argument("--hermes", action="store_true")
parser.add_argument("--agent0", action="store_true")
args = parser.parse_args()
asyncio.run(main(args))