Add connectivity tests and fix Hermes handle + venv setup

- test_connectivity.py: connectivity tests for all four endpoint types (anthropic, openai, lm_studio, hermes, agent0) — treats no-credits as success - test_hermes.py: raw WebSocket frame logger used to reverse-engineer protocol - Fix handle_hermes: skip prompt.submit ack frame, read full text from message.complete payload.text, always raise on status==error - Fix requirements.txt: use >= pins (fastapi/uvicorn versions didn't exist) - Fix dev.sh: prefer python3.12 for venv (mcp>=1.9.0 requires 3.10+) - Remove ANTHROPIC_KEY env var dependency from server.py (keys come from DB) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 19:00:51 +02:00
parent 604df52247
commit 9814d18e8c
5 changed files with 521 additions and 4 deletions
@@ -55,7 +55,8 @@ git -C "$SCRIPT_DIR" pull --ff-only >> "$LOG_FILE" 2>&1 \
 if [ ! -d "$SCRIPT_DIR/.venv" ]; then
  log "$YELLOW" "Creating virtual environment..."
-  python3 -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1
+  PYTHON=$(which python3.12 || which python3.11 || which python3.10 || which python3)
  $PYTHON -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1
 fi
 source "$SCRIPT_DIR/.venv/bin/activate"
@@ -1,5 +1,5 @@
-fastapi==0.135.1
+fastapi>=0.115.0
-uvicorn==0.41.0
+uvicorn>=0.30.0
 litellm>=1.80.0
 mcp>=1.9.0
 httpx>=0.27.0
@@ -451,21 +451,43 @@ async def handle_hermes(
                log.info(f"[{agent.name}] created Hermes session {hermes_session_id}")
            # ── 2. Submit prompt ──────────────────────────────────────────
-            await rpc("prompt.submit", {"session_id": hermes_session_id, "text": req.message})
+            # Send prompt.submit but do NOT wait for its ack via rpc() —
            # the ack arrives interleaved with events (after message.start).
            # We drain all frames below until message.complete arrives.
            submit_id = f"h{req_id + 1}"
            req_id += 1
            await ws.send(json.dumps({
                "jsonrpc": "2.0", "id": submit_id,
                "method": "prompt.submit",
                "params": {"session_id": hermes_session_id, "text": req.message},
            }))
            # ── 3. Stream events until message.complete ───────────────────
            full_text = ""
            while True:
                raw = await asyncio.wait_for(ws.recv(), timeout=120.0)
                msg = json.loads(raw)
                # RPC ack for prompt.submit — ignore, keep reading
                if msg.get("id") == submit_id:
                    continue
                if msg.get("method") != "event":
                    continue
                params = msg.get("params") or {}
                etype = params.get("type", "")
                payload = params.get("payload") or {}
                if etype == "message.delta":
                    full_text += payload.get("text", "")
                elif etype == "message.complete":
                    # payload.text holds the full assembled response
                    complete_text = payload.get("text", "")
                    if complete_text:
                        full_text = complete_text
                    if payload.get("status") == "error":
                        raise RuntimeError(full_text or "Hermes returned an error response")
                    break
                elif etype == "error":
                    raise RuntimeError(payload.get("message", "Hermes error"))
@@ -0,0 +1,350 @@
 #!/usr/bin/env python3
 """
 agent-inference connectivity tests.
 Tests reachability for all four supported inference endpoint types:
  - anthropic   : Anthropic cloud API (via litellm)
  - openai      : OpenAI or compatible cloud API (via litellm)
  - lm_studio   : Local LM Studio (OpenAI-compatible, no key)
  - hermes      : Hermes Agent dashboard (JSON-RPC WebSocket)
  - agent0      : Agent Zero (MCP streamable-http)
 A test PASSES if the endpoint is reachable — even if the API key has
 no credits. A 400/402/429 from the provider still means connectivity works.
 A test FAILS only on network errors (connection refused, timeout, DNS failure)
 or bad credentials (401/403).
 Configuration via environment variables (or a local .env file):
  ANTHROPIC_API_KEY   — Anthropic API key
  OPENAI_API_KEY      — OpenAI API key
  OPENAI_BASE_URL     — optional, override for OpenAI-compatible endpoint
  LM_STUDIO_URL       — LM Studio base URL (default: http://localhost:1234)
  LM_STUDIO_MODEL     — model to use (default: first available)
  HERMES_URL          — Hermes dashboard URL (default: http://localhost:50007)
  AGENT0_URL          — Agent Zero base URL
  AGENT0_MCP_KEY      — Agent Zero MCP token
 Usage:
  python test_connectivity.py
  python test_connectivity.py --anthropic --hermes   # run specific tests only
 """
 import argparse
 import asyncio
 import json
 import os
 import re
 import sys
 from typing import Optional
 # Load .env if present
 _env_file = os.path.join(os.path.dirname(__file__), ".env")
 if os.path.exists(_env_file):
    with open(_env_file) as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                k, _, v = line.partition("=")
                os.environ.setdefault(k.strip(), v.strip())
 # ── ANSI colours ──────────────────────────────────────────────────────────────
 GREEN  = "\033[32m"
 RED    = "\033[31m"
 YELLOW = "\033[33m"
 GREY   = "\033[90m"
 RESET  = "\033[0m"
 def ok(label: str, detail: str = ""):
    suffix = f"  {GREY}{detail}{RESET}" if detail else ""
    print(f"  {GREEN}✓{RESET}  {label}{suffix}")
 def fail(label: str, detail: str = ""):
    suffix = f"  {GREY}{detail}{RESET}" if detail else ""
    print(f"  {RED}✗{RESET}  {label}{suffix}")
 def skip(label: str, reason: str = "not configured"):
    print(f"  {YELLOW}○{RESET}  {label}  {GREY}({reason}){RESET}")
 def section(title: str):
    print(f"\n{title}")
    print("─" * len(title))
 # ── Anthropic ─────────────────────────────────────────────────────────────────
 async def test_anthropic():
    section("Anthropic")
    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
    if not api_key:
        skip("API reachability", "ANTHROPIC_API_KEY not set")
        return
    import httpx
    try:
        async with httpx.AsyncClient(timeout=15.0) as client:
            r = await client.post(
                "https://api.anthropic.com/v1/messages",
                headers={
                    "x-api-key": api_key,
                    "anthropic-version": "2023-06-01",
                    "content-type": "application/json",
                },
                json={
                    "model": "claude-haiku-4-5-20251001",
                    "max_tokens": 16,
                    "messages": [{"role": "user", "content": "ping"}],
                },
            )
        if r.status_code == 200:
            ok("API reachable + responded", f"HTTP {r.status_code}")
        elif r.status_code in (400, 402, 529):
            body = r.json()
            err  = body.get("error", {}).get("message", "")
            if "credit" in err.lower() or "balance" in err.lower():
                ok("API reachable (no credits)", f"HTTP {r.status_code} — {err[:80]}")
            else:
                ok("API reachable", f"HTTP {r.status_code} — {err[:80]}")
        elif r.status_code in (401, 403):
            fail("Bad API key", f"HTTP {r.status_code}")
        else:
            fail("Unexpected response", f"HTTP {r.status_code} — {r.text[:120]}")
    except httpx.ConnectError as e:
        fail("Connection failed", str(e))
    except httpx.TimeoutException:
        fail("Timeout (15s)")
    except Exception as e:
        fail("Error", str(e))
 # ── OpenAI ────────────────────────────────────────────────────────────────────
 async def test_openai():
    section("OpenAI")
    api_key  = os.environ.get("OPENAI_API_KEY", "")
    base_url = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
    if not api_key:
        skip("API reachability", "OPENAI_API_KEY not set")
        return
    import httpx
    try:
        async with httpx.AsyncClient(timeout=15.0) as client:
            r = await client.post(
                f"{base_url.rstrip('/')}/chat/completions",
                headers={
                    "Authorization": f"Bearer {api_key}",
                    "content-type": "application/json",
                },
                json={
                    "model": "gpt-4o-mini",
                    "max_tokens": 16,
                    "messages": [{"role": "user", "content": "ping"}],
                },
            )
        if r.status_code == 200:
            ok("API reachable + responded", f"HTTP {r.status_code}")
        elif r.status_code in (400, 402, 429):
            body = r.json()
            err  = (body.get("error") or {}).get("message", "")
            if any(w in err.lower() for w in ("quota", "credit", "balance", "insufficient")):
                ok("API reachable (quota/credits issue)", f"HTTP {r.status_code} — {err[:80]}")
            else:
                ok("API reachable", f"HTTP {r.status_code} — {err[:80]}")
        elif r.status_code in (401, 403):
            fail("Bad API key", f"HTTP {r.status_code}")
        else:
            fail("Unexpected response", f"HTTP {r.status_code} — {r.text[:120]}")
    except httpx.ConnectError as e:
        fail("Connection failed", str(e)[:120])
    except httpx.TimeoutException:
        fail("Timeout (15s)")
    except Exception as e:
        fail("Error", str(e))
 # ── LM Studio ─────────────────────────────────────────────────────────────────
 async def test_lm_studio():
    section("LM Studio (local)")
    base_url = os.environ.get("LM_STUDIO_URL", "http://localhost:1234").rstrip("/")
    model    = os.environ.get("LM_STUDIO_MODEL", "")
    import httpx
    try:
        # Step 1: list models
        async with httpx.AsyncClient(timeout=10.0) as client:
            r = await client.get(f"{base_url}/v1/models")
        if r.status_code != 200:
            fail("Models endpoint", f"HTTP {r.status_code}")
            return
        models = [m["id"] for m in r.json().get("data", [])]
        if not models:
            fail("No models loaded in LM Studio")
            return
        ok(f"Reachable — {len(models)} model(s)", ", ".join(models[:3]))
        # Step 2: minimal inference — try each model until one works
        candidates = [model] if model else models
        inference_ok = False
        async with httpx.AsyncClient(timeout=30.0) as client:
            for chosen in candidates:
                r2 = await client.post(
                    f"{base_url}/v1/chat/completions",
                    headers={"content-type": "application/json"},
                    json={
                        "model": chosen,
                        "max_tokens": 16,
                        "messages": [{"role": "user", "content": "ping"}],
                    },
                )
                if r2.status_code == 200:
                    content = r2.json()["choices"][0]["message"]["content"]
                    ok(f"Inference OK ({chosen})", repr(content[:60]))
                    inference_ok = True
                    break
                body = r2.text[:120]
                if any(s in body for s in ("Failed to load model", "No models loaded", "not loaded")):
                    continue  # model in catalog but not loaded — try next
                fail(f"Inference failed ({chosen})", f"HTTP {r2.status_code} — {body}")
                inference_ok = True  # stop trying, this is a real error
                break
        if not inference_ok:
            fail("No model currently loaded in LM Studio", f"tried: {', '.join(candidates[:3])}")
    except httpx.ConnectError:
        fail("Not reachable", f"{base_url} — is LM Studio running?")
    except httpx.TimeoutException:
        fail("Timeout")
    except Exception as e:
        fail("Error", str(e))
 # ── Hermes ────────────────────────────────────────────────────────────────────
 async def test_hermes():
    section("Hermes (WebSocket)")
    endpoint = os.environ.get("HERMES_URL", "http://localhost:50007").rstrip("/")
    import httpx
    try:
        from websockets.asyncio.client import connect as ws_connect
    except ImportError:
        from websockets.client import connect as ws_connect  # type: ignore
    # Step 1: fetch token from dashboard HTML
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            r = await client.get(f"{endpoint}/")
        m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', r.text)
        if not m:
            fail("Token fetch", "Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
            return
        token = m.group(1)
        ok("Dashboard reachable + token fetched", f"{token[:12]}…")
    except httpx.ConnectError:
        fail("Dashboard not reachable", f"{endpoint} — is gerhard-dashboard running?")
        return
    except Exception as e:
        fail("Dashboard error", str(e))
        return
    # Step 2: WebSocket + session.create
    ws_scheme = "wss" if endpoint.startswith("https") else "ws"
    ws_url    = f"{ws_scheme}://{endpoint.split('://', 1)[-1]}/api/ws?token={token}"
    try:
        async with ws_connect(ws_url) as ws:
            await ws.send(json.dumps({"jsonrpc": "2.0", "id": "c1",
                                      "method": "session.create", "params": {}}))
            session_id = None
            for _ in range(10):  # drain up to 10 frames looking for the ack
                raw = await asyncio.wait_for(ws.recv(), timeout=10.0)
                msg = json.loads(raw)
                if msg.get("id") == "c1":
                    result    = msg.get("result") or {}
                    session_id = result.get("session_id") or result.get("id")
                    break
        if session_id:
            ok("WebSocket + session.create", f"session={session_id}")
        else:
            fail("session.create — no session_id in response")
    except Exception as e:
        fail("WebSocket error", str(e)[:120])
 # ── Agent Zero ────────────────────────────────────────────────────────────────
 async def test_agent0():
    section("Agent Zero (MCP)")
    base_url = os.environ.get("AGENT0_URL", "")
    mcp_key  = os.environ.get("AGENT0_MCP_KEY", "")
    if not base_url or not mcp_key:
        skip("MCP endpoint", "AGENT0_URL and AGENT0_MCP_KEY not set")
        return
    mcp_url = f"{base_url.rstrip('/')}/mcp/t-{mcp_key}/http"
    import httpx
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            r = await client.get(mcp_url, headers={"Accept": "application/json"})
        if r.status_code in (200, 405):
            ok("MCP endpoint reachable", f"HTTP {r.status_code} at {mcp_url}")
        elif r.status_code == 401:
            fail("Bad MCP key", f"HTTP {r.status_code}")
        else:
            ok("MCP endpoint reachable", f"HTTP {r.status_code}")
    except httpx.ConnectError as e:
        fail("Not reachable", str(e)[:120])
    except httpx.TimeoutException:
        fail("Timeout (10s)")
    except Exception as e:
        fail("Error", str(e))
 # ── Runner ────────────────────────────────────────────────────────────────────
 async def main(args):
    run_all = not any([args.anthropic, args.openai, args.lm_studio, args.hermes, args.agent0])
    print(f"\nagent-inference connectivity tests")
    print(f"{'='*40}")
    if run_all or args.anthropic:
        await test_anthropic()
    if run_all or args.openai:
        await test_openai()
    if run_all or args.lm_studio:
        await test_lm_studio()
    if run_all or args.hermes:
        await test_hermes()
    if run_all or args.agent0:
        await test_agent0()
    print()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="agent-inference connectivity tests")
    parser.add_argument("--anthropic",  action="store_true")
    parser.add_argument("--openai",     action="store_true")
    parser.add_argument("--lm-studio",  dest="lm_studio", action="store_true")
    parser.add_argument("--hermes",     action="store_true")
    parser.add_argument("--agent0",     action="store_true")
    args = parser.parse_args()
    asyncio.run(main(args))
@@ -0,0 +1,144 @@
 #!/usr/bin/env python3
 """
 Integration test: raw Hermes WebSocket conversation trace.
 Shows every frame sent and received so you can see exactly what
 the protocol looks like and diagnose where handle_hermes breaks.
 Usage:
    python test_hermes.py [endpoint]
    endpoint defaults to http://localhost:50007
 """
 import asyncio
 import json
 import re
 import sys
 import httpx
 ENDPOINT = sys.argv[1].rstrip("/") if len(sys.argv) > 1 else "http://localhost:50007"
 MESSAGE  = "Hello Gerhard, what are you working on?"
 def pp(direction: str, raw: str):
    """Pretty-print a raw WebSocket frame."""
    try:
        parsed = json.loads(raw)
        body   = json.dumps(parsed, indent=2)
    except Exception:
        body = raw
    print(f"\n{'─'*60}")
    print(f"  {direction}")
    print(f"{'─'*60}")
    print(body)
 async def fetch_token(endpoint: str) -> str:
    async with httpx.AsyncClient(timeout=10.0) as client:
        resp = await client.get(f"{endpoint}/")
        resp.raise_for_status()
        html = resp.text
    m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', html)
    if not m:
        raise RuntimeError("Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
    token = m.group(1)
    print(f"[token] fetched: {token[:12]}…")
    return token
 async def run():
    token = await fetch_token(ENDPOINT)
    ws_scheme = "wss" if ENDPOINT.startswith("https") else "ws"
    ws_url = f"{ws_scheme}://{ENDPOINT.split('://', 1)[-1]}/api/ws?token={token}"
    print(f"[ws]    connecting to {ws_url}")
    try:
        from websockets.asyncio.client import connect as ws_connect
    except ImportError:
        from websockets.client import connect as ws_connect  # type: ignore
    async with ws_connect(ws_url) as ws:
        print("[ws]    connected\n")
        # ── 1. session.create ─────────────────────────────────────────────
        create_msg = json.dumps({
            "jsonrpc": "2.0", "id": "t1",
            "method": "session.create", "params": {}
        })
        pp("SEND →", create_msg)
        await ws.send(create_msg)
        session_id = None
        print("\n[waiting for session.create response — all frames logged below]")
        while session_id is None:
            raw = await asyncio.wait_for(ws.recv(), timeout=15.0)
            pp("RECV ←", raw)
            msg = json.loads(raw)
            if msg.get("id") == "t1":
                result = msg.get("result") or {}
                session_id = result.get("session_id") or result.get("id")
                print(f"\n[session] id = {session_id}")
                break
        if not session_id:
            print("[ERROR] No session_id in response — check frames above")
            return
        # ── 2. prompt.submit ──────────────────────────────────────────────
        submit_msg = json.dumps({
            "jsonrpc": "2.0", "id": "t2",
            "method": "prompt.submit",
            "params": {"session_id": session_id, "text": MESSAGE}
        })
        pp("SEND →", submit_msg)
        await ws.send(submit_msg)
        # ── 3. Read ALL frames until message.complete or timeout ──────────
        print("\n[streaming — logging every frame until message.complete or 120s timeout]")
        full_text = ""
        while True:
            try:
                raw = await asyncio.wait_for(ws.recv(), timeout=120.0)
            except asyncio.TimeoutError:
                print("\n[TIMEOUT] No message.complete received within 120s")
                break
            pp("RECV ←", raw)
            msg = json.loads(raw)
            # RPC ack for prompt.submit
            if msg.get("id") == "t2":
                print("[ack] prompt.submit acknowledged")
                continue
            # Events
            method = msg.get("method", "")
            params = msg.get("params") or {}
            etype  = params.get("type", "")
            payload = params.get("payload") or {}
            if method == "event":
                if etype == "message.delta":
                    chunk = payload.get("text", "")
                    full_text += chunk
                    print(f"  [delta] +{len(chunk)} chars")
                elif etype == "message.complete":
                    print(f"\n[complete] full response ({len(full_text)} chars):")
                    print(f"  {repr(full_text[:200])}")
                    break
                elif etype == "error":
                    print(f"\n[ERROR event] {payload}")
                    break
                else:
                    print(f"  [event:{etype}] (logged above)")
        print(f"\n{'═'*60}")
        print(f"  FINAL RESPONSE: {repr(full_text) if full_text else '(empty)'}")
        print(f"{'═'*60}\n")
 if __name__ == "__main__":
    asyncio.run(run())