diff --git a/dev.sh b/dev.sh index 756a358..9080293 100755 --- a/dev.sh +++ b/dev.sh @@ -55,7 +55,8 @@ git -C "$SCRIPT_DIR" pull --ff-only >> "$LOG_FILE" 2>&1 \ if [ ! -d "$SCRIPT_DIR/.venv" ]; then log "$YELLOW" "Creating virtual environment..." - python3 -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1 + PYTHON=$(which python3.12 || which python3.11 || which python3.10 || which python3) + $PYTHON -m venv "$SCRIPT_DIR/.venv" >> "$LOG_FILE" 2>&1 fi source "$SCRIPT_DIR/.venv/bin/activate" diff --git a/requirements.txt b/requirements.txt index 8852145..1dfd80d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -fastapi==0.135.1 -uvicorn==0.41.0 +fastapi>=0.115.0 +uvicorn>=0.30.0 litellm>=1.80.0 mcp>=1.9.0 httpx>=0.27.0 diff --git a/server.py b/server.py index c5f85fa..4ab03e1 100644 --- a/server.py +++ b/server.py @@ -451,21 +451,43 @@ async def handle_hermes( log.info(f"[{agent.name}] created Hermes session {hermes_session_id}") # ── 2. Submit prompt ────────────────────────────────────────── - await rpc("prompt.submit", {"session_id": hermes_session_id, "text": req.message}) + # Send prompt.submit but do NOT wait for its ack via rpc() — + # the ack arrives interleaved with events (after message.start). + # We drain all frames below until message.complete arrives. + submit_id = f"h{req_id + 1}" + req_id += 1 + await ws.send(json.dumps({ + "jsonrpc": "2.0", "id": submit_id, + "method": "prompt.submit", + "params": {"session_id": hermes_session_id, "text": req.message}, + })) # ── 3. Stream events until message.complete ─────────────────── full_text = "" while True: raw = await asyncio.wait_for(ws.recv(), timeout=120.0) msg = json.loads(raw) + + # RPC ack for prompt.submit — ignore, keep reading + if msg.get("id") == submit_id: + continue + if msg.get("method") != "event": continue + params = msg.get("params") or {} etype = params.get("type", "") payload = params.get("payload") or {} + if etype == "message.delta": full_text += payload.get("text", "") elif etype == "message.complete": + # payload.text holds the full assembled response + complete_text = payload.get("text", "") + if complete_text: + full_text = complete_text + if payload.get("status") == "error": + raise RuntimeError(full_text or "Hermes returned an error response") break elif etype == "error": raise RuntimeError(payload.get("message", "Hermes error")) diff --git a/test_connectivity.py b/test_connectivity.py new file mode 100644 index 0000000..cc22de5 --- /dev/null +++ b/test_connectivity.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +""" +agent-inference connectivity tests. + +Tests reachability for all four supported inference endpoint types: + - anthropic : Anthropic cloud API (via litellm) + - openai : OpenAI or compatible cloud API (via litellm) + - lm_studio : Local LM Studio (OpenAI-compatible, no key) + - hermes : Hermes Agent dashboard (JSON-RPC WebSocket) + - agent0 : Agent Zero (MCP streamable-http) + +A test PASSES if the endpoint is reachable — even if the API key has +no credits. A 400/402/429 from the provider still means connectivity works. +A test FAILS only on network errors (connection refused, timeout, DNS failure) +or bad credentials (401/403). + +Configuration via environment variables (or a local .env file): + ANTHROPIC_API_KEY — Anthropic API key + OPENAI_API_KEY — OpenAI API key + OPENAI_BASE_URL — optional, override for OpenAI-compatible endpoint + LM_STUDIO_URL — LM Studio base URL (default: http://localhost:1234) + LM_STUDIO_MODEL — model to use (default: first available) + HERMES_URL — Hermes dashboard URL (default: http://localhost:50007) + AGENT0_URL — Agent Zero base URL + AGENT0_MCP_KEY — Agent Zero MCP token + +Usage: + python test_connectivity.py + python test_connectivity.py --anthropic --hermes # run specific tests only +""" + +import argparse +import asyncio +import json +import os +import re +import sys +from typing import Optional + +# Load .env if present +_env_file = os.path.join(os.path.dirname(__file__), ".env") +if os.path.exists(_env_file): + with open(_env_file) as f: + for line in f: + line = line.strip() + if line and not line.startswith("#") and "=" in line: + k, _, v = line.partition("=") + os.environ.setdefault(k.strip(), v.strip()) + + +# ── ANSI colours ────────────────────────────────────────────────────────────── + +GREEN = "\033[32m" +RED = "\033[31m" +YELLOW = "\033[33m" +GREY = "\033[90m" +RESET = "\033[0m" + +def ok(label: str, detail: str = ""): + suffix = f" {GREY}{detail}{RESET}" if detail else "" + print(f" {GREEN}✓{RESET} {label}{suffix}") + +def fail(label: str, detail: str = ""): + suffix = f" {GREY}{detail}{RESET}" if detail else "" + print(f" {RED}✗{RESET} {label}{suffix}") + +def skip(label: str, reason: str = "not configured"): + print(f" {YELLOW}○{RESET} {label} {GREY}({reason}){RESET}") + +def section(title: str): + print(f"\n{title}") + print("─" * len(title)) + + +# ── Anthropic ───────────────────────────────────────────────────────────────── + +async def test_anthropic(): + section("Anthropic") + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + if not api_key: + skip("API reachability", "ANTHROPIC_API_KEY not set") + return + + import httpx + try: + async with httpx.AsyncClient(timeout=15.0) as client: + r = await client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json={ + "model": "claude-haiku-4-5-20251001", + "max_tokens": 16, + "messages": [{"role": "user", "content": "ping"}], + }, + ) + + if r.status_code == 200: + ok("API reachable + responded", f"HTTP {r.status_code}") + elif r.status_code in (400, 402, 529): + body = r.json() + err = body.get("error", {}).get("message", "") + if "credit" in err.lower() or "balance" in err.lower(): + ok("API reachable (no credits)", f"HTTP {r.status_code} — {err[:80]}") + else: + ok("API reachable", f"HTTP {r.status_code} — {err[:80]}") + elif r.status_code in (401, 403): + fail("Bad API key", f"HTTP {r.status_code}") + else: + fail("Unexpected response", f"HTTP {r.status_code} — {r.text[:120]}") + + except httpx.ConnectError as e: + fail("Connection failed", str(e)) + except httpx.TimeoutException: + fail("Timeout (15s)") + except Exception as e: + fail("Error", str(e)) + + +# ── OpenAI ──────────────────────────────────────────────────────────────────── + +async def test_openai(): + section("OpenAI") + api_key = os.environ.get("OPENAI_API_KEY", "") + base_url = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1") + if not api_key: + skip("API reachability", "OPENAI_API_KEY not set") + return + + import httpx + try: + async with httpx.AsyncClient(timeout=15.0) as client: + r = await client.post( + f"{base_url.rstrip('/')}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "content-type": "application/json", + }, + json={ + "model": "gpt-4o-mini", + "max_tokens": 16, + "messages": [{"role": "user", "content": "ping"}], + }, + ) + + if r.status_code == 200: + ok("API reachable + responded", f"HTTP {r.status_code}") + elif r.status_code in (400, 402, 429): + body = r.json() + err = (body.get("error") or {}).get("message", "") + if any(w in err.lower() for w in ("quota", "credit", "balance", "insufficient")): + ok("API reachable (quota/credits issue)", f"HTTP {r.status_code} — {err[:80]}") + else: + ok("API reachable", f"HTTP {r.status_code} — {err[:80]}") + elif r.status_code in (401, 403): + fail("Bad API key", f"HTTP {r.status_code}") + else: + fail("Unexpected response", f"HTTP {r.status_code} — {r.text[:120]}") + + except httpx.ConnectError as e: + fail("Connection failed", str(e)[:120]) + except httpx.TimeoutException: + fail("Timeout (15s)") + except Exception as e: + fail("Error", str(e)) + + +# ── LM Studio ───────────────────────────────────────────────────────────────── + +async def test_lm_studio(): + section("LM Studio (local)") + base_url = os.environ.get("LM_STUDIO_URL", "http://localhost:1234").rstrip("/") + model = os.environ.get("LM_STUDIO_MODEL", "") + + import httpx + try: + # Step 1: list models + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.get(f"{base_url}/v1/models") + + if r.status_code != 200: + fail("Models endpoint", f"HTTP {r.status_code}") + return + + models = [m["id"] for m in r.json().get("data", [])] + if not models: + fail("No models loaded in LM Studio") + return + + ok(f"Reachable — {len(models)} model(s)", ", ".join(models[:3])) + + # Step 2: minimal inference — try each model until one works + candidates = [model] if model else models + inference_ok = False + async with httpx.AsyncClient(timeout=30.0) as client: + for chosen in candidates: + r2 = await client.post( + f"{base_url}/v1/chat/completions", + headers={"content-type": "application/json"}, + json={ + "model": chosen, + "max_tokens": 16, + "messages": [{"role": "user", "content": "ping"}], + }, + ) + if r2.status_code == 200: + content = r2.json()["choices"][0]["message"]["content"] + ok(f"Inference OK ({chosen})", repr(content[:60])) + inference_ok = True + break + body = r2.text[:120] + if any(s in body for s in ("Failed to load model", "No models loaded", "not loaded")): + continue # model in catalog but not loaded — try next + fail(f"Inference failed ({chosen})", f"HTTP {r2.status_code} — {body}") + inference_ok = True # stop trying, this is a real error + break + + if not inference_ok: + fail("No model currently loaded in LM Studio", f"tried: {', '.join(candidates[:3])}") + + except httpx.ConnectError: + fail("Not reachable", f"{base_url} — is LM Studio running?") + except httpx.TimeoutException: + fail("Timeout") + except Exception as e: + fail("Error", str(e)) + + +# ── Hermes ──────────────────────────────────────────────────────────────────── + +async def test_hermes(): + section("Hermes (WebSocket)") + endpoint = os.environ.get("HERMES_URL", "http://localhost:50007").rstrip("/") + + import httpx + try: + from websockets.asyncio.client import connect as ws_connect + except ImportError: + from websockets.client import connect as ws_connect # type: ignore + + # Step 1: fetch token from dashboard HTML + try: + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.get(f"{endpoint}/") + m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', r.text) + if not m: + fail("Token fetch", "Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML") + return + token = m.group(1) + ok("Dashboard reachable + token fetched", f"{token[:12]}…") + except httpx.ConnectError: + fail("Dashboard not reachable", f"{endpoint} — is gerhard-dashboard running?") + return + except Exception as e: + fail("Dashboard error", str(e)) + return + + # Step 2: WebSocket + session.create + ws_scheme = "wss" if endpoint.startswith("https") else "ws" + ws_url = f"{ws_scheme}://{endpoint.split('://', 1)[-1]}/api/ws?token={token}" + + try: + async with ws_connect(ws_url) as ws: + await ws.send(json.dumps({"jsonrpc": "2.0", "id": "c1", + "method": "session.create", "params": {}})) + session_id = None + for _ in range(10): # drain up to 10 frames looking for the ack + raw = await asyncio.wait_for(ws.recv(), timeout=10.0) + msg = json.loads(raw) + if msg.get("id") == "c1": + result = msg.get("result") or {} + session_id = result.get("session_id") or result.get("id") + break + + if session_id: + ok("WebSocket + session.create", f"session={session_id}") + else: + fail("session.create — no session_id in response") + + except Exception as e: + fail("WebSocket error", str(e)[:120]) + + +# ── Agent Zero ──────────────────────────────────────────────────────────────── + +async def test_agent0(): + section("Agent Zero (MCP)") + base_url = os.environ.get("AGENT0_URL", "") + mcp_key = os.environ.get("AGENT0_MCP_KEY", "") + + if not base_url or not mcp_key: + skip("MCP endpoint", "AGENT0_URL and AGENT0_MCP_KEY not set") + return + + mcp_url = f"{base_url.rstrip('/')}/mcp/t-{mcp_key}/http" + + import httpx + try: + async with httpx.AsyncClient(timeout=10.0) as client: + r = await client.get(mcp_url, headers={"Accept": "application/json"}) + + if r.status_code in (200, 405): + ok("MCP endpoint reachable", f"HTTP {r.status_code} at {mcp_url}") + elif r.status_code == 401: + fail("Bad MCP key", f"HTTP {r.status_code}") + else: + ok("MCP endpoint reachable", f"HTTP {r.status_code}") + + except httpx.ConnectError as e: + fail("Not reachable", str(e)[:120]) + except httpx.TimeoutException: + fail("Timeout (10s)") + except Exception as e: + fail("Error", str(e)) + + +# ── Runner ──────────────────────────────────────────────────────────────────── + +async def main(args): + run_all = not any([args.anthropic, args.openai, args.lm_studio, args.hermes, args.agent0]) + + print(f"\nagent-inference connectivity tests") + print(f"{'='*40}") + + if run_all or args.anthropic: + await test_anthropic() + if run_all or args.openai: + await test_openai() + if run_all or args.lm_studio: + await test_lm_studio() + if run_all or args.hermes: + await test_hermes() + if run_all or args.agent0: + await test_agent0() + + print() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="agent-inference connectivity tests") + parser.add_argument("--anthropic", action="store_true") + parser.add_argument("--openai", action="store_true") + parser.add_argument("--lm-studio", dest="lm_studio", action="store_true") + parser.add_argument("--hermes", action="store_true") + parser.add_argument("--agent0", action="store_true") + args = parser.parse_args() + asyncio.run(main(args)) diff --git a/test_hermes.py b/test_hermes.py new file mode 100644 index 0000000..0dbe1d1 --- /dev/null +++ b/test_hermes.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Integration test: raw Hermes WebSocket conversation trace. + +Shows every frame sent and received so you can see exactly what +the protocol looks like and diagnose where handle_hermes breaks. + +Usage: + python test_hermes.py [endpoint] + + endpoint defaults to http://localhost:50007 +""" + +import asyncio +import json +import re +import sys +import httpx + +ENDPOINT = sys.argv[1].rstrip("/") if len(sys.argv) > 1 else "http://localhost:50007" +MESSAGE = "Hello Gerhard, what are you working on?" + + +def pp(direction: str, raw: str): + """Pretty-print a raw WebSocket frame.""" + try: + parsed = json.loads(raw) + body = json.dumps(parsed, indent=2) + except Exception: + body = raw + print(f"\n{'─'*60}") + print(f" {direction}") + print(f"{'─'*60}") + print(body) + + +async def fetch_token(endpoint: str) -> str: + async with httpx.AsyncClient(timeout=10.0) as client: + resp = await client.get(f"{endpoint}/") + resp.raise_for_status() + html = resp.text + + m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', html) + if not m: + raise RuntimeError("Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML") + token = m.group(1) + print(f"[token] fetched: {token[:12]}…") + return token + + +async def run(): + token = await fetch_token(ENDPOINT) + + ws_scheme = "wss" if ENDPOINT.startswith("https") else "ws" + ws_url = f"{ws_scheme}://{ENDPOINT.split('://', 1)[-1]}/api/ws?token={token}" + print(f"[ws] connecting to {ws_url}") + + try: + from websockets.asyncio.client import connect as ws_connect + except ImportError: + from websockets.client import connect as ws_connect # type: ignore + + async with ws_connect(ws_url) as ws: + print("[ws] connected\n") + + # ── 1. session.create ───────────────────────────────────────────── + create_msg = json.dumps({ + "jsonrpc": "2.0", "id": "t1", + "method": "session.create", "params": {} + }) + pp("SEND →", create_msg) + await ws.send(create_msg) + + session_id = None + print("\n[waiting for session.create response — all frames logged below]") + while session_id is None: + raw = await asyncio.wait_for(ws.recv(), timeout=15.0) + pp("RECV ←", raw) + msg = json.loads(raw) + if msg.get("id") == "t1": + result = msg.get("result") or {} + session_id = result.get("session_id") or result.get("id") + print(f"\n[session] id = {session_id}") + break + + if not session_id: + print("[ERROR] No session_id in response — check frames above") + return + + # ── 2. prompt.submit ────────────────────────────────────────────── + submit_msg = json.dumps({ + "jsonrpc": "2.0", "id": "t2", + "method": "prompt.submit", + "params": {"session_id": session_id, "text": MESSAGE} + }) + pp("SEND →", submit_msg) + await ws.send(submit_msg) + + # ── 3. Read ALL frames until message.complete or timeout ────────── + print("\n[streaming — logging every frame until message.complete or 120s timeout]") + full_text = "" + while True: + try: + raw = await asyncio.wait_for(ws.recv(), timeout=120.0) + except asyncio.TimeoutError: + print("\n[TIMEOUT] No message.complete received within 120s") + break + + pp("RECV ←", raw) + msg = json.loads(raw) + + # RPC ack for prompt.submit + if msg.get("id") == "t2": + print("[ack] prompt.submit acknowledged") + continue + + # Events + method = msg.get("method", "") + params = msg.get("params") or {} + etype = params.get("type", "") + payload = params.get("payload") or {} + + if method == "event": + if etype == "message.delta": + chunk = payload.get("text", "") + full_text += chunk + print(f" [delta] +{len(chunk)} chars") + elif etype == "message.complete": + print(f"\n[complete] full response ({len(full_text)} chars):") + print(f" {repr(full_text[:200])}") + break + elif etype == "error": + print(f"\n[ERROR event] {payload}") + break + else: + print(f" [event:{etype}] (logged above)") + + print(f"\n{'═'*60}") + print(f" FINAL RESPONSE: {repr(full_text) if full_text else '(empty)'}") + print(f"{'═'*60}\n") + + +if __name__ == "__main__": + asyncio.run(run())