Files
agent-inference/test_hermes.py
T
gitprov 9814d18e8c Add connectivity tests and fix Hermes handle + venv setup
- test_connectivity.py: connectivity tests for all four endpoint types
  (anthropic, openai, lm_studio, hermes, agent0) — treats no-credits as success
- test_hermes.py: raw WebSocket frame logger used to reverse-engineer protocol
- Fix handle_hermes: skip prompt.submit ack frame, read full text from
  message.complete payload.text, always raise on status==error
- Fix requirements.txt: use >= pins (fastapi/uvicorn versions didn't exist)
- Fix dev.sh: prefer python3.12 for venv (mcp>=1.9.0 requires 3.10+)
- Remove ANTHROPIC_KEY env var dependency from server.py (keys come from DB)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 19:00:51 +02:00

145 lines
4.9 KiB
Python

#!/usr/bin/env python3
"""
Integration test: raw Hermes WebSocket conversation trace.
Shows every frame sent and received so you can see exactly what
the protocol looks like and diagnose where handle_hermes breaks.
Usage:
python test_hermes.py [endpoint]
endpoint defaults to http://localhost:50007
"""
import asyncio
import json
import re
import sys
import httpx
ENDPOINT = sys.argv[1].rstrip("/") if len(sys.argv) > 1 else "http://localhost:50007"
MESSAGE = "Hello Gerhard, what are you working on?"
def pp(direction: str, raw: str):
"""Pretty-print a raw WebSocket frame."""
try:
parsed = json.loads(raw)
body = json.dumps(parsed, indent=2)
except Exception:
body = raw
print(f"\n{''*60}")
print(f" {direction}")
print(f"{''*60}")
print(body)
async def fetch_token(endpoint: str) -> str:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(f"{endpoint}/")
resp.raise_for_status()
html = resp.text
m = re.search(r'__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"', html)
if not m:
raise RuntimeError("Could not find __HERMES_SESSION_TOKEN__ in dashboard HTML")
token = m.group(1)
print(f"[token] fetched: {token[:12]}")
return token
async def run():
token = await fetch_token(ENDPOINT)
ws_scheme = "wss" if ENDPOINT.startswith("https") else "ws"
ws_url = f"{ws_scheme}://{ENDPOINT.split('://', 1)[-1]}/api/ws?token={token}"
print(f"[ws] connecting to {ws_url}")
try:
from websockets.asyncio.client import connect as ws_connect
except ImportError:
from websockets.client import connect as ws_connect # type: ignore
async with ws_connect(ws_url) as ws:
print("[ws] connected\n")
# ── 1. session.create ─────────────────────────────────────────────
create_msg = json.dumps({
"jsonrpc": "2.0", "id": "t1",
"method": "session.create", "params": {}
})
pp("SEND →", create_msg)
await ws.send(create_msg)
session_id = None
print("\n[waiting for session.create response — all frames logged below]")
while session_id is None:
raw = await asyncio.wait_for(ws.recv(), timeout=15.0)
pp("RECV ←", raw)
msg = json.loads(raw)
if msg.get("id") == "t1":
result = msg.get("result") or {}
session_id = result.get("session_id") or result.get("id")
print(f"\n[session] id = {session_id}")
break
if not session_id:
print("[ERROR] No session_id in response — check frames above")
return
# ── 2. prompt.submit ──────────────────────────────────────────────
submit_msg = json.dumps({
"jsonrpc": "2.0", "id": "t2",
"method": "prompt.submit",
"params": {"session_id": session_id, "text": MESSAGE}
})
pp("SEND →", submit_msg)
await ws.send(submit_msg)
# ── 3. Read ALL frames until message.complete or timeout ──────────
print("\n[streaming — logging every frame until message.complete or 120s timeout]")
full_text = ""
while True:
try:
raw = await asyncio.wait_for(ws.recv(), timeout=120.0)
except asyncio.TimeoutError:
print("\n[TIMEOUT] No message.complete received within 120s")
break
pp("RECV ←", raw)
msg = json.loads(raw)
# RPC ack for prompt.submit
if msg.get("id") == "t2":
print("[ack] prompt.submit acknowledged")
continue
# Events
method = msg.get("method", "")
params = msg.get("params") or {}
etype = params.get("type", "")
payload = params.get("payload") or {}
if method == "event":
if etype == "message.delta":
chunk = payload.get("text", "")
full_text += chunk
print(f" [delta] +{len(chunk)} chars")
elif etype == "message.complete":
print(f"\n[complete] full response ({len(full_text)} chars):")
print(f" {repr(full_text[:200])}")
break
elif etype == "error":
print(f"\n[ERROR event] {payload}")
break
else:
print(f" [event:{etype}] (logged above)")
print(f"\n{''*60}")
print(f" FINAL RESPONSE: {repr(full_text) if full_text else '(empty)'}")
print(f"{''*60}\n")
if __name__ == "__main__":
asyncio.run(run())