Add Festinger upstream health status

2026-05-03 09:43:27 +02:00
parent e8301fb2bf
commit 86ef632ac6
2 changed files with 202 additions and 1 deletions
@@ -2008,12 +2008,111 @@ async def reset(request: Request) -> dict:
 # /health
 # ---------------------------------------------------------------------------

+def _join_upstream_url(base_url: str, probe_path: str) -> str:
+    """Join an upstream base URL and probe path without producing /v1/v1."""
+    base = (base_url or "").rstrip("/")
+    path = probe_path if probe_path.startswith("/") else f"/{probe_path}"
+    if path.startswith("/v1/") and base.endswith("/v1"):
+        path = path[3:]
+    return f"{base}{path}"
+
+
+def _extract_upstream_models(payload: dict) -> list[str]:
+    """Extract model names from OpenAI-compatible (/v1/models) or Ollama (/api/tags) payloads."""
+    models: list[str] = []
+    if isinstance(payload.get("data"), list):
+        for item in payload["data"]:
+            if isinstance(item, dict):
+                model_id = item.get("id") or item.get("name")
+                if model_id:
+                    models.append(str(model_id))
+    if isinstance(payload.get("models"), list):
+        for item in payload["models"]:
+            if isinstance(item, dict):
+                model_id = item.get("name") or item.get("model") or item.get("id")
+                if model_id:
+                    models.append(str(model_id))
+    return models
+
+
+async def _probe_upstream_connection(name: str, base_url: str, probe_path: str) -> dict:
+    """
+    Probe a local inference upstream and return a state-machine-friendly status block.
+
+    gnommoweb can poll /health and display status_messages without needing to know
+    whether Festinger is configured for LM Studio's OpenAI-compatible API or
+    Ollama-compatible endpoints.
+    """
+    url = _join_upstream_url(base_url, probe_path)
+    started = time.perf_counter()
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.get(url)
+        latency_ms = round((time.perf_counter() - started) * 1000)
+        if response.is_success:
+            try:
+                payload = response.json()
+            except ValueError:
+                payload = {}
+            models = _extract_upstream_models(payload) if isinstance(payload, dict) else []
+            detail = f": {len(models)} model(s) available" if models else ""
+            return {
+                "name": name,
+                "ok": True,
+                "status": "ok",
+                "url": url,
+                "status_code": response.status_code,
+                "latency_ms": latency_ms,
+                "models": models,
+                "status_message": f"{name} reachable{detail}",
+            }
+        body = (response.text or "").strip().replace("\n", " ")[:200]
+        suffix = f" — {body}" if body else ""
+        return {
+            "name": name,
+            "ok": False,
+            "status": "error",
+            "url": url,
+            "status_code": response.status_code,
+            "latency_ms": latency_ms,
+            "models": [],
+            "status_message": f"{name} returned HTTP {response.status_code}{suffix}",
+        }
+    except httpx.RequestError as exc:
+        latency_ms = round((time.perf_counter() - started) * 1000)
+        return {
+            "name": name,
+            "ok": False,
+            "status": "unreachable",
+            "url": url,
+            "status_code": None,
+            "latency_ms": latency_ms,
+            "models": [],
+            "status_message": f"{name} unreachable: {type(exc).__name__}: {exc}",
+        }
+
+
@app.get("/health")
 async def health(request: Request) -> dict:
    cfg = request.app.state.yaml_config
+    lm_studio = await _probe_upstream_connection(
+        "lm_studio",
+        cfg.get("upstream_openai", ""),
+        "/v1/models",
+    )
+    ollama = await _probe_upstream_connection(
+        "ollama",
+        cfg.get("upstream_ollama", ""),
+        "/api/tags",
+    )
+    connections = {"lm_studio": lm_studio, "ollama": ollama}
+    status = "ok" if any(conn["ok"] for conn in connections.values()) else "error"
    return {
-        "status": "ok",
+        "status": status,
        "upstream": cfg["upstream_ollama"],
+        "upstream_openai": cfg.get("upstream_openai"),
+        "connections": connections,
+        "status_messages": [conn["status_message"] for conn in connections.values()],
        "active_loop_sessions": 0,  # loop detector is stateful in-process
        "soas_tokens": len(cache.soas_by_token),
        "urd_edges": len(cache.urd_by_concept_dim),
@@ -0,0 +1,102 @@
+"""Tests for Festinger health reporting used by gnommoweb state-machine checks."""
+import asyncio
+from types import SimpleNamespace
+
+import pytest
+
+from festinger import main
+
+
+class DummyResponse:
+    def __init__(self, status_code=200, payload=None, text=""):
+        self.status_code = status_code
+        self._payload = payload or {}
+        self.text = text
+
+    @property
+    def is_success(self):
+        return 200 <= self.status_code < 300
+
+    def json(self):
+        return self._payload
+
+
+class FakeAsyncClient:
+    requested_urls = []
+
+    def __init__(self, *, timeout):
+        self.timeout = timeout
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return False
+
+    async def get(self, url):
+        self.requested_urls.append(url)
+        return DummyResponse(
+            200,
+            {
+                "object": "list",
+                "data": [
+                    {"id": "local-model", "object": "model"},
+                ],
+            },
+        )
+
+
+def test_probe_openai_compatible_upstream_reports_models_without_double_v1(monkeypatch):
+    FakeAsyncClient.requested_urls = []
+    monkeypatch.setattr(main.httpx, "AsyncClient", FakeAsyncClient)
+
+    result = asyncio.run(
+        main._probe_upstream_connection(
+            name="lm_studio",
+            base_url="http://lmstudio.local:1234/v1",
+            probe_path="/v1/models",
+        )
+    )
+
+    assert result["ok"] is True
+    assert result["status"] == "ok"
+    assert result["url"] == "http://lmstudio.local:1234/v1/models"
+    assert FakeAsyncClient.requested_urls == ["http://lmstudio.local:1234/v1/models"]
+    assert result["models"] == ["local-model"]
+    assert result["status_message"] == "lm_studio reachable: 1 model(s) available"
+
+
+def test_health_endpoint_includes_connection_status_messages(monkeypatch):
+    async def fake_probe(name, base_url, probe_path):
+        return {
+            "name": name,
+            "ok": True,
+            "status": "ok",
+            "url": f"{base_url.rstrip('/')}{probe_path}",
+            "status_code": 200,
+            "latency_ms": 12,
+            "models": ["local-model"] if name == "lm_studio" else [],
+            "status_message": f"{name} reachable",
+        }
+
+    monkeypatch.setattr(main, "_probe_upstream_connection", fake_probe)
+    request = SimpleNamespace(
+        app=SimpleNamespace(
+            state=SimpleNamespace(
+                yaml_config={
+                    "upstream_ollama": "http://ollama.local:11434",
+                    "upstream_openai": "http://lmstudio.local:1234",
+                }
+            )
+        )
+    )
+
+    result = asyncio.run(main.health(request))
+
+    assert result["status"] == "ok"
+    assert result["connections"]["lm_studio"]["ok"] is True
+    assert result["connections"]["ollama"]["ok"] is True
+    assert result["status_messages"] == [
+        "lm_studio reachable",
+        "ollama reachable",
+    ]