Add Festinger upstream health status

2026-05-03 09:43:27 +02:00
parent e8301fb2bf
commit 86ef632ac6
2 changed files with 202 additions and 1 deletions
@@ -2008,12 +2008,111 @@ async def reset(request: Request) -> dict:
 # /health
 # ---------------------------------------------------------------------------
 def _join_upstream_url(base_url: str, probe_path: str) -> str:
    """Join an upstream base URL and probe path without producing /v1/v1."""
    base = (base_url or "").rstrip("/")
    path = probe_path if probe_path.startswith("/") else f"/{probe_path}"
    if path.startswith("/v1/") and base.endswith("/v1"):
        path = path[3:]
    return f"{base}{path}"
 def _extract_upstream_models(payload: dict) -> list[str]:
    """Extract model names from OpenAI-compatible (/v1/models) or Ollama (/api/tags) payloads."""
    models: list[str] = []
    if isinstance(payload.get("data"), list):
        for item in payload["data"]:
            if isinstance(item, dict):
                model_id = item.get("id") or item.get("name")
                if model_id:
                    models.append(str(model_id))
    if isinstance(payload.get("models"), list):
        for item in payload["models"]:
            if isinstance(item, dict):
                model_id = item.get("name") or item.get("model") or item.get("id")
                if model_id:
                    models.append(str(model_id))
    return models
 async def _probe_upstream_connection(name: str, base_url: str, probe_path: str) -> dict:
    """
    Probe a local inference upstream and return a state-machine-friendly status block.
    gnommoweb can poll /health and display status_messages without needing to know
    whether Festinger is configured for LM Studio's OpenAI-compatible API or
    Ollama-compatible endpoints.
    """
    url = _join_upstream_url(base_url, probe_path)
    started = time.perf_counter()
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            response = await client.get(url)
        latency_ms = round((time.perf_counter() - started) * 1000)
        if response.is_success:
            try:
                payload = response.json()
            except ValueError:
                payload = {}
            models = _extract_upstream_models(payload) if isinstance(payload, dict) else []
            detail = f": {len(models)} model(s) available" if models else ""
            return {
                "name": name,
                "ok": True,
                "status": "ok",
                "url": url,
                "status_code": response.status_code,
                "latency_ms": latency_ms,
                "models": models,
                "status_message": f"{name} reachable{detail}",
            }
        body = (response.text or "").strip().replace("\n", " ")[:200]
        suffix = f" — {body}" if body else ""
        return {
            "name": name,
            "ok": False,
            "status": "error",
            "url": url,
            "status_code": response.status_code,
            "latency_ms": latency_ms,
            "models": [],
            "status_message": f"{name} returned HTTP {response.status_code}{suffix}",
        }
    except httpx.RequestError as exc:
        latency_ms = round((time.perf_counter() - started) * 1000)
        return {
            "name": name,
            "ok": False,
            "status": "unreachable",
            "url": url,
            "status_code": None,
            "latency_ms": latency_ms,
            "models": [],
            "status_message": f"{name} unreachable: {type(exc).__name__}: {exc}",
        }
@app.get("/health")
 async def health(request: Request) -> dict:
    cfg = request.app.state.yaml_config
    lm_studio = await _probe_upstream_connection(
        "lm_studio",
        cfg.get("upstream_openai", ""),
        "/v1/models",
    )
    ollama = await _probe_upstream_connection(
        "ollama",
        cfg.get("upstream_ollama", ""),
        "/api/tags",
    )
    connections = {"lm_studio": lm_studio, "ollama": ollama}
    status = "ok" if any(conn["ok"] for conn in connections.values()) else "error"
    return {
-        "status": "ok",
+        "status": status,
        "upstream": cfg["upstream_ollama"],
        "upstream_openai": cfg.get("upstream_openai"),
        "connections": connections,
        "status_messages": [conn["status_message"] for conn in connections.values()],
        "active_loop_sessions": 0,  # loop detector is stateful in-process
        "soas_tokens": len(cache.soas_by_token),
        "urd_edges": len(cache.urd_by_concept_dim),
@@ -0,0 +1,102 @@
 """Tests for Festinger health reporting used by gnommoweb state-machine checks."""
 import asyncio
 from types import SimpleNamespace
 import pytest
 from festinger import main
 class DummyResponse:
    def __init__(self, status_code=200, payload=None, text=""):
        self.status_code = status_code
        self._payload = payload or {}
        self.text = text
    @property
    def is_success(self):
        return 200 <= self.status_code < 300
    def json(self):
        return self._payload
 class FakeAsyncClient:
    requested_urls = []
    def __init__(self, *, timeout):
        self.timeout = timeout
    async def __aenter__(self):
        return self
    async def __aexit__(self, exc_type, exc, tb):
        return False
    async def get(self, url):
        self.requested_urls.append(url)
        return DummyResponse(
            200,
            {
                "object": "list",
                "data": [
                    {"id": "local-model", "object": "model"},
                ],
            },
        )
 def test_probe_openai_compatible_upstream_reports_models_without_double_v1(monkeypatch):
    FakeAsyncClient.requested_urls = []
    monkeypatch.setattr(main.httpx, "AsyncClient", FakeAsyncClient)
    result = asyncio.run(
        main._probe_upstream_connection(
            name="lm_studio",
            base_url="http://lmstudio.local:1234/v1",
            probe_path="/v1/models",
        )
    )
    assert result["ok"] is True
    assert result["status"] == "ok"
    assert result["url"] == "http://lmstudio.local:1234/v1/models"
    assert FakeAsyncClient.requested_urls == ["http://lmstudio.local:1234/v1/models"]
    assert result["models"] == ["local-model"]
    assert result["status_message"] == "lm_studio reachable: 1 model(s) available"
 def test_health_endpoint_includes_connection_status_messages(monkeypatch):
    async def fake_probe(name, base_url, probe_path):
        return {
            "name": name,
            "ok": True,
            "status": "ok",
            "url": f"{base_url.rstrip('/')}{probe_path}",
            "status_code": 200,
            "latency_ms": 12,
            "models": ["local-model"] if name == "lm_studio" else [],
            "status_message": f"{name} reachable",
        }
    monkeypatch.setattr(main, "_probe_upstream_connection", fake_probe)
    request = SimpleNamespace(
        app=SimpleNamespace(
            state=SimpleNamespace(
                yaml_config={
                    "upstream_ollama": "http://ollama.local:11434",
                    "upstream_openai": "http://lmstudio.local:1234",
                }
            )
        )
    )
    result = asyncio.run(main.health(request))
    assert result["status"] == "ok"
    assert result["connections"]["lm_studio"]["ok"] is True
    assert result["connections"]["ollama"]["ok"] is True
    assert result["status_messages"] == [
        "lm_studio reachable",
        "ollama reachable",
    ]