Add Festinger upstream health status
This commit is contained in:
@@ -2008,12 +2008,111 @@ async def reset(request: Request) -> dict:
|
||||
# /health
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _join_upstream_url(base_url: str, probe_path: str) -> str:
|
||||
"""Join an upstream base URL and probe path without producing /v1/v1."""
|
||||
base = (base_url or "").rstrip("/")
|
||||
path = probe_path if probe_path.startswith("/") else f"/{probe_path}"
|
||||
if path.startswith("/v1/") and base.endswith("/v1"):
|
||||
path = path[3:]
|
||||
return f"{base}{path}"
|
||||
|
||||
|
||||
def _extract_upstream_models(payload: dict) -> list[str]:
|
||||
"""Extract model names from OpenAI-compatible (/v1/models) or Ollama (/api/tags) payloads."""
|
||||
models: list[str] = []
|
||||
if isinstance(payload.get("data"), list):
|
||||
for item in payload["data"]:
|
||||
if isinstance(item, dict):
|
||||
model_id = item.get("id") or item.get("name")
|
||||
if model_id:
|
||||
models.append(str(model_id))
|
||||
if isinstance(payload.get("models"), list):
|
||||
for item in payload["models"]:
|
||||
if isinstance(item, dict):
|
||||
model_id = item.get("name") or item.get("model") or item.get("id")
|
||||
if model_id:
|
||||
models.append(str(model_id))
|
||||
return models
|
||||
|
||||
|
||||
async def _probe_upstream_connection(name: str, base_url: str, probe_path: str) -> dict:
|
||||
"""
|
||||
Probe a local inference upstream and return a state-machine-friendly status block.
|
||||
|
||||
gnommoweb can poll /health and display status_messages without needing to know
|
||||
whether Festinger is configured for LM Studio's OpenAI-compatible API or
|
||||
Ollama-compatible endpoints.
|
||||
"""
|
||||
url = _join_upstream_url(base_url, probe_path)
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(url)
|
||||
latency_ms = round((time.perf_counter() - started) * 1000)
|
||||
if response.is_success:
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError:
|
||||
payload = {}
|
||||
models = _extract_upstream_models(payload) if isinstance(payload, dict) else []
|
||||
detail = f": {len(models)} model(s) available" if models else ""
|
||||
return {
|
||||
"name": name,
|
||||
"ok": True,
|
||||
"status": "ok",
|
||||
"url": url,
|
||||
"status_code": response.status_code,
|
||||
"latency_ms": latency_ms,
|
||||
"models": models,
|
||||
"status_message": f"{name} reachable{detail}",
|
||||
}
|
||||
body = (response.text or "").strip().replace("\n", " ")[:200]
|
||||
suffix = f" — {body}" if body else ""
|
||||
return {
|
||||
"name": name,
|
||||
"ok": False,
|
||||
"status": "error",
|
||||
"url": url,
|
||||
"status_code": response.status_code,
|
||||
"latency_ms": latency_ms,
|
||||
"models": [],
|
||||
"status_message": f"{name} returned HTTP {response.status_code}{suffix}",
|
||||
}
|
||||
except httpx.RequestError as exc:
|
||||
latency_ms = round((time.perf_counter() - started) * 1000)
|
||||
return {
|
||||
"name": name,
|
||||
"ok": False,
|
||||
"status": "unreachable",
|
||||
"url": url,
|
||||
"status_code": None,
|
||||
"latency_ms": latency_ms,
|
||||
"models": [],
|
||||
"status_message": f"{name} unreachable: {type(exc).__name__}: {exc}",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health(request: Request) -> dict:
|
||||
cfg = request.app.state.yaml_config
|
||||
lm_studio = await _probe_upstream_connection(
|
||||
"lm_studio",
|
||||
cfg.get("upstream_openai", ""),
|
||||
"/v1/models",
|
||||
)
|
||||
ollama = await _probe_upstream_connection(
|
||||
"ollama",
|
||||
cfg.get("upstream_ollama", ""),
|
||||
"/api/tags",
|
||||
)
|
||||
connections = {"lm_studio": lm_studio, "ollama": ollama}
|
||||
status = "ok" if any(conn["ok"] for conn in connections.values()) else "error"
|
||||
return {
|
||||
"status": "ok",
|
||||
"status": status,
|
||||
"upstream": cfg["upstream_ollama"],
|
||||
"upstream_openai": cfg.get("upstream_openai"),
|
||||
"connections": connections,
|
||||
"status_messages": [conn["status_message"] for conn in connections.values()],
|
||||
"active_loop_sessions": 0, # loop detector is stateful in-process
|
||||
"soas_tokens": len(cache.soas_by_token),
|
||||
"urd_edges": len(cache.urd_by_concept_dim),
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
"""Tests for Festinger health reporting used by gnommoweb state-machine checks."""
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from festinger import main
|
||||
|
||||
|
||||
class DummyResponse:
|
||||
def __init__(self, status_code=200, payload=None, text=""):
|
||||
self.status_code = status_code
|
||||
self._payload = payload or {}
|
||||
self.text = text
|
||||
|
||||
@property
|
||||
def is_success(self):
|
||||
return 200 <= self.status_code < 300
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
class FakeAsyncClient:
|
||||
requested_urls = []
|
||||
|
||||
def __init__(self, *, timeout):
|
||||
self.timeout = timeout
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
async def get(self, url):
|
||||
self.requested_urls.append(url)
|
||||
return DummyResponse(
|
||||
200,
|
||||
{
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "local-model", "object": "model"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_probe_openai_compatible_upstream_reports_models_without_double_v1(monkeypatch):
|
||||
FakeAsyncClient.requested_urls = []
|
||||
monkeypatch.setattr(main.httpx, "AsyncClient", FakeAsyncClient)
|
||||
|
||||
result = asyncio.run(
|
||||
main._probe_upstream_connection(
|
||||
name="lm_studio",
|
||||
base_url="http://lmstudio.local:1234/v1",
|
||||
probe_path="/v1/models",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["ok"] is True
|
||||
assert result["status"] == "ok"
|
||||
assert result["url"] == "http://lmstudio.local:1234/v1/models"
|
||||
assert FakeAsyncClient.requested_urls == ["http://lmstudio.local:1234/v1/models"]
|
||||
assert result["models"] == ["local-model"]
|
||||
assert result["status_message"] == "lm_studio reachable: 1 model(s) available"
|
||||
|
||||
|
||||
def test_health_endpoint_includes_connection_status_messages(monkeypatch):
|
||||
async def fake_probe(name, base_url, probe_path):
|
||||
return {
|
||||
"name": name,
|
||||
"ok": True,
|
||||
"status": "ok",
|
||||
"url": f"{base_url.rstrip('/')}{probe_path}",
|
||||
"status_code": 200,
|
||||
"latency_ms": 12,
|
||||
"models": ["local-model"] if name == "lm_studio" else [],
|
||||
"status_message": f"{name} reachable",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(main, "_probe_upstream_connection", fake_probe)
|
||||
request = SimpleNamespace(
|
||||
app=SimpleNamespace(
|
||||
state=SimpleNamespace(
|
||||
yaml_config={
|
||||
"upstream_ollama": "http://ollama.local:11434",
|
||||
"upstream_openai": "http://lmstudio.local:1234",
|
||||
}
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
result = asyncio.run(main.health(request))
|
||||
|
||||
assert result["status"] == "ok"
|
||||
assert result["connections"]["lm_studio"]["ok"] is True
|
||||
assert result["connections"]["ollama"]["ok"] is True
|
||||
assert result["status_messages"] == [
|
||||
"lm_studio reachable",
|
||||
"ollama reachable",
|
||||
]
|
||||
Reference in New Issue
Block a user