Add Festinger upstream health status
This commit is contained in:
@@ -2008,12 +2008,111 @@ async def reset(request: Request) -> dict:
|
|||||||
# /health
|
# /health
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _join_upstream_url(base_url: str, probe_path: str) -> str:
|
||||||
|
"""Join an upstream base URL and probe path without producing /v1/v1."""
|
||||||
|
base = (base_url or "").rstrip("/")
|
||||||
|
path = probe_path if probe_path.startswith("/") else f"/{probe_path}"
|
||||||
|
if path.startswith("/v1/") and base.endswith("/v1"):
|
||||||
|
path = path[3:]
|
||||||
|
return f"{base}{path}"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_upstream_models(payload: dict) -> list[str]:
|
||||||
|
"""Extract model names from OpenAI-compatible (/v1/models) or Ollama (/api/tags) payloads."""
|
||||||
|
models: list[str] = []
|
||||||
|
if isinstance(payload.get("data"), list):
|
||||||
|
for item in payload["data"]:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
model_id = item.get("id") or item.get("name")
|
||||||
|
if model_id:
|
||||||
|
models.append(str(model_id))
|
||||||
|
if isinstance(payload.get("models"), list):
|
||||||
|
for item in payload["models"]:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
model_id = item.get("name") or item.get("model") or item.get("id")
|
||||||
|
if model_id:
|
||||||
|
models.append(str(model_id))
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
async def _probe_upstream_connection(name: str, base_url: str, probe_path: str) -> dict:
|
||||||
|
"""
|
||||||
|
Probe a local inference upstream and return a state-machine-friendly status block.
|
||||||
|
|
||||||
|
gnommoweb can poll /health and display status_messages without needing to know
|
||||||
|
whether Festinger is configured for LM Studio's OpenAI-compatible API or
|
||||||
|
Ollama-compatible endpoints.
|
||||||
|
"""
|
||||||
|
url = _join_upstream_url(base_url, probe_path)
|
||||||
|
started = time.perf_counter()
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
latency_ms = round((time.perf_counter() - started) * 1000)
|
||||||
|
if response.is_success:
|
||||||
|
try:
|
||||||
|
payload = response.json()
|
||||||
|
except ValueError:
|
||||||
|
payload = {}
|
||||||
|
models = _extract_upstream_models(payload) if isinstance(payload, dict) else []
|
||||||
|
detail = f": {len(models)} model(s) available" if models else ""
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"ok": True,
|
||||||
|
"status": "ok",
|
||||||
|
"url": url,
|
||||||
|
"status_code": response.status_code,
|
||||||
|
"latency_ms": latency_ms,
|
||||||
|
"models": models,
|
||||||
|
"status_message": f"{name} reachable{detail}",
|
||||||
|
}
|
||||||
|
body = (response.text or "").strip().replace("\n", " ")[:200]
|
||||||
|
suffix = f" — {body}" if body else ""
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"ok": False,
|
||||||
|
"status": "error",
|
||||||
|
"url": url,
|
||||||
|
"status_code": response.status_code,
|
||||||
|
"latency_ms": latency_ms,
|
||||||
|
"models": [],
|
||||||
|
"status_message": f"{name} returned HTTP {response.status_code}{suffix}",
|
||||||
|
}
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
latency_ms = round((time.perf_counter() - started) * 1000)
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"ok": False,
|
||||||
|
"status": "unreachable",
|
||||||
|
"url": url,
|
||||||
|
"status_code": None,
|
||||||
|
"latency_ms": latency_ms,
|
||||||
|
"models": [],
|
||||||
|
"status_message": f"{name} unreachable: {type(exc).__name__}: {exc}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health(request: Request) -> dict:
|
async def health(request: Request) -> dict:
|
||||||
cfg = request.app.state.yaml_config
|
cfg = request.app.state.yaml_config
|
||||||
|
lm_studio = await _probe_upstream_connection(
|
||||||
|
"lm_studio",
|
||||||
|
cfg.get("upstream_openai", ""),
|
||||||
|
"/v1/models",
|
||||||
|
)
|
||||||
|
ollama = await _probe_upstream_connection(
|
||||||
|
"ollama",
|
||||||
|
cfg.get("upstream_ollama", ""),
|
||||||
|
"/api/tags",
|
||||||
|
)
|
||||||
|
connections = {"lm_studio": lm_studio, "ollama": ollama}
|
||||||
|
status = "ok" if any(conn["ok"] for conn in connections.values()) else "error"
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": status,
|
||||||
"upstream": cfg["upstream_ollama"],
|
"upstream": cfg["upstream_ollama"],
|
||||||
|
"upstream_openai": cfg.get("upstream_openai"),
|
||||||
|
"connections": connections,
|
||||||
|
"status_messages": [conn["status_message"] for conn in connections.values()],
|
||||||
"active_loop_sessions": 0, # loop detector is stateful in-process
|
"active_loop_sessions": 0, # loop detector is stateful in-process
|
||||||
"soas_tokens": len(cache.soas_by_token),
|
"soas_tokens": len(cache.soas_by_token),
|
||||||
"urd_edges": len(cache.urd_by_concept_dim),
|
"urd_edges": len(cache.urd_by_concept_dim),
|
||||||
|
|||||||
@@ -0,0 +1,102 @@
|
|||||||
|
"""Tests for Festinger health reporting used by gnommoweb state-machine checks."""
|
||||||
|
import asyncio
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from festinger import main
|
||||||
|
|
||||||
|
|
||||||
|
class DummyResponse:
|
||||||
|
def __init__(self, status_code=200, payload=None, text=""):
|
||||||
|
self.status_code = status_code
|
||||||
|
self._payload = payload or {}
|
||||||
|
self.text = text
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_success(self):
|
||||||
|
return 200 <= self.status_code < 300
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
class FakeAsyncClient:
|
||||||
|
requested_urls = []
|
||||||
|
|
||||||
|
def __init__(self, *, timeout):
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc, tb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def get(self, url):
|
||||||
|
self.requested_urls.append(url)
|
||||||
|
return DummyResponse(
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{"id": "local-model", "object": "model"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_probe_openai_compatible_upstream_reports_models_without_double_v1(monkeypatch):
|
||||||
|
FakeAsyncClient.requested_urls = []
|
||||||
|
monkeypatch.setattr(main.httpx, "AsyncClient", FakeAsyncClient)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
main._probe_upstream_connection(
|
||||||
|
name="lm_studio",
|
||||||
|
base_url="http://lmstudio.local:1234/v1",
|
||||||
|
probe_path="/v1/models",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["ok"] is True
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["url"] == "http://lmstudio.local:1234/v1/models"
|
||||||
|
assert FakeAsyncClient.requested_urls == ["http://lmstudio.local:1234/v1/models"]
|
||||||
|
assert result["models"] == ["local-model"]
|
||||||
|
assert result["status_message"] == "lm_studio reachable: 1 model(s) available"
|
||||||
|
|
||||||
|
|
||||||
|
def test_health_endpoint_includes_connection_status_messages(monkeypatch):
|
||||||
|
async def fake_probe(name, base_url, probe_path):
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"ok": True,
|
||||||
|
"status": "ok",
|
||||||
|
"url": f"{base_url.rstrip('/')}{probe_path}",
|
||||||
|
"status_code": 200,
|
||||||
|
"latency_ms": 12,
|
||||||
|
"models": ["local-model"] if name == "lm_studio" else [],
|
||||||
|
"status_message": f"{name} reachable",
|
||||||
|
}
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "_probe_upstream_connection", fake_probe)
|
||||||
|
request = SimpleNamespace(
|
||||||
|
app=SimpleNamespace(
|
||||||
|
state=SimpleNamespace(
|
||||||
|
yaml_config={
|
||||||
|
"upstream_ollama": "http://ollama.local:11434",
|
||||||
|
"upstream_openai": "http://lmstudio.local:1234",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(main.health(request))
|
||||||
|
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["connections"]["lm_studio"]["ok"] is True
|
||||||
|
assert result["connections"]["ollama"]["ok"] is True
|
||||||
|
assert result["status_messages"] == [
|
||||||
|
"lm_studio reachable",
|
||||||
|
"ollama reachable",
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user