From de907fd29a9b87a2cbe711eea9de97384e141a5e Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Mon, 20 Apr 2026 16:58:39 +0200 Subject: [PATCH] Adding throwing of out of funds exception --- plugins/festinger/festinger/main.py | 34 +++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index 4ac31a9..797f6a1 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -257,6 +257,40 @@ async def call_anthropic(body: dict, upstream: str, headers: dict) -> tuple[str, for block in data.get("content", []): if block.get("type") == "text": text += block.get("text", "") + + stop_reason = data.get("stop_reason", "unknown") + usage = data.get("usage", {}) + in_tok = usage.get("input_tokens", "?") + out_tok = usage.get("output_tokens", "?") + preview = text[:120].replace("\n", " ") if text else "(empty)" + log.info( + "upstream_response provider=anthropic model=%s stop_reason=%s in_tokens=%s out_tokens=%s text=%.120s", + model, stop_reason, in_tok, out_tok, preview, + ) + + if stop_reason == "max_tokens": + # Output was cut off at the token limit. The truncated response is + # always identical, so Festinger's loop detector will fire immediately + # and Agent0 will retry forever. Convert to a 400 so litellm raises + # ContextWindowExceededError and the agent can handle it gracefully. + log.error( + "upstream_max_tokens provider=anthropic model=%s in_tokens=%s out_tokens=%s" + " — response truncated, converting to 400 to break loop", + model, in_tok, out_tok, + ) + error_body = json.dumps({ + "type": "error", + "error": { + "type": "invalid_request_error", + "message": ( + f"max_tokens reached: response was truncated after {out_tok} output tokens " + f"({in_tok} input tokens used). " + "Reduce the prompt length or raise the max_tokens limit." + ), + }, + }).encode() + raise UpstreamError(400, error_body, "application/json", "anthropic") + return text, data