From de907fd29a9b87a2cbe711eea9de97384e141a5e Mon Sep 17 00:00:00 2001
From: jenstandstad <jens.tandstad@gmail.com>
Date: Mon, 20 Apr 2026 16:58:39 +0200
Subject: [PATCH] Adding throwing of out of funds exception

---
 plugins/festinger/festinger/main.py | 34 +++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py
index 4ac31a9..797f6a1 100644
--- a/plugins/festinger/festinger/main.py
+++ b/plugins/festinger/festinger/main.py
@@ -257,6 +257,40 @@ async def call_anthropic(body: dict, upstream: str, headers: dict) -> tuple[str,
     for block in data.get("content", []):
         if block.get("type") == "text":
             text += block.get("text", "")
+
+    stop_reason = data.get("stop_reason", "unknown")
+    usage = data.get("usage", {})
+    in_tok = usage.get("input_tokens", "?")
+    out_tok = usage.get("output_tokens", "?")
+    preview = text[:120].replace("\n", " ") if text else "(empty)"
+    log.info(
+        "upstream_response  provider=anthropic model=%s stop_reason=%s in_tokens=%s out_tokens=%s  text=%.120s",
+        model, stop_reason, in_tok, out_tok, preview,
+    )
+
+    if stop_reason == "max_tokens":
+        # Output was cut off at the token limit.  The truncated response is
+        # always identical, so Festinger's loop detector will fire immediately
+        # and Agent0 will retry forever.  Convert to a 400 so litellm raises
+        # ContextWindowExceededError and the agent can handle it gracefully.
+        log.error(
+            "upstream_max_tokens  provider=anthropic model=%s in_tokens=%s out_tokens=%s"
+            " — response truncated, converting to 400 to break loop",
+            model, in_tok, out_tok,
+        )
+        error_body = json.dumps({
+            "type": "error",
+            "error": {
+                "type": "invalid_request_error",
+                "message": (
+                    f"max_tokens reached: response was truncated after {out_tok} output tokens "
+                    f"({in_tok} input tokens used). "
+                    "Reduce the prompt length or raise the max_tokens limit."
+                ),
+            },
+        }).encode()
+        raise UpstreamError(400, error_body, "application/json", "anthropic")
+
     return text, data