Adding throwing of out of funds exception

2026-04-20 16:58:39 +02:00
parent 8f14b86e68
commit de907fd29a
1 changed files with 34 additions and 0 deletions
@@ -257,6 +257,40 @@ async def call_anthropic(body: dict, upstream: str, headers: dict) -> tuple[str,
    for block in data.get("content", []):
        if block.get("type") == "text":
            text += block.get("text", "")
    stop_reason = data.get("stop_reason", "unknown")
    usage = data.get("usage", {})
    in_tok = usage.get("input_tokens", "?")
    out_tok = usage.get("output_tokens", "?")
    preview = text[:120].replace("\n", " ") if text else "(empty)"
    log.info(
        "upstream_response  provider=anthropic model=%s stop_reason=%s in_tokens=%s out_tokens=%s  text=%.120s",
        model, stop_reason, in_tok, out_tok, preview,
    )
    if stop_reason == "max_tokens":
        # Output was cut off at the token limit.  The truncated response is
        # always identical, so Festinger's loop detector will fire immediately
        # and Agent0 will retry forever.  Convert to a 400 so litellm raises
        # ContextWindowExceededError and the agent can handle it gracefully.
        log.error(
            "upstream_max_tokens  provider=anthropic model=%s in_tokens=%s out_tokens=%s"
            " — response truncated, converting to 400 to break loop",
            model, in_tok, out_tok,
        )
        error_body = json.dumps({
            "type": "error",
            "error": {
                "type": "invalid_request_error",
                "message": (
                    f"max_tokens reached: response was truncated after {out_tok} output tokens "
                    f"({in_tok} input tokens used). "
                    "Reduce the prompt length or raise the max_tokens limit."
                ),
            },
        }).encode()
        raise UpstreamError(400, error_body, "application/json", "anthropic")
    return text, data