Fix model config context/token limits and inject gutask orientation into agent identity

- Chat model max_tokens: 32000 → 4096 (was inflating context budget calculations) - Utility model ctx_length: 8192 → 14000 (matches GLM flash actual loaded context) - Utility model max_tokens: explicit 2048 cap (utility tasks need short responses) - Utility model name: full zai-org/glm-4.7-flash to target correct LM Studio instance - pull-agent-identity.py: include agent_id and gutask orientation block in system prompt - gutask.md: expand lore command docs, note done auto-creates review record Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-10 23:32:58 +02:00
parent 9acb4d86dc
commit e3e4f9d0ec
4 changed files with 30 additions and 10 deletions
@@ -11,7 +11,7 @@
    "rl_input": 0,
    "rl_output": 0,
    "kwargs": {
-      "max_tokens": 32000,
+      "max_tokens": 4096,
      "agent_id": 3
    },
    "max_embeds": 10
@@ -20,12 +20,14 @@
    "provider": "lm_studio",
    "name": "zai-org/glm-4.7-flash",
    "api_base": "http://host.docker.internal:1234/v1",
-    "ctx_length": 8192,
+    "ctx_length": 14000,
    "ctx_input": 0.7,
    "rl_requests": 0,
    "rl_input": 0,
    "rl_output": 0,
-    "kwargs": {}
+    "kwargs": {
+      "max_tokens": 2048
+    }
  },
  "embedding_model": {
    "provider": "huggingface",
@@ -11,7 +11,7 @@
    "rl_input": 0,
    "rl_output": 0,
    "kwargs": {
-      "max_tokens": 32000,
+      "max_tokens": 4096,
      "agent_id": 8
    },
    "max_embeds": 10
@@ -20,12 +20,14 @@
    "provider": "lm_studio",
    "name": "zai-org/glm-4.7-flash",
    "api_base": "http://host.docker.internal:1234/v1",
-    "ctx_length": 8192,
+    "ctx_length": 14000,
    "ctx_input": 0.7,
    "rl_requests": 0,
    "rl_input": 0,
    "rl_output": 0,
-    "kwargs": {}
+    "kwargs": {
+      "max_tokens": 2048
+    }
  },
  "embedding_model": {
    "provider": "huggingface",