Adding changes to docker compose

2026-04-25 13:15:15 +02:00
parent 14f74c372b
commit f1fe41dac7
10 changed files with 308 additions and 6 deletions
@@ -1327,6 +1327,231 @@ async def ollama_generate_with_agent_id(agent_id: str, request: Request) -> Resp
    return await _handle_ollama_generate(request, agent_name=agent_id.lower())


+# ---------------------------------------------------------------------------
+# /scan — gutask integration: scan task / letter text and return recollection
+# ---------------------------------------------------------------------------
+
+# Saliency encounter weight per context type.
+# Facts stated in tasks are stronger signals than chat overheard in passing.
+_CONTEXT_WEIGHT: dict[str, float] = {
+    "task":   2.0,
+    "letter": 1.5,
+    "chat":   1.0,
+}
+
+
+@app.post("/scan")
+async def scan_text(request: Request) -> dict:
+    """
+    Scan a block of plain text for domain concepts and return a recollection block.
+
+    Called by gutask when an agent reads a task or a letter, so that the agent
+    sees relevant knowledge-graph context alongside the task/letter content.
+
+    Body:
+        text    (str)  — the task description, letter body, or any free text
+        agent   (str)  — agent name (for logging; optional)
+        context (str)  — "task" | "letter" | "chat"  (default: "task")
+
+    Returns:
+        recollection_block  (str | null)  — the <recollection>…</recollection> block,
+                                            or null if nothing salient was found
+        salient_tokens      (list[str])   — concepts that triggered the block
+        cues_found          (int)         — number of URD cues extracted from the text
+    """
+    pool = request.app.state.pool
+    data = await request.json()
+
+    text: str = data.get("text", "").strip()
+    agent_name: str = data.get("agent", "").strip().lower()
+    context_type: str = data.get("context", "task").strip().lower()
+    weight: float = _CONTEXT_WEIGHT.get(context_type, 1.0)
+
+    if not text:
+        return {"recollection_block": None, "salient_tokens": [], "cues_found": 0}
+
+    read_threshold = float(await get_config(pool, "saliency_read_threshold", "0.5"))
+    conf_floor = float(await get_config(pool, "recollection_confidence_floor", "0.6"))
+    recency_days = int(await get_config(pool, "recollection_recency_days", "90"))
+
+    # 1. Cue scanner — extract explicit relationship assertions and enqueue them.
+    cues = list(scan_cues(text))
+    cues_found = len(cues)
+    for cue in cues:
+        await enqueue_cue(cue)
+
+    # 2. Token loop — find salient concepts and record weighted encounters.
+    tokens = tokenize(text)
+    salient_ids: list[int] = []
+
+    for token in tokens:
+        row = cache.soas_by_token.get(token)
+        if row is None or row.saliency == 0.0:
+            continue
+        # Weight the encounter: task/letter mentions count more than chat.
+        # We stage fractional deltas; flush_encounter_deltas rounds to int,
+        # so accumulate weight as repeated single increments for simplicity.
+        increments = max(1, round(weight))
+        for _ in range(increments):
+            cache.record_encounter(row.id)
+        if row.saliency >= read_threshold:
+            salient_ids.append(row.id)
+
+    if not salient_ids:
+        log.debug("scan | agent=%s context=%s cues=%d → no salient concepts",
+                  agent_name or "(none)", context_type, cues_found)
+        return {"recollection_block": None, "salient_tokens": [], "cues_found": cues_found}
+
+    # 3. Build recollection block (session boost not applicable here — no system message).
+    block = build_recollection_block(salient_ids, conf_floor, recency_days)
+
+    salient_tokens = [cache.soas_by_id.get(cid, str(cid)) for cid in salient_ids]
+    log.info(
+        "scan | agent=%s context=%s cues=%d salient=%s\n%s",
+        agent_name or "(none)", context_type, cues_found, salient_tokens,
+        block or "(no block)",
+    )
+
+    return {
+        "recollection_block": block,
+        "salient_tokens": salient_tokens,
+        "cues_found": cues_found,
+    }
+
+
+# ---------------------------------------------------------------------------
+# /recall/{concept} — gutask recall backend
+# ---------------------------------------------------------------------------
+
+@app.get("/recall/{concept:path}")
+async def recall_concept(
+    concept: str,
+    request: Request,
+    depth: str = "brief",
+) -> dict:
+    """
+    Return what Festinger knows about a concept, formatted for agent display.
+
+    depth:
+      brief      — URD edges only (same as recollection block, more readable)
+      detailed   — edges + saliency stats + related concepts in same dimensions
+      everything — detailed + full write log history
+    """
+    pool = request.app.state.pool
+    concept = concept.lower().strip()
+
+    row = cache.soas_by_token.get(concept)
+    if row is None:
+        # Try DB in case cache is stale
+        async with pool.acquire() as conn:
+            db_row = await conn.fetchrow(
+                "SELECT id, token, saliency, novelty, encounter_count, "
+                "first_seen_context, last_seen FROM soas WHERE token = $1",
+                concept,
+            )
+        if not db_row:
+            return {"concept": concept, "found": False, "text": f"No knowledge about '{concept}' in Festinger."}
+        from .cache import SoasRow
+        row = SoasRow(
+            id=db_row["id"], token=db_row["token"],
+            saliency=db_row["saliency"], novelty=db_row["novelty"],
+            encounter_count=db_row["encounter_count"],
+            first_seen_context=db_row["first_seen_context"] or "",
+            last_seen=db_row["last_seen"],
+        )
+
+    edges = cache.urd_by_concept.get(row.id, [])
+    reverse_edges = cache.urd_by_parent.get(row.id, [])
+
+    lines = [f"── {concept} {'─' * max(0, 50 - len(concept))}"]
+
+    if depth in ("detailed", "everything"):
+        from .recollection import recency_decay, centrality_bonus, effective_score
+        decay = recency_decay(row)
+        score = effective_score(row.id)
+        last_seen_str = (
+            row.last_seen.strftime("%Y-%m-%d") if row.last_seen else "never"
+        )
+        lines.append(f"  saliency: {row.saliency:.2f}  encounters: {row.encounter_count}"
+                     f"  score: {score:.2f}  last seen: {last_seen_str}")
+        if row.first_seen_context:
+            lines.append(f"  first seen: \"{row.first_seen_context[:80]}\"")
+        if row.id in cache.pending_conflicts:
+            lines.append("  ⚠ has pending conflict in resolution queue")
+        lines.append("")
+
+    if edges:
+        lines.append("  Relationships (outgoing):")
+        for e in edges:
+            conf_str = f"  conf={e.confidence:.2f}" if e.confidence < 1.0 else ""
+            lines.append(f"    [{e.dim_token}] → {e.parent_token}{conf_str}")
+    else:
+        lines.append("  No outgoing relationships stored.")
+
+    if reverse_edges:
+        lines.append("")
+        lines.append("  Referenced by:")
+        for e in reverse_edges[:10]:
+            child_token = cache.soas_by_id.get(e.concept_id, str(e.concept_id))
+            lines.append(f"    [{e.dim_token}] ← {child_token}")
+        if len(reverse_edges) > 10:
+            lines.append(f"    … and {len(reverse_edges) - 10} more")
+
+    if depth in ("detailed", "everything") and edges:
+        # Siblings: other concepts sharing the same parent in the same dimension
+        siblings: dict[str, list[str]] = {}
+        for e in edges:
+            peer_edges = cache.urd_by_parent.get(e.parent_id, [])
+            peers = [
+                cache.soas_by_id.get(pe.concept_id, str(pe.concept_id))
+                for pe in peer_edges
+                if pe.concept_id != row.id and pe.dim_id == e.dim_id
+            ][:5]
+            if peers:
+                siblings[f"{e.dim_token}/{e.parent_token}"] = peers
+        if siblings:
+            lines.append("")
+            lines.append("  Siblings (same parent/dimension):")
+            for label, peers in siblings.items():
+                lines.append(f"    {label}: {', '.join(peers)}")
+
+    if depth == "everything":
+        async with pool.acquire() as conn:
+            log_rows = await conn.fetch(
+                """
+                SELECT op, parent_token, dim_token, is_isa, source, created_at
+                FROM kg_write_log WHERE concept_id = $1
+                ORDER BY created_at DESC LIMIT 20
+                """,
+                row.id,
+            )
+        if log_rows:
+            lines.append("")
+            lines.append("  Write history:")
+            for lr in log_rows:
+                ts = lr["created_at"].strftime("%Y-%m-%d")
+                isa = "is-a" if lr["is_isa"] else "is-part-of"
+                lines.append(
+                    f"    {ts}  {lr['op']:<10} [{lr['dim_token']}] {isa} {lr['parent_token']}"
+                    f"  ({lr['source']})"
+                )
+
+    text = "\n".join(lines)
+    return {
+        "concept": concept,
+        "found": True,
+        "depth": depth,
+        "saliency": row.saliency,
+        "encounter_count": row.encounter_count,
+        "edges": [
+            {"dim": e.dim_token, "parent": e.parent_token,
+             "is_isa": e.is_isa, "confidence": e.confidence}
+            for e in edges
+        ],
+        "text": text,
+    }
+
+
 # ---------------------------------------------------------------------------
 # /iknowthat — manual write path
 # ---------------------------------------------------------------------------
@@ -178,7 +178,25 @@ def build_recollection_block(
    if not lines:
        return None

-    return f"<recollection>\n" + "\n".join(lines) + "\n</recollection>"
+    # Footer: list the concepts that have actual URD data so the agent knows
+    # it can dig deeper via gutask recall.
+    hit_tokens = [
+        cache.soas_by_id.get(cid, str(cid))
+        for _, cid in scored
+        if query_edges(cid, confidence_floor) or cache.urd_by_parent.get(cid)
+    ][:6]
+    footer_lines = []
+    if hit_tokens:
+        footer_lines.append(
+            "To recall more: gutask recall <concept> brief|detailed|everything"
+        )
+        footer_lines.append("  concepts: " + ", ".join(hit_tokens))
+
+    body = "\n".join(lines)
+    if footer_lines:
+        body += "\n" + "\n".join(footer_lines)
+
+    return "<recollection>\n" + body + "\n</recollection>"


 # ---------------------------------------------------------------------------