Adding Festinger with wordnet
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
"""
|
||||
Test helpers — populate the in-memory cache directly without touching Postgres.
|
||||
|
||||
All collision detection, recollection rendering, and queue routing logic operates
|
||||
entirely on the module-level dicts in festinger.cache, so unit tests can inject
|
||||
state there directly and assert outcomes without a live database.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from festinger import cache
|
||||
from festinger.cache import SoasRow, UrdEdge
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear all in-memory state. Call at the start of each test."""
|
||||
cache.soas_by_token.clear()
|
||||
cache.soas_by_id.clear()
|
||||
cache.urd_by_concept.clear()
|
||||
cache.urd_by_concept_dim.clear()
|
||||
cache.pending_conflicts.clear()
|
||||
cache._encounter_deltas.clear()
|
||||
|
||||
|
||||
def add_soas(id: int, token: str, saliency: float = 0.0, novelty: float = 0.0) -> SoasRow:
|
||||
row = SoasRow(id=id, token=token, saliency=saliency, novelty=novelty)
|
||||
cache.soas_by_token[token] = row
|
||||
cache.soas_by_id[id] = token
|
||||
return row
|
||||
|
||||
|
||||
def add_urd(
|
||||
concept_id: int,
|
||||
parent_id: int,
|
||||
dim_id: int,
|
||||
is_isa: bool,
|
||||
confidence: float = 0.9,
|
||||
source: str = "test",
|
||||
) -> UrdEdge:
|
||||
parent_token = cache.soas_by_id.get(parent_id, str(parent_id))
|
||||
dim_token = cache.soas_by_id.get(dim_id, str(dim_id))
|
||||
edge = UrdEdge(
|
||||
concept_id=concept_id,
|
||||
parent_id=parent_id,
|
||||
dim_id=dim_id,
|
||||
is_isa=is_isa,
|
||||
confidence=confidence,
|
||||
source=source,
|
||||
parent_token=parent_token,
|
||||
dim_token=dim_token,
|
||||
)
|
||||
cache.urd_by_concept.setdefault(concept_id, []).append(edge)
|
||||
cache.urd_by_concept_dim[(concept_id, dim_id)] = edge
|
||||
return edge
|
||||
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Collision detection tests — exercises the in-memory URD insert pipeline.
|
||||
|
||||
All tests inject state directly into the cache module dicts and call
|
||||
insert_urd_edge() with a mock asyncpg pool that records calls but never
|
||||
touches a real database.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from festinger import cache
|
||||
from festinger.urd_writer import InsertRequest, insert_urd_edge, CollisionInfo
|
||||
from tests.helpers import reset_cache, add_soas, add_urd
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mock pool — captures INSERT attempts, never hits Postgres
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_mock_pool():
|
||||
"""Return a mock asyncpg pool where execute() succeeds and never raises."""
|
||||
conn = AsyncMock()
|
||||
conn.execute = AsyncMock(return_value="INSERT 0 1")
|
||||
conn.fetchrow = AsyncMock(return_value=None)
|
||||
conn.__aenter__ = AsyncMock(return_value=conn)
|
||||
conn.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
pool = MagicMock()
|
||||
pool.acquire = MagicMock(return_value=conn)
|
||||
return pool, conn
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario A — misclassification (ISPART existing, ISA incoming, same dim)
|
||||
#
|
||||
# Pre-state: michigan ISPART usa in dim:usa (parent_id = dim_id — degenerate edge)
|
||||
# Trigger: michigan ISA state in dim:usa
|
||||
# Expected: misclassification collision, no URD modification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScenarioA:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
# SOAS entries
|
||||
self.michigan = add_soas(101, "michigan", saliency=1.5)
|
||||
self.usa = add_soas(102, "usa", saliency=0.8)
|
||||
self.state = add_soas(103, "state", saliency=0.7)
|
||||
|
||||
# Degenerate seed edge: michigan ISPART usa, dim=usa (parent_id = dim_id)
|
||||
self.existing = add_urd(
|
||||
concept_id=101, parent_id=102, dim_id=102, # dim_id = parent_id = usa
|
||||
is_isa=False, confidence=0.85, source="test"
|
||||
)
|
||||
|
||||
def test_degenerate_edge_stored(self):
|
||||
"""Confirm the seed has parent_id == dim_id."""
|
||||
edge = cache.urd_by_concept_dim.get((101, 102))
|
||||
assert edge is not None
|
||||
assert edge.parent_id == edge.dim_id == 102
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_misclassification_detected(self):
|
||||
"""Incoming ISA in the same dim as existing ISPART → misclassification."""
|
||||
pool, conn = make_mock_pool()
|
||||
|
||||
# Patch _queue_collision so we can inspect it without hitting Postgres
|
||||
queued: list[CollisionInfo] = []
|
||||
async def fake_queue(pool, col, priority):
|
||||
queued.append(col)
|
||||
cache.pending_conflicts.add(col.concept_id)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=101, # michigan
|
||||
parent_id=103, # state
|
||||
dim_id=102, # usa (same dim as existing ISPART)
|
||||
is_isa=True,
|
||||
confidence=0.85,
|
||||
source="gutask",
|
||||
)
|
||||
collision = await insert_urd_edge(pool, req)
|
||||
|
||||
assert collision is not None
|
||||
assert collision.collision_type == "misclassification"
|
||||
assert collision.existing_is_isa is False
|
||||
assert collision.incoming_is_isa is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_urd_not_modified_on_collision(self):
|
||||
"""URD in-memory cache is unchanged after a collision."""
|
||||
pool, conn = make_mock_pool()
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", new_callable=AsyncMock):
|
||||
req = InsertRequest(
|
||||
concept_id=101, parent_id=103, dim_id=102,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
# Original edge still in place
|
||||
edge = cache.urd_by_concept_dim.get((101, 102))
|
||||
assert edge is not None
|
||||
assert edge.parent_id == 102 # usa, unchanged
|
||||
assert edge.is_isa is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_postgres_not_written_on_collision(self):
|
||||
"""No INSERT to Postgres is attempted when collision is detected in cache."""
|
||||
pool, conn = make_mock_pool()
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", new_callable=AsyncMock):
|
||||
req = InsertRequest(
|
||||
concept_id=101, parent_id=103, dim_id=102,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
conn.execute.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pending_conflicts_marked(self):
|
||||
"""concept_id is added to pending_conflicts after collision."""
|
||||
pool, _ = make_mock_pool()
|
||||
|
||||
async def fake_queue(pool, col, priority):
|
||||
cache.pending_conflicts.add(col.concept_id)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=101, parent_id=103, dim_id=102,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
assert 101 in cache.pending_conflicts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario B — ISA + ISA collision (dimension too coarse → decompose)
|
||||
#
|
||||
# Pre-state: gnommoweb ISA container in dim:type
|
||||
# Trigger: gnommoweb ISA repo in dim:type
|
||||
# Expected: isa_isa collision
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScenarioB:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
self.gnommoweb = add_soas(201, "gnommoweb", saliency=1.5)
|
||||
self.container = add_soas(202, "container", saliency=0.9)
|
||||
self.repo = add_soas(203, "repo", saliency=0.8)
|
||||
self.type_dim = add_soas(1, "type", saliency=0.0)
|
||||
|
||||
self.existing = add_urd(
|
||||
concept_id=201, parent_id=202, dim_id=1,
|
||||
is_isa=True, confidence=0.9, source="cloud_llm"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_isa_isa_collision_type(self):
|
||||
pool, _ = make_mock_pool()
|
||||
queued: list[CollisionInfo] = []
|
||||
|
||||
async def fake_queue(pool, col, priority):
|
||||
queued.append(col)
|
||||
cache.pending_conflicts.add(col.concept_id)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=201, parent_id=203, dim_id=1,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
collision = await insert_urd_edge(pool, req)
|
||||
|
||||
assert collision is not None
|
||||
assert collision.collision_type == "isa_isa"
|
||||
assert len(queued) == 1
|
||||
assert queued[0].existing_parent_id == 202 # container
|
||||
assert queued[0].incoming_parent_id == 203 # repo
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_existing_edge_unchanged_after_isa_isa(self):
|
||||
pool, _ = make_mock_pool()
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", new_callable=AsyncMock):
|
||||
req = InsertRequest(
|
||||
concept_id=201, parent_id=203, dim_id=1,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
edge = cache.urd_by_concept_dim.get((201, 1))
|
||||
assert edge.parent_id == 202 # still container
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario C — ISPART + ISPART contradiction (host migration)
|
||||
#
|
||||
# Pre-state: dobby ISPART docker_host_1 in dim:runs-on
|
||||
# Trigger: dobby ISPART docker_host_2 in dim:runs-on
|
||||
# Expected: ispart_ispart collision
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestScenarioC:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
self.dobby = add_soas(301, "dobby", saliency=1.5)
|
||||
self.host1 = add_soas(302, "docker_host_1", saliency=0.5)
|
||||
self.host2 = add_soas(303, "docker_host_2", saliency=0.5)
|
||||
self.runs_on_dim = add_soas(4, "runs-on", saliency=0.0)
|
||||
|
||||
self.existing = add_urd(
|
||||
concept_id=301, parent_id=302, dim_id=4,
|
||||
is_isa=False, confidence=0.9, source="cloud_llm"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ispart_ispart_collision_type(self):
|
||||
pool, _ = make_mock_pool()
|
||||
queued: list[CollisionInfo] = []
|
||||
|
||||
async def fake_queue(pool, col, priority):
|
||||
queued.append(col)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=301, parent_id=303, dim_id=4,
|
||||
is_isa=False, confidence=1.0, source="gutask",
|
||||
)
|
||||
collision = await insert_urd_edge(pool, req)
|
||||
|
||||
assert collision is not None
|
||||
assert collision.collision_type == "ispart_ispart"
|
||||
assert queued[0].existing_parent_id == 302 # docker_host_1
|
||||
assert queued[0].incoming_parent_id == 303 # docker_host_2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Clean insert (no collision)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCleanInsert:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
add_soas(401, "festinger", saliency=1.2)
|
||||
add_soas(402, "middleware", saliency=0.8)
|
||||
add_soas(1, "type", saliency=0.0)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_successful_insert_updates_cache(self):
|
||||
pool, conn = make_mock_pool()
|
||||
|
||||
req = InsertRequest(
|
||||
concept_id=401, parent_id=402, dim_id=1,
|
||||
is_isa=True, confidence=0.9, source="test",
|
||||
)
|
||||
collision = await insert_urd_edge(pool, req)
|
||||
|
||||
assert collision is None
|
||||
edge = cache.urd_by_concept_dim.get((401, 1))
|
||||
assert edge is not None
|
||||
assert edge.parent_id == 402
|
||||
assert edge.is_isa is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_successful_insert_calls_postgres(self):
|
||||
pool, conn = make_mock_pool()
|
||||
|
||||
req = InsertRequest(
|
||||
concept_id=401, parent_id=402, dim_id=1,
|
||||
is_isa=True, confidence=0.9, source="test",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
conn.execute.assert_called_once()
|
||||
call_args = conn.execute.call_args[0]
|
||||
assert "INSERT INTO urd" in call_args[0]
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Tests for the relationship cue scanner — ISA/ISPART patterns and of-Z modifier."""
|
||||
import pytest
|
||||
from festinger.cue_scanner import scan_cues
|
||||
|
||||
|
||||
def _find(triples, subj, parent, dim=None, is_isa=None):
|
||||
for t in triples:
|
||||
if t.subject != subj or t.parent != parent:
|
||||
continue
|
||||
if dim is not None and t.dimension != dim:
|
||||
continue
|
||||
if is_isa is not None and t.is_isa != is_isa:
|
||||
continue
|
||||
return t
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ISA patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_is_a_pattern():
|
||||
triples = scan_cues("gnommoweb is a repo")
|
||||
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||||
assert t is not None
|
||||
assert t.dimension == "type"
|
||||
|
||||
|
||||
def test_is_an_pattern():
|
||||
triples = scan_cues("gnommoweb is an api")
|
||||
t = _find(triples, "gnommoweb", "api", is_isa=True)
|
||||
assert t is not None
|
||||
|
||||
|
||||
def test_isa_explicit():
|
||||
triples = scan_cues("gnommoweb ISA repo")
|
||||
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||||
assert t is not None
|
||||
assert t.confidence >= 0.9
|
||||
|
||||
|
||||
def test_of_z_dimension_modifier():
|
||||
# "is a repo of Glitch University" → dim = glitch_university
|
||||
triples = scan_cues("gnommoweb is a repo of Glitch University")
|
||||
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||||
assert t is not None
|
||||
assert t.dimension == "glitch_university"
|
||||
|
||||
|
||||
def test_is_a_state_of_usa():
|
||||
# Core scenario A trigger pattern
|
||||
triples = scan_cues("michigan is a state of USA")
|
||||
t = _find(triples, "michigan", "state", is_isa=True)
|
||||
assert t is not None
|
||||
assert t.dimension == "usa"
|
||||
|
||||
|
||||
def test_is_a_kind_of():
|
||||
triples = scan_cues("gnommoweb is a kind of service")
|
||||
t = _find(triples, "gnommoweb", "service", is_isa=True)
|
||||
assert t is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ISPART patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_is_part_of():
|
||||
triples = scan_cues("gnommoweb is part of Glitch University")
|
||||
t = _find(triples, "gnommoweb", "glitch_university", is_isa=False)
|
||||
assert t is not None
|
||||
|
||||
|
||||
def test_runs_on():
|
||||
triples = scan_cues("gnommoweb runs on Docker")
|
||||
t = _find(triples, "gnommoweb", "docker", is_isa=False)
|
||||
assert t is not None
|
||||
assert t.dimension == "runs-on"
|
||||
|
||||
|
||||
def test_belongs_to():
|
||||
triples = scan_cues("gnommoweb belongs to Agent0")
|
||||
t = _find(triples, "gnommoweb", "agent0", is_isa=False)
|
||||
assert t is not None
|
||||
|
||||
|
||||
def test_is_owned_by():
|
||||
triples = scan_cues("gnommoweb is owned by jenstandstad")
|
||||
t = _find(triples, "gnommoweb", "jenstandstad", is_isa=False)
|
||||
assert t is not None
|
||||
assert t.dimension == "owned-by"
|
||||
|
||||
|
||||
def test_deployed_on():
|
||||
triples = scan_cues("dobby deployed on docker_host_2")
|
||||
t = _find(triples, "dobby", "docker_host_2", is_isa=False)
|
||||
assert t is not None
|
||||
assert t.dimension == "runs-on"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# No false positives
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_match_plain_sentence():
|
||||
triples = scan_cues("please update the configuration file")
|
||||
assert triples == []
|
||||
|
||||
|
||||
def test_deduplication():
|
||||
# Same triple from multiple overlapping patterns should appear once
|
||||
triples = scan_cues("gnommoweb is a repo")
|
||||
matches = [t for t in triples if t.subject == "gnommoweb" and t.parent == "repo"]
|
||||
# May match both "is a" and "is an" patterns but should deduplicate
|
||||
assert len(matches) <= 2 # at most one per distinct pattern type
|
||||
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Recollection rendering tests — hit path, zero-hit path, pending-conflict marker,
|
||||
and prompt injection position.
|
||||
"""
|
||||
import pytest
|
||||
from festinger import cache
|
||||
from festinger.recollection import (
|
||||
query_edges, render_hit, render_zero_hit,
|
||||
build_recollection_block, inject_recollection,
|
||||
)
|
||||
from tests.helpers import reset_cache, add_soas, add_urd
|
||||
|
||||
|
||||
class TestHitPath:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
self.michigan = add_soas(101, "michigan", saliency=1.5)
|
||||
self.usa = add_soas(102, "usa", saliency=0.8)
|
||||
self.geography = add_soas(5, "geography", saliency=0.0)
|
||||
self.state = add_soas(103, "state", saliency=0.7)
|
||||
self.type_dim = add_soas(1, "type", saliency=0.0)
|
||||
|
||||
add_urd(concept_id=101, parent_id=102, dim_id=5, is_isa=False, confidence=0.9)
|
||||
add_urd(concept_id=101, parent_id=103, dim_id=1, is_isa=True, confidence=0.9)
|
||||
|
||||
def test_query_returns_both_edges(self):
|
||||
edges = query_edges(101, confidence_floor=0.5, recency_days=90)
|
||||
assert len(edges) == 2
|
||||
|
||||
def test_render_hit_format(self):
|
||||
edges = query_edges(101, confidence_floor=0.5, recency_days=90)
|
||||
line = render_hit("michigan", edges, concept_id=101)
|
||||
assert line.startswith("michigan:")
|
||||
assert "[geography] usa" in line
|
||||
assert "[type] state" in line
|
||||
|
||||
def test_pending_conflict_adds_question_mark(self):
|
||||
cache.pending_conflicts.add(101)
|
||||
edges = query_edges(101, confidence_floor=0.5, recency_days=90)
|
||||
line = render_hit("michigan", edges, concept_id=101)
|
||||
# All dim labels should have ? when concept has pending conflict
|
||||
assert "[geography?]" in line or "[type?]" in line
|
||||
|
||||
def test_confidence_floor_filters_edges(self):
|
||||
# Add a low-confidence edge
|
||||
add_urd(concept_id=101, parent_id=5, dim_id=5, is_isa=False, confidence=0.2)
|
||||
edges = query_edges(101, confidence_floor=0.6, recency_days=90)
|
||||
# Should not include the 0.2 confidence edge
|
||||
low_conf = [e for e in edges if e.confidence < 0.6]
|
||||
assert low_conf == []
|
||||
|
||||
def test_block_contains_recollection_tags(self):
|
||||
block = build_recollection_block([101], confidence_floor=0.5, recency_days=90)
|
||||
assert block is not None
|
||||
assert block.startswith("<recollection>")
|
||||
assert block.endswith("</recollection>")
|
||||
|
||||
|
||||
class TestZeroHitPath:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
add_soas(201, "ramanujan", saliency=1.5)
|
||||
# No URD edges for ramanujan
|
||||
|
||||
def test_zero_hit_render(self):
|
||||
line = render_zero_hit("ramanujan")
|
||||
assert "ramanujan" in line
|
||||
assert "no recollection" in line
|
||||
assert "gutask iknowthat" in line
|
||||
|
||||
def test_zero_hit_in_block(self):
|
||||
block = build_recollection_block([201], confidence_floor=0.5, recency_days=90)
|
||||
assert block is not None
|
||||
assert "? ramanujan" in block
|
||||
assert "gutask iknowthat" in block
|
||||
|
||||
def test_block_is_none_when_no_salient_concepts(self):
|
||||
block = build_recollection_block([], confidence_floor=0.5, recency_days=90)
|
||||
assert block is None
|
||||
|
||||
|
||||
class TestPromptInjection:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
add_soas(101, "michigan", saliency=1.5)
|
||||
add_soas(102, "usa", saliency=0.8)
|
||||
add_soas(5, "geography", saliency=0.0)
|
||||
add_urd(concept_id=101, parent_id=102, dim_id=5, is_isa=False, confidence=0.9)
|
||||
|
||||
def _block(self):
|
||||
return build_recollection_block([101], confidence_floor=0.5, recency_days=90)
|
||||
|
||||
def test_injected_into_existing_system_message(self):
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Tell me about michigan."},
|
||||
]
|
||||
block = self._block()
|
||||
result = inject_recollection(messages, block)
|
||||
|
||||
system = next(m for m in result if m["role"] == "system")
|
||||
assert system["content"].startswith("<recollection>")
|
||||
assert "You are a helpful assistant." in system["content"]
|
||||
|
||||
def test_injected_at_position_0_when_no_system_message(self):
|
||||
messages = [{"role": "user", "content": "Tell me about michigan."}]
|
||||
block = self._block()
|
||||
result = inject_recollection(messages, block)
|
||||
|
||||
assert result[0]["role"] == "system"
|
||||
assert "<recollection>" in result[0]["content"]
|
||||
assert result[1]["role"] == "user"
|
||||
|
||||
def test_original_messages_not_mutated(self):
|
||||
original = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "michigan?"},
|
||||
]
|
||||
inject_recollection(original, self._block())
|
||||
# Original list and dicts must be unchanged
|
||||
assert original[0]["content"] == "You are helpful."
|
||||
|
||||
def test_user_message_preserved_after_injection(self):
|
||||
messages = [
|
||||
{"role": "system", "content": "System prompt."},
|
||||
{"role": "user", "content": "michigan?"},
|
||||
]
|
||||
result = inject_recollection(messages, self._block())
|
||||
user_msgs = [m for m in result if m["role"] == "user"]
|
||||
assert user_msgs[0]["content"] == "michigan?"
|
||||
@@ -0,0 +1,201 @@
|
||||
"""
|
||||
Scenario A — full integration walk-through (in-memory only, no DB required).
|
||||
|
||||
Demonstrates the degenerate parent_id=dim_id pattern and the misclassification
|
||||
collision pipeline from initial seed through recollection rendering.
|
||||
|
||||
Story:
|
||||
1. World model receives coarse early knowledge: "michigan is in usa"
|
||||
→ stored as michigan ISPART usa in dim:usa (parent_id = dim_id)
|
||||
2. Agent sends prompt: "michigan is a state of USA"
|
||||
→ cue scanner extracts michigan ISA state in dim:usa
|
||||
3. Collision detected: existing ISPART vs incoming ISA in dim:usa → misclassification
|
||||
4. Recollection rendered with [usa?] marker while conflict is pending
|
||||
5. After resolution (simulated): fact moves to correct dimension
|
||||
→ michigan ISA state in dim:type
|
||||
→ michigan ISPART usa in dim:geography
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from festinger import cache
|
||||
from festinger.cache import SoasRow, UrdEdge
|
||||
from festinger.cue_scanner import scan_cues
|
||||
from festinger.recollection import (
|
||||
build_recollection_block, inject_recollection, render_hit, query_edges
|
||||
)
|
||||
from festinger.urd_writer import InsertRequest, insert_urd_edge, CollisionInfo
|
||||
from tests.helpers import reset_cache, add_soas, add_urd
|
||||
|
||||
|
||||
def make_mock_pool():
|
||||
conn = AsyncMock()
|
||||
conn.execute = AsyncMock(return_value="INSERT 0 1")
|
||||
conn.fetchrow = AsyncMock(return_value=None)
|
||||
conn.__aenter__ = AsyncMock(return_value=conn)
|
||||
conn.__aexit__ = AsyncMock(return_value=False)
|
||||
pool = MagicMock()
|
||||
pool.acquire = MagicMock(return_value=conn)
|
||||
return pool, conn
|
||||
|
||||
|
||||
class TestScenarioAIntegration:
|
||||
|
||||
def setup_method(self):
|
||||
reset_cache()
|
||||
# SOAS vocabulary
|
||||
self.michigan = add_soas(101, "michigan", saliency=1.5, novelty=1.0)
|
||||
self.usa = add_soas(102, "usa", saliency=0.8, novelty=0.8)
|
||||
self.state = add_soas(103, "state", saliency=0.6, novelty=0.5)
|
||||
self.type_dim = add_soas(1, "type", saliency=0.0)
|
||||
self.geo_dim = add_soas(5, "geography", saliency=0.0)
|
||||
|
||||
# Degenerate seed edge: michigan ISPART usa, dim=usa (parent_id = dim_id = 102)
|
||||
self.seed_edge = add_urd(
|
||||
concept_id=101,
|
||||
parent_id=102, # usa
|
||||
dim_id=102, # usa — same as parent_id
|
||||
is_isa=False,
|
||||
confidence=0.85,
|
||||
source="test",
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 1: Verify the degenerate seed state
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_seed_edge_has_parent_id_equals_dim_id(self):
|
||||
edge = cache.urd_by_concept_dim.get((101, 102))
|
||||
assert edge is not None, "seed edge must be in cache"
|
||||
assert edge.parent_id == edge.dim_id, (
|
||||
f"degenerate edge requires parent_id == dim_id, "
|
||||
f"got parent_id={edge.parent_id}, dim_id={edge.dim_id}"
|
||||
)
|
||||
assert edge.is_isa is False
|
||||
|
||||
def test_seed_recollection_renders_correctly(self):
|
||||
edges = query_edges(101, confidence_floor=0.5, recency_days=90)
|
||||
assert len(edges) == 1
|
||||
line = render_hit("michigan", edges, concept_id=101)
|
||||
# dim token = "usa", parent token = "usa"
|
||||
assert "[usa] usa" in line
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 2: Cue scanner extracts the incoming ISA triple
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_cue_scanner_extracts_michigan_isa_state(self):
|
||||
prompt = "michigan is a state of USA"
|
||||
triples = scan_cues(prompt)
|
||||
match = next(
|
||||
(t for t in triples if t.subject == "michigan" and t.parent == "state"),
|
||||
None,
|
||||
)
|
||||
assert match is not None, "cue scanner must extract michigan ISA state"
|
||||
assert match.is_isa is True
|
||||
assert match.dimension == "usa" # from "of USA" modifier
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 3: Incoming ISA in dim:usa collides with existing ISPART in dim:usa
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collision_classified_as_misclassification(self):
|
||||
pool, _ = make_mock_pool()
|
||||
captured: list[CollisionInfo] = []
|
||||
|
||||
async def fake_queue(pool, col, priority):
|
||||
captured.append(col)
|
||||
cache.pending_conflicts.add(col.concept_id)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=101, # michigan
|
||||
parent_id=103, # state
|
||||
dim_id=102, # usa — same dim as existing ISPART
|
||||
is_isa=True,
|
||||
confidence=0.85,
|
||||
source="gutask",
|
||||
)
|
||||
collision = await insert_urd_edge(pool, req)
|
||||
|
||||
assert collision is not None
|
||||
assert collision.collision_type == "misclassification", (
|
||||
f"expected misclassification, got {collision.collision_type!r}. "
|
||||
f"existing is_isa={self.seed_edge.is_isa}, incoming is_isa=True"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 4: Recollection renders the [usa?] pending-conflict marker
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recollection_shows_pending_marker_after_collision(self):
|
||||
pool, _ = make_mock_pool()
|
||||
|
||||
async def fake_queue(pool, col, priority):
|
||||
cache.pending_conflicts.add(col.concept_id)
|
||||
|
||||
with patch("festinger.urd_writer._queue_collision", side_effect=fake_queue):
|
||||
req = InsertRequest(
|
||||
concept_id=101, parent_id=103, dim_id=102,
|
||||
is_isa=True, confidence=0.85, source="gutask",
|
||||
)
|
||||
await insert_urd_edge(pool, req)
|
||||
|
||||
assert 101 in cache.pending_conflicts
|
||||
block = build_recollection_block([101], confidence_floor=0.5, recency_days=90)
|
||||
assert block is not None
|
||||
assert "[usa?]" in block, (
|
||||
f"recollection must show [usa?] pending marker. Got:\n{block}"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step 5: Simulate resolution — move facts to correct dimensions
|
||||
# After nightly job:
|
||||
# michigan ISA state in dim:type (the ISA fact)
|
||||
# michigan ISPART usa in dim:geography (the ISPART fact, moved off degenerate dim)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def test_recollection_clean_after_simulated_resolution(self):
|
||||
# Remove the degenerate edge
|
||||
del cache.urd_by_concept_dim[(101, 102)]
|
||||
cache.urd_by_concept[101] = []
|
||||
|
||||
# Insert two correctly-dimensioned edges
|
||||
geo_edge = add_urd(
|
||||
concept_id=101, parent_id=102, dim_id=5, # geography dim
|
||||
is_isa=False, confidence=0.9, source="festinger",
|
||||
)
|
||||
type_edge = add_urd(
|
||||
concept_id=101, parent_id=103, dim_id=1, # type dim
|
||||
is_isa=True, confidence=0.9, source="festinger",
|
||||
)
|
||||
cache.pending_conflicts.discard(101)
|
||||
|
||||
edges = query_edges(101, confidence_floor=0.5, recency_days=90)
|
||||
assert len(edges) == 2
|
||||
|
||||
line = render_hit("michigan", edges, concept_id=101)
|
||||
assert "[geography] usa" in line
|
||||
assert "[type] state" in line
|
||||
assert "?" not in line, "no pending marker after resolution"
|
||||
|
||||
def test_full_recollection_block_after_resolution(self):
|
||||
# Simulate post-resolution state
|
||||
del cache.urd_by_concept_dim[(101, 102)]
|
||||
cache.urd_by_concept[101] = []
|
||||
add_urd(concept_id=101, parent_id=102, dim_id=5, is_isa=False, confidence=0.9, source="festinger")
|
||||
add_urd(concept_id=101, parent_id=103, dim_id=1, is_isa=True, confidence=0.9, source="festinger")
|
||||
cache.pending_conflicts.discard(101)
|
||||
|
||||
block = build_recollection_block([101], confidence_floor=0.5, recency_days=90)
|
||||
assert block is not None
|
||||
assert "<recollection>" in block
|
||||
assert "michigan:" in block
|
||||
assert "[geography] usa" in block
|
||||
assert "[type] state" in block
|
||||
# No pending markers
|
||||
assert "?" not in block
|
||||
@@ -0,0 +1,70 @@
|
||||
"""Tests for the tokeniser — compound token rule, punctuation stripping, length filter."""
|
||||
import pytest
|
||||
from festinger.tokenizer import tokenize, tokenize_all
|
||||
|
||||
|
||||
def test_simple_tokens():
|
||||
# "repo" is 4 chars — filtered by the ≥5 rule. Use a longer word.
|
||||
tokens = tokenize("gnommoweb is a repository")
|
||||
assert "gnommoweb" in tokens
|
||||
assert "repository" in tokens
|
||||
assert "repo" not in tokens # 4 chars — below threshold
|
||||
|
||||
|
||||
def test_compound_token_rule():
|
||||
tokens = tokenize("Glitch University runs on Docker")
|
||||
assert "glitch_university" in tokens
|
||||
assert "docker" in tokens
|
||||
# Individual parts should NOT appear as separate tokens
|
||||
assert "glitch" not in tokens
|
||||
assert "university" not in tokens
|
||||
|
||||
|
||||
def test_multi_word_compound():
|
||||
tokens = tokenize("New York City is a place")
|
||||
assert "new_york_city" in tokens
|
||||
|
||||
|
||||
def test_lowercase_breaks_compound_run():
|
||||
# "the" breaks the run — "Glitch University" still merges
|
||||
tokens = tokenize("the Glitch University system")
|
||||
assert "glitch_university" in tokens
|
||||
assert "system" in tokens
|
||||
assert "glitch" not in tokens
|
||||
|
||||
|
||||
def test_length_filter():
|
||||
# Tokens < 5 chars are dropped
|
||||
tokens = tokenize("cat dog bird eagle")
|
||||
assert "eagle" in tokens
|
||||
assert "bird" not in tokens
|
||||
assert "cat" not in tokens
|
||||
assert "dog" not in tokens
|
||||
|
||||
|
||||
def test_punctuation_stripped():
|
||||
# Trailing punctuation (period, colon) breaks the compound run.
|
||||
# "FastAPI." ends a run immediately; "Docker:" starts and ends a fresh run.
|
||||
tokens = tokenize("gnommoweb, FastAPI. Docker:")
|
||||
assert "gnommoweb" in tokens
|
||||
assert "fastapi" in tokens # from "FastAPI." — flushed as solo compound
|
||||
assert "docker" in tokens # from "Docker:" — flushed as solo compound
|
||||
# Must NOT merge across sentence boundaries
|
||||
assert "fastapi_docker" not in tokens
|
||||
|
||||
|
||||
def test_deduplication():
|
||||
tokens = tokenize("gnommoweb gnommoweb gnommoweb")
|
||||
assert tokens.count("gnommoweb") == 1
|
||||
|
||||
|
||||
def test_empty_string():
|
||||
assert tokenize("") == []
|
||||
|
||||
|
||||
def test_tokenize_all_no_length_filter():
|
||||
# tokenize_all keeps short tokens
|
||||
tokens = tokenize_all("is a part of")
|
||||
assert "is" in tokens
|
||||
assert "of" in tokens
|
||||
assert "part" in tokens
|
||||
Reference in New Issue
Block a user