116 lines
3.6 KiB
Python
116 lines
3.6 KiB
Python
|
|
"""Tests for the relationship cue scanner — ISA/ISPART patterns and of-Z modifier."""
|
||
|
|
import pytest
|
||
|
|
from festinger.cue_scanner import scan_cues
|
||
|
|
|
||
|
|
|
||
|
|
def _find(triples, subj, parent, dim=None, is_isa=None):
|
||
|
|
for t in triples:
|
||
|
|
if t.subject != subj or t.parent != parent:
|
||
|
|
continue
|
||
|
|
if dim is not None and t.dimension != dim:
|
||
|
|
continue
|
||
|
|
if is_isa is not None and t.is_isa != is_isa:
|
||
|
|
continue
|
||
|
|
return t
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# ISA patterns
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def test_is_a_pattern():
|
||
|
|
triples = scan_cues("gnommoweb is a repo")
|
||
|
|
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "type"
|
||
|
|
|
||
|
|
|
||
|
|
def test_is_an_pattern():
|
||
|
|
triples = scan_cues("gnommoweb is an api")
|
||
|
|
t = _find(triples, "gnommoweb", "api", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
|
||
|
|
|
||
|
|
def test_isa_explicit():
|
||
|
|
triples = scan_cues("gnommoweb ISA repo")
|
||
|
|
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
assert t.confidence >= 0.9
|
||
|
|
|
||
|
|
|
||
|
|
def test_of_z_dimension_modifier():
|
||
|
|
# "is a repo of Glitch University" → dim = glitch_university
|
||
|
|
triples = scan_cues("gnommoweb is a repo of Glitch University")
|
||
|
|
t = _find(triples, "gnommoweb", "repo", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "glitch_university"
|
||
|
|
|
||
|
|
|
||
|
|
def test_is_a_state_of_usa():
|
||
|
|
# Core scenario A trigger pattern
|
||
|
|
triples = scan_cues("michigan is a state of USA")
|
||
|
|
t = _find(triples, "michigan", "state", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "usa"
|
||
|
|
|
||
|
|
|
||
|
|
def test_is_a_kind_of():
|
||
|
|
triples = scan_cues("gnommoweb is a kind of service")
|
||
|
|
t = _find(triples, "gnommoweb", "service", is_isa=True)
|
||
|
|
assert t is not None
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# ISPART patterns
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def test_is_part_of():
|
||
|
|
triples = scan_cues("gnommoweb is part of Glitch University")
|
||
|
|
t = _find(triples, "gnommoweb", "glitch_university", is_isa=False)
|
||
|
|
assert t is not None
|
||
|
|
|
||
|
|
|
||
|
|
def test_runs_on():
|
||
|
|
triples = scan_cues("gnommoweb runs on Docker")
|
||
|
|
t = _find(triples, "gnommoweb", "docker", is_isa=False)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "runs-on"
|
||
|
|
|
||
|
|
|
||
|
|
def test_belongs_to():
|
||
|
|
triples = scan_cues("gnommoweb belongs to Agent0")
|
||
|
|
t = _find(triples, "gnommoweb", "agent0", is_isa=False)
|
||
|
|
assert t is not None
|
||
|
|
|
||
|
|
|
||
|
|
def test_is_owned_by():
|
||
|
|
triples = scan_cues("gnommoweb is owned by jenstandstad")
|
||
|
|
t = _find(triples, "gnommoweb", "jenstandstad", is_isa=False)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "owned-by"
|
||
|
|
|
||
|
|
|
||
|
|
def test_deployed_on():
|
||
|
|
triples = scan_cues("dobby deployed on docker_host_2")
|
||
|
|
t = _find(triples, "dobby", "docker_host_2", is_isa=False)
|
||
|
|
assert t is not None
|
||
|
|
assert t.dimension == "runs-on"
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# No false positives
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def test_no_match_plain_sentence():
|
||
|
|
triples = scan_cues("please update the configuration file")
|
||
|
|
assert triples == []
|
||
|
|
|
||
|
|
|
||
|
|
def test_deduplication():
|
||
|
|
# Same triple from multiple overlapping patterns should appear once
|
||
|
|
triples = scan_cues("gnommoweb is a repo")
|
||
|
|
matches = [t for t in triples if t.subject == "gnommoweb" and t.parent == "repo"]
|
||
|
|
# May match both "is a" and "is an" patterns but should deduplicate
|
||
|
|
assert len(matches) <= 2 # at most one per distinct pattern type
|