Imrpove cue scanner and remove false positives
This commit is contained in:
@@ -7,6 +7,11 @@ bypasses the saliency write threshold and goes directly to the write queue.
|
|||||||
|
|
||||||
The `of {Z}` modifier after an ISA pattern names the dimension explicitly.
|
The `of {Z}` modifier after an ISA pattern names the dimension explicitly.
|
||||||
Without it, ISA defaults to dimension 'type'; ISPART defaults to 'membership'.
|
Without it, ISA defaults to dimension 'type'; ISPART defaults to 'membership'.
|
||||||
|
|
||||||
|
Noise filters applied to every extracted subject/parent:
|
||||||
|
- Must be at least MIN_TOKEN_LEN characters
|
||||||
|
- Must not be in the stopword list (pronouns, articles, common adj/adv/aux verbs)
|
||||||
|
- Must not be a bare number
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -15,6 +20,86 @@ from dataclasses import dataclass
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Noise filters
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
MIN_TOKEN_LEN = 3 # "it", "be", "do", "a", "an" all filtered by length alone
|
||||||
|
|
||||||
|
# Words that are structural glue in English and never meaningful KG concepts.
|
||||||
|
# Covers: pronouns, articles, demonstratives, common aux/modal verbs,
|
||||||
|
# common adjectives, common adverbs, prepositions, conjunctions.
|
||||||
|
_STOPWORDS: frozenset[str] = frozenset({
|
||||||
|
# Personal pronouns
|
||||||
|
"i", "me", "my", "myself", "we", "us", "our", "ours", "ourselves",
|
||||||
|
"you", "your", "yours", "yourself", "yourselves",
|
||||||
|
"he", "him", "his", "himself",
|
||||||
|
"she", "her", "hers", "herself",
|
||||||
|
"it", "its", "itself",
|
||||||
|
"they", "them", "their", "theirs", "themselves",
|
||||||
|
# Demonstrative / relative / interrogative pronouns
|
||||||
|
"this", "that", "these", "those",
|
||||||
|
"which", "who", "whom", "whose", "what", "where", "when", "how", "why",
|
||||||
|
"whoever", "whatever", "whichever",
|
||||||
|
# Articles
|
||||||
|
"a", "an", "the",
|
||||||
|
# Auxiliary / modal verbs
|
||||||
|
"be", "been", "being", "am", "are", "was", "were",
|
||||||
|
"have", "has", "had", "having",
|
||||||
|
"do", "does", "did", "doing",
|
||||||
|
"will", "would", "shall", "should",
|
||||||
|
"may", "might", "must", "can", "could",
|
||||||
|
"ought", "need", "dare", "used",
|
||||||
|
# Common linking / existential
|
||||||
|
"there", "here",
|
||||||
|
# Prepositions / conjunctions / particles
|
||||||
|
"in", "on", "at", "to", "for", "of", "with", "by", "from", "as",
|
||||||
|
"into", "onto", "upon", "about", "above", "below", "between",
|
||||||
|
"through", "during", "before", "after", "over", "under", "within",
|
||||||
|
"without", "against", "along", "across", "behind", "beyond",
|
||||||
|
"and", "or", "but", "nor", "so", "yet", "both", "either", "neither",
|
||||||
|
"not", "no", "nor",
|
||||||
|
"if", "then", "else", "although", "though", "while", "whereas",
|
||||||
|
"because", "since", "unless", "until", "when", "than",
|
||||||
|
# Common adjectives that are never useful KG concepts
|
||||||
|
"new", "old", "good", "bad", "big", "small", "large", "little",
|
||||||
|
"long", "short", "high", "low", "right", "left", "next", "last",
|
||||||
|
"first", "second", "other", "same", "different", "few", "many",
|
||||||
|
"much", "more", "most", "less", "least", "some", "any", "all",
|
||||||
|
"every", "each", "both", "own", "such", "only", "just", "very",
|
||||||
|
"too", "also", "again", "once", "now", "then", "still", "already",
|
||||||
|
"always", "never", "often", "well", "back", "even", "way", "out",
|
||||||
|
# Common verbs that appear as bare tokens
|
||||||
|
"get", "got", "let", "put", "set", "go", "goes", "went",
|
||||||
|
"make", "made", "take", "took", "come", "came", "know", "knew",
|
||||||
|
"think", "thought", "see", "saw", "look", "use", "used",
|
||||||
|
"want", "try", "ask", "work", "seem", "feel", "call", "keep",
|
||||||
|
"give", "show", "run", "move", "live", "stand", "turn", "start",
|
||||||
|
"play", "follow", "create", "include", "continue", "add", "become",
|
||||||
|
# Boolean / value words
|
||||||
|
"true", "false", "none", "null", "yes",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _is_meaningful(token: str) -> bool:
|
||||||
|
"""
|
||||||
|
Return True only if *token* could be a useful KG concept:
|
||||||
|
- long enough
|
||||||
|
- not a stopword
|
||||||
|
- not a bare integer or float
|
||||||
|
"""
|
||||||
|
if len(token) < MIN_TOKEN_LEN:
|
||||||
|
return False
|
||||||
|
if token in _STOPWORDS:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
float(token)
|
||||||
|
return False # bare number
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CueTriple:
|
class CueTriple:
|
||||||
subject: str # canonical token (lowercase, compound rule applied)
|
subject: str # canonical token (lowercase, compound rule applied)
|
||||||
@@ -127,7 +212,7 @@ def scan_cues(text: str) -> list[CueTriple]:
|
|||||||
# Extend parent into compound if followed by more capital words
|
# Extend parent into compound if followed by more capital words
|
||||||
parent = _extend_compound(text, m.end("parent"), raw_parent)
|
parent = _extend_compound(text, m.end("parent"), raw_parent)
|
||||||
|
|
||||||
if not subj or not parent:
|
if not _is_meaningful(subj) or not _is_meaningful(parent):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for "of {Z}" dimension modifier immediately after the match
|
# Check for "of {Z}" dimension modifier immediately after the match
|
||||||
@@ -143,6 +228,11 @@ def scan_cues(text: str) -> list[CueTriple]:
|
|||||||
else:
|
else:
|
||||||
dimension = _infer_ispart_dimension(m.re.pattern)
|
dimension = _infer_ispart_dimension(m.re.pattern)
|
||||||
|
|
||||||
|
# Reject if dimension was extracted but is a stopword (fall back to default)
|
||||||
|
if dimension not in ("type", "membership", "runs-on", "owned-by", "geography", "tech"):
|
||||||
|
if not _is_meaningful(dimension):
|
||||||
|
dimension = "type" if is_isa else "membership"
|
||||||
|
|
||||||
key = (subj, parent, dimension, is_isa)
|
key = (subj, parent, dimension, is_isa)
|
||||||
if key not in seen:
|
if key not in seen:
|
||||||
seen.add(key)
|
seen.add(key)
|
||||||
|
|||||||
@@ -770,6 +770,81 @@ async def iknowthat(request: Request) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# /models — LLM model management
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@app.get("/models")
|
||||||
|
async def list_models(request: Request) -> dict:
|
||||||
|
pool = request.app.state.pool
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"SELECT id, provider, model_name, created_at FROM models ORDER BY id"
|
||||||
|
)
|
||||||
|
return {"models": [
|
||||||
|
{"id": r["id"], "provider": r["provider"], "model_name": r["model_name"],
|
||||||
|
"created_at": r["created_at"].isoformat()}
|
||||||
|
for r in rows
|
||||||
|
]}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/models")
|
||||||
|
async def create_model(request: Request) -> dict:
|
||||||
|
pool = request.app.state.pool
|
||||||
|
data = await request.json()
|
||||||
|
provider = data.get("provider", "").strip()
|
||||||
|
model_name = data.get("model_name", "").strip()
|
||||||
|
api_key = data.get("api_key", "").strip()
|
||||||
|
if not provider or not model_name or not api_key:
|
||||||
|
return {"error": "provider, model_name, and api_key are required"}
|
||||||
|
if provider not in ("claude", "openai"):
|
||||||
|
return {"error": "provider must be 'claude' or 'openai'"}
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"INSERT INTO models (provider, model_name, api_key) VALUES ($1,$2,$3) RETURNING id",
|
||||||
|
provider, model_name, api_key,
|
||||||
|
)
|
||||||
|
log.info("model created id=%d provider=%s model=%s", row["id"], provider, model_name)
|
||||||
|
return {"status": "ok", "id": row["id"]}
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/models/{model_id}")
|
||||||
|
async def delete_model(model_id: int, request: Request) -> dict:
|
||||||
|
pool = request.app.state.pool
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
result = await conn.execute("DELETE FROM models WHERE id=$1", model_id)
|
||||||
|
deleted = int(result.split()[-1]) if result else 0
|
||||||
|
if not deleted:
|
||||||
|
return {"error": f"model {model_id} not found"}
|
||||||
|
log.info("model deleted id=%d", model_id)
|
||||||
|
return {"status": "ok", "deleted": model_id}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/config")
|
||||||
|
async def get_all_config(request: Request) -> dict:
|
||||||
|
pool = request.app.state.pool
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch("SELECT key, value, updated_at FROM config ORDER BY key")
|
||||||
|
return {"config": {r["key"]: r["value"] for r in rows}}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/config")
|
||||||
|
async def update_config(request: Request) -> dict:
|
||||||
|
pool = request.app.state.pool
|
||||||
|
data = await request.json()
|
||||||
|
key = data.get("key", "").strip()
|
||||||
|
value = str(data.get("value", "")).strip()
|
||||||
|
if not key:
|
||||||
|
return {"error": "key is required"}
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"UPDATE config SET value=$1, updated_at=now() WHERE key=$2",
|
||||||
|
value, key,
|
||||||
|
)
|
||||||
|
log.info("config updated key=%s value=%s", key, value)
|
||||||
|
return {"status": "ok", "key": key, "value": value}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# /resolve/run — manually trigger resolution job
|
# /resolve/run — manually trigger resolution job
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -897,15 +972,19 @@ async def kg_log(request: Request, limit: int = 100, offset: int = 0, op: str =
|
|||||||
count_query = "SELECT COUNT(*) FROM kg_write_log {where}"
|
count_query = "SELECT COUNT(*) FROM kg_write_log {where}"
|
||||||
|
|
||||||
if op:
|
if op:
|
||||||
where = "WHERE op = $3"
|
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
rows = await conn.fetch(query.format(where=where), limit, offset, op)
|
rows = await conn.fetch(
|
||||||
total = await conn.fetchval(count_query.format(where=where), op)
|
query.format(where="WHERE op = $3"),
|
||||||
|
limit, offset, op,
|
||||||
|
)
|
||||||
|
total = await conn.fetchval(
|
||||||
|
"SELECT COUNT(*) FROM kg_write_log WHERE op = $1",
|
||||||
|
op,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
where = ""
|
|
||||||
async with pool.acquire() as conn:
|
async with pool.acquire() as conn:
|
||||||
rows = await conn.fetch(query.format(where=where), limit, offset)
|
rows = await conn.fetch(query.format(where=""), limit, offset)
|
||||||
total = await conn.fetchval(count_query.format(where=where))
|
total = await conn.fetchval("SELECT COUNT(*) FROM kg_write_log")
|
||||||
|
|
||||||
def fmt(r):
|
def fmt(r):
|
||||||
return {
|
return {
|
||||||
@@ -1642,6 +1721,34 @@ ADMIN_HTML = """<!DOCTYPE html>
|
|||||||
<div class="stat"><div class="stat-label">Last resolution</div><div class="stat-value" style="font-size:0.85em" id="s-lastrun">…</div></div>
|
<div class="stat"><div class="stat-label">Last resolution</div><div class="stat-value" style="font-size:0.85em" id="s-lastrun">…</div></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<h2>Resolution model</h2>
|
||||||
|
<div id="models-section">
|
||||||
|
<table id="models-table" style="margin-bottom:0.8em">
|
||||||
|
<thead><tr><th>ID</th><th>Provider</th><th>Model name</th><th>resolve?</th><th>write?</th><th></th></tr></thead>
|
||||||
|
<tbody id="models-tbody"><tr><td colspan="6">Loading…</td></tr></tbody>
|
||||||
|
</table>
|
||||||
|
<details style="margin-bottom:1em">
|
||||||
|
<summary style="cursor:pointer;font-size:0.9em;color:#555">Add model…</summary>
|
||||||
|
<div style="margin-top:0.6em;display:flex;gap:0.7em;flex-wrap:wrap;align-items:flex-end">
|
||||||
|
<label style="font-size:0.85em">Provider
|
||||||
|
<select id="m-provider" style="font-family:monospace;padding:4px 8px;display:block;margin-top:2px">
|
||||||
|
<option value="claude">claude</option>
|
||||||
|
<option value="openai">openai</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label style="font-size:0.85em">Model name
|
||||||
|
<input id="m-name" type="text" value="claude-opus-4-6"
|
||||||
|
style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px;width:200px">
|
||||||
|
</label>
|
||||||
|
<label style="font-size:0.85em">API key
|
||||||
|
<input id="m-key" type="password" placeholder="sk-ant-…"
|
||||||
|
style="font-family:monospace;padding:5px 8px;border:1px solid #ccc;border-radius:3px;display:block;margin-top:2px;width:260px">
|
||||||
|
</label>
|
||||||
|
<button onclick="addModel(this)" style="height:32px">Add</button>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
|
|
||||||
<h2>Actions</h2>
|
<h2>Actions</h2>
|
||||||
<div class="actions">
|
<div class="actions">
|
||||||
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
|
<button class="primary" onclick="runResolution(this)">Run conflict resolution now</button>
|
||||||
@@ -1694,6 +1801,70 @@ ADMIN_HTML = """<!DOCTYPE html>
|
|||||||
: 'never';
|
: 'never';
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
let _cfg = {{}};
|
||||||
|
|
||||||
|
async function loadModels() {{
|
||||||
|
const [mr, cr] = await Promise.all([fetch('/models'), fetch('/config')]);
|
||||||
|
const md = await mr.json();
|
||||||
|
_cfg = (await cr.json()).config;
|
||||||
|
const resolveId = _cfg['resolve_model_id'] || '';
|
||||||
|
const writeId = _cfg['write_model_id'] || '';
|
||||||
|
|
||||||
|
const tbody = document.getElementById('models-tbody');
|
||||||
|
if (!md.models.length) {{
|
||||||
|
tbody.innerHTML = '<tr><td colspan="6" style="color:#999">No models yet — add one below.</td></tr>';
|
||||||
|
return;
|
||||||
|
}}
|
||||||
|
tbody.innerHTML = md.models.map(m => `
|
||||||
|
<tr>
|
||||||
|
<td>${{m.id}}</td>
|
||||||
|
<td>${{m.provider}}</td>
|
||||||
|
<td>${{m.model_name}}</td>
|
||||||
|
<td><button onclick="setConfig('resolve_model_id','${{m.id}}')" style="padding:2px 8px;font-size:0.8em;${{resolveId==String(m.id)?'background:#2a7a2a;color:#fff;border-color:#2a7a2a':''}}">${{resolveId==String(m.id)?'✓ active':'set'}}</button></td>
|
||||||
|
<td><button onclick="setConfig('write_model_id','${{m.id}}')" style="padding:2px 8px;font-size:0.8em;${{writeId==String(m.id)?'background:#2a7a2a;color:#fff;border-color:#2a7a2a':''}}">${{writeId==String(m.id)?'✓ active':'set'}}</button></td>
|
||||||
|
<td><button onclick="deleteModel(${{m.id}},this)" style="padding:2px 8px;font-size:0.8em;color:#b00;border-color:#b00">✕</button></td>
|
||||||
|
</tr>`).join('');
|
||||||
|
}}
|
||||||
|
|
||||||
|
async function addModel(btn) {{
|
||||||
|
const provider = document.getElementById('m-provider').value;
|
||||||
|
const model_name = document.getElementById('m-name').value.trim();
|
||||||
|
const api_key = document.getElementById('m-key').value.trim();
|
||||||
|
if (!model_name || !api_key) {{ alert('Model name and API key are required.'); return; }}
|
||||||
|
btn.disabled = true;
|
||||||
|
try {{
|
||||||
|
const r = await fetch('/models', {{method:'POST', headers:{{'Content-Type':'application/json'}},
|
||||||
|
body: JSON.stringify({{provider, model_name, api_key}})}});
|
||||||
|
const d = await r.json();
|
||||||
|
if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
|
||||||
|
showResult('Model added (id=' + d.id + '). You can now set it as the resolve model.', true);
|
||||||
|
document.getElementById('m-key').value = '';
|
||||||
|
await loadModels();
|
||||||
|
}} catch(e) {{ showResult('Error: ' + e.message, false); }}
|
||||||
|
finally {{ btn.disabled = false; }}
|
||||||
|
}}
|
||||||
|
|
||||||
|
async function deleteModel(id, btn) {{
|
||||||
|
if (!confirm('Delete model ' + id + '?')) return;
|
||||||
|
btn.disabled = true;
|
||||||
|
try {{
|
||||||
|
const r = await fetch('/models/' + id, {{method:'DELETE'}});
|
||||||
|
const d = await r.json();
|
||||||
|
if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
|
||||||
|
await loadModels();
|
||||||
|
}} catch(e) {{ showResult('Error: ' + e.message, false); }}
|
||||||
|
finally {{ btn.disabled = false; }}
|
||||||
|
}}
|
||||||
|
|
||||||
|
async function setConfig(key, value) {{
|
||||||
|
const r = await fetch('/config', {{method:'POST', headers:{{'Content-Type':'application/json'}},
|
||||||
|
body: JSON.stringify({{key, value}})}});
|
||||||
|
const d = await r.json();
|
||||||
|
if (d.error) {{ showResult('Error: ' + d.error, false); return; }}
|
||||||
|
showResult('Config updated: ' + key + ' = ' + value, true);
|
||||||
|
await loadModels();
|
||||||
|
}}
|
||||||
|
|
||||||
async function loadConflicts() {{
|
async function loadConflicts() {{
|
||||||
const r = await fetch('/conflicts');
|
const r = await fetch('/conflicts');
|
||||||
const d = await r.json();
|
const d = await r.json();
|
||||||
@@ -1819,6 +1990,7 @@ ADMIN_HTML = """<!DOCTYPE html>
|
|||||||
loadStats();
|
loadStats();
|
||||||
loadConflicts();
|
loadConflicts();
|
||||||
loadLog(0);
|
loadLog(0);
|
||||||
|
loadModels();
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
Reference in New Issue
Block a user