diff --git a/plugins/festinger/festinger/cue_scanner.py b/plugins/festinger/festinger/cue_scanner.py index e3e837a..a684b22 100644 --- a/plugins/festinger/festinger/cue_scanner.py +++ b/plugins/festinger/festinger/cue_scanner.py @@ -7,6 +7,11 @@ bypasses the saliency write threshold and goes directly to the write queue. The `of {Z}` modifier after an ISA pattern names the dimension explicitly. Without it, ISA defaults to dimension 'type'; ISPART defaults to 'membership'. + +Noise filters applied to every extracted subject/parent: + - Must be at least MIN_TOKEN_LEN characters + - Must not be in the stopword list (pronouns, articles, common adj/adv/aux verbs) + - Must not be a bare number """ from __future__ import annotations @@ -15,6 +20,86 @@ from dataclasses import dataclass from typing import Optional +# --------------------------------------------------------------------------- +# Noise filters +# --------------------------------------------------------------------------- + +MIN_TOKEN_LEN = 3 # "it", "be", "do", "a", "an" all filtered by length alone + +# Words that are structural glue in English and never meaningful KG concepts. +# Covers: pronouns, articles, demonstratives, common aux/modal verbs, +# common adjectives, common adverbs, prepositions, conjunctions. +_STOPWORDS: frozenset[str] = frozenset({ + # Personal pronouns + "i", "me", "my", "myself", "we", "us", "our", "ours", "ourselves", + "you", "your", "yours", "yourself", "yourselves", + "he", "him", "his", "himself", + "she", "her", "hers", "herself", + "it", "its", "itself", + "they", "them", "their", "theirs", "themselves", + # Demonstrative / relative / interrogative pronouns + "this", "that", "these", "those", + "which", "who", "whom", "whose", "what", "where", "when", "how", "why", + "whoever", "whatever", "whichever", + # Articles + "a", "an", "the", + # Auxiliary / modal verbs + "be", "been", "being", "am", "are", "was", "were", + "have", "has", "had", "having", + "do", "does", "did", "doing", + "will", "would", "shall", "should", + "may", "might", "must", "can", "could", + "ought", "need", "dare", "used", + # Common linking / existential + "there", "here", + # Prepositions / conjunctions / particles + "in", "on", "at", "to", "for", "of", "with", "by", "from", "as", + "into", "onto", "upon", "about", "above", "below", "between", + "through", "during", "before", "after", "over", "under", "within", + "without", "against", "along", "across", "behind", "beyond", + "and", "or", "but", "nor", "so", "yet", "both", "either", "neither", + "not", "no", "nor", + "if", "then", "else", "although", "though", "while", "whereas", + "because", "since", "unless", "until", "when", "than", + # Common adjectives that are never useful KG concepts + "new", "old", "good", "bad", "big", "small", "large", "little", + "long", "short", "high", "low", "right", "left", "next", "last", + "first", "second", "other", "same", "different", "few", "many", + "much", "more", "most", "less", "least", "some", "any", "all", + "every", "each", "both", "own", "such", "only", "just", "very", + "too", "also", "again", "once", "now", "then", "still", "already", + "always", "never", "often", "well", "back", "even", "way", "out", + # Common verbs that appear as bare tokens + "get", "got", "let", "put", "set", "go", "goes", "went", + "make", "made", "take", "took", "come", "came", "know", "knew", + "think", "thought", "see", "saw", "look", "use", "used", + "want", "try", "ask", "work", "seem", "feel", "call", "keep", + "give", "show", "run", "move", "live", "stand", "turn", "start", + "play", "follow", "create", "include", "continue", "add", "become", + # Boolean / value words + "true", "false", "none", "null", "yes", +}) + + +def _is_meaningful(token: str) -> bool: + """ + Return True only if *token* could be a useful KG concept: + - long enough + - not a stopword + - not a bare integer or float + """ + if len(token) < MIN_TOKEN_LEN: + return False + if token in _STOPWORDS: + return False + try: + float(token) + return False # bare number + except ValueError: + pass + return True + + @dataclass class CueTriple: subject: str # canonical token (lowercase, compound rule applied) @@ -127,7 +212,7 @@ def scan_cues(text: str) -> list[CueTriple]: # Extend parent into compound if followed by more capital words parent = _extend_compound(text, m.end("parent"), raw_parent) - if not subj or not parent: + if not _is_meaningful(subj) or not _is_meaningful(parent): continue # Check for "of {Z}" dimension modifier immediately after the match @@ -143,6 +228,11 @@ def scan_cues(text: str) -> list[CueTriple]: else: dimension = _infer_ispart_dimension(m.re.pattern) + # Reject if dimension was extracted but is a stopword (fall back to default) + if dimension not in ("type", "membership", "runs-on", "owned-by", "geography", "tech"): + if not _is_meaningful(dimension): + dimension = "type" if is_isa else "membership" + key = (subj, parent, dimension, is_isa) if key not in seen: seen.add(key) diff --git a/plugins/festinger/festinger/main.py b/plugins/festinger/festinger/main.py index bcf9f31..0d7e01a 100644 --- a/plugins/festinger/festinger/main.py +++ b/plugins/festinger/festinger/main.py @@ -770,6 +770,81 @@ async def iknowthat(request: Request) -> dict: } +# --------------------------------------------------------------------------- +# /models — LLM model management +# --------------------------------------------------------------------------- + +@app.get("/models") +async def list_models(request: Request) -> dict: + pool = request.app.state.pool + async with pool.acquire() as conn: + rows = await conn.fetch( + "SELECT id, provider, model_name, created_at FROM models ORDER BY id" + ) + return {"models": [ + {"id": r["id"], "provider": r["provider"], "model_name": r["model_name"], + "created_at": r["created_at"].isoformat()} + for r in rows + ]} + + +@app.post("/models") +async def create_model(request: Request) -> dict: + pool = request.app.state.pool + data = await request.json() + provider = data.get("provider", "").strip() + model_name = data.get("model_name", "").strip() + api_key = data.get("api_key", "").strip() + if not provider or not model_name or not api_key: + return {"error": "provider, model_name, and api_key are required"} + if provider not in ("claude", "openai"): + return {"error": "provider must be 'claude' or 'openai'"} + async with pool.acquire() as conn: + row = await conn.fetchrow( + "INSERT INTO models (provider, model_name, api_key) VALUES ($1,$2,$3) RETURNING id", + provider, model_name, api_key, + ) + log.info("model created id=%d provider=%s model=%s", row["id"], provider, model_name) + return {"status": "ok", "id": row["id"]} + + +@app.delete("/models/{model_id}") +async def delete_model(model_id: int, request: Request) -> dict: + pool = request.app.state.pool + async with pool.acquire() as conn: + result = await conn.execute("DELETE FROM models WHERE id=$1", model_id) + deleted = int(result.split()[-1]) if result else 0 + if not deleted: + return {"error": f"model {model_id} not found"} + log.info("model deleted id=%d", model_id) + return {"status": "ok", "deleted": model_id} + + +@app.get("/config") +async def get_all_config(request: Request) -> dict: + pool = request.app.state.pool + async with pool.acquire() as conn: + rows = await conn.fetch("SELECT key, value, updated_at FROM config ORDER BY key") + return {"config": {r["key"]: r["value"] for r in rows}} + + +@app.post("/config") +async def update_config(request: Request) -> dict: + pool = request.app.state.pool + data = await request.json() + key = data.get("key", "").strip() + value = str(data.get("value", "")).strip() + if not key: + return {"error": "key is required"} + async with pool.acquire() as conn: + await conn.execute( + "UPDATE config SET value=$1, updated_at=now() WHERE key=$2", + value, key, + ) + log.info("config updated key=%s value=%s", key, value) + return {"status": "ok", "key": key, "value": value} + + # --------------------------------------------------------------------------- # /resolve/run — manually trigger resolution job # --------------------------------------------------------------------------- @@ -897,15 +972,19 @@ async def kg_log(request: Request, limit: int = 100, offset: int = 0, op: str = count_query = "SELECT COUNT(*) FROM kg_write_log {where}" if op: - where = "WHERE op = $3" async with pool.acquire() as conn: - rows = await conn.fetch(query.format(where=where), limit, offset, op) - total = await conn.fetchval(count_query.format(where=where), op) + rows = await conn.fetch( + query.format(where="WHERE op = $3"), + limit, offset, op, + ) + total = await conn.fetchval( + "SELECT COUNT(*) FROM kg_write_log WHERE op = $1", + op, + ) else: - where = "" async with pool.acquire() as conn: - rows = await conn.fetch(query.format(where=where), limit, offset) - total = await conn.fetchval(count_query.format(where=where)) + rows = await conn.fetch(query.format(where=""), limit, offset) + total = await conn.fetchval("SELECT COUNT(*) FROM kg_write_log") def fmt(r): return { @@ -1642,6 +1721,34 @@ ADMIN_HTML = """
| ID | Provider | Model name | resolve? | write? | |
|---|---|---|---|---|---|
| Loading… | |||||