This commit is contained in:
Lars 2025-12-18 13:15:58 +01:00
parent 9a18f3cc8b
commit 3eac646cb6
4 changed files with 131 additions and 226 deletions

View File

@ -162,7 +162,7 @@ class IngestionService:
# --- WP-22: Content Lifecycle Gate --- # --- WP-22: Content Lifecycle Gate ---
status = fm.get("status", "draft").lower().strip() status = fm.get("status", "draft").lower().strip()
# Hard Skip für System-Dateien # Hard Skip für System-Dateien (Teil A)
if status in ["system", "template", "archive", "hidden"]: if status in ["system", "template", "archive", "hidden"]:
logger.info(f"Skipping file {file_path} (Status: {status})") logger.info(f"Skipping file {file_path} (Status: {status})")
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"} return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
@ -265,7 +265,7 @@ class IngestionService:
except TypeError: except TypeError:
raw_edges = build_edges_for_note(note_id, chunk_pls) raw_edges = build_edges_for_note(note_id, chunk_pls)
# --- WP-22: Edge Registry Validation --- # --- WP-22: Edge Registry Validation (Teil B) ---
edges = [] edges = []
if raw_edges: if raw_edges:
for edge in raw_edges: for edge in raw_edges:

View File

@ -98,7 +98,7 @@ def _semantic_hits(
results.append((str(pid), float(score), dict(payload or {}))) results.append((str(pid), float(score), dict(payload or {})))
return results return results
# --- WP-22 Helper: Lifecycle Multipliers --- # --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
def _get_status_multiplier(payload: Dict[str, Any]) -> float: def _get_status_multiplier(payload: Dict[str, Any]) -> float:
""" """
WP-22: Drafts werden bestraft, Stable Notes belohnt. WP-22: Drafts werden bestraft, Stable Notes belohnt.
@ -106,10 +106,11 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
status = str(payload.get("status", "draft")).lower() status = str(payload.get("status", "draft")).lower()
if status == "stable": return 1.2 if status == "stable": return 1.2
if status == "active": return 1.0 if status == "active": return 1.0
if status == "draft": return 0.8 # Malus für Entwürfe if status == "draft": return 0.5 # Malus für Entwürfe
# Fallback für andere oder leere Status # Fallback für andere oder leere Status
return 1.0 return 1.0
# --- WP-22: Dynamic Scoring Formula (Teil C) ---
def _compute_total_score( def _compute_total_score(
semantic_score: float, semantic_score: float,
payload: Dict[str, Any], payload: Dict[str, Any],
@ -118,8 +119,8 @@ def _compute_total_score(
dynamic_edge_boosts: Dict[str, float] = None dynamic_edge_boosts: Dict[str, float] = None
) -> Tuple[float, float, float]: ) -> Tuple[float, float, float]:
""" """
Berechnet total_score. Berechnet total_score nach WP-22 Formel.
WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts. Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
""" """
raw_weight = payload.get("retriever_weight", 1.0) raw_weight = payload.get("retriever_weight", 1.0)
try: try:
@ -132,13 +133,13 @@ def _compute_total_score(
sem_w, edge_w, cent_w = _get_scoring_weights() sem_w, edge_w, cent_w = _get_scoring_weights()
status_mult = _get_status_multiplier(payload) status_mult = _get_status_multiplier(payload)
# Dynamic Edge Boosting # Dynamic Edge Boosting (Teil C)
# Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen # Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
# Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte. # Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
final_edge_score = edge_w * edge_bonus final_edge_score = edge_w * edge_bonus
if dynamic_edge_boosts and edge_bonus > 0: if dynamic_edge_boosts and edge_bonus > 0:
# Globaler Boost für Graph-Signale bei spezifischen Intents # Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
final_edge_score *= 1.2 final_edge_score *= 1.5
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus) total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
return float(total), float(edge_bonus), float(cent_bonus) return float(total), float(edge_bonus), float(cent_bonus)
@ -154,9 +155,8 @@ def _build_explanation(
subgraph: Optional[ga.Subgraph], subgraph: Optional[ga.Subgraph],
node_key: Optional[str] node_key: Optional[str]
) -> Explanation: ) -> Explanation:
"""Erstellt ein Explanation-Objekt.""" """Erstellt ein Explanation-Objekt (WP-04b)."""
sem_w, _edge_w, _cent_w = _get_scoring_weights() sem_w, _edge_w, _cent_w = _get_scoring_weights()
# Scoring weights erneut laden für Reason-Details
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights() _, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
try: try:
@ -167,6 +167,7 @@ def _build_explanation(
status_mult = _get_status_multiplier(payload) status_mult = _get_status_multiplier(payload)
note_type = payload.get("type", "unknown") note_type = payload.get("type", "unknown")
# Breakdown Berechnung (muss mit _compute_total_score korrelieren)
breakdown = ScoreBreakdown( breakdown = ScoreBreakdown(
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult), semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
edge_contribution=(edge_w_cfg * edge_bonus), edge_contribution=(edge_w_cfg * edge_bonus),
@ -180,6 +181,7 @@ def _build_explanation(
reasons: List[Reason] = [] reasons: List[Reason] = []
edges_dto: List[EdgeDTO] = [] edges_dto: List[EdgeDTO] = []
# Reason Generation Logik (WP-04b)
if semantic_score > 0.85: if semantic_score > 0.85:
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution)) reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
elif semantic_score > 0.70: elif semantic_score > 0.70:
@ -189,11 +191,13 @@ def _build_explanation(
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet" msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0)))) reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
# NEU: WP-22 Status Reason
if status_mult != 1.0: if status_mult != 1.0:
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus" msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0)) reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
if subgraph and node_key and edge_bonus > 0: if subgraph and node_key and edge_bonus > 0:
# Extrahiere Top-Kanten für die Erklärung
if hasattr(subgraph, "get_outgoing_edges"): if hasattr(subgraph, "get_outgoing_edges"):
outgoing = subgraph.get_outgoing_edges(node_key) outgoing = subgraph.get_outgoing_edges(node_key)
for edge in outgoing: for edge in outgoing:
@ -226,7 +230,7 @@ def _build_explanation(
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]: def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
"""Extrahiert depth und edge_types.""" """Extrahiert depth und edge_types für Graph-Expansion."""
expand = getattr(req, "expand", None) expand = getattr(req, "expand", None)
if not expand: if not expand:
return 0, None return 0, None
@ -259,7 +263,7 @@ def _build_hits_from_semantic(
explain: bool = False, explain: bool = False,
dynamic_edge_boosts: Dict[str, float] = None dynamic_edge_boosts: Dict[str, float] = None
) -> QueryResponse: ) -> QueryResponse:
"""Baut strukturierte QueryHits.""" """Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
t0 = time.time() t0 = time.time()
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = [] enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
@ -278,27 +282,28 @@ def _build_hits_from_semantic(
except Exception: except Exception:
cent_bonus = 0.0 cent_bonus = 0.0
total, edge_bonus, cent_bonus = _compute_total_score( total, eb, cb = _compute_total_score(
semantic_score, semantic_score,
payload, payload,
edge_bonus=edge_bonus, edge_bonus=edge_bonus,
cent_bonus=cent_bonus, cent_bonus=cent_bonus,
dynamic_edge_boosts=dynamic_edge_boosts dynamic_edge_boosts=dynamic_edge_boosts
) )
enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus)) enriched.append((pid, float(semantic_score), payload, total, eb, cb))
# Sort & Limit
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True) enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
limited = enriched_sorted[: max(1, top_k)] limited = enriched_sorted[: max(1, top_k)]
results: List[QueryHit] = [] results: List[QueryHit] = []
for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited: for pid, semantic_score, payload, total, eb, cb in limited:
explanation_obj = None explanation_obj = None
if explain: if explain:
explanation_obj = _build_explanation( explanation_obj = _build_explanation(
semantic_score=float(semantic_score), semantic_score=float(semantic_score),
payload=payload, payload=payload,
edge_bonus=edge_bonus, edge_bonus=eb,
cent_bonus=cent_bonus, cent_bonus=cb,
subgraph=subgraph, subgraph=subgraph,
node_key=payload.get("chunk_id") or payload.get("note_id") node_key=payload.get("chunk_id") or payload.get("note_id")
) )
@ -307,10 +312,10 @@ def _build_hits_from_semantic(
results.append(QueryHit( results.append(QueryHit(
node_id=str(pid), node_id=str(pid),
note_id=payload.get("note_id"), note_id=payload.get("note_id", "unknown"),
semantic_score=float(semantic_score), semantic_score=float(semantic_score),
edge_bonus=edge_bonus, edge_bonus=eb,
centrality_bonus=cent_bonus, centrality_bonus=cb,
total_score=total, total_score=total,
paths=None, paths=None,
source={ source={
@ -327,7 +332,7 @@ def _build_hits_from_semantic(
def semantic_retrieve(req: QueryRequest) -> QueryResponse: def semantic_retrieve(req: QueryRequest) -> QueryResponse:
"""Reiner semantischer Retriever.""" """Reiner semantischer Retriever (WP-02)."""
client, prefix = _get_client_and_prefix() client, prefix = _get_client_and_prefix()
vector = _get_query_vector(req) vector = _get_query_vector(req)
top_k = req.top_k or get_settings().RETRIEVER_TOP_K top_k = req.top_k or get_settings().RETRIEVER_TOP_K
@ -337,44 +342,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
def hybrid_retrieve(req: QueryRequest) -> QueryResponse: def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion.""" """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
client, prefix = _get_client_and_prefix() client, prefix = _get_client_and_prefix()
if req.query_vector:
vector = list(req.query_vector)
else:
vector = _get_query_vector(req)
# 1. Semantische Suche
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
top_k = req.top_k or get_settings().RETRIEVER_TOP_K top_k = req.top_k or get_settings().RETRIEVER_TOP_K
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters) hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
# 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
depth, edge_types = _extract_expand_options(req) depth, edge_types = _extract_expand_options(req)
# WP-22: Dynamic Boosts aus dem Request (vom Router)
boost_edges = getattr(req, "boost_edges", {}) boost_edges = getattr(req, "boost_edges", {})
subgraph: ga.Subgraph | None = None subgraph: ga.Subgraph | None = None
if depth and depth > 0: if depth and depth > 0:
seed_ids: List[str] = [] seed_ids: List[str] = []
for _pid, _score, payload in hits: for _pid, _score, payload in hits:
key = payload.get("chunk_id") or payload.get("note_id") key = payload.get("note_id")
if key and key not in seed_ids: if key and key not in seed_ids:
seed_ids.append(key) seed_ids.append(key)
if seed_ids: if seed_ids:
try: try:
# Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt # Subgraph laden
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types) subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
# Manuelles Boosten der Kantengewichte im Graphen falls aktiv # --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
# Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
if boost_edges and subgraph and hasattr(subgraph, "graph"): if boost_edges and subgraph and hasattr(subgraph, "graph"):
for u, v, data in subgraph.graph.edges(data=True): for u, v, data in subgraph.graph.edges(data=True):
k = data.get("kind") k = data.get("kind")
if k in boost_edges: if k in boost_edges:
# Gewicht erhöhen für diesen Query-Kontext # Gewicht multiplizieren (z.B. caused_by * 3.0)
data["weight"] = data.get("weight", 1.0) * boost_edges[k] data["weight"] = data.get("weight", 1.0) * boost_edges[k]
except Exception: except Exception:
subgraph = None subgraph = None
# 3. Scoring & Re-Ranking
return _build_hits_from_semantic( return _build_hits_from_semantic(
hits, hits,
top_k=top_k, top_k=top_k,
@ -386,11 +391,6 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
class Retriever: class Retriever:
""" """Wrapper-Klasse für Suchoperationen."""
Wrapper-Klasse für WP-05 (Chat).
"""
def __init__(self):
pass
async def search(self, request: QueryRequest) -> QueryResponse: async def search(self, request: QueryRequest) -> QueryResponse:
return hybrid_retrieve(request) return hybrid_retrieve(request)

View File

@ -2,7 +2,7 @@
FILE: app/services/edge_registry.py FILE: app/services/edge_registry.py
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'. DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
WP-22 Teil B: Registry & Validation. WP-22 Teil B: Registry & Validation.
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt. Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
""" """
import re import re
import os import os
@ -25,15 +25,11 @@ class EdgeRegistry:
if self.initialized: if self.initialized:
return return
# Priorität 1: Übergebener Parameter (z.B. für Tests) # Priorität: 1. Parameter -> 2. ENV -> 3. Default
# Priorität 2: Environment Variable (z.B. Production ./vault_master)
# Priorität 3: Default Fallback (./vault)
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault") self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
# Der relative Pfad ist laut Spezifikation fest definiert
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md") self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
self.unknown_log_path = "data/logs/unknown_edges.jsonl" self.unknown_log_path = "data/logs/unknown_edges.jsonl"
self.canonical_map: Dict[str, str] = {} self.canonical_map: Dict[str, str] = {}
self.valid_types: Set[str] = set() self.valid_types: Set[str] = set()
@ -42,15 +38,13 @@ class EdgeRegistry:
def _load_vocabulary(self): def _load_vocabulary(self):
"""Parst die Markdown-Tabelle im Vault.""" """Parst die Markdown-Tabelle im Vault."""
# Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path)) full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
if not os.path.exists(full_path): if not os.path.exists(full_path):
# Wir loggen den vollen Pfad, damit Debugging einfacher ist
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.") logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
return return
# Regex: | **canonical** | alias, alias | # Regex für Markdown Tabellen: | **canonical** | Aliases | ...
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|") pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
try: try:
@ -67,7 +61,7 @@ class EdgeRegistry:
if aliases_str and "Kein Alias" not in aliases_str: if aliases_str and "Kein Alias" not in aliases_str:
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()] aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
for alias in aliases: for alias in aliases:
clean_alias = alias.replace("`", "") clean_alias = alias.replace("`", "").lower().strip()
self.canonical_map[clean_alias] = canonical self.canonical_map[clean_alias] = canonical
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.") logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
@ -76,6 +70,7 @@ class EdgeRegistry:
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}") logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
def resolve(self, edge_type: str) -> str: def resolve(self, edge_type: str) -> str:
"""Normalisiert Kanten-Typen via Registry oder loggt Unbekannte."""
if not edge_type: return "related_to" if not edge_type: return "related_to"
clean_type = edge_type.lower().strip().replace(" ", "_") clean_type = edge_type.lower().strip().replace(" ", "_")
@ -86,6 +81,7 @@ class EdgeRegistry:
return clean_type return clean_type
def _log_unknown(self, edge_type: str): def _log_unknown(self, edge_type: str):
"""Schreibt unbekannte Typen für Review in ein Log."""
try: try:
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True) os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
entry = {"unknown_type": edge_type, "status": "new"} entry = {"unknown_type": edge_type, "status": "new"}
@ -94,5 +90,5 @@ class EdgeRegistry:
except Exception: except Exception:
pass pass
# Default Instanz # Singleton Instanz
registry = EdgeRegistry() registry = EdgeRegistry()

View File

@ -1,188 +1,97 @@
""" """
FILE: tests/test_WP22_integration.py FILE: app/services/edge_registry.py
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence). DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
FIXES: Pydantic Validation & Config Caching Issues. WP-22 Teil B: Registry & Validation.
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
""" """
import unittest import re
import os import os
import shutil
import json import json
import yaml import logging
import asyncio from typing import Dict, Optional, Set
from unittest.mock import MagicMock, patch, AsyncMock
# Wir importieren das Modul direkt, um auf den Cache zuzugreifen logger = logging.getLogger(__name__)
import app.routers.chat
# DTOs und Logik class EdgeRegistry:
from app.models.dto import ChatRequest, QueryRequest, QueryHit _instance = None
from app.services.edge_registry import EdgeRegistry
from app.core.retriever import _compute_total_score, _get_status_multiplier
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
class TestWP22Integration(unittest.IsolatedAsyncioTestCase): def __new__(cls, vault_root: Optional[str] = None):
if cls._instance is None:
cls._instance = super(EdgeRegistry, cls).__new__(cls)
cls._instance.initialized = False
return cls._instance
def setUp(self): def __init__(self, vault_root: Optional[str] = None):
"""Bereitet eine isolierte Test-Umgebung vor.""" if self.initialized:
self.test_dir = "tests/temp_integration" return
# 1. Environment Patching # Priorität 1: Übergebener Parameter (z.B. für Tests)
self.os_env_patch = patch.dict(os.environ, { # Priorität 2: Environment Variable (z.B. Production ./vault_master)
"MINDNET_VAULT_ROOT": self.test_dir, # Priorität 3: Default Fallback (./vault)
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"), self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
})
self.os_env_patch.start()
# 2. Verzeichnisse erstellen # Der relative Pfad ist laut Spezifikation fest definiert
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True) self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
# 3. Config: decision_engine.yaml schreiben (Test-Definition) self.unknown_log_path = "data/logs/unknown_edges.jsonl"
self.decision_config = { self.canonical_map: Dict[str, str] = {}
"strategies": { self.valid_types: Set[str] = set()
"FACT": {
"trigger_keywords": ["was ist"],
"edge_boosts": {"part_of": 2.0} # Kein 'caused_by' hier!
},
"CAUSAL": {
"trigger_keywords": ["warum", "weshalb"],
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
}
}
}
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
yaml.dump(self.decision_config, f)
# 4. Config: Edge Vocabulary schreiben self._load_vocabulary()
vocab_path = os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md") self.initialized = True
with open(vocab_path, "w") as f:
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
# 5. CACHE RESET (WICHTIG!) def _load_vocabulary(self):
# Damit der Router die oben geschriebene YAML auch wirklich liest: """Parst die Markdown-Tabelle im Vault."""
app.routers.chat._DECISION_CONFIG_CACHE = None # Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
EdgeRegistry._instance = None full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
# Registry neu init if not os.path.exists(full_path):
self.registry = EdgeRegistry(vault_root=self.test_dir) logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
return
def tearDown(self): # Regex: | **canonical** | alias, alias |
self.os_env_patch.stop() pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
EdgeRegistry._instance = None
app.routers.chat._DECISION_CONFIG_CACHE = None
# ------------------------------------------------------------------------ try:
# TEST 1: Edge Registry & Validation with open(full_path, "r", encoding="utf-8") as f:
# ------------------------------------------------------------------------ for line in f:
def test_edge_registry_aliases(self): match = pattern.search(line)
print("\n🔵 TEST 1: Edge Registry Resolution") if match:
resolved = self.registry.resolve("ursache_ist") canonical = match.group(1).strip()
self.assertEqual(resolved, "caused_by") aliases_str = match.group(2).strip()
unknown = self.registry.resolve("foobar_link") self.valid_types.add(canonical)
self.assertEqual(unknown, "foobar_link") self.canonical_map[canonical] = canonical
log_path = self.registry.unknown_log_path if aliases_str and "Kein Alias" not in aliases_str:
self.assertTrue(os.path.exists(log_path)) aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
print("✅ Registry funktioniert.") for alias in aliases:
clean_alias = alias.replace("`", "")
self.canonical_map[clean_alias] = canonical
# ------------------------------------------------------------------------ logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
# TEST 2: Lifecycle Scoring
# ------------------------------------------------------------------------
def test_lifecycle_scoring_logic(self):
print("\n🔵 TEST 2: Lifecycle Scoring")
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
base_sem = 0.9
payload_draft = {"status": "draft", "retriever_weight": 1.0} except Exception as e:
mult_draft = _get_status_multiplier(payload_draft) logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
self.assertEqual(mult_draft, 0.8)
payload_stable = {"status": "stable", "retriever_weight": 1.0} def resolve(self, edge_type: str) -> str:
mult_stable = _get_status_multiplier(payload_stable) if not edge_type: return "related_to"
self.assertEqual(mult_stable, 1.2) clean_type = edge_type.lower().strip().replace(" ", "_")
print("✅ Lifecycle Scoring korrekt.")
# ------------------------------------------------------------------------ if clean_type in self.canonical_map:
# TEST 3: Semantic Router & Boosting return self.canonical_map[clean_type]
# ------------------------------------------------------------------------
async def test_router_integration(self):
print("\n🔵 TEST 3: Semantic Router Integration")
mock_llm = MagicMock() self._log_unknown(clean_type)
mock_llm.prompts = {} return clean_type
# Da der Cache im setUp gelöscht wurde, sollte er jetzt CAUSAL finden def _log_unknown(self, edge_type: str):
query_causal = "Warum ist das Projekt gescheitert?" try:
intent, source = await _classify_intent(query_causal, mock_llm) os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
entry = {"unknown_type": edge_type, "status": "new"}
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
f.write(json.dumps(entry) + "\n")
except Exception:
pass
self.assertEqual(intent, "CAUSAL", f"Erwartete CAUSAL, bekam {intent} via {source}") # Default Instanz
registry = EdgeRegistry()
strategy = get_decision_strategy(intent)
boosts = strategy.get("edge_boosts", {})
self.assertEqual(boosts.get("caused_by"), 3.0)
print("✅ Router lädt Config korrekt.")
# ------------------------------------------------------------------------
# TEST 4: Full Pipeline
# ------------------------------------------------------------------------
async def test_full_pipeline_flow(self):
print("\n🔵 TEST 4: Full Chat Pipeline")
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Antwort."
mock_retriever = AsyncMock()
# FIX: note_id hinzugefügt für Pydantic
mock_hit = QueryHit(
node_id="123",
note_id="test_note_123", # <--- WICHTIG
semantic_score=0.9,
edge_bonus=0.5,
centrality_bonus=0.0,
total_score=1.0,
source={"text": "Inhalt"},
payload={"type": "concept"}
)
mock_retriever.search.return_value.results = [mock_hit]
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
called_query_req = mock_retriever.search.call_args[0][0]
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0)
self.assertEqual(response.intent, "CAUSAL")
print("✅ Pipeline reicht Boosts weiter.")
# ------------------------------------------------------------------------
# TEST 5: Regression Check
# ------------------------------------------------------------------------
async def test_regression_standard_query(self):
print("\n🔵 TEST 5: Regression")
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Antwort."
mock_retriever = AsyncMock()
mock_retriever.search.return_value.results = []
req = ChatRequest(message="Was ist das?", top_k=3)
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
called_query_req = mock_retriever.search.call_args[0][0]
# FACT strategy hat in unserem Test Setup NUR 'part_of', KEIN 'caused_by'
self.assertEqual(response.intent, "FACT")
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
print("✅ Regression Test bestanden.")
if __name__ == '__main__':
unittest.main()