bug fix
This commit is contained in:
parent
9a18f3cc8b
commit
3eac646cb6
|
|
@ -162,7 +162,7 @@ class IngestionService:
|
|||
# --- WP-22: Content Lifecycle Gate ---
|
||||
status = fm.get("status", "draft").lower().strip()
|
||||
|
||||
# Hard Skip für System-Dateien
|
||||
# Hard Skip für System-Dateien (Teil A)
|
||||
if status in ["system", "template", "archive", "hidden"]:
|
||||
logger.info(f"Skipping file {file_path} (Status: {status})")
|
||||
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
||||
|
|
@ -265,7 +265,7 @@ class IngestionService:
|
|||
except TypeError:
|
||||
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
||||
|
||||
# --- WP-22: Edge Registry Validation ---
|
||||
# --- WP-22: Edge Registry Validation (Teil B) ---
|
||||
edges = []
|
||||
if raw_edges:
|
||||
for edge in raw_edges:
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ def _semantic_hits(
|
|||
results.append((str(pid), float(score), dict(payload or {})))
|
||||
return results
|
||||
|
||||
# --- WP-22 Helper: Lifecycle Multipliers ---
|
||||
# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
|
||||
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||
"""
|
||||
WP-22: Drafts werden bestraft, Stable Notes belohnt.
|
||||
|
|
@ -106,10 +106,11 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
|||
status = str(payload.get("status", "draft")).lower()
|
||||
if status == "stable": return 1.2
|
||||
if status == "active": return 1.0
|
||||
if status == "draft": return 0.8 # Malus für Entwürfe
|
||||
if status == "draft": return 0.5 # Malus für Entwürfe
|
||||
# Fallback für andere oder leere Status
|
||||
return 1.0
|
||||
|
||||
# --- WP-22: Dynamic Scoring Formula (Teil C) ---
|
||||
def _compute_total_score(
|
||||
semantic_score: float,
|
||||
payload: Dict[str, Any],
|
||||
|
|
@ -118,8 +119,8 @@ def _compute_total_score(
|
|||
dynamic_edge_boosts: Dict[str, float] = None
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Berechnet total_score.
|
||||
WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
|
||||
Berechnet total_score nach WP-22 Formel.
|
||||
Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
|
||||
"""
|
||||
raw_weight = payload.get("retriever_weight", 1.0)
|
||||
try:
|
||||
|
|
@ -132,13 +133,13 @@ def _compute_total_score(
|
|||
sem_w, edge_w, cent_w = _get_scoring_weights()
|
||||
status_mult = _get_status_multiplier(payload)
|
||||
|
||||
# Dynamic Edge Boosting
|
||||
# Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
|
||||
# Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
|
||||
# Dynamic Edge Boosting (Teil C)
|
||||
# Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
|
||||
# Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
|
||||
final_edge_score = edge_w * edge_bonus
|
||||
if dynamic_edge_boosts and edge_bonus > 0:
|
||||
# Globaler Boost für Graph-Signale bei spezifischen Intents
|
||||
final_edge_score *= 1.2
|
||||
# Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
|
||||
final_edge_score *= 1.5
|
||||
|
||||
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
||||
return float(total), float(edge_bonus), float(cent_bonus)
|
||||
|
|
@ -154,9 +155,8 @@ def _build_explanation(
|
|||
subgraph: Optional[ga.Subgraph],
|
||||
node_key: Optional[str]
|
||||
) -> Explanation:
|
||||
"""Erstellt ein Explanation-Objekt."""
|
||||
"""Erstellt ein Explanation-Objekt (WP-04b)."""
|
||||
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
||||
# Scoring weights erneut laden für Reason-Details
|
||||
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||
|
||||
try:
|
||||
|
|
@ -167,6 +167,7 @@ def _build_explanation(
|
|||
status_mult = _get_status_multiplier(payload)
|
||||
note_type = payload.get("type", "unknown")
|
||||
|
||||
# Breakdown Berechnung (muss mit _compute_total_score korrelieren)
|
||||
breakdown = ScoreBreakdown(
|
||||
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
||||
edge_contribution=(edge_w_cfg * edge_bonus),
|
||||
|
|
@ -180,6 +181,7 @@ def _build_explanation(
|
|||
reasons: List[Reason] = []
|
||||
edges_dto: List[EdgeDTO] = []
|
||||
|
||||
# Reason Generation Logik (WP-04b)
|
||||
if semantic_score > 0.85:
|
||||
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
|
||||
elif semantic_score > 0.70:
|
||||
|
|
@ -189,11 +191,13 @@ def _build_explanation(
|
|||
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
||||
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
||||
|
||||
# NEU: WP-22 Status Reason
|
||||
if status_mult != 1.0:
|
||||
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
||||
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
|
||||
|
||||
if subgraph and node_key and edge_bonus > 0:
|
||||
# Extrahiere Top-Kanten für die Erklärung
|
||||
if hasattr(subgraph, "get_outgoing_edges"):
|
||||
outgoing = subgraph.get_outgoing_edges(node_key)
|
||||
for edge in outgoing:
|
||||
|
|
@ -226,7 +230,7 @@ def _build_explanation(
|
|||
|
||||
|
||||
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
||||
"""Extrahiert depth und edge_types."""
|
||||
"""Extrahiert depth und edge_types für Graph-Expansion."""
|
||||
expand = getattr(req, "expand", None)
|
||||
if not expand:
|
||||
return 0, None
|
||||
|
|
@ -259,7 +263,7 @@ def _build_hits_from_semantic(
|
|||
explain: bool = False,
|
||||
dynamic_edge_boosts: Dict[str, float] = None
|
||||
) -> QueryResponse:
|
||||
"""Baut strukturierte QueryHits."""
|
||||
"""Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
|
||||
t0 = time.time()
|
||||
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
||||
|
||||
|
|
@ -278,27 +282,28 @@ def _build_hits_from_semantic(
|
|||
except Exception:
|
||||
cent_bonus = 0.0
|
||||
|
||||
total, edge_bonus, cent_bonus = _compute_total_score(
|
||||
total, eb, cb = _compute_total_score(
|
||||
semantic_score,
|
||||
payload,
|
||||
edge_bonus=edge_bonus,
|
||||
cent_bonus=cent_bonus,
|
||||
dynamic_edge_boosts=dynamic_edge_boosts
|
||||
)
|
||||
enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
|
||||
enriched.append((pid, float(semantic_score), payload, total, eb, cb))
|
||||
|
||||
# Sort & Limit
|
||||
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
||||
limited = enriched_sorted[: max(1, top_k)]
|
||||
|
||||
results: List[QueryHit] = []
|
||||
for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
|
||||
for pid, semantic_score, payload, total, eb, cb in limited:
|
||||
explanation_obj = None
|
||||
if explain:
|
||||
explanation_obj = _build_explanation(
|
||||
semantic_score=float(semantic_score),
|
||||
payload=payload,
|
||||
edge_bonus=edge_bonus,
|
||||
cent_bonus=cent_bonus,
|
||||
edge_bonus=eb,
|
||||
cent_bonus=cb,
|
||||
subgraph=subgraph,
|
||||
node_key=payload.get("chunk_id") or payload.get("note_id")
|
||||
)
|
||||
|
|
@ -307,10 +312,10 @@ def _build_hits_from_semantic(
|
|||
|
||||
results.append(QueryHit(
|
||||
node_id=str(pid),
|
||||
note_id=payload.get("note_id"),
|
||||
note_id=payload.get("note_id", "unknown"),
|
||||
semantic_score=float(semantic_score),
|
||||
edge_bonus=edge_bonus,
|
||||
centrality_bonus=cent_bonus,
|
||||
edge_bonus=eb,
|
||||
centrality_bonus=cb,
|
||||
total_score=total,
|
||||
paths=None,
|
||||
source={
|
||||
|
|
@ -327,7 +332,7 @@ def _build_hits_from_semantic(
|
|||
|
||||
|
||||
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||
"""Reiner semantischer Retriever."""
|
||||
"""Reiner semantischer Retriever (WP-02)."""
|
||||
client, prefix = _get_client_and_prefix()
|
||||
vector = _get_query_vector(req)
|
||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||
|
|
@ -337,44 +342,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
|||
|
||||
|
||||
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
|
||||
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
|
||||
client, prefix = _get_client_and_prefix()
|
||||
if req.query_vector:
|
||||
vector = list(req.query_vector)
|
||||
else:
|
||||
vector = _get_query_vector(req)
|
||||
|
||||
|
||||
# 1. Semantische Suche
|
||||
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
|
||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
|
||||
|
||||
# 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
|
||||
depth, edge_types = _extract_expand_options(req)
|
||||
|
||||
# WP-22: Dynamic Boosts aus dem Request (vom Router)
|
||||
boost_edges = getattr(req, "boost_edges", {})
|
||||
|
||||
subgraph: ga.Subgraph | None = None
|
||||
if depth and depth > 0:
|
||||
seed_ids: List[str] = []
|
||||
for _pid, _score, payload in hits:
|
||||
key = payload.get("chunk_id") or payload.get("note_id")
|
||||
key = payload.get("note_id")
|
||||
if key and key not in seed_ids:
|
||||
seed_ids.append(key)
|
||||
|
||||
if seed_ids:
|
||||
try:
|
||||
# Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
|
||||
# Subgraph laden
|
||||
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
|
||||
|
||||
# Manuelles Boosten der Kantengewichte im Graphen falls aktiv
|
||||
# --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
|
||||
# Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
|
||||
if boost_edges and subgraph and hasattr(subgraph, "graph"):
|
||||
for u, v, data in subgraph.graph.edges(data=True):
|
||||
k = data.get("kind")
|
||||
if k in boost_edges:
|
||||
# Gewicht erhöhen für diesen Query-Kontext
|
||||
# Gewicht multiplizieren (z.B. caused_by * 3.0)
|
||||
data["weight"] = data.get("weight", 1.0) * boost_edges[k]
|
||||
|
||||
except Exception:
|
||||
subgraph = None
|
||||
|
||||
# 3. Scoring & Re-Ranking
|
||||
return _build_hits_from_semantic(
|
||||
hits,
|
||||
top_k=top_k,
|
||||
|
|
@ -386,11 +391,6 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
|||
|
||||
|
||||
class Retriever:
|
||||
"""
|
||||
Wrapper-Klasse für WP-05 (Chat).
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
"""Wrapper-Klasse für Suchoperationen."""
|
||||
async def search(self, request: QueryRequest) -> QueryResponse:
|
||||
return hybrid_retrieve(request)
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
FILE: app/services/edge_registry.py
|
||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||
WP-22 Teil B: Registry & Validation.
|
||||
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
|
||||
Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
|
|
@ -25,15 +25,11 @@ class EdgeRegistry:
|
|||
if self.initialized:
|
||||
return
|
||||
|
||||
# Priorität 1: Übergebener Parameter (z.B. für Tests)
|
||||
# Priorität 2: Environment Variable (z.B. Production ./vault_master)
|
||||
# Priorität 3: Default Fallback (./vault)
|
||||
# Priorität: 1. Parameter -> 2. ENV -> 3. Default
|
||||
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||
|
||||
# Der relative Pfad ist laut Spezifikation fest definiert
|
||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||
|
||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||
|
||||
self.canonical_map: Dict[str, str] = {}
|
||||
self.valid_types: Set[str] = set()
|
||||
|
||||
|
|
@ -42,15 +38,13 @@ class EdgeRegistry:
|
|||
|
||||
def _load_vocabulary(self):
|
||||
"""Parst die Markdown-Tabelle im Vault."""
|
||||
# Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
|
||||
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
# Wir loggen den vollen Pfad, damit Debugging einfacher ist
|
||||
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
||||
return
|
||||
|
||||
# Regex: | **canonical** | alias, alias |
|
||||
# Regex für Markdown Tabellen: | **canonical** | Aliases | ...
|
||||
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
||||
|
||||
try:
|
||||
|
|
@ -67,7 +61,7 @@ class EdgeRegistry:
|
|||
if aliases_str and "Kein Alias" not in aliases_str:
|
||||
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
||||
for alias in aliases:
|
||||
clean_alias = alias.replace("`", "")
|
||||
clean_alias = alias.replace("`", "").lower().strip()
|
||||
self.canonical_map[clean_alias] = canonical
|
||||
|
||||
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||
|
|
@ -76,6 +70,7 @@ class EdgeRegistry:
|
|||
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||
|
||||
def resolve(self, edge_type: str) -> str:
|
||||
"""Normalisiert Kanten-Typen via Registry oder loggt Unbekannte."""
|
||||
if not edge_type: return "related_to"
|
||||
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||
|
||||
|
|
@ -86,6 +81,7 @@ class EdgeRegistry:
|
|||
return clean_type
|
||||
|
||||
def _log_unknown(self, edge_type: str):
|
||||
"""Schreibt unbekannte Typen für Review in ein Log."""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||
entry = {"unknown_type": edge_type, "status": "new"}
|
||||
|
|
@ -94,5 +90,5 @@ class EdgeRegistry:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Default Instanz
|
||||
# Singleton Instanz
|
||||
registry = EdgeRegistry()
|
||||
|
|
@ -1,188 +1,97 @@
|
|||
"""
|
||||
FILE: tests/test_WP22_integration.py
|
||||
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
|
||||
FIXES: Pydantic Validation & Config Caching Issues.
|
||||
FILE: app/services/edge_registry.py
|
||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||
WP-22 Teil B: Registry & Validation.
|
||||
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
|
||||
"""
|
||||
import unittest
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import yaml
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
import logging
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
# Wir importieren das Modul direkt, um auf den Cache zuzugreifen
|
||||
import app.routers.chat
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# DTOs und Logik
|
||||
from app.models.dto import ChatRequest, QueryRequest, QueryHit
|
||||
from app.services.edge_registry import EdgeRegistry
|
||||
from app.core.retriever import _compute_total_score, _get_status_multiplier
|
||||
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
|
||||
class EdgeRegistry:
|
||||
_instance = None
|
||||
|
||||
class TestWP22Integration(unittest.IsolatedAsyncioTestCase):
|
||||
def __new__(cls, vault_root: Optional[str] = None):
|
||||
if cls._instance is None:
|
||||
cls._instance = super(EdgeRegistry, cls).__new__(cls)
|
||||
cls._instance.initialized = False
|
||||
return cls._instance
|
||||
|
||||
def setUp(self):
|
||||
"""Bereitet eine isolierte Test-Umgebung vor."""
|
||||
self.test_dir = "tests/temp_integration"
|
||||
|
||||
# 1. Environment Patching
|
||||
self.os_env_patch = patch.dict(os.environ, {
|
||||
"MINDNET_VAULT_ROOT": self.test_dir,
|
||||
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
|
||||
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
|
||||
})
|
||||
self.os_env_patch.start()
|
||||
|
||||
# 2. Verzeichnisse erstellen
|
||||
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
|
||||
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
|
||||
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
|
||||
|
||||
# 3. Config: decision_engine.yaml schreiben (Test-Definition)
|
||||
self.decision_config = {
|
||||
"strategies": {
|
||||
"FACT": {
|
||||
"trigger_keywords": ["was ist"],
|
||||
"edge_boosts": {"part_of": 2.0} # Kein 'caused_by' hier!
|
||||
},
|
||||
"CAUSAL": {
|
||||
"trigger_keywords": ["warum", "weshalb"],
|
||||
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
|
||||
}
|
||||
}
|
||||
}
|
||||
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
|
||||
yaml.dump(self.decision_config, f)
|
||||
|
||||
# 4. Config: Edge Vocabulary schreiben
|
||||
vocab_path = os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md")
|
||||
with open(vocab_path, "w") as f:
|
||||
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
|
||||
|
||||
# 5. CACHE RESET (WICHTIG!)
|
||||
# Damit der Router die oben geschriebene YAML auch wirklich liest:
|
||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
||||
EdgeRegistry._instance = None
|
||||
|
||||
# Registry neu init
|
||||
self.registry = EdgeRegistry(vault_root=self.test_dir)
|
||||
|
||||
def tearDown(self):
|
||||
self.os_env_patch.stop()
|
||||
if os.path.exists(self.test_dir):
|
||||
shutil.rmtree(self.test_dir)
|
||||
EdgeRegistry._instance = None
|
||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# TEST 1: Edge Registry & Validation
|
||||
# ------------------------------------------------------------------------
|
||||
def test_edge_registry_aliases(self):
|
||||
print("\n🔵 TEST 1: Edge Registry Resolution")
|
||||
resolved = self.registry.resolve("ursache_ist")
|
||||
self.assertEqual(resolved, "caused_by")
|
||||
|
||||
unknown = self.registry.resolve("foobar_link")
|
||||
self.assertEqual(unknown, "foobar_link")
|
||||
|
||||
log_path = self.registry.unknown_log_path
|
||||
self.assertTrue(os.path.exists(log_path))
|
||||
print("✅ Registry funktioniert.")
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# TEST 2: Lifecycle Scoring
|
||||
# ------------------------------------------------------------------------
|
||||
def test_lifecycle_scoring_logic(self):
|
||||
print("\n🔵 TEST 2: Lifecycle Scoring")
|
||||
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
|
||||
base_sem = 0.9
|
||||
def __init__(self, vault_root: Optional[str] = None):
|
||||
if self.initialized:
|
||||
return
|
||||
|
||||
payload_draft = {"status": "draft", "retriever_weight": 1.0}
|
||||
mult_draft = _get_status_multiplier(payload_draft)
|
||||
self.assertEqual(mult_draft, 0.8)
|
||||
# Priorität 1: Übergebener Parameter (z.B. für Tests)
|
||||
# Priorität 2: Environment Variable (z.B. Production ./vault_master)
|
||||
# Priorität 3: Default Fallback (./vault)
|
||||
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||
|
||||
# Der relative Pfad ist laut Spezifikation fest definiert
|
||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||
|
||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||
self.canonical_map: Dict[str, str] = {}
|
||||
self.valid_types: Set[str] = set()
|
||||
|
||||
self._load_vocabulary()
|
||||
self.initialized = True
|
||||
|
||||
def _load_vocabulary(self):
|
||||
"""Parst die Markdown-Tabelle im Vault."""
|
||||
# Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
|
||||
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
||||
return
|
||||
|
||||
# Regex: | **canonical** | alias, alias |
|
||||
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
||||
|
||||
try:
|
||||
with open(full_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
match = pattern.search(line)
|
||||
if match:
|
||||
canonical = match.group(1).strip()
|
||||
aliases_str = match.group(2).strip()
|
||||
|
||||
self.valid_types.add(canonical)
|
||||
self.canonical_map[canonical] = canonical
|
||||
|
||||
if aliases_str and "Kein Alias" not in aliases_str:
|
||||
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
||||
for alias in aliases:
|
||||
clean_alias = alias.replace("`", "")
|
||||
self.canonical_map[clean_alias] = canonical
|
||||
|
||||
payload_stable = {"status": "stable", "retriever_weight": 1.0}
|
||||
mult_stable = _get_status_multiplier(payload_stable)
|
||||
self.assertEqual(mult_stable, 1.2)
|
||||
print("✅ Lifecycle Scoring korrekt.")
|
||||
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# TEST 3: Semantic Router & Boosting
|
||||
# ------------------------------------------------------------------------
|
||||
async def test_router_integration(self):
|
||||
print("\n🔵 TEST 3: Semantic Router Integration")
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.prompts = {}
|
||||
|
||||
# Da der Cache im setUp gelöscht wurde, sollte er jetzt CAUSAL finden
|
||||
query_causal = "Warum ist das Projekt gescheitert?"
|
||||
intent, source = await _classify_intent(query_causal, mock_llm)
|
||||
|
||||
self.assertEqual(intent, "CAUSAL", f"Erwartete CAUSAL, bekam {intent} via {source}")
|
||||
|
||||
strategy = get_decision_strategy(intent)
|
||||
boosts = strategy.get("edge_boosts", {})
|
||||
self.assertEqual(boosts.get("caused_by"), 3.0)
|
||||
print("✅ Router lädt Config korrekt.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# TEST 4: Full Pipeline
|
||||
# ------------------------------------------------------------------------
|
||||
async def test_full_pipeline_flow(self):
|
||||
print("\n🔵 TEST 4: Full Chat Pipeline")
|
||||
def resolve(self, edge_type: str) -> str:
|
||||
if not edge_type: return "related_to"
|
||||
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||
|
||||
mock_llm = AsyncMock()
|
||||
mock_llm.prompts = {}
|
||||
mock_llm.generate_raw_response.return_value = "Antwort."
|
||||
if clean_type in self.canonical_map:
|
||||
return self.canonical_map[clean_type]
|
||||
|
||||
mock_retriever = AsyncMock()
|
||||
# FIX: note_id hinzugefügt für Pydantic
|
||||
mock_hit = QueryHit(
|
||||
node_id="123",
|
||||
note_id="test_note_123", # <--- WICHTIG
|
||||
semantic_score=0.9,
|
||||
edge_bonus=0.5,
|
||||
centrality_bonus=0.0,
|
||||
total_score=1.0,
|
||||
source={"text": "Inhalt"},
|
||||
payload={"type": "concept"}
|
||||
)
|
||||
mock_retriever.search.return_value.results = [mock_hit]
|
||||
self._log_unknown(clean_type)
|
||||
return clean_type
|
||||
|
||||
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
|
||||
|
||||
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
|
||||
|
||||
called_query_req = mock_retriever.search.call_args[0][0]
|
||||
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0)
|
||||
self.assertEqual(response.intent, "CAUSAL")
|
||||
print("✅ Pipeline reicht Boosts weiter.")
|
||||
def _log_unknown(self, edge_type: str):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||
entry = {"unknown_type": edge_type, "status": "new"}
|
||||
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# TEST 5: Regression Check
|
||||
# ------------------------------------------------------------------------
|
||||
async def test_regression_standard_query(self):
|
||||
print("\n🔵 TEST 5: Regression")
|
||||
|
||||
mock_llm = AsyncMock()
|
||||
mock_llm.prompts = {}
|
||||
mock_llm.generate_raw_response.return_value = "Antwort."
|
||||
|
||||
mock_retriever = AsyncMock()
|
||||
mock_retriever.search.return_value.results = []
|
||||
|
||||
req = ChatRequest(message="Was ist das?", top_k=3)
|
||||
|
||||
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
|
||||
|
||||
called_query_req = mock_retriever.search.call_args[0][0]
|
||||
|
||||
# FACT strategy hat in unserem Test Setup NUR 'part_of', KEIN 'caused_by'
|
||||
self.assertEqual(response.intent, "FACT")
|
||||
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
|
||||
print("✅ Regression Test bestanden.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
# Default Instanz
|
||||
registry = EdgeRegistry()
|
||||
Loading…
Reference in New Issue
Block a user