Script update

2025-12-18 12:10:15 +01:00 · 2025-12-18 12:10:15 +01:00 · 20b219d86c
commit 20b219d86c
parent 0ff39d7b14
1 changed files with 84 additions and 122 deletions
--- a/tests/test_WP22_intelligence.py
+++ b/tests/test_WP22_intelligence.py
@ -1,133 +1,95 @@
-import unittest
+"""
+FILE: app/services/edge_registry.py
+DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
+             Pfad-Logik gefixed: Nutzt MINDNET_VAULT_ROOT oder Parameter.
+"""
+import re
 import os
-import shutil
 import json
-from unittest.mock import MagicMock, patch
+import logging
+from typing import Dict, Optional, Set

-# Importiere die neuen Module
-from app.services.edge_registry import EdgeRegistry
-from app.core.retriever import _compute_total_score_v2, _get_status_multiplier
+logger = logging.getLogger(__name__)

-# Wir mocken Teile, um DB-Abhängigkeit zu vermeiden
-class TestWP22Intelligence(unittest.TestCase):
+class EdgeRegistry:
+    _instance = None

-    def setUp(self):
-        # 1. Setup Dummy Vocabulary
-        self.test_vocab_path = "tests/fixtures/01_edge_vocabulary.md"
-        self.test_log_path = "tests/logs/unknown_edges.jsonl"
-        os.makedirs("tests/fixtures", exist_ok=True)
-        os.makedirs("tests/logs", exist_ok=True)
-        
-        with open(self.test_vocab_path, "w") as f:
-            f.write("""
-| **canonical** | Aliases |
-| :--- | :--- |
-| **caused_by** | ursache_ist, wegen |
-| **next** | danach, folgt |
-            """)
+    def __new__(cls, vault_root: Optional[str] = None):
+        if cls._instance is None:
+            cls._instance = super(EdgeRegistry, cls).__new__(cls)
+            cls._instance.initialized = False
+        return cls._instance
+
+    def __init__(self, vault_root: Optional[str] = None):
+        if self.initialized: 
+            return
            
-        # Reset Registry Singleton for Test
-        EdgeRegistry._instance = None
-        self.registry = EdgeRegistry()
-        self.registry.vocab_path = self.test_vocab_path
-        self.registry.unknown_log_path = self.test_log_path
-        self.registry._load_vocabulary()
+        # Priorität: 1. Argument -> 2. ENV -> 3. Default
+        self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
+        
+        # Fester relativer Pfad laut Spec
+        self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
+        
+        self.unknown_log_path = "data/logs/unknown_edges.jsonl"
+        self.canonical_map: Dict[str, str] = {} 
+        self.valid_types: Set[str] = set()
+        
+        self._load_vocabulary()
+        self.initialized = True

-    def tearDown(self):
-        # Cleanup
-        if os.path.exists("tests/fixtures"):
-            shutil.rmtree("tests/fixtures")
-        if os.path.exists("tests/logs"):
-            shutil.rmtree("tests/logs")
+    def _load_vocabulary(self):
+        """Parst die Markdown-Tabelle im Vault."""
+        # Absoluten Pfad auflösen
+        full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
+        
+        if not os.path.exists(full_path):
+            logger.warning(f"Edge Vocabulary not found at: {full_path}. Registry empty.")
+            # Wir versuchen NICHT mehr diverse Pfade zu raten, um Konsistenz zu wahren.
+            return

-    # --- TEIL A: EDGE REGISTRY ---
-    def test_registry_resolution(self):
-        print("\n--- Test A: Registry & Alias Resolution ---")
-        
-        # 1. Canonical Check
-        self.assertEqual(self.registry.resolve("caused_by"), "caused_by")
-        
-        # 2. Alias Check
-        resolved = self.registry.resolve("ursache_ist")
-        print(f"Resolving 'ursache_ist' -> '{resolved}'")
-        self.assertEqual(resolved, "caused_by")
-        
-        # 3. Unknown Check & Logging
-        unknown = self.registry.resolve("mystery_link")
-        print(f"Resolving 'mystery_link' -> '{unknown}' (sollte durchgereicht werden)")
-        self.assertEqual(unknown, "mystery_link")
-        
-        # Check Logfile
-        with open(self.test_log_path, "r") as f:
-            log_content = f.read()
-            self.assertIn("mystery_link", log_content)
-            print("✅ Unknown edge correctly logged.")
+        # Regex: | **canonical** | alias, alias |
+        pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")

-    # --- TEIL B: LIFECYCLE SCORING ---
-    def test_lifecycle_scoring(self):
-        print("\n--- Test B: Lifecycle Scoring Math ---")
-        
-        # Baseline: Semantic Score 0.9, keine Edges
-        base_sem = 0.9
-        
-        payload_draft = {"status": "draft", "retriever_weight": 1.0}
-        payload_stable = {"status": "stable", "retriever_weight": 1.0}
-        
-        # Mock Settings
-        with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
-             # Achtung: Hier rufen wir die Logik auf, die wir im Retriever implementiert haben
-             # Da wir die Funktion _compute_total_score_v2 im Chat-Prompt definiert haben,
-             # nutzen wir hier die Logik aus der _get_status_multiplier Helper Funktion
-             
-             mult_draft = _get_status_multiplier(payload_draft)
-             mult_stable = _get_status_multiplier(payload_stable)
-             
-             score_draft = base_sem * mult_draft
-             score_stable = base_sem * mult_stable
-             
-             print(f"Score Draft (0.8x): {score_draft:.2f}")
-             print(f"Score Stable (1.2x): {score_stable:.2f}")
-             
-             self.assertLess(score_draft, base_sem)
-             self.assertGreater(score_stable, base_sem)
-             print("✅ Stable notes scored higher than drafts.")
-
-    # --- TEIL C: DYNAMIC EDGE BOOSTING ---
-    def test_dynamic_boosting(self):
-        print("\n--- Test C: Dynamic Edge Boosting ---")
-        
-        # Szenario: Wir simulieren, dass der Graph-Adapter einen Edge-Bonus von 1.0 berechnet hat
-        # Wir wollen prüfen, ob der Intent "WHY" diesen Bonus verstärkt.
-        
-        semantic_score = 0.5
-        raw_edge_bonus = 1.0 # Stark vernetzt
-        
-        with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)):
-            # Fall 1: Normale Suche (Kein Boost)
-            # Formel ca: (1.0 * 0.5) + (1.0 * 1.0) = 1.5
-            from app.core.retriever import _compute_total_score
+        try:
+            with open(full_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    match = pattern.search(line)
+                    if match:
+                        canonical = match.group(1).strip()
+                        aliases_str = match.group(2).strip()
+                        
+                        self.valid_types.add(canonical)
+                        self.canonical_map[canonical] = canonical
+                        
+                        if aliases_str and "Kein Alias" not in aliases_str:
+                            aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
+                            for alias in aliases:
+                                clean_alias = alias.replace("`", "")
+                                self.canonical_map[clean_alias] = canonical
            
-            score_normal, _, _ = _compute_total_score(
-                semantic_score, 
-                {"status": "active"}, 
-                edge_bonus=raw_edge_bonus, 
-                dynamic_edge_boosts=None
-            )
-            
-            # Fall 2: "WHY" Frage (Boost auf caused_by -> simuliert im Request)
-            boost_map = {"caused_by": 2.0}
-            score_boosted, _, _ = _compute_total_score(
-                semantic_score, 
-                {"status": "active"}, 
-                edge_bonus=raw_edge_bonus, 
-                dynamic_edge_boosts=boost_map
-            )
-            
-            print(f"Normal Score: {score_normal}")
-            print(f"Boosted Score: {score_boosted}")
-            
-            self.assertGreater(score_boosted, score_normal)
-            print("✅ Dynamic Boosting increased score successfully.")
+            logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")

-if __name__ == '__main__':
-    unittest.main()
+        except Exception as e:
+            logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
+
+    def resolve(self, edge_type: str) -> str:
+        if not edge_type: return "related_to"
+        clean_type = edge_type.lower().strip().replace(" ", "_")
+        
+        if clean_type in self.canonical_map:
+            return self.canonical_map[clean_type]
+        
+        self._log_unknown(clean_type)
+        return clean_type 
+
+    def _log_unknown(self, edge_type: str):
+        try:
+            os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
+            entry = {"unknown_type": edge_type, "status": "new"}
+            with open(self.unknown_log_path, "a", encoding="utf-8") as f:
+                f.write(json.dumps(entry) + "\n")
+        except Exception:
+            pass
+
+# Default Instanz (nutzt ENV oder ./vault)
+registry = EdgeRegistry()