From 20b219d86ce86011ba15270b06d4f5a5b9a69028 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 18 Dec 2025 12:10:15 +0100 Subject: [PATCH] Script update --- tests/test_WP22_intelligence.py | 206 +++++++++++++------------------- 1 file changed, 84 insertions(+), 122 deletions(-) diff --git a/tests/test_WP22_intelligence.py b/tests/test_WP22_intelligence.py index f94d27c..0197f29 100644 --- a/tests/test_WP22_intelligence.py +++ b/tests/test_WP22_intelligence.py @@ -1,133 +1,95 @@ -import unittest +""" +FILE: app/services/edge_registry.py +DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'. + Pfad-Logik gefixed: Nutzt MINDNET_VAULT_ROOT oder Parameter. +""" +import re import os -import shutil import json -from unittest.mock import MagicMock, patch +import logging +from typing import Dict, Optional, Set -# Importiere die neuen Module -from app.services.edge_registry import EdgeRegistry -from app.core.retriever import _compute_total_score_v2, _get_status_multiplier +logger = logging.getLogger(__name__) -# Wir mocken Teile, um DB-Abhängigkeit zu vermeiden -class TestWP22Intelligence(unittest.TestCase): +class EdgeRegistry: + _instance = None - def setUp(self): - # 1. Setup Dummy Vocabulary - self.test_vocab_path = "tests/fixtures/01_edge_vocabulary.md" - self.test_log_path = "tests/logs/unknown_edges.jsonl" - os.makedirs("tests/fixtures", exist_ok=True) - os.makedirs("tests/logs", exist_ok=True) - - with open(self.test_vocab_path, "w") as f: - f.write(""" -| **canonical** | Aliases | -| :--- | :--- | -| **caused_by** | ursache_ist, wegen | -| **next** | danach, folgt | - """) + def __new__(cls, vault_root: Optional[str] = None): + if cls._instance is None: + cls._instance = super(EdgeRegistry, cls).__new__(cls) + cls._instance.initialized = False + return cls._instance + + def __init__(self, vault_root: Optional[str] = None): + if self.initialized: + return - # Reset Registry Singleton for Test - EdgeRegistry._instance = None - self.registry = EdgeRegistry() - self.registry.vocab_path = self.test_vocab_path - self.registry.unknown_log_path = self.test_log_path - self.registry._load_vocabulary() + # Priorität: 1. Argument -> 2. ENV -> 3. Default + self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault") + + # Fester relativer Pfad laut Spec + self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md") + + self.unknown_log_path = "data/logs/unknown_edges.jsonl" + self.canonical_map: Dict[str, str] = {} + self.valid_types: Set[str] = set() + + self._load_vocabulary() + self.initialized = True - def tearDown(self): - # Cleanup - if os.path.exists("tests/fixtures"): - shutil.rmtree("tests/fixtures") - if os.path.exists("tests/logs"): - shutil.rmtree("tests/logs") + def _load_vocabulary(self): + """Parst die Markdown-Tabelle im Vault.""" + # Absoluten Pfad auflösen + full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path)) + + if not os.path.exists(full_path): + logger.warning(f"Edge Vocabulary not found at: {full_path}. Registry empty.") + # Wir versuchen NICHT mehr diverse Pfade zu raten, um Konsistenz zu wahren. + return - # --- TEIL A: EDGE REGISTRY --- - def test_registry_resolution(self): - print("\n--- Test A: Registry & Alias Resolution ---") - - # 1. Canonical Check - self.assertEqual(self.registry.resolve("caused_by"), "caused_by") - - # 2. Alias Check - resolved = self.registry.resolve("ursache_ist") - print(f"Resolving 'ursache_ist' -> '{resolved}'") - self.assertEqual(resolved, "caused_by") - - # 3. Unknown Check & Logging - unknown = self.registry.resolve("mystery_link") - print(f"Resolving 'mystery_link' -> '{unknown}' (sollte durchgereicht werden)") - self.assertEqual(unknown, "mystery_link") - - # Check Logfile - with open(self.test_log_path, "r") as f: - log_content = f.read() - self.assertIn("mystery_link", log_content) - print("✅ Unknown edge correctly logged.") + # Regex: | **canonical** | alias, alias | + pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|") - # --- TEIL B: LIFECYCLE SCORING --- - def test_lifecycle_scoring(self): - print("\n--- Test B: Lifecycle Scoring Math ---") - - # Baseline: Semantic Score 0.9, keine Edges - base_sem = 0.9 - - payload_draft = {"status": "draft", "retriever_weight": 1.0} - payload_stable = {"status": "stable", "retriever_weight": 1.0} - - # Mock Settings - with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)): - # Achtung: Hier rufen wir die Logik auf, die wir im Retriever implementiert haben - # Da wir die Funktion _compute_total_score_v2 im Chat-Prompt definiert haben, - # nutzen wir hier die Logik aus der _get_status_multiplier Helper Funktion - - mult_draft = _get_status_multiplier(payload_draft) - mult_stable = _get_status_multiplier(payload_stable) - - score_draft = base_sem * mult_draft - score_stable = base_sem * mult_stable - - print(f"Score Draft (0.8x): {score_draft:.2f}") - print(f"Score Stable (1.2x): {score_stable:.2f}") - - self.assertLess(score_draft, base_sem) - self.assertGreater(score_stable, base_sem) - print("✅ Stable notes scored higher than drafts.") - - # --- TEIL C: DYNAMIC EDGE BOOSTING --- - def test_dynamic_boosting(self): - print("\n--- Test C: Dynamic Edge Boosting ---") - - # Szenario: Wir simulieren, dass der Graph-Adapter einen Edge-Bonus von 1.0 berechnet hat - # Wir wollen prüfen, ob der Intent "WHY" diesen Bonus verstärkt. - - semantic_score = 0.5 - raw_edge_bonus = 1.0 # Stark vernetzt - - with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)): - # Fall 1: Normale Suche (Kein Boost) - # Formel ca: (1.0 * 0.5) + (1.0 * 1.0) = 1.5 - from app.core.retriever import _compute_total_score + try: + with open(full_path, "r", encoding="utf-8") as f: + for line in f: + match = pattern.search(line) + if match: + canonical = match.group(1).strip() + aliases_str = match.group(2).strip() + + self.valid_types.add(canonical) + self.canonical_map[canonical] = canonical + + if aliases_str and "Kein Alias" not in aliases_str: + aliases = [a.strip() for a in aliases_str.split(",") if a.strip()] + for alias in aliases: + clean_alias = alias.replace("`", "") + self.canonical_map[clean_alias] = canonical - score_normal, _, _ = _compute_total_score( - semantic_score, - {"status": "active"}, - edge_bonus=raw_edge_bonus, - dynamic_edge_boosts=None - ) - - # Fall 2: "WHY" Frage (Boost auf caused_by -> simuliert im Request) - boost_map = {"caused_by": 2.0} - score_boosted, _, _ = _compute_total_score( - semantic_score, - {"status": "active"}, - edge_bonus=raw_edge_bonus, - dynamic_edge_boosts=boost_map - ) - - print(f"Normal Score: {score_normal}") - print(f"Boosted Score: {score_boosted}") - - self.assertGreater(score_boosted, score_normal) - print("✅ Dynamic Boosting increased score successfully.") + logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.") -if __name__ == '__main__': - unittest.main() \ No newline at end of file + except Exception as e: + logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}") + + def resolve(self, edge_type: str) -> str: + if not edge_type: return "related_to" + clean_type = edge_type.lower().strip().replace(" ", "_") + + if clean_type in self.canonical_map: + return self.canonical_map[clean_type] + + self._log_unknown(clean_type) + return clean_type + + def _log_unknown(self, edge_type: str): + try: + os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True) + entry = {"unknown_type": edge_type, "status": "new"} + with open(self.unknown_log_path, "a", encoding="utf-8") as f: + f.write(json.dumps(entry) + "\n") + except Exception: + pass + +# Default Instanz (nutzt ENV oder ./vault) +registry = EdgeRegistry() \ No newline at end of file