Script update
This commit is contained in:
parent
0ff39d7b14
commit
20b219d86c
|
|
@ -1,133 +1,95 @@
|
|||
import unittest
|
||||
"""
|
||||
FILE: app/services/edge_registry.py
|
||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||
Pfad-Logik gefixed: Nutzt MINDNET_VAULT_ROOT oder Parameter.
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
import logging
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
# Importiere die neuen Module
|
||||
from app.services.edge_registry import EdgeRegistry
|
||||
from app.core.retriever import _compute_total_score_v2, _get_status_multiplier
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Wir mocken Teile, um DB-Abhängigkeit zu vermeiden
|
||||
class TestWP22Intelligence(unittest.TestCase):
|
||||
class EdgeRegistry:
|
||||
_instance = None
|
||||
|
||||
def setUp(self):
|
||||
# 1. Setup Dummy Vocabulary
|
||||
self.test_vocab_path = "tests/fixtures/01_edge_vocabulary.md"
|
||||
self.test_log_path = "tests/logs/unknown_edges.jsonl"
|
||||
os.makedirs("tests/fixtures", exist_ok=True)
|
||||
os.makedirs("tests/logs", exist_ok=True)
|
||||
|
||||
with open(self.test_vocab_path, "w") as f:
|
||||
f.write("""
|
||||
| **canonical** | Aliases |
|
||||
| :--- | :--- |
|
||||
| **caused_by** | ursache_ist, wegen |
|
||||
| **next** | danach, folgt |
|
||||
""")
|
||||
def __new__(cls, vault_root: Optional[str] = None):
|
||||
if cls._instance is None:
|
||||
cls._instance = super(EdgeRegistry, cls).__new__(cls)
|
||||
cls._instance.initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, vault_root: Optional[str] = None):
|
||||
if self.initialized:
|
||||
return
|
||||
|
||||
# Reset Registry Singleton for Test
|
||||
EdgeRegistry._instance = None
|
||||
self.registry = EdgeRegistry()
|
||||
self.registry.vocab_path = self.test_vocab_path
|
||||
self.registry.unknown_log_path = self.test_log_path
|
||||
self.registry._load_vocabulary()
|
||||
# Priorität: 1. Argument -> 2. ENV -> 3. Default
|
||||
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||
|
||||
# Fester relativer Pfad laut Spec
|
||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||
|
||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||
self.canonical_map: Dict[str, str] = {}
|
||||
self.valid_types: Set[str] = set()
|
||||
|
||||
self._load_vocabulary()
|
||||
self.initialized = True
|
||||
|
||||
def tearDown(self):
|
||||
# Cleanup
|
||||
if os.path.exists("tests/fixtures"):
|
||||
shutil.rmtree("tests/fixtures")
|
||||
if os.path.exists("tests/logs"):
|
||||
shutil.rmtree("tests/logs")
|
||||
def _load_vocabulary(self):
|
||||
"""Parst die Markdown-Tabelle im Vault."""
|
||||
# Absoluten Pfad auflösen
|
||||
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
logger.warning(f"Edge Vocabulary not found at: {full_path}. Registry empty.")
|
||||
# Wir versuchen NICHT mehr diverse Pfade zu raten, um Konsistenz zu wahren.
|
||||
return
|
||||
|
||||
# --- TEIL A: EDGE REGISTRY ---
|
||||
def test_registry_resolution(self):
|
||||
print("\n--- Test A: Registry & Alias Resolution ---")
|
||||
|
||||
# 1. Canonical Check
|
||||
self.assertEqual(self.registry.resolve("caused_by"), "caused_by")
|
||||
|
||||
# 2. Alias Check
|
||||
resolved = self.registry.resolve("ursache_ist")
|
||||
print(f"Resolving 'ursache_ist' -> '{resolved}'")
|
||||
self.assertEqual(resolved, "caused_by")
|
||||
|
||||
# 3. Unknown Check & Logging
|
||||
unknown = self.registry.resolve("mystery_link")
|
||||
print(f"Resolving 'mystery_link' -> '{unknown}' (sollte durchgereicht werden)")
|
||||
self.assertEqual(unknown, "mystery_link")
|
||||
|
||||
# Check Logfile
|
||||
with open(self.test_log_path, "r") as f:
|
||||
log_content = f.read()
|
||||
self.assertIn("mystery_link", log_content)
|
||||
print("✅ Unknown edge correctly logged.")
|
||||
# Regex: | **canonical** | alias, alias |
|
||||
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
||||
|
||||
# --- TEIL B: LIFECYCLE SCORING ---
|
||||
def test_lifecycle_scoring(self):
|
||||
print("\n--- Test B: Lifecycle Scoring Math ---")
|
||||
|
||||
# Baseline: Semantic Score 0.9, keine Edges
|
||||
base_sem = 0.9
|
||||
|
||||
payload_draft = {"status": "draft", "retriever_weight": 1.0}
|
||||
payload_stable = {"status": "stable", "retriever_weight": 1.0}
|
||||
|
||||
# Mock Settings
|
||||
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
|
||||
# Achtung: Hier rufen wir die Logik auf, die wir im Retriever implementiert haben
|
||||
# Da wir die Funktion _compute_total_score_v2 im Chat-Prompt definiert haben,
|
||||
# nutzen wir hier die Logik aus der _get_status_multiplier Helper Funktion
|
||||
|
||||
mult_draft = _get_status_multiplier(payload_draft)
|
||||
mult_stable = _get_status_multiplier(payload_stable)
|
||||
|
||||
score_draft = base_sem * mult_draft
|
||||
score_stable = base_sem * mult_stable
|
||||
|
||||
print(f"Score Draft (0.8x): {score_draft:.2f}")
|
||||
print(f"Score Stable (1.2x): {score_stable:.2f}")
|
||||
|
||||
self.assertLess(score_draft, base_sem)
|
||||
self.assertGreater(score_stable, base_sem)
|
||||
print("✅ Stable notes scored higher than drafts.")
|
||||
|
||||
# --- TEIL C: DYNAMIC EDGE BOOSTING ---
|
||||
def test_dynamic_boosting(self):
|
||||
print("\n--- Test C: Dynamic Edge Boosting ---")
|
||||
|
||||
# Szenario: Wir simulieren, dass der Graph-Adapter einen Edge-Bonus von 1.0 berechnet hat
|
||||
# Wir wollen prüfen, ob der Intent "WHY" diesen Bonus verstärkt.
|
||||
|
||||
semantic_score = 0.5
|
||||
raw_edge_bonus = 1.0 # Stark vernetzt
|
||||
|
||||
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)):
|
||||
# Fall 1: Normale Suche (Kein Boost)
|
||||
# Formel ca: (1.0 * 0.5) + (1.0 * 1.0) = 1.5
|
||||
from app.core.retriever import _compute_total_score
|
||||
try:
|
||||
with open(full_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
match = pattern.search(line)
|
||||
if match:
|
||||
canonical = match.group(1).strip()
|
||||
aliases_str = match.group(2).strip()
|
||||
|
||||
self.valid_types.add(canonical)
|
||||
self.canonical_map[canonical] = canonical
|
||||
|
||||
if aliases_str and "Kein Alias" not in aliases_str:
|
||||
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
||||
for alias in aliases:
|
||||
clean_alias = alias.replace("`", "")
|
||||
self.canonical_map[clean_alias] = canonical
|
||||
|
||||
score_normal, _, _ = _compute_total_score(
|
||||
semantic_score,
|
||||
{"status": "active"},
|
||||
edge_bonus=raw_edge_bonus,
|
||||
dynamic_edge_boosts=None
|
||||
)
|
||||
|
||||
# Fall 2: "WHY" Frage (Boost auf caused_by -> simuliert im Request)
|
||||
boost_map = {"caused_by": 2.0}
|
||||
score_boosted, _, _ = _compute_total_score(
|
||||
semantic_score,
|
||||
{"status": "active"},
|
||||
edge_bonus=raw_edge_bonus,
|
||||
dynamic_edge_boosts=boost_map
|
||||
)
|
||||
|
||||
print(f"Normal Score: {score_normal}")
|
||||
print(f"Boosted Score: {score_boosted}")
|
||||
|
||||
self.assertGreater(score_boosted, score_normal)
|
||||
print("✅ Dynamic Boosting increased score successfully.")
|
||||
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||
|
||||
def resolve(self, edge_type: str) -> str:
|
||||
if not edge_type: return "related_to"
|
||||
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||
|
||||
if clean_type in self.canonical_map:
|
||||
return self.canonical_map[clean_type]
|
||||
|
||||
self._log_unknown(clean_type)
|
||||
return clean_type
|
||||
|
||||
def _log_unknown(self, edge_type: str):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||
entry = {"unknown_type": edge_type, "status": "new"}
|
||||
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default Instanz (nutzt ENV oder ./vault)
|
||||
registry = EdgeRegistry()
|
||||
Loading…
Reference in New Issue
Block a user