bug fix
This commit is contained in:
parent
9a18f3cc8b
commit
3eac646cb6
|
|
@ -162,7 +162,7 @@ class IngestionService:
|
||||||
# --- WP-22: Content Lifecycle Gate ---
|
# --- WP-22: Content Lifecycle Gate ---
|
||||||
status = fm.get("status", "draft").lower().strip()
|
status = fm.get("status", "draft").lower().strip()
|
||||||
|
|
||||||
# Hard Skip für System-Dateien
|
# Hard Skip für System-Dateien (Teil A)
|
||||||
if status in ["system", "template", "archive", "hidden"]:
|
if status in ["system", "template", "archive", "hidden"]:
|
||||||
logger.info(f"Skipping file {file_path} (Status: {status})")
|
logger.info(f"Skipping file {file_path} (Status: {status})")
|
||||||
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
||||||
|
|
@ -265,7 +265,7 @@ class IngestionService:
|
||||||
except TypeError:
|
except TypeError:
|
||||||
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
||||||
|
|
||||||
# --- WP-22: Edge Registry Validation ---
|
# --- WP-22: Edge Registry Validation (Teil B) ---
|
||||||
edges = []
|
edges = []
|
||||||
if raw_edges:
|
if raw_edges:
|
||||||
for edge in raw_edges:
|
for edge in raw_edges:
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ def _semantic_hits(
|
||||||
results.append((str(pid), float(score), dict(payload or {})))
|
results.append((str(pid), float(score), dict(payload or {})))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# --- WP-22 Helper: Lifecycle Multipliers ---
|
# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
|
||||||
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||||
"""
|
"""
|
||||||
WP-22: Drafts werden bestraft, Stable Notes belohnt.
|
WP-22: Drafts werden bestraft, Stable Notes belohnt.
|
||||||
|
|
@ -106,10 +106,11 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||||
status = str(payload.get("status", "draft")).lower()
|
status = str(payload.get("status", "draft")).lower()
|
||||||
if status == "stable": return 1.2
|
if status == "stable": return 1.2
|
||||||
if status == "active": return 1.0
|
if status == "active": return 1.0
|
||||||
if status == "draft": return 0.8 # Malus für Entwürfe
|
if status == "draft": return 0.5 # Malus für Entwürfe
|
||||||
# Fallback für andere oder leere Status
|
# Fallback für andere oder leere Status
|
||||||
return 1.0
|
return 1.0
|
||||||
|
|
||||||
|
# --- WP-22: Dynamic Scoring Formula (Teil C) ---
|
||||||
def _compute_total_score(
|
def _compute_total_score(
|
||||||
semantic_score: float,
|
semantic_score: float,
|
||||||
payload: Dict[str, Any],
|
payload: Dict[str, Any],
|
||||||
|
|
@ -118,8 +119,8 @@ def _compute_total_score(
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> Tuple[float, float, float]:
|
) -> Tuple[float, float, float]:
|
||||||
"""
|
"""
|
||||||
Berechnet total_score.
|
Berechnet total_score nach WP-22 Formel.
|
||||||
WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
|
Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
|
||||||
"""
|
"""
|
||||||
raw_weight = payload.get("retriever_weight", 1.0)
|
raw_weight = payload.get("retriever_weight", 1.0)
|
||||||
try:
|
try:
|
||||||
|
|
@ -132,13 +133,13 @@ def _compute_total_score(
|
||||||
sem_w, edge_w, cent_w = _get_scoring_weights()
|
sem_w, edge_w, cent_w = _get_scoring_weights()
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
|
|
||||||
# Dynamic Edge Boosting
|
# Dynamic Edge Boosting (Teil C)
|
||||||
# Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
|
# Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
|
||||||
# Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
|
# Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
|
||||||
final_edge_score = edge_w * edge_bonus
|
final_edge_score = edge_w * edge_bonus
|
||||||
if dynamic_edge_boosts and edge_bonus > 0:
|
if dynamic_edge_boosts and edge_bonus > 0:
|
||||||
# Globaler Boost für Graph-Signale bei spezifischen Intents
|
# Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
|
||||||
final_edge_score *= 1.2
|
final_edge_score *= 1.5
|
||||||
|
|
||||||
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
||||||
return float(total), float(edge_bonus), float(cent_bonus)
|
return float(total), float(edge_bonus), float(cent_bonus)
|
||||||
|
|
@ -154,9 +155,8 @@ def _build_explanation(
|
||||||
subgraph: Optional[ga.Subgraph],
|
subgraph: Optional[ga.Subgraph],
|
||||||
node_key: Optional[str]
|
node_key: Optional[str]
|
||||||
) -> Explanation:
|
) -> Explanation:
|
||||||
"""Erstellt ein Explanation-Objekt."""
|
"""Erstellt ein Explanation-Objekt (WP-04b)."""
|
||||||
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
||||||
# Scoring weights erneut laden für Reason-Details
|
|
||||||
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -167,6 +167,7 @@ def _build_explanation(
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
note_type = payload.get("type", "unknown")
|
note_type = payload.get("type", "unknown")
|
||||||
|
|
||||||
|
# Breakdown Berechnung (muss mit _compute_total_score korrelieren)
|
||||||
breakdown = ScoreBreakdown(
|
breakdown = ScoreBreakdown(
|
||||||
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
||||||
edge_contribution=(edge_w_cfg * edge_bonus),
|
edge_contribution=(edge_w_cfg * edge_bonus),
|
||||||
|
|
@ -180,6 +181,7 @@ def _build_explanation(
|
||||||
reasons: List[Reason] = []
|
reasons: List[Reason] = []
|
||||||
edges_dto: List[EdgeDTO] = []
|
edges_dto: List[EdgeDTO] = []
|
||||||
|
|
||||||
|
# Reason Generation Logik (WP-04b)
|
||||||
if semantic_score > 0.85:
|
if semantic_score > 0.85:
|
||||||
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
|
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
|
||||||
elif semantic_score > 0.70:
|
elif semantic_score > 0.70:
|
||||||
|
|
@ -189,11 +191,13 @@ def _build_explanation(
|
||||||
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
||||||
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
||||||
|
|
||||||
|
# NEU: WP-22 Status Reason
|
||||||
if status_mult != 1.0:
|
if status_mult != 1.0:
|
||||||
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
||||||
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
|
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
|
||||||
|
|
||||||
if subgraph and node_key and edge_bonus > 0:
|
if subgraph and node_key and edge_bonus > 0:
|
||||||
|
# Extrahiere Top-Kanten für die Erklärung
|
||||||
if hasattr(subgraph, "get_outgoing_edges"):
|
if hasattr(subgraph, "get_outgoing_edges"):
|
||||||
outgoing = subgraph.get_outgoing_edges(node_key)
|
outgoing = subgraph.get_outgoing_edges(node_key)
|
||||||
for edge in outgoing:
|
for edge in outgoing:
|
||||||
|
|
@ -226,7 +230,7 @@ def _build_explanation(
|
||||||
|
|
||||||
|
|
||||||
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
||||||
"""Extrahiert depth und edge_types."""
|
"""Extrahiert depth und edge_types für Graph-Expansion."""
|
||||||
expand = getattr(req, "expand", None)
|
expand = getattr(req, "expand", None)
|
||||||
if not expand:
|
if not expand:
|
||||||
return 0, None
|
return 0, None
|
||||||
|
|
@ -259,7 +263,7 @@ def _build_hits_from_semantic(
|
||||||
explain: bool = False,
|
explain: bool = False,
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> QueryResponse:
|
) -> QueryResponse:
|
||||||
"""Baut strukturierte QueryHits."""
|
"""Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
||||||
|
|
||||||
|
|
@ -278,27 +282,28 @@ def _build_hits_from_semantic(
|
||||||
except Exception:
|
except Exception:
|
||||||
cent_bonus = 0.0
|
cent_bonus = 0.0
|
||||||
|
|
||||||
total, edge_bonus, cent_bonus = _compute_total_score(
|
total, eb, cb = _compute_total_score(
|
||||||
semantic_score,
|
semantic_score,
|
||||||
payload,
|
payload,
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=edge_bonus,
|
||||||
cent_bonus=cent_bonus,
|
cent_bonus=cent_bonus,
|
||||||
dynamic_edge_boosts=dynamic_edge_boosts
|
dynamic_edge_boosts=dynamic_edge_boosts
|
||||||
)
|
)
|
||||||
enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
|
enriched.append((pid, float(semantic_score), payload, total, eb, cb))
|
||||||
|
|
||||||
|
# Sort & Limit
|
||||||
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
||||||
limited = enriched_sorted[: max(1, top_k)]
|
limited = enriched_sorted[: max(1, top_k)]
|
||||||
|
|
||||||
results: List[QueryHit] = []
|
results: List[QueryHit] = []
|
||||||
for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
|
for pid, semantic_score, payload, total, eb, cb in limited:
|
||||||
explanation_obj = None
|
explanation_obj = None
|
||||||
if explain:
|
if explain:
|
||||||
explanation_obj = _build_explanation(
|
explanation_obj = _build_explanation(
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
payload=payload,
|
payload=payload,
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=eb,
|
||||||
cent_bonus=cent_bonus,
|
cent_bonus=cb,
|
||||||
subgraph=subgraph,
|
subgraph=subgraph,
|
||||||
node_key=payload.get("chunk_id") or payload.get("note_id")
|
node_key=payload.get("chunk_id") or payload.get("note_id")
|
||||||
)
|
)
|
||||||
|
|
@ -307,10 +312,10 @@ def _build_hits_from_semantic(
|
||||||
|
|
||||||
results.append(QueryHit(
|
results.append(QueryHit(
|
||||||
node_id=str(pid),
|
node_id=str(pid),
|
||||||
note_id=payload.get("note_id"),
|
note_id=payload.get("note_id", "unknown"),
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=eb,
|
||||||
centrality_bonus=cent_bonus,
|
centrality_bonus=cb,
|
||||||
total_score=total,
|
total_score=total,
|
||||||
paths=None,
|
paths=None,
|
||||||
source={
|
source={
|
||||||
|
|
@ -327,7 +332,7 @@ def _build_hits_from_semantic(
|
||||||
|
|
||||||
|
|
||||||
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
"""Reiner semantischer Retriever."""
|
"""Reiner semantischer Retriever (WP-02)."""
|
||||||
client, prefix = _get_client_and_prefix()
|
client, prefix = _get_client_and_prefix()
|
||||||
vector = _get_query_vector(req)
|
vector = _get_query_vector(req)
|
||||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||||
|
|
@ -337,44 +342,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
|
|
||||||
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
|
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
|
||||||
client, prefix = _get_client_and_prefix()
|
client, prefix = _get_client_and_prefix()
|
||||||
if req.query_vector:
|
|
||||||
vector = list(req.query_vector)
|
|
||||||
else:
|
|
||||||
vector = _get_query_vector(req)
|
|
||||||
|
|
||||||
|
# 1. Semantische Suche
|
||||||
|
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
|
||||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||||
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
|
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
|
||||||
|
|
||||||
|
# 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
|
||||||
depth, edge_types = _extract_expand_options(req)
|
depth, edge_types = _extract_expand_options(req)
|
||||||
|
|
||||||
# WP-22: Dynamic Boosts aus dem Request (vom Router)
|
|
||||||
boost_edges = getattr(req, "boost_edges", {})
|
boost_edges = getattr(req, "boost_edges", {})
|
||||||
|
|
||||||
subgraph: ga.Subgraph | None = None
|
subgraph: ga.Subgraph | None = None
|
||||||
if depth and depth > 0:
|
if depth and depth > 0:
|
||||||
seed_ids: List[str] = []
|
seed_ids: List[str] = []
|
||||||
for _pid, _score, payload in hits:
|
for _pid, _score, payload in hits:
|
||||||
key = payload.get("chunk_id") or payload.get("note_id")
|
key = payload.get("note_id")
|
||||||
if key and key not in seed_ids:
|
if key and key not in seed_ids:
|
||||||
seed_ids.append(key)
|
seed_ids.append(key)
|
||||||
|
|
||||||
if seed_ids:
|
if seed_ids:
|
||||||
try:
|
try:
|
||||||
# Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
|
# Subgraph laden
|
||||||
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
|
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
|
||||||
|
|
||||||
# Manuelles Boosten der Kantengewichte im Graphen falls aktiv
|
# --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
|
||||||
|
# Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
|
||||||
if boost_edges and subgraph and hasattr(subgraph, "graph"):
|
if boost_edges and subgraph and hasattr(subgraph, "graph"):
|
||||||
for u, v, data in subgraph.graph.edges(data=True):
|
for u, v, data in subgraph.graph.edges(data=True):
|
||||||
k = data.get("kind")
|
k = data.get("kind")
|
||||||
if k in boost_edges:
|
if k in boost_edges:
|
||||||
# Gewicht erhöhen für diesen Query-Kontext
|
# Gewicht multiplizieren (z.B. caused_by * 3.0)
|
||||||
data["weight"] = data.get("weight", 1.0) * boost_edges[k]
|
data["weight"] = data.get("weight", 1.0) * boost_edges[k]
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
subgraph = None
|
subgraph = None
|
||||||
|
|
||||||
|
# 3. Scoring & Re-Ranking
|
||||||
return _build_hits_from_semantic(
|
return _build_hits_from_semantic(
|
||||||
hits,
|
hits,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
|
@ -386,11 +391,6 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
|
|
||||||
class Retriever:
|
class Retriever:
|
||||||
"""
|
"""Wrapper-Klasse für Suchoperationen."""
|
||||||
Wrapper-Klasse für WP-05 (Chat).
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def search(self, request: QueryRequest) -> QueryResponse:
|
async def search(self, request: QueryRequest) -> QueryResponse:
|
||||||
return hybrid_retrieve(request)
|
return hybrid_retrieve(request)
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
FILE: app/services/edge_registry.py
|
FILE: app/services/edge_registry.py
|
||||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||||
WP-22 Teil B: Registry & Validation.
|
WP-22 Teil B: Registry & Validation.
|
||||||
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
|
Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
@ -25,15 +25,11 @@ class EdgeRegistry:
|
||||||
if self.initialized:
|
if self.initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Priorität 1: Übergebener Parameter (z.B. für Tests)
|
# Priorität: 1. Parameter -> 2. ENV -> 3. Default
|
||||||
# Priorität 2: Environment Variable (z.B. Production ./vault_master)
|
|
||||||
# Priorität 3: Default Fallback (./vault)
|
|
||||||
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
|
|
||||||
# Der relative Pfad ist laut Spezifikation fest definiert
|
|
||||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||||
|
|
||||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||||
|
|
||||||
self.canonical_map: Dict[str, str] = {}
|
self.canonical_map: Dict[str, str] = {}
|
||||||
self.valid_types: Set[str] = set()
|
self.valid_types: Set[str] = set()
|
||||||
|
|
||||||
|
|
@ -42,15 +38,13 @@ class EdgeRegistry:
|
||||||
|
|
||||||
def _load_vocabulary(self):
|
def _load_vocabulary(self):
|
||||||
"""Parst die Markdown-Tabelle im Vault."""
|
"""Parst die Markdown-Tabelle im Vault."""
|
||||||
# Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
|
|
||||||
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||||
|
|
||||||
if not os.path.exists(full_path):
|
if not os.path.exists(full_path):
|
||||||
# Wir loggen den vollen Pfad, damit Debugging einfacher ist
|
|
||||||
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Regex: | **canonical** | alias, alias |
|
# Regex für Markdown Tabellen: | **canonical** | Aliases | ...
|
||||||
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -67,7 +61,7 @@ class EdgeRegistry:
|
||||||
if aliases_str and "Kein Alias" not in aliases_str:
|
if aliases_str and "Kein Alias" not in aliases_str:
|
||||||
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
||||||
for alias in aliases:
|
for alias in aliases:
|
||||||
clean_alias = alias.replace("`", "")
|
clean_alias = alias.replace("`", "").lower().strip()
|
||||||
self.canonical_map[clean_alias] = canonical
|
self.canonical_map[clean_alias] = canonical
|
||||||
|
|
||||||
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||||
|
|
@ -76,6 +70,7 @@ class EdgeRegistry:
|
||||||
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||||
|
|
||||||
def resolve(self, edge_type: str) -> str:
|
def resolve(self, edge_type: str) -> str:
|
||||||
|
"""Normalisiert Kanten-Typen via Registry oder loggt Unbekannte."""
|
||||||
if not edge_type: return "related_to"
|
if not edge_type: return "related_to"
|
||||||
clean_type = edge_type.lower().strip().replace(" ", "_")
|
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||||
|
|
||||||
|
|
@ -86,6 +81,7 @@ class EdgeRegistry:
|
||||||
return clean_type
|
return clean_type
|
||||||
|
|
||||||
def _log_unknown(self, edge_type: str):
|
def _log_unknown(self, edge_type: str):
|
||||||
|
"""Schreibt unbekannte Typen für Review in ein Log."""
|
||||||
try:
|
try:
|
||||||
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||||
entry = {"unknown_type": edge_type, "status": "new"}
|
entry = {"unknown_type": edge_type, "status": "new"}
|
||||||
|
|
@ -94,5 +90,5 @@ class EdgeRegistry:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Default Instanz
|
# Singleton Instanz
|
||||||
registry = EdgeRegistry()
|
registry = EdgeRegistry()
|
||||||
|
|
@ -1,188 +1,97 @@
|
||||||
"""
|
"""
|
||||||
FILE: tests/test_WP22_integration.py
|
FILE: app/services/edge_registry.py
|
||||||
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
|
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||||
FIXES: Pydantic Validation & Config Caching Issues.
|
WP-22 Teil B: Registry & Validation.
|
||||||
|
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
|
||||||
"""
|
"""
|
||||||
import unittest
|
import re
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import json
|
import json
|
||||||
import yaml
|
import logging
|
||||||
import asyncio
|
from typing import Dict, Optional, Set
|
||||||
from unittest.mock import MagicMock, patch, AsyncMock
|
|
||||||
|
|
||||||
# Wir importieren das Modul direkt, um auf den Cache zuzugreifen
|
logger = logging.getLogger(__name__)
|
||||||
import app.routers.chat
|
|
||||||
|
|
||||||
# DTOs und Logik
|
class EdgeRegistry:
|
||||||
from app.models.dto import ChatRequest, QueryRequest, QueryHit
|
_instance = None
|
||||||
from app.services.edge_registry import EdgeRegistry
|
|
||||||
from app.core.retriever import _compute_total_score, _get_status_multiplier
|
|
||||||
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
|
|
||||||
|
|
||||||
class TestWP22Integration(unittest.IsolatedAsyncioTestCase):
|
def __new__(cls, vault_root: Optional[str] = None):
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super(EdgeRegistry, cls).__new__(cls)
|
||||||
|
cls._instance.initialized = False
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
def setUp(self):
|
def __init__(self, vault_root: Optional[str] = None):
|
||||||
"""Bereitet eine isolierte Test-Umgebung vor."""
|
if self.initialized:
|
||||||
self.test_dir = "tests/temp_integration"
|
return
|
||||||
|
|
||||||
# 1. Environment Patching
|
# Priorität 1: Übergebener Parameter (z.B. für Tests)
|
||||||
self.os_env_patch = patch.dict(os.environ, {
|
# Priorität 2: Environment Variable (z.B. Production ./vault_master)
|
||||||
"MINDNET_VAULT_ROOT": self.test_dir,
|
# Priorität 3: Default Fallback (./vault)
|
||||||
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
|
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
|
|
||||||
})
|
|
||||||
self.os_env_patch.start()
|
|
||||||
|
|
||||||
# 2. Verzeichnisse erstellen
|
# Der relative Pfad ist laut Spezifikation fest definiert
|
||||||
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
|
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||||
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
|
|
||||||
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
|
|
||||||
|
|
||||||
# 3. Config: decision_engine.yaml schreiben (Test-Definition)
|
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||||
self.decision_config = {
|
self.canonical_map: Dict[str, str] = {}
|
||||||
"strategies": {
|
self.valid_types: Set[str] = set()
|
||||||
"FACT": {
|
|
||||||
"trigger_keywords": ["was ist"],
|
|
||||||
"edge_boosts": {"part_of": 2.0} # Kein 'caused_by' hier!
|
|
||||||
},
|
|
||||||
"CAUSAL": {
|
|
||||||
"trigger_keywords": ["warum", "weshalb"],
|
|
||||||
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
|
|
||||||
yaml.dump(self.decision_config, f)
|
|
||||||
|
|
||||||
# 4. Config: Edge Vocabulary schreiben
|
self._load_vocabulary()
|
||||||
vocab_path = os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md")
|
self.initialized = True
|
||||||
with open(vocab_path, "w") as f:
|
|
||||||
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
|
|
||||||
|
|
||||||
# 5. CACHE RESET (WICHTIG!)
|
def _load_vocabulary(self):
|
||||||
# Damit der Router die oben geschriebene YAML auch wirklich liest:
|
"""Parst die Markdown-Tabelle im Vault."""
|
||||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
# Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
|
||||||
EdgeRegistry._instance = None
|
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||||
|
|
||||||
# Registry neu init
|
if not os.path.exists(full_path):
|
||||||
self.registry = EdgeRegistry(vault_root=self.test_dir)
|
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
||||||
|
return
|
||||||
|
|
||||||
def tearDown(self):
|
# Regex: | **canonical** | alias, alias |
|
||||||
self.os_env_patch.stop()
|
pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
|
||||||
if os.path.exists(self.test_dir):
|
|
||||||
shutil.rmtree(self.test_dir)
|
|
||||||
EdgeRegistry._instance = None
|
|
||||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
try:
|
||||||
# TEST 1: Edge Registry & Validation
|
with open(full_path, "r", encoding="utf-8") as f:
|
||||||
# ------------------------------------------------------------------------
|
for line in f:
|
||||||
def test_edge_registry_aliases(self):
|
match = pattern.search(line)
|
||||||
print("\n🔵 TEST 1: Edge Registry Resolution")
|
if match:
|
||||||
resolved = self.registry.resolve("ursache_ist")
|
canonical = match.group(1).strip()
|
||||||
self.assertEqual(resolved, "caused_by")
|
aliases_str = match.group(2).strip()
|
||||||
|
|
||||||
unknown = self.registry.resolve("foobar_link")
|
self.valid_types.add(canonical)
|
||||||
self.assertEqual(unknown, "foobar_link")
|
self.canonical_map[canonical] = canonical
|
||||||
|
|
||||||
log_path = self.registry.unknown_log_path
|
if aliases_str and "Kein Alias" not in aliases_str:
|
||||||
self.assertTrue(os.path.exists(log_path))
|
aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
|
||||||
print("✅ Registry funktioniert.")
|
for alias in aliases:
|
||||||
|
clean_alias = alias.replace("`", "")
|
||||||
|
self.canonical_map[clean_alias] = canonical
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||||
# TEST 2: Lifecycle Scoring
|
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
def test_lifecycle_scoring_logic(self):
|
|
||||||
print("\n🔵 TEST 2: Lifecycle Scoring")
|
|
||||||
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
|
|
||||||
base_sem = 0.9
|
|
||||||
|
|
||||||
payload_draft = {"status": "draft", "retriever_weight": 1.0}
|
except Exception as e:
|
||||||
mult_draft = _get_status_multiplier(payload_draft)
|
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||||
self.assertEqual(mult_draft, 0.8)
|
|
||||||
|
|
||||||
payload_stable = {"status": "stable", "retriever_weight": 1.0}
|
def resolve(self, edge_type: str) -> str:
|
||||||
mult_stable = _get_status_multiplier(payload_stable)
|
if not edge_type: return "related_to"
|
||||||
self.assertEqual(mult_stable, 1.2)
|
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||||
print("✅ Lifecycle Scoring korrekt.")
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
if clean_type in self.canonical_map:
|
||||||
# TEST 3: Semantic Router & Boosting
|
return self.canonical_map[clean_type]
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
async def test_router_integration(self):
|
|
||||||
print("\n🔵 TEST 3: Semantic Router Integration")
|
|
||||||
|
|
||||||
mock_llm = MagicMock()
|
self._log_unknown(clean_type)
|
||||||
mock_llm.prompts = {}
|
return clean_type
|
||||||
|
|
||||||
# Da der Cache im setUp gelöscht wurde, sollte er jetzt CAUSAL finden
|
def _log_unknown(self, edge_type: str):
|
||||||
query_causal = "Warum ist das Projekt gescheitert?"
|
try:
|
||||||
intent, source = await _classify_intent(query_causal, mock_llm)
|
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||||
|
entry = {"unknown_type": edge_type, "status": "new"}
|
||||||
|
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(entry) + "\n")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
self.assertEqual(intent, "CAUSAL", f"Erwartete CAUSAL, bekam {intent} via {source}")
|
# Default Instanz
|
||||||
|
registry = EdgeRegistry()
|
||||||
strategy = get_decision_strategy(intent)
|
|
||||||
boosts = strategy.get("edge_boosts", {})
|
|
||||||
self.assertEqual(boosts.get("caused_by"), 3.0)
|
|
||||||
print("✅ Router lädt Config korrekt.")
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
# TEST 4: Full Pipeline
|
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
async def test_full_pipeline_flow(self):
|
|
||||||
print("\n🔵 TEST 4: Full Chat Pipeline")
|
|
||||||
|
|
||||||
mock_llm = AsyncMock()
|
|
||||||
mock_llm.prompts = {}
|
|
||||||
mock_llm.generate_raw_response.return_value = "Antwort."
|
|
||||||
|
|
||||||
mock_retriever = AsyncMock()
|
|
||||||
# FIX: note_id hinzugefügt für Pydantic
|
|
||||||
mock_hit = QueryHit(
|
|
||||||
node_id="123",
|
|
||||||
note_id="test_note_123", # <--- WICHTIG
|
|
||||||
semantic_score=0.9,
|
|
||||||
edge_bonus=0.5,
|
|
||||||
centrality_bonus=0.0,
|
|
||||||
total_score=1.0,
|
|
||||||
source={"text": "Inhalt"},
|
|
||||||
payload={"type": "concept"}
|
|
||||||
)
|
|
||||||
mock_retriever.search.return_value.results = [mock_hit]
|
|
||||||
|
|
||||||
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
|
|
||||||
|
|
||||||
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
|
|
||||||
|
|
||||||
called_query_req = mock_retriever.search.call_args[0][0]
|
|
||||||
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0)
|
|
||||||
self.assertEqual(response.intent, "CAUSAL")
|
|
||||||
print("✅ Pipeline reicht Boosts weiter.")
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
# TEST 5: Regression Check
|
|
||||||
# ------------------------------------------------------------------------
|
|
||||||
async def test_regression_standard_query(self):
|
|
||||||
print("\n🔵 TEST 5: Regression")
|
|
||||||
|
|
||||||
mock_llm = AsyncMock()
|
|
||||||
mock_llm.prompts = {}
|
|
||||||
mock_llm.generate_raw_response.return_value = "Antwort."
|
|
||||||
|
|
||||||
mock_retriever = AsyncMock()
|
|
||||||
mock_retriever.search.return_value.results = []
|
|
||||||
|
|
||||||
req = ChatRequest(message="Was ist das?", top_k=3)
|
|
||||||
|
|
||||||
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
|
|
||||||
|
|
||||||
called_query_req = mock_retriever.search.call_args[0][0]
|
|
||||||
|
|
||||||
# FACT strategy hat in unserem Test Setup NUR 'part_of', KEIN 'caused_by'
|
|
||||||
self.assertEqual(response.intent, "FACT")
|
|
||||||
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
|
|
||||||
print("✅ Regression Test bestanden.")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
Loading…
Reference in New Issue
Block a user