neue version
This commit is contained in:
parent
2c3ee8efd6
commit
136c3bb43f
|
|
@ -4,7 +4,7 @@ DESCRIPTION: Haupt-Ingestion-Logik.
|
||||||
FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight.
|
FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight.
|
||||||
Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ.
|
Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ.
|
||||||
WP-22: Integration von Content Lifecycle (Status) und Edge Registry.
|
WP-22: Integration von Content Lifecycle (Status) und Edge Registry.
|
||||||
VERSION: 2.8.1 (WP-22 Lifecycle & Registry)
|
VERSION: 2.8.5 (WP-22 Lifecycle & Registry)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry
|
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry
|
||||||
EXTERNAL_CONFIG: config/types.yaml
|
EXTERNAL_CONFIG: config/types.yaml
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
FILE: app/core/retriever.py
|
FILE: app/core/retriever.py
|
||||||
DESCRIPTION: Implementiert die Hybrid-Suche (Vektor + Graph-Expansion) und das Scoring-Modell (Explainability).
|
DESCRIPTION: Implementiert die Hybrid-Suche (Vektor + Graph-Expansion) und das Scoring-Modell (Explainability).
|
||||||
WP-22 Update: Dynamic Edge Boosting & Lifecycle Scoring.
|
WP-22 Update: Dynamic Edge Boosting & Lifecycle Scoring.
|
||||||
VERSION: 0.6.1 (WP-22 Dynamic Scoring)
|
VERSION: 0.6.5 (WP-22 Scoring Formula)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.services.embeddings_client, app.core.graph_adapter
|
DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.services.embeddings_client, app.core.graph_adapter
|
||||||
LAST_ANALYSIS: 2025-12-18
|
LAST_ANALYSIS: 2025-12-18
|
||||||
|
|
@ -101,45 +101,48 @@ def _semantic_hits(
|
||||||
# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
|
# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
|
||||||
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||||
"""
|
"""
|
||||||
WP-22: stable (1.2), active (1.0), draft (0.5).
|
WP-22: stable (1.2), active/default (1.0), draft (0.5).
|
||||||
"""
|
"""
|
||||||
status = str(payload.get("status", "active")).lower()
|
status = str(payload.get("status", "active")).lower()
|
||||||
if status == "stable": return 1.2
|
if status == "stable": return 1.2
|
||||||
if status == "active": return 1.0
|
|
||||||
if status == "draft": return 0.5
|
if status == "draft": return 0.5
|
||||||
return 1.0
|
return 1.0
|
||||||
|
|
||||||
|
# --- WP-22: Dynamic Scoring Formula (Teil C) ---
|
||||||
def _compute_total_score(
|
def _compute_total_score(
|
||||||
semantic_score: float,
|
semantic_score: float,
|
||||||
payload: Dict[str, Any],
|
payload: Dict[str, Any],
|
||||||
edge_bonus: float = 0.0,
|
edge_bonus_raw: float = 0.0,
|
||||||
cent_bonus: float = 0.0,
|
cent_bonus_raw: float = 0.0,
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> Tuple[float, float, float]:
|
) -> Tuple[float, float, float]:
|
||||||
"""
|
"""
|
||||||
Berechnet total_score nach WP-22 Scoring Formel.
|
WP-22 Mathematische Logik:
|
||||||
|
Score = BaseScore * (1 + ConfigWeight + DynamicBoost)
|
||||||
|
|
||||||
|
Hierbei gilt:
|
||||||
|
- BaseScore: semantic_similarity * status_multiplier
|
||||||
|
- ConfigWeight: retriever_weight (Type Boost)
|
||||||
|
- DynamicBoost: (edge_weight * edge_bonus) + (centrality_weight * centrality_bonus)
|
||||||
"""
|
"""
|
||||||
raw_weight = payload.get("retriever_weight", 1.0)
|
|
||||||
try:
|
# 1. Base Score (Semantik * Lifecycle)
|
||||||
weight = float(raw_weight)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
weight = 1.0
|
|
||||||
if weight < 0.0:
|
|
||||||
weight = 0.0
|
|
||||||
|
|
||||||
sem_w, edge_w, cent_w = _get_scoring_weights()
|
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
|
base_score = float(semantic_score) * status_mult
|
||||||
# Dynamic Edge Boosting (Teil C)
|
|
||||||
# Globaler Bonus falls Kanten-spezifische Boosts aktiv sind (z.B. WHY Frage)
|
# 2. Config Weight (Static Type Boost)
|
||||||
# Die kanten-spezifische Gewichtung passiert bereits im Subgraph in hybrid_retrieve.
|
config_weight = float(payload.get("retriever_weight", 1.0)) - 1.0 # 1.0 ist neutral
|
||||||
final_edge_score = edge_w * edge_bonus
|
|
||||||
if dynamic_edge_boosts and edge_bonus > 0:
|
# 3. Dynamic Boost (Graph-Signale)
|
||||||
# Globaler Verstärker für Graph-Signale bei spezifischen Intents
|
_sem_w, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||||
final_edge_score *= 1.5
|
dynamic_boost = (edge_w_cfg * edge_bonus_raw) + (cent_w_cfg * cent_bonus_raw)
|
||||||
|
|
||||||
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
# Falls Intent-Boosts vorliegen, verstärken wir den Dynamic Boost
|
||||||
return float(total), float(edge_bonus), float(cent_bonus)
|
if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0):
|
||||||
|
dynamic_boost *= 1.5
|
||||||
|
|
||||||
|
total = base_score * (1.0 + config_weight + dynamic_boost)
|
||||||
|
return float(total), float(edge_bonus_raw), float(cent_bonus_raw)
|
||||||
|
|
||||||
|
|
||||||
# --- WP-04b Explanation Logic ---
|
# --- WP-04b Explanation Logic ---
|
||||||
|
|
@ -153,22 +156,21 @@ def _build_explanation(
|
||||||
node_key: Optional[str]
|
node_key: Optional[str]
|
||||||
) -> Explanation:
|
) -> Explanation:
|
||||||
"""Erstellt ein Explanation-Objekt (WP-04b)."""
|
"""Erstellt ein Explanation-Objekt (WP-04b)."""
|
||||||
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
|
||||||
# Scoring weights erneut laden für Reason-Details
|
|
||||||
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||||
|
|
||||||
try:
|
type_weight = float(payload.get("retriever_weight", 1.0))
|
||||||
type_weight = float(payload.get("retriever_weight", 1.0))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
type_weight = 1.0
|
|
||||||
|
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
note_type = payload.get("type", "unknown")
|
note_type = payload.get("type", "unknown")
|
||||||
|
|
||||||
|
# Breakdown für Explanation (Muss die Scoring Formel spiegeln)
|
||||||
|
config_w_impact = type_weight - 1.0
|
||||||
|
dynamic_b_impact = (edge_w_cfg * edge_bonus) + (cent_w_cfg * cent_bonus)
|
||||||
|
base_val = semantic_score * status_mult
|
||||||
|
|
||||||
breakdown = ScoreBreakdown(
|
breakdown = ScoreBreakdown(
|
||||||
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
semantic_contribution=base_val,
|
||||||
edge_contribution=(edge_w_cfg * edge_bonus),
|
edge_contribution=base_val * dynamic_b_impact,
|
||||||
centrality_contribution=(cent_w_cfg * cent_bonus),
|
centrality_contribution=0.0, # In dynamic_b_impact enthalten
|
||||||
raw_semantic=semantic_score,
|
raw_semantic=semantic_score,
|
||||||
raw_edge_bonus=edge_bonus,
|
raw_edge_bonus=edge_bonus,
|
||||||
raw_centrality=cent_bonus,
|
raw_centrality=cent_bonus,
|
||||||
|
|
@ -185,9 +187,8 @@ def _build_explanation(
|
||||||
|
|
||||||
if type_weight != 1.0:
|
if type_weight != 1.0:
|
||||||
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
||||||
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=base_val * config_w_impact))
|
||||||
|
|
||||||
# WP-22: Status Grund hinzufügen
|
|
||||||
if status_mult != 1.0:
|
if status_mult != 1.0:
|
||||||
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
||||||
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status', 'unknown')}).", score_impact=0.0))
|
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status', 'unknown')}).", score_impact=0.0))
|
||||||
|
|
@ -219,13 +220,13 @@ def _build_explanation(
|
||||||
reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind}))
|
reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind}))
|
||||||
|
|
||||||
if cent_bonus > 0.01:
|
if cent_bonus > 0.01:
|
||||||
reasons.append(Reason(kind="centrality", message="Knoten liegt zentral im Kontext.", score_impact=breakdown.centrality_contribution))
|
reasons.append(Reason(kind="centrality", message="Knoten liegt zentral im Kontext.", score_impact=cent_w_cfg * cent_bonus))
|
||||||
|
|
||||||
return Explanation(breakdown=breakdown, reasons=reasons, related_edges=edges_dto if edges_dto else None)
|
return Explanation(breakdown=breakdown, reasons=reasons, related_edges=edges_dto if edges_dto else None)
|
||||||
|
|
||||||
|
|
||||||
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
||||||
"""Extrahiert depth und edge_types für die Graph-Expansion."""
|
"""Extrahiert depth und edge_types für die Expansion."""
|
||||||
expand = getattr(req, "expand", None)
|
expand = getattr(req, "expand", None)
|
||||||
if not expand:
|
if not expand:
|
||||||
return 0, None
|
return 0, None
|
||||||
|
|
@ -258,7 +259,7 @@ def _build_hits_from_semantic(
|
||||||
explain: bool = False,
|
explain: bool = False,
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> QueryResponse:
|
) -> QueryResponse:
|
||||||
"""Baut strukturierte QueryHits basierend auf den berechneten Scores."""
|
"""Baut strukturierte QueryHits basierend auf Hybrid-Scoring."""
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
||||||
|
|
||||||
|
|
@ -277,27 +278,27 @@ def _build_hits_from_semantic(
|
||||||
except Exception:
|
except Exception:
|
||||||
cent_bonus = 0.0
|
cent_bonus = 0.0
|
||||||
|
|
||||||
total, edge_bonus, cent_bonus = _compute_total_score(
|
total, eb, cb = _compute_total_score(
|
||||||
semantic_score,
|
semantic_score,
|
||||||
payload,
|
payload,
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus_raw=edge_bonus,
|
||||||
cent_bonus=cent_bonus,
|
cent_bonus_raw=cent_bonus,
|
||||||
dynamic_edge_boosts=dynamic_edge_boosts
|
dynamic_edge_boosts=dynamic_edge_boosts
|
||||||
)
|
)
|
||||||
enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
|
enriched.append((pid, float(semantic_score), payload, total, eb, cb))
|
||||||
|
|
||||||
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
||||||
limited = enriched_sorted[: max(1, top_k)]
|
limited = enriched_sorted[: max(1, top_k)]
|
||||||
|
|
||||||
results: List[QueryHit] = []
|
results: List[QueryHit] = []
|
||||||
for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
|
for pid, semantic_score, payload, total, eb, cb in limited:
|
||||||
explanation_obj = None
|
explanation_obj = None
|
||||||
if explain:
|
if explain:
|
||||||
explanation_obj = _build_explanation(
|
explanation_obj = _build_explanation(
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
payload=payload,
|
payload=payload,
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=eb,
|
||||||
cent_bonus=cent_bonus,
|
cent_bonus=cb,
|
||||||
subgraph=subgraph,
|
subgraph=subgraph,
|
||||||
node_key=payload.get("chunk_id") or payload.get("note_id")
|
node_key=payload.get("chunk_id") or payload.get("note_id")
|
||||||
)
|
)
|
||||||
|
|
@ -308,8 +309,8 @@ def _build_hits_from_semantic(
|
||||||
node_id=str(pid),
|
node_id=str(pid),
|
||||||
note_id=payload.get("note_id", "unknown"),
|
note_id=payload.get("note_id", "unknown"),
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=eb,
|
||||||
centrality_bonus=cent_bonus,
|
centrality_bonus=cb,
|
||||||
total_score=total,
|
total_score=total,
|
||||||
paths=None,
|
paths=None,
|
||||||
source={
|
source={
|
||||||
|
|
@ -348,7 +349,7 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
depth, edge_types = _extract_expand_options(req)
|
depth, edge_types = _extract_expand_options(req)
|
||||||
|
|
||||||
# WP-22: Dynamic Boosts aus dem Request (vom Router) (Teil C)
|
# WP-22: Dynamic Boosts aus dem Request (vom Router)
|
||||||
boost_edges = getattr(req, "boost_edges", {})
|
boost_edges = getattr(req, "boost_edges", {})
|
||||||
|
|
||||||
subgraph: ga.Subgraph | None = None
|
subgraph: ga.Subgraph | None = None
|
||||||
|
|
|
||||||
|
|
@ -286,7 +286,7 @@ async def chat_endpoint(
|
||||||
mode="hybrid",
|
mode="hybrid",
|
||||||
top_k=request.top_k,
|
top_k=request.top_k,
|
||||||
explain=request.explain,
|
explain=request.explain,
|
||||||
# WP-22: Boosts weitergeben
|
# WP-22: Boosts an den Retriever weitergeben
|
||||||
boost_edges=edge_boosts
|
boost_edges=edge_boosts
|
||||||
)
|
)
|
||||||
retrieve_result = await retriever.search(query_req)
|
retrieve_result = await retriever.search(query_req)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
FILE: app/services/edge_registry.py
|
FILE: app/services/edge_registry.py
|
||||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||||
WP-22 Teil B: Registry & Validation.
|
WP-22 Teil B: Registry & Validation.
|
||||||
Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
|
FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
@ -27,8 +27,8 @@ class EdgeRegistry:
|
||||||
if self.initialized:
|
if self.initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Priorität: 1. Parameter (Test) -> 2. ENV -> 3. Default
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
# Priorität: 1. Parameter (Test) -> 2. Config (.env) -> 3. Default
|
|
||||||
self.vault_root = vault_root or getattr(settings, "MINDNET_VAULT_ROOT", "./vault")
|
self.vault_root = vault_root or getattr(settings, "MINDNET_VAULT_ROOT", "./vault")
|
||||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||||
|
|
@ -67,35 +67,31 @@ class EdgeRegistry:
|
||||||
clean_alias = alias.replace("`", "").lower().strip()
|
clean_alias = alias.replace("`", "").lower().strip()
|
||||||
self.canonical_map[clean_alias] = canonical
|
self.canonical_map[clean_alias] = canonical
|
||||||
|
|
||||||
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} canonical types.")
|
logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")
|
||||||
|
|
||||||
def resolve(self, edge_type: str) -> str:
|
def resolve(self, edge_type: str) -> str:
|
||||||
"""Normalisiert Kanten-Typen via Registry oder loggt Unbekannte für Review."""
|
"""Normalisiert Kanten-Typen via Registry oder loggt Unbekannte."""
|
||||||
if not edge_type: return "related_to"
|
if not edge_type: return "related_to"
|
||||||
|
|
||||||
# Normalisierung (Kleinschreibung, Unterstriche)
|
|
||||||
clean_type = edge_type.lower().strip().replace(" ", "_")
|
clean_type = edge_type.lower().strip().replace(" ", "_")
|
||||||
|
|
||||||
# 1. Lookup in Map (Canonical oder Alias)
|
|
||||||
if clean_type in self.canonical_map:
|
if clean_type in self.canonical_map:
|
||||||
return self.canonical_map[clean_type]
|
return self.canonical_map[clean_type]
|
||||||
|
|
||||||
# 2. Unknown Handling (Loggen aber nicht verwerfen - Learning System)
|
|
||||||
self._log_unknown(clean_type)
|
self._log_unknown(clean_type)
|
||||||
return clean_type
|
return clean_type
|
||||||
|
|
||||||
def _log_unknown(self, edge_type: str):
|
def _log_unknown(self, edge_type: str):
|
||||||
"""Schreibt unbekannte Typen für späteres Review in ein Log-File."""
|
"""Schreibt unbekannte Typen für Review in ein Log."""
|
||||||
try:
|
try:
|
||||||
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
|
||||||
entry = {"unknown_type": edge_type, "status": "new"}
|
entry = {"unknown_type": edge_type, "status": "new"}
|
||||||
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
|
with open(self.unknown_log_path, "a", encoding="utf-8") as f:
|
||||||
f.write(json.dumps(entry) + "\n")
|
f.write(json.dumps(entry) + "\n")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Globale Singleton Instanz
|
# Singleton Instanz
|
||||||
registry = EdgeRegistry()
|
registry = EdgeRegistry()
|
||||||
|
|
@ -42,7 +42,7 @@ strategies:
|
||||||
part_of: 2.0
|
part_of: 2.0
|
||||||
composed_of: 2.0
|
composed_of: 2.0
|
||||||
similar_to: 1.5
|
similar_to: 1.5
|
||||||
caused_by: 0.5 # Kausalität ist hier oft Rauschen
|
caused_by: 0.5
|
||||||
prompt_template: "rag_template"
|
prompt_template: "rag_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
|
|
@ -61,8 +61,8 @@ strategies:
|
||||||
inject_types: ["value", "principle", "goal", "risk"]
|
inject_types: ["value", "principle", "goal", "risk"]
|
||||||
# WP-22: Risiken und Konsequenzen hervorheben
|
# WP-22: Risiken und Konsequenzen hervorheben
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
blocks: 2.5 # Blocker/Risiken sind kritisch
|
blocks: 2.5
|
||||||
solves: 2.0 # Lösungen sind relevant
|
solves: 2.0
|
||||||
depends_on: 1.5
|
depends_on: 1.5
|
||||||
risk_of: 2.5
|
risk_of: 2.5
|
||||||
prompt_template: "decision_template"
|
prompt_template: "decision_template"
|
||||||
|
|
@ -85,10 +85,10 @@ strategies:
|
||||||
inject_types: ["experience", "belief", "profile"]
|
inject_types: ["experience", "belief", "profile"]
|
||||||
# WP-22: Weiche Assoziationen & Erfahrungen stärken
|
# WP-22: Weiche Assoziationen & Erfahrungen stärken
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
based_on: 2.0 # Werte-Bezug
|
based_on: 2.0
|
||||||
related_to: 2.0 # Assoziatives Denken
|
related_to: 2.0
|
||||||
experienced_in: 2.5
|
experienced_in: 2.5
|
||||||
blocks: 0.1 # Stressoren ausblenden
|
blocks: 0.1
|
||||||
prompt_template: "empathy_template"
|
prompt_template: "empathy_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
|
|
@ -108,14 +108,14 @@ strategies:
|
||||||
inject_types: ["snippet", "reference", "source"]
|
inject_types: ["snippet", "reference", "source"]
|
||||||
# WP-22: Technische Abhängigkeiten
|
# WP-22: Technische Abhängigkeiten
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
uses: 2.5 # Tool-Nutzung
|
uses: 2.5
|
||||||
depends_on: 2.0
|
depends_on: 2.0
|
||||||
implemented_in: 3.0
|
implemented_in: 3.0
|
||||||
prompt_template: "technical_template"
|
prompt_template: "technical_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
# 5. Interview / Datenerfassung
|
# 5. Interview / Datenerfassung
|
||||||
# HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch
|
# HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch
|
||||||
# über die types.yaml erkannt. Hier stehen nur generische Trigger.
|
# über die types.yaml erkannt. Hier stehen nur generische Trigger.
|
||||||
INTERVIEW:
|
INTERVIEW:
|
||||||
description: "Der User möchte Wissen erfassen."
|
description: "Der User möchte Wissen erfassen."
|
||||||
|
|
@ -131,10 +131,9 @@ strategies:
|
||||||
- "idee speichern"
|
- "idee speichern"
|
||||||
- "draft"
|
- "draft"
|
||||||
inject_types: []
|
inject_types: []
|
||||||
edge_boosts: {} # Kein Retrieval im Interview Modus
|
edge_boosts: {}
|
||||||
prompt_template: "interview_template"
|
prompt_template: "interview_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
# Schemas: Hier nur der Fallback.
|
# Schemas: Hier nur der Fallback.
|
||||||
# Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
|
# Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
|
||||||
schemas:
|
schemas:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
"""
|
"""
|
||||||
FILE: tests/test_WP22_intelligence.py
|
FILE: tests/test_WP22_intelligence.py
|
||||||
DESCRIPTION: Integrationstest für WP-22.
|
DESCRIPTION: Integrationstest für WP-22.
|
||||||
FIX: Erzwingt Pfad-Synchronisation für Registry & Router.
|
FIX: Erzwingt Pfad-Synchronisation für Registry & Router. Behebt Pydantic Validation Errors.
|
||||||
"""
|
"""
|
||||||
import unittest
|
import unittest
|
||||||
import os
|
import os
|
||||||
|
|
@ -10,88 +10,136 @@ import yaml
|
||||||
import asyncio
|
import asyncio
|
||||||
from unittest.mock import MagicMock, patch, AsyncMock
|
from unittest.mock import MagicMock, patch, AsyncMock
|
||||||
|
|
||||||
|
# --- Modul-Caching Fix: Wir müssen Caches leeren ---
|
||||||
import app.routers.chat
|
import app.routers.chat
|
||||||
from app.models.dto import ChatRequest, QueryHit, QueryRequest
|
from app.models.dto import ChatRequest, QueryHit, QueryRequest
|
||||||
from app.services.edge_registry import EdgeRegistry
|
from app.services.edge_registry import EdgeRegistry
|
||||||
from app.core.retriever import _compute_total_score, _get_status_multiplier
|
from app.core.retriever import _compute_total_score, _get_status_multiplier
|
||||||
from app.routers.chat import _classify_intent, chat_endpoint
|
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
|
||||||
|
|
||||||
class TestWP22Integration(unittest.IsolatedAsyncioTestCase):
|
class TestWP22Integration(unittest.IsolatedAsyncioTestCase):
|
||||||
|
|
||||||
async def asyncSetUp(self):
|
async def asyncSetUp(self):
|
||||||
"""Bereitet eine isolierte Test-Umgebung vor."""
|
"""Bereitet eine isolierte Test-Umgebung vor."""
|
||||||
|
# Wir simulieren hier 'vault_master' (oder venv_master) als Verzeichnis
|
||||||
self.test_root = os.path.abspath("tests/temp_wp22")
|
self.test_root = os.path.abspath("tests/temp_wp22")
|
||||||
self.test_vault = os.path.join(self.test_root, "vault_master")
|
self.test_vault = os.path.join(self.test_root, "vault_master")
|
||||||
self.test_config_dir = os.path.join(self.test_root, "config")
|
self.test_config_dir = os.path.join(self.test_root, "config")
|
||||||
|
|
||||||
|
# 1. Pfade erstellen
|
||||||
os.makedirs(os.path.join(self.test_vault, "01_User_Manual"), exist_ok=True)
|
os.makedirs(os.path.join(self.test_vault, "01_User_Manual"), exist_ok=True)
|
||||||
os.makedirs(self.test_config_dir, exist_ok=True)
|
os.makedirs(self.test_config_dir, exist_ok=True)
|
||||||
os.makedirs(os.path.join(self.test_root, "data/logs"), exist_ok=True)
|
os.makedirs(os.path.join(self.test_root, "data/logs"), exist_ok=True)
|
||||||
|
|
||||||
# 2. Config Files schreiben
|
# 2. Config Files schreiben (MOCK CONFIG)
|
||||||
self.decision_path = os.path.join(self.test_config_dir, "decision_engine.yaml")
|
self.decision_path = os.path.join(self.test_config_dir, "decision_engine.yaml")
|
||||||
self.decision_config = {
|
self.decision_config = {
|
||||||
"strategies": {
|
"strategies": {
|
||||||
"FACT": {"trigger_keywords": ["was"], "edge_boosts": {"part_of": 2.0}},
|
"FACT": {
|
||||||
"CAUSAL": {"trigger_keywords": ["warum"], "edge_boosts": {"caused_by": 3.0}}
|
"trigger_keywords": ["was ist"],
|
||||||
|
"edge_boosts": {"part_of": 2.0}
|
||||||
|
},
|
||||||
|
"CAUSAL": {
|
||||||
|
"trigger_keywords": ["warum"],
|
||||||
|
"edge_boosts": {"caused_by": 3.0}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
with open(self.decision_path, "w") as f: yaml.dump(self.decision_config, f)
|
with open(self.decision_path, "w", encoding="utf-8") as f:
|
||||||
|
yaml.dump(self.decision_config, f)
|
||||||
|
|
||||||
# 3. Vocabulary File am RICHTIGEN Ort
|
# 3. Vocabulary File am RICHTIGEN Ort relativ zum test_vault
|
||||||
self.vocab_path = os.path.join(self.test_vault, "01_User_Manual/01_edge_vocabulary.md")
|
self.vocab_path = os.path.join(self.test_vault, "01_User_Manual/01_edge_vocabulary.md")
|
||||||
with open(self.vocab_path, "w") as f:
|
with open(self.vocab_path, "w", encoding="utf-8") as f:
|
||||||
f.write("| System-Typ | Aliases |\n| :--- | :--- |\n| **caused_by** | ursache_ist |\n| **part_of** | teil_von |")
|
f.write("| System-Typ | Aliases |\n| :--- | :--- |\n| **caused_by** | ursache_ist |\n| **part_of** | teil_von |")
|
||||||
|
|
||||||
# 4. MOCKING / RESETTING GLOBAL STATE
|
# 4. MOCKING / RESETTING GLOBAL STATE
|
||||||
|
# Zwinge get_settings, unsere Test-Pfade zurückzugeben
|
||||||
self.mock_settings = MagicMock()
|
self.mock_settings = MagicMock()
|
||||||
self.mock_settings.DECISION_CONFIG_PATH = self.decision_path
|
self.mock_settings.DECISION_CONFIG_PATH = self.decision_path
|
||||||
self.mock_settings.MINDNET_VAULT_ROOT = self.test_vault
|
self.mock_settings.MINDNET_VAULT_ROOT = self.test_vault
|
||||||
self.mock_settings.RETRIEVER_TOP_K = 5
|
self.mock_settings.RETRIEVER_TOP_K = 5
|
||||||
self.mock_settings.MODEL_NAME = "test-model"
|
self.mock_settings.MODEL_NAME = "test-model"
|
||||||
|
|
||||||
|
# Patching get_settings in allen relevanten Modulen
|
||||||
self.patch_settings_chat = patch('app.routers.chat.get_settings', return_value=self.mock_settings)
|
self.patch_settings_chat = patch('app.routers.chat.get_settings', return_value=self.mock_settings)
|
||||||
self.patch_settings_registry = patch('app.services.edge_registry.get_settings', return_value=self.mock_settings)
|
self.patch_settings_registry = patch('app.services.edge_registry.get_settings', return_value=self.mock_settings)
|
||||||
|
|
||||||
self.patch_settings_chat.start()
|
self.patch_settings_chat.start()
|
||||||
self.patch_settings_registry.start()
|
self.patch_settings_registry.start()
|
||||||
|
|
||||||
|
# Caches zwingend leeren
|
||||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
app.routers.chat._DECISION_CONFIG_CACHE = None
|
||||||
|
|
||||||
|
# Registry Singleton Reset & Force Init mit Test-Pfad
|
||||||
EdgeRegistry._instance = None
|
EdgeRegistry._instance = None
|
||||||
self.registry = EdgeRegistry(vault_root=self.test_vault)
|
self.registry = EdgeRegistry(vault_root=self.test_vault)
|
||||||
|
self.registry.unknown_log_path = os.path.join(self.test_root, "data/logs/unknown.jsonl")
|
||||||
|
|
||||||
async def asyncTearDown(self):
|
async def asyncTearDown(self):
|
||||||
self.patch_settings_chat.stop()
|
self.patch_settings_chat.stop()
|
||||||
self.patch_settings_registry.stop()
|
self.patch_settings_registry.stop()
|
||||||
if os.path.exists(self.test_root): shutil.rmtree(self.test_root)
|
if os.path.exists(self.test_root):
|
||||||
|
shutil.rmtree(self.test_root)
|
||||||
EdgeRegistry._instance = None
|
EdgeRegistry._instance = None
|
||||||
app.routers.chat._DECISION_CONFIG_CACHE = None
|
app.routers.chat._DECISION_CONFIG_CACHE = None
|
||||||
|
|
||||||
def test_registry_resolution(self):
|
def test_registry_resolution(self):
|
||||||
print("\n🔵 TEST 1: Registry Resolution")
|
print("\n🔵 TEST 1: Registry Pfad & Alias Resolution")
|
||||||
self.assertTrue(len(self.registry.valid_types) > 0)
|
# Prüfen ob die Datei gefunden wurde
|
||||||
|
self.assertTrue(len(self.registry.valid_types) > 0, f"Registry leer! Root: {self.registry.vault_root}")
|
||||||
self.assertEqual(self.registry.resolve("ursache_ist"), "caused_by")
|
self.assertEqual(self.registry.resolve("ursache_ist"), "caused_by")
|
||||||
print("✅ Registry OK.")
|
print("✅ Registry OK.")
|
||||||
|
|
||||||
def test_scoring_math(self):
|
def test_scoring_math(self):
|
||||||
print("\n🔵 TEST 2: Scoring Math (Lifecycle)")
|
print("\n🔵 TEST 2: Scoring Math (Lifecycle)")
|
||||||
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)):
|
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)):
|
||||||
|
# Stable (1.2)
|
||||||
self.assertEqual(_get_status_multiplier({"status": "stable"}), 1.2)
|
self.assertEqual(_get_status_multiplier({"status": "stable"}), 1.2)
|
||||||
self.assertEqual(_get_status_multiplier({"status": "draft"}), 0.8)
|
# Draft (0.5)
|
||||||
|
self.assertEqual(_get_status_multiplier({"status": "draft"}), 0.5)
|
||||||
|
|
||||||
|
# Scoring Formel Test: BaseScore * (1 + ConfigWeight + DynamicBoost)
|
||||||
|
# BaseScore = 0.5 (sem) * 1.2 (stable) = 0.6
|
||||||
|
# ConfigWeight = 1.0 (neutral) - 1.0 = 0.0
|
||||||
|
# DynamicBoost = (1.0 * 0.5) = 0.5
|
||||||
|
# Total = 0.6 * (1 + 0 + 0.5) = 0.9
|
||||||
|
total, _, _ = _compute_total_score(0.5, {"status": "stable", "retriever_weight": 1.0}, edge_bonus_raw=0.5)
|
||||||
|
self.assertAlmostEqual(total, 0.9)
|
||||||
print("✅ Scoring OK.")
|
print("✅ Scoring OK.")
|
||||||
|
|
||||||
|
async def test_router_intent(self):
|
||||||
|
print("\n🔵 TEST 3: Intent Classification")
|
||||||
|
mock_llm = MagicMock()
|
||||||
|
intent, _ = await _classify_intent("Warum ist das so?", mock_llm)
|
||||||
|
self.assertEqual(intent, "CAUSAL")
|
||||||
|
print("✅ Routing OK.")
|
||||||
|
|
||||||
async def test_full_flow(self):
|
async def test_full_flow(self):
|
||||||
print("\n🔵 TEST 3: Pipeline flow")
|
print("\n🔵 TEST 4: End-to-End Pipeline & Dynamic Boosting")
|
||||||
mock_llm = AsyncMock(); mock_llm.prompts = {}; mock_llm.generate_raw_response.return_value = "Ok"
|
mock_llm = AsyncMock()
|
||||||
mock_ret = AsyncMock()
|
mock_llm.prompts = {}
|
||||||
mock_hit = QueryHit(node_id="c1", note_id="n1", semantic_score=0.8, edge_bonus=0.0, centrality_bonus=0.0, total_score=0.8, source={"text": "t"}, payload={"status": "active"})
|
mock_llm.generate_raw_response.return_value = "Test Antwort"
|
||||||
mock_ret.search.return_value.results = [mock_hit]
|
|
||||||
|
|
||||||
resp = await chat_endpoint(ChatRequest(message="Warum?"), llm=mock_llm, retriever=mock_ret)
|
mock_retriever = AsyncMock()
|
||||||
|
# Fix note_id für Pydantic Validation
|
||||||
|
mock_hit = QueryHit(
|
||||||
|
node_id="c1", note_id="test_note_n1", semantic_score=0.8, edge_bonus=0.0,
|
||||||
|
centrality_bonus=0.0, total_score=0.8, source={"text": "t"},
|
||||||
|
payload={"status": "active", "type": "concept"}
|
||||||
|
)
|
||||||
|
mock_retriever.search.return_value.results = [mock_hit]
|
||||||
|
|
||||||
|
req = ChatRequest(message="Warum ist das passiert?", top_k=1)
|
||||||
|
resp = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
|
||||||
|
|
||||||
|
# Verify Intent
|
||||||
self.assertEqual(resp.intent, "CAUSAL")
|
self.assertEqual(resp.intent, "CAUSAL")
|
||||||
called_req = mock_ret.search.call_args[0][0]
|
|
||||||
|
# Verify Boosts Reached Retriever
|
||||||
|
called_req = mock_retriever.search.call_args[0][0]
|
||||||
self.assertEqual(called_req.boost_edges.get("caused_by"), 3.0)
|
self.assertEqual(called_req.boost_edges.get("caused_by"), 3.0)
|
||||||
print("✅ Full Flow OK.")
|
print("✅ Full Flow & Boosting OK.")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
Loading…
Reference in New Issue
Block a user