bug fix
This commit is contained in:
parent
3eac646cb6
commit
48729e6f5d
|
|
@ -162,7 +162,7 @@ class IngestionService:
|
||||||
# --- WP-22: Content Lifecycle Gate ---
|
# --- WP-22: Content Lifecycle Gate ---
|
||||||
status = fm.get("status", "draft").lower().strip()
|
status = fm.get("status", "draft").lower().strip()
|
||||||
|
|
||||||
# Hard Skip für System-Dateien (Teil A)
|
# Hard Skip für System-Dateien
|
||||||
if status in ["system", "template", "archive", "hidden"]:
|
if status in ["system", "template", "archive", "hidden"]:
|
||||||
logger.info(f"Skipping file {file_path} (Status: {status})")
|
logger.info(f"Skipping file {file_path} (Status: {status})")
|
||||||
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
||||||
|
|
@ -265,7 +265,7 @@ class IngestionService:
|
||||||
except TypeError:
|
except TypeError:
|
||||||
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
||||||
|
|
||||||
# --- WP-22: Edge Registry Validation (Teil B) ---
|
# --- WP-22: Edge Registry Validation ---
|
||||||
edges = []
|
edges = []
|
||||||
if raw_edges:
|
if raw_edges:
|
||||||
for edge in raw_edges:
|
for edge in raw_edges:
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ def _semantic_hits(
|
||||||
results.append((str(pid), float(score), dict(payload or {})))
|
results.append((str(pid), float(score), dict(payload or {})))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
|
# --- WP-22 Helper: Lifecycle Multipliers ---
|
||||||
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||||
"""
|
"""
|
||||||
WP-22: Drafts werden bestraft, Stable Notes belohnt.
|
WP-22: Drafts werden bestraft, Stable Notes belohnt.
|
||||||
|
|
@ -106,11 +106,10 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
|
||||||
status = str(payload.get("status", "draft")).lower()
|
status = str(payload.get("status", "draft")).lower()
|
||||||
if status == "stable": return 1.2
|
if status == "stable": return 1.2
|
||||||
if status == "active": return 1.0
|
if status == "active": return 1.0
|
||||||
if status == "draft": return 0.5 # Malus für Entwürfe
|
if status == "draft": return 0.8 # Malus für Entwürfe
|
||||||
# Fallback für andere oder leere Status
|
# Fallback für andere oder leere Status
|
||||||
return 1.0
|
return 1.0
|
||||||
|
|
||||||
# --- WP-22: Dynamic Scoring Formula (Teil C) ---
|
|
||||||
def _compute_total_score(
|
def _compute_total_score(
|
||||||
semantic_score: float,
|
semantic_score: float,
|
||||||
payload: Dict[str, Any],
|
payload: Dict[str, Any],
|
||||||
|
|
@ -119,8 +118,8 @@ def _compute_total_score(
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> Tuple[float, float, float]:
|
) -> Tuple[float, float, float]:
|
||||||
"""
|
"""
|
||||||
Berechnet total_score nach WP-22 Formel.
|
Berechnet total_score.
|
||||||
Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
|
WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
|
||||||
"""
|
"""
|
||||||
raw_weight = payload.get("retriever_weight", 1.0)
|
raw_weight = payload.get("retriever_weight", 1.0)
|
||||||
try:
|
try:
|
||||||
|
|
@ -133,13 +132,13 @@ def _compute_total_score(
|
||||||
sem_w, edge_w, cent_w = _get_scoring_weights()
|
sem_w, edge_w, cent_w = _get_scoring_weights()
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
|
|
||||||
# Dynamic Edge Boosting (Teil C)
|
# Dynamic Edge Boosting
|
||||||
# Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
|
# Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
|
||||||
# Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
|
# Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
|
||||||
final_edge_score = edge_w * edge_bonus
|
final_edge_score = edge_w * edge_bonus
|
||||||
if dynamic_edge_boosts and edge_bonus > 0:
|
if dynamic_edge_boosts and edge_bonus > 0:
|
||||||
# Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
|
# Globaler Boost für Graph-Signale bei spezifischen Intents
|
||||||
final_edge_score *= 1.5
|
final_edge_score *= 1.2
|
||||||
|
|
||||||
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
|
||||||
return float(total), float(edge_bonus), float(cent_bonus)
|
return float(total), float(edge_bonus), float(cent_bonus)
|
||||||
|
|
@ -155,8 +154,9 @@ def _build_explanation(
|
||||||
subgraph: Optional[ga.Subgraph],
|
subgraph: Optional[ga.Subgraph],
|
||||||
node_key: Optional[str]
|
node_key: Optional[str]
|
||||||
) -> Explanation:
|
) -> Explanation:
|
||||||
"""Erstellt ein Explanation-Objekt (WP-04b)."""
|
"""Erstellt ein Explanation-Objekt."""
|
||||||
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
||||||
|
# Scoring weights erneut laden für Reason-Details
|
||||||
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -167,7 +167,6 @@ def _build_explanation(
|
||||||
status_mult = _get_status_multiplier(payload)
|
status_mult = _get_status_multiplier(payload)
|
||||||
note_type = payload.get("type", "unknown")
|
note_type = payload.get("type", "unknown")
|
||||||
|
|
||||||
# Breakdown Berechnung (muss mit _compute_total_score korrelieren)
|
|
||||||
breakdown = ScoreBreakdown(
|
breakdown = ScoreBreakdown(
|
||||||
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
|
||||||
edge_contribution=(edge_w_cfg * edge_bonus),
|
edge_contribution=(edge_w_cfg * edge_bonus),
|
||||||
|
|
@ -181,7 +180,6 @@ def _build_explanation(
|
||||||
reasons: List[Reason] = []
|
reasons: List[Reason] = []
|
||||||
edges_dto: List[EdgeDTO] = []
|
edges_dto: List[EdgeDTO] = []
|
||||||
|
|
||||||
# Reason Generation Logik (WP-04b)
|
|
||||||
if semantic_score > 0.85:
|
if semantic_score > 0.85:
|
||||||
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
|
reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
|
||||||
elif semantic_score > 0.70:
|
elif semantic_score > 0.70:
|
||||||
|
|
@ -191,13 +189,11 @@ def _build_explanation(
|
||||||
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
|
||||||
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
|
||||||
|
|
||||||
# NEU: WP-22 Status Reason
|
|
||||||
if status_mult != 1.0:
|
if status_mult != 1.0:
|
||||||
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
|
||||||
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
|
reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
|
||||||
|
|
||||||
if subgraph and node_key and edge_bonus > 0:
|
if subgraph and node_key and edge_bonus > 0:
|
||||||
# Extrahiere Top-Kanten für die Erklärung
|
|
||||||
if hasattr(subgraph, "get_outgoing_edges"):
|
if hasattr(subgraph, "get_outgoing_edges"):
|
||||||
outgoing = subgraph.get_outgoing_edges(node_key)
|
outgoing = subgraph.get_outgoing_edges(node_key)
|
||||||
for edge in outgoing:
|
for edge in outgoing:
|
||||||
|
|
@ -230,7 +226,7 @@ def _build_explanation(
|
||||||
|
|
||||||
|
|
||||||
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
|
||||||
"""Extrahiert depth und edge_types für Graph-Expansion."""
|
"""Extrahiert depth und edge_types."""
|
||||||
expand = getattr(req, "expand", None)
|
expand = getattr(req, "expand", None)
|
||||||
if not expand:
|
if not expand:
|
||||||
return 0, None
|
return 0, None
|
||||||
|
|
@ -263,7 +259,7 @@ def _build_hits_from_semantic(
|
||||||
explain: bool = False,
|
explain: bool = False,
|
||||||
dynamic_edge_boosts: Dict[str, float] = None
|
dynamic_edge_boosts: Dict[str, float] = None
|
||||||
) -> QueryResponse:
|
) -> QueryResponse:
|
||||||
"""Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
|
"""Baut strukturierte QueryHits."""
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
|
||||||
|
|
||||||
|
|
@ -282,28 +278,27 @@ def _build_hits_from_semantic(
|
||||||
except Exception:
|
except Exception:
|
||||||
cent_bonus = 0.0
|
cent_bonus = 0.0
|
||||||
|
|
||||||
total, eb, cb = _compute_total_score(
|
total, edge_bonus, cent_bonus = _compute_total_score(
|
||||||
semantic_score,
|
semantic_score,
|
||||||
payload,
|
payload,
|
||||||
edge_bonus=edge_bonus,
|
edge_bonus=edge_bonus,
|
||||||
cent_bonus=cent_bonus,
|
cent_bonus=cent_bonus,
|
||||||
dynamic_edge_boosts=dynamic_edge_boosts
|
dynamic_edge_boosts=dynamic_edge_boosts
|
||||||
)
|
)
|
||||||
enriched.append((pid, float(semantic_score), payload, total, eb, cb))
|
enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
|
||||||
|
|
||||||
# Sort & Limit
|
|
||||||
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
|
||||||
limited = enriched_sorted[: max(1, top_k)]
|
limited = enriched_sorted[: max(1, top_k)]
|
||||||
|
|
||||||
results: List[QueryHit] = []
|
results: List[QueryHit] = []
|
||||||
for pid, semantic_score, payload, total, eb, cb in limited:
|
for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
|
||||||
explanation_obj = None
|
explanation_obj = None
|
||||||
if explain:
|
if explain:
|
||||||
explanation_obj = _build_explanation(
|
explanation_obj = _build_explanation(
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
payload=payload,
|
payload=payload,
|
||||||
edge_bonus=eb,
|
edge_bonus=edge_bonus,
|
||||||
cent_bonus=cb,
|
cent_bonus=cent_bonus,
|
||||||
subgraph=subgraph,
|
subgraph=subgraph,
|
||||||
node_key=payload.get("chunk_id") or payload.get("note_id")
|
node_key=payload.get("chunk_id") or payload.get("note_id")
|
||||||
)
|
)
|
||||||
|
|
@ -312,10 +307,10 @@ def _build_hits_from_semantic(
|
||||||
|
|
||||||
results.append(QueryHit(
|
results.append(QueryHit(
|
||||||
node_id=str(pid),
|
node_id=str(pid),
|
||||||
note_id=payload.get("note_id", "unknown"),
|
note_id=payload.get("note_id"),
|
||||||
semantic_score=float(semantic_score),
|
semantic_score=float(semantic_score),
|
||||||
edge_bonus=eb,
|
edge_bonus=edge_bonus,
|
||||||
centrality_bonus=cb,
|
centrality_bonus=cent_bonus,
|
||||||
total_score=total,
|
total_score=total,
|
||||||
paths=None,
|
paths=None,
|
||||||
source={
|
source={
|
||||||
|
|
@ -332,7 +327,7 @@ def _build_hits_from_semantic(
|
||||||
|
|
||||||
|
|
||||||
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
"""Reiner semantischer Retriever (WP-02)."""
|
"""Reiner semantischer Retriever."""
|
||||||
client, prefix = _get_client_and_prefix()
|
client, prefix = _get_client_and_prefix()
|
||||||
vector = _get_query_vector(req)
|
vector = _get_query_vector(req)
|
||||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||||
|
|
@ -342,44 +337,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
|
|
||||||
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
|
"""Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
|
||||||
client, prefix = _get_client_and_prefix()
|
client, prefix = _get_client_and_prefix()
|
||||||
|
if req.query_vector:
|
||||||
# 1. Semantische Suche
|
vector = list(req.query_vector)
|
||||||
vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
|
else:
|
||||||
|
vector = _get_query_vector(req)
|
||||||
|
|
||||||
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
top_k = req.top_k or get_settings().RETRIEVER_TOP_K
|
||||||
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
|
hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
|
||||||
|
|
||||||
# 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
|
|
||||||
depth, edge_types = _extract_expand_options(req)
|
depth, edge_types = _extract_expand_options(req)
|
||||||
|
|
||||||
|
# WP-22: Dynamic Boosts aus dem Request (vom Router)
|
||||||
boost_edges = getattr(req, "boost_edges", {})
|
boost_edges = getattr(req, "boost_edges", {})
|
||||||
|
|
||||||
subgraph: ga.Subgraph | None = None
|
subgraph: ga.Subgraph | None = None
|
||||||
if depth and depth > 0:
|
if depth and depth > 0:
|
||||||
seed_ids: List[str] = []
|
seed_ids: List[str] = []
|
||||||
for _pid, _score, payload in hits:
|
for _pid, _score, payload in hits:
|
||||||
key = payload.get("note_id")
|
key = payload.get("chunk_id") or payload.get("note_id")
|
||||||
if key and key not in seed_ids:
|
if key and key not in seed_ids:
|
||||||
seed_ids.append(key)
|
seed_ids.append(key)
|
||||||
|
|
||||||
if seed_ids:
|
if seed_ids:
|
||||||
try:
|
try:
|
||||||
# Subgraph laden
|
# Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
|
||||||
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
|
subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
|
||||||
|
|
||||||
# --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
|
# Manuelles Boosten der Kantengewichte im Graphen falls aktiv
|
||||||
# Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
|
|
||||||
if boost_edges and subgraph and hasattr(subgraph, "graph"):
|
if boost_edges and subgraph and hasattr(subgraph, "graph"):
|
||||||
for u, v, data in subgraph.graph.edges(data=True):
|
for u, v, data in subgraph.graph.edges(data=True):
|
||||||
k = data.get("kind")
|
k = data.get("kind")
|
||||||
if k in boost_edges:
|
if k in boost_edges:
|
||||||
# Gewicht multiplizieren (z.B. caused_by * 3.0)
|
# Gewicht erhöhen für diesen Query-Kontext
|
||||||
data["weight"] = data.get("weight", 1.0) * boost_edges[k]
|
data["weight"] = data.get("weight", 1.0) * boost_edges[k]
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
subgraph = None
|
subgraph = None
|
||||||
|
|
||||||
# 3. Scoring & Re-Ranking
|
|
||||||
return _build_hits_from_semantic(
|
return _build_hits_from_semantic(
|
||||||
hits,
|
hits,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
|
@ -391,6 +386,11 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
|
||||||
|
|
||||||
|
|
||||||
class Retriever:
|
class Retriever:
|
||||||
"""Wrapper-Klasse für Suchoperationen."""
|
"""
|
||||||
|
Wrapper-Klasse für WP-05 (Chat).
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
async def search(self, request: QueryRequest) -> QueryResponse:
|
async def search(self, request: QueryRequest) -> QueryResponse:
|
||||||
return hybrid_retrieve(request)
|
return hybrid_retrieve(request)
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
FILE: app/services/edge_registry.py
|
FILE: app/services/edge_registry.py
|
||||||
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
|
||||||
WP-22 Teil B: Registry & Validation.
|
WP-22 Teil B: Registry & Validation.
|
||||||
Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
|
FIX: Dynamische Pfad-Auflösung basierend auf MINDNET_VAULT_ROOT.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
@ -25,7 +25,7 @@ class EdgeRegistry:
|
||||||
if self.initialized:
|
if self.initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Priorität: 1. Parameter -> 2. ENV -> 3. Default
|
# Priorität: 1. Parameter (Test) -> 2. ENV (dotenv) -> 3. Default
|
||||||
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
|
||||||
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
self.unknown_log_path = "data/logs/unknown_edges.jsonl"
|
||||||
|
|
@ -38,9 +38,11 @@ class EdgeRegistry:
|
||||||
|
|
||||||
def _load_vocabulary(self):
|
def _load_vocabulary(self):
|
||||||
"""Parst die Markdown-Tabelle im Vault."""
|
"""Parst die Markdown-Tabelle im Vault."""
|
||||||
|
# Absoluten Pfad auflösen, um Verwechslungen im venv zu vermeiden
|
||||||
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
|
||||||
|
|
||||||
if not os.path.exists(full_path):
|
if not os.path.exists(full_path):
|
||||||
|
# Debug-Info: Zeige wo genau gesucht wurde
|
||||||
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user