WP24c - Agentic Edge Validation & Chunk-Aware Multigraph-System (v4.5.8) #22
|
|
@ -1,11 +1,12 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/graph/graph_utils.py
|
FILE: app/core/graph/graph_utils.py
|
||||||
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
||||||
AUDIT v1.6.0:
|
AUDIT v1.6.1:
|
||||||
- Erweitert um parse_link_target für sauberes Section-Splitting.
|
- Wiederherstellung der Funktion '_edge' (Fix für ImportError).
|
||||||
- Einführung einer gehärteten, deterministischen ID-Berechnung für Kanten (WP-24c).
|
- Rückkehr zu UUIDv5 für Qdrant-Kompatibilität (Fix für Pydantic-Crash).
|
||||||
- Integration der .env-gesteuerten Pfadauflösung für Schema und Vokabular.
|
- Beibehaltung der Section-Logik (variant) in der ID-Generierung.
|
||||||
VERSION: 1.6.0 (WP-24c: Identity & Path Enforcement)
|
- Integration der .env Pfad-Auflösung.
|
||||||
|
VERSION: 1.6.1 (WP-24c: Circular Dependency & Identity Fix)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
@ -18,7 +19,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
yaml = None
|
yaml = None
|
||||||
|
|
||||||
# WP-15b: Prioritäten-Ranking für die De-Duplizierung von Kanten unterschiedlicher Herkunft
|
# WP-15b: Prioritäten-Ranking für die De-Duplizierung
|
||||||
PROVENANCE_PRIORITY = {
|
PROVENANCE_PRIORITY = {
|
||||||
"explicit:wikilink": 1.00,
|
"explicit:wikilink": 1.00,
|
||||||
"inline:rel": 0.95,
|
"inline:rel": 0.95,
|
||||||
|
|
@ -28,7 +29,7 @@ PROVENANCE_PRIORITY = {
|
||||||
"structure:order": 0.95, # next/prev
|
"structure:order": 0.95, # next/prev
|
||||||
"explicit:note_scope": 1.00,
|
"explicit:note_scope": 1.00,
|
||||||
"derived:backlink": 0.90,
|
"derived:backlink": 0.90,
|
||||||
"edge_defaults": 0.70 # Heuristik basierend auf types.yaml
|
"edge_defaults": 0.70 # Heuristik (types.yaml)
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -48,24 +49,58 @@ def get_schema_path() -> str:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _get(d: dict, *keys, default=None):
|
def _get(d: dict, *keys, default=None):
|
||||||
"""Sicherer Zugriff auf tief verschachtelte Dictionary-Keys."""
|
"""Sicherer Zugriff auf verschachtelte Keys."""
|
||||||
for k in keys:
|
for k in keys:
|
||||||
if isinstance(d, dict) and k in d and d[k] is not None:
|
if isinstance(d, dict) and k in d and d[k] is not None:
|
||||||
return d[k]
|
return d[k]
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def _dedupe_seq(seq: Iterable[str]) -> List[str]:
|
def _dedupe_seq(seq: Iterable[str]) -> List[str]:
|
||||||
"""Dedupliziert eine Sequenz von Strings unter Beibehaltung der Reihenfolge."""
|
"""Dedupliziert Strings unter Beibehaltung der Reihenfolge."""
|
||||||
seen = set()
|
seen: Set[str] = set()
|
||||||
return [x for x in seq if not (x in seen or seen.add(x))]
|
out: List[str] = []
|
||||||
|
for s in seq:
|
||||||
|
if s not in seen:
|
||||||
|
seen.add(s); out.append(s)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None, variant: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Erzeugt eine deterministische UUIDv5.
|
||||||
|
|
||||||
|
WP-Fix: Wir nutzen UUIDv5 statt BLAKE2s-Hex, um 100% kompatibel zu den
|
||||||
|
Pydantic-Erwartungen von Qdrant (Step 1) zu bleiben.
|
||||||
|
"""
|
||||||
|
# Basis-String für den deterministischen Hash
|
||||||
|
base = f"edge:{kind}:{s}->{t}#{scope}"
|
||||||
|
if rule_id:
|
||||||
|
base += f"|{rule_id}"
|
||||||
|
if variant:
|
||||||
|
base += f"|{variant}" # Ermöglicht eindeutige IDs für verschiedene Abschnitte
|
||||||
|
|
||||||
|
# Nutzt den URL-Namespace für deterministische UUIDs
|
||||||
|
return str(uuid.uuid5(uuid.NAMESPACE_URL, base))
|
||||||
|
|
||||||
|
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
|
||||||
|
"""
|
||||||
|
Konstruiert ein Kanten-Payload für Qdrant.
|
||||||
|
Wiederhergestellt v1.6.1 (Erforderlich für graph_derive_edges.py).
|
||||||
|
"""
|
||||||
|
pl = {
|
||||||
|
"kind": kind,
|
||||||
|
"relation": kind,
|
||||||
|
"scope": scope,
|
||||||
|
"source_id": source_id,
|
||||||
|
"target_id": target_id,
|
||||||
|
"note_id": note_id,
|
||||||
|
}
|
||||||
|
if extra: pl.update(extra)
|
||||||
|
return pl
|
||||||
|
|
||||||
def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
|
def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Trennt einen Obsidian-Link [[Target#Section]] in seine Bestandteile Target und Section.
|
Trennt [[Target#Section]] in Target und Section.
|
||||||
Behandelt Self-Links (z.B. [[#Ziele]]), indem die aktuelle note_id eingesetzt wird.
|
Behandelt Self-Links ('#Section'), indem current_note_id eingesetzt wird.
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple (target_id, target_section)
|
|
||||||
"""
|
"""
|
||||||
if not raw:
|
if not raw:
|
||||||
return "", None
|
return "", None
|
||||||
|
|
@ -74,64 +109,35 @@ def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[
|
||||||
target = parts[0].strip()
|
target = parts[0].strip()
|
||||||
section = parts[1].strip() if len(parts) > 1 else None
|
section = parts[1].strip() if len(parts) > 1 else None
|
||||||
|
|
||||||
# Spezialfall: Self-Link innerhalb derselben Datei
|
|
||||||
if not target and section and current_note_id:
|
if not target and section and current_note_id:
|
||||||
target = current_note_id
|
target = current_note_id
|
||||||
|
|
||||||
return target, section
|
return target, section
|
||||||
|
|
||||||
def _mk_edge_id(kind: str, source_id: str, target_id: str, scope: str = "note") -> str:
|
|
||||||
"""
|
|
||||||
WP-24c: Erzeugt eine deterministische UUIDv5 für eine Kante.
|
|
||||||
Garantiert, dass explizite Links und systemgenerierte Symmetrien dieselbe Point-ID
|
|
||||||
erzeugen, sofern Quelle und Ziel identisch aufgelöst wurden.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
kind: Typ der Relation (z.B. 'references')
|
|
||||||
source_id: Kanonische ID der Quell-Note
|
|
||||||
target_id: Kanonische ID der Ziel-Note
|
|
||||||
scope: Granularität (z.B. 'note' oder 'chunk')
|
|
||||||
"""
|
|
||||||
# Hard-Guard gegen None-Werte zur Vermeidung von Pydantic-Validierungsfehlern
|
|
||||||
if not all([kind, source_id, target_id]):
|
|
||||||
raise ValueError(f"Incomplete data for edge ID: kind={kind}, src={source_id}, tgt={target_id}")
|
|
||||||
|
|
||||||
# Stabiler Schlüssel für die Kollisions-Strategie (Authority-First)
|
|
||||||
stable_key = f"edge:{kind}:{source_id}:{target_id}:{scope}"
|
|
||||||
|
|
||||||
# Nutzt den URL-Namespace für deterministische Reproduzierbarkeit
|
|
||||||
return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key))
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Registry Operations
|
# Registry Operations
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def load_types_registry() -> dict:
|
def load_types_registry() -> dict:
|
||||||
"""
|
"""Lädt die YAML-Registry."""
|
||||||
Lädt die zentrale YAML-Registry (types.yaml).
|
|
||||||
Pfad wird über die Umgebungsvariable MINDNET_TYPES_FILE gesteuert.
|
|
||||||
"""
|
|
||||||
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
||||||
if not os.path.isfile(p) or yaml is None:
|
if not os.path.isfile(p) or yaml is None:
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
with open(p, "r", encoding="utf-8") as f:
|
with open(p, "r", encoding="utf-8") as f:
|
||||||
data = yaml.safe_load(f)
|
return yaml.safe_load(f) or {}
|
||||||
return data if data is not None else {}
|
except Exception:
|
||||||
except Exception:
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
||||||
"""
|
"""Ermittelt Standard-Kanten für einen Typ."""
|
||||||
Ermittelt die konfigurierten Standard-Kanten für einen Note-Typ.
|
|
||||||
Greift bei Bedarf auf die globalen ingestion_settings zurück.
|
|
||||||
"""
|
|
||||||
types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
|
types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
|
||||||
if note_type and isinstance(types_map, dict):
|
if note_type and isinstance(types_map, dict):
|
||||||
t_cfg = types_map.get(note_type)
|
t = types_map.get(note_type)
|
||||||
if isinstance(t_cfg, dict) and isinstance(t_cfg.get("edge_defaults"), list):
|
if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
|
||||||
return [str(x) for x in t_cfg["edge_defaults"]]
|
return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
|
||||||
|
for key in ("defaults", "default", "global"):
|
||||||
# Fallback auf die globalen Standardwerte der Ingestion
|
v = reg.get(key)
|
||||||
cfg_def = reg.get("ingestion_settings", {})
|
if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
|
||||||
return cfg_def.get("edge_defaults", [])
|
return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
|
||||||
|
return []
|
||||||
Loading…
Reference in New Issue
Block a user