Update graph_utils.py to version 1.6.1: Restore '_edge' function to address ImportError, revert to UUIDv5 for Qdrant compatibility, and maintain section logic in ID generation. Enhance documentation for clarity and refine edge ID generation process.
This commit is contained in:
parent
7cc823e2f4
commit
c33b1c644a
|
|
@ -1,11 +1,12 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/graph/graph_utils.py
|
FILE: app/core/graph/graph_utils.py
|
||||||
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
||||||
AUDIT v1.6.0:
|
AUDIT v1.6.1:
|
||||||
- Erweitert um parse_link_target für sauberes Section-Splitting.
|
- Wiederherstellung der Funktion '_edge' (Fix für ImportError).
|
||||||
- Einführung einer gehärteten, deterministischen ID-Berechnung für Kanten (WP-24c).
|
- Rückkehr zu UUIDv5 für Qdrant-Kompatibilität (Fix für Pydantic-Crash).
|
||||||
- Integration der .env-gesteuerten Pfadauflösung für Schema und Vokabular.
|
- Beibehaltung der Section-Logik (variant) in der ID-Generierung.
|
||||||
VERSION: 1.6.0 (WP-24c: Identity & Path Enforcement)
|
- Integration der .env Pfad-Auflösung.
|
||||||
|
VERSION: 1.6.1 (WP-24c: Circular Dependency & Identity Fix)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
@ -18,7 +19,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
yaml = None
|
yaml = None
|
||||||
|
|
||||||
# WP-15b: Prioritäten-Ranking für die De-Duplizierung von Kanten unterschiedlicher Herkunft
|
# WP-15b: Prioritäten-Ranking für die De-Duplizierung
|
||||||
PROVENANCE_PRIORITY = {
|
PROVENANCE_PRIORITY = {
|
||||||
"explicit:wikilink": 1.00,
|
"explicit:wikilink": 1.00,
|
||||||
"inline:rel": 0.95,
|
"inline:rel": 0.95,
|
||||||
|
|
@ -28,7 +29,7 @@ PROVENANCE_PRIORITY = {
|
||||||
"structure:order": 0.95, # next/prev
|
"structure:order": 0.95, # next/prev
|
||||||
"explicit:note_scope": 1.00,
|
"explicit:note_scope": 1.00,
|
||||||
"derived:backlink": 0.90,
|
"derived:backlink": 0.90,
|
||||||
"edge_defaults": 0.70 # Heuristik basierend auf types.yaml
|
"edge_defaults": 0.70 # Heuristik (types.yaml)
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -48,24 +49,58 @@ def get_schema_path() -> str:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _get(d: dict, *keys, default=None):
|
def _get(d: dict, *keys, default=None):
|
||||||
"""Sicherer Zugriff auf tief verschachtelte Dictionary-Keys."""
|
"""Sicherer Zugriff auf verschachtelte Keys."""
|
||||||
for k in keys:
|
for k in keys:
|
||||||
if isinstance(d, dict) and k in d and d[k] is not None:
|
if isinstance(d, dict) and k in d and d[k] is not None:
|
||||||
return d[k]
|
return d[k]
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def _dedupe_seq(seq: Iterable[str]) -> List[str]:
|
def _dedupe_seq(seq: Iterable[str]) -> List[str]:
|
||||||
"""Dedupliziert eine Sequenz von Strings unter Beibehaltung der Reihenfolge."""
|
"""Dedupliziert Strings unter Beibehaltung der Reihenfolge."""
|
||||||
seen = set()
|
seen: Set[str] = set()
|
||||||
return [x for x in seq if not (x in seen or seen.add(x))]
|
out: List[str] = []
|
||||||
|
for s in seq:
|
||||||
|
if s not in seen:
|
||||||
|
seen.add(s); out.append(s)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None, variant: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Erzeugt eine deterministische UUIDv5.
|
||||||
|
|
||||||
|
WP-Fix: Wir nutzen UUIDv5 statt BLAKE2s-Hex, um 100% kompatibel zu den
|
||||||
|
Pydantic-Erwartungen von Qdrant (Step 1) zu bleiben.
|
||||||
|
"""
|
||||||
|
# Basis-String für den deterministischen Hash
|
||||||
|
base = f"edge:{kind}:{s}->{t}#{scope}"
|
||||||
|
if rule_id:
|
||||||
|
base += f"|{rule_id}"
|
||||||
|
if variant:
|
||||||
|
base += f"|{variant}" # Ermöglicht eindeutige IDs für verschiedene Abschnitte
|
||||||
|
|
||||||
|
# Nutzt den URL-Namespace für deterministische UUIDs
|
||||||
|
return str(uuid.uuid5(uuid.NAMESPACE_URL, base))
|
||||||
|
|
||||||
|
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
|
||||||
|
"""
|
||||||
|
Konstruiert ein Kanten-Payload für Qdrant.
|
||||||
|
Wiederhergestellt v1.6.1 (Erforderlich für graph_derive_edges.py).
|
||||||
|
"""
|
||||||
|
pl = {
|
||||||
|
"kind": kind,
|
||||||
|
"relation": kind,
|
||||||
|
"scope": scope,
|
||||||
|
"source_id": source_id,
|
||||||
|
"target_id": target_id,
|
||||||
|
"note_id": note_id,
|
||||||
|
}
|
||||||
|
if extra: pl.update(extra)
|
||||||
|
return pl
|
||||||
|
|
||||||
def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
|
def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Trennt einen Obsidian-Link [[Target#Section]] in seine Bestandteile Target und Section.
|
Trennt [[Target#Section]] in Target und Section.
|
||||||
Behandelt Self-Links (z.B. [[#Ziele]]), indem die aktuelle note_id eingesetzt wird.
|
Behandelt Self-Links ('#Section'), indem current_note_id eingesetzt wird.
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple (target_id, target_section)
|
|
||||||
"""
|
"""
|
||||||
if not raw:
|
if not raw:
|
||||||
return "", None
|
return "", None
|
||||||
|
|
@ -74,64 +109,35 @@ def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[
|
||||||
target = parts[0].strip()
|
target = parts[0].strip()
|
||||||
section = parts[1].strip() if len(parts) > 1 else None
|
section = parts[1].strip() if len(parts) > 1 else None
|
||||||
|
|
||||||
# Spezialfall: Self-Link innerhalb derselben Datei
|
|
||||||
if not target and section and current_note_id:
|
if not target and section and current_note_id:
|
||||||
target = current_note_id
|
target = current_note_id
|
||||||
|
|
||||||
return target, section
|
return target, section
|
||||||
|
|
||||||
def _mk_edge_id(kind: str, source_id: str, target_id: str, scope: str = "note") -> str:
|
|
||||||
"""
|
|
||||||
WP-24c: Erzeugt eine deterministische UUIDv5 für eine Kante.
|
|
||||||
Garantiert, dass explizite Links und systemgenerierte Symmetrien dieselbe Point-ID
|
|
||||||
erzeugen, sofern Quelle und Ziel identisch aufgelöst wurden.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
kind: Typ der Relation (z.B. 'references')
|
|
||||||
source_id: Kanonische ID der Quell-Note
|
|
||||||
target_id: Kanonische ID der Ziel-Note
|
|
||||||
scope: Granularität (z.B. 'note' oder 'chunk')
|
|
||||||
"""
|
|
||||||
# Hard-Guard gegen None-Werte zur Vermeidung von Pydantic-Validierungsfehlern
|
|
||||||
if not all([kind, source_id, target_id]):
|
|
||||||
raise ValueError(f"Incomplete data for edge ID: kind={kind}, src={source_id}, tgt={target_id}")
|
|
||||||
|
|
||||||
# Stabiler Schlüssel für die Kollisions-Strategie (Authority-First)
|
|
||||||
stable_key = f"edge:{kind}:{source_id}:{target_id}:{scope}"
|
|
||||||
|
|
||||||
# Nutzt den URL-Namespace für deterministische Reproduzierbarkeit
|
|
||||||
return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key))
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Registry Operations
|
# Registry Operations
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def load_types_registry() -> dict:
|
def load_types_registry() -> dict:
|
||||||
"""
|
"""Lädt die YAML-Registry."""
|
||||||
Lädt die zentrale YAML-Registry (types.yaml).
|
|
||||||
Pfad wird über die Umgebungsvariable MINDNET_TYPES_FILE gesteuert.
|
|
||||||
"""
|
|
||||||
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
||||||
if not os.path.isfile(p) or yaml is None:
|
if not os.path.isfile(p) or yaml is None:
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
with open(p, "r", encoding="utf-8") as f:
|
with open(p, "r", encoding="utf-8") as f:
|
||||||
data = yaml.safe_load(f)
|
return yaml.safe_load(f) or {}
|
||||||
return data if data is not None else {}
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
||||||
"""
|
"""Ermittelt Standard-Kanten für einen Typ."""
|
||||||
Ermittelt die konfigurierten Standard-Kanten für einen Note-Typ.
|
|
||||||
Greift bei Bedarf auf die globalen ingestion_settings zurück.
|
|
||||||
"""
|
|
||||||
types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
|
types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
|
||||||
if note_type and isinstance(types_map, dict):
|
if note_type and isinstance(types_map, dict):
|
||||||
t_cfg = types_map.get(note_type)
|
t = types_map.get(note_type)
|
||||||
if isinstance(t_cfg, dict) and isinstance(t_cfg.get("edge_defaults"), list):
|
if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
|
||||||
return [str(x) for x in t_cfg["edge_defaults"]]
|
return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
|
||||||
|
for key in ("defaults", "default", "global"):
|
||||||
# Fallback auf die globalen Standardwerte der Ingestion
|
v = reg.get(key)
|
||||||
cfg_def = reg.get("ingestion_settings", {})
|
if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
|
||||||
return cfg_def.get("edge_defaults", [])
|
return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
|
||||||
|
return []
|
||||||
Loading…
Reference in New Issue
Block a user