bug fix

2025-12-29 11:00:00 +01:00 · 2025-12-29 11:00:00 +01:00 · 857ba953e3
commit 857ba953e3
parent e180018c99
2 changed files with 91 additions and 22 deletions
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@ -1,13 +1,14 @@
 """
 FILE: app/core/graph/graph_extractors.py
 DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
-             AUDIT: Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen Support).
+             AUDIT: 
+             - Regex für Wikilinks liberalisiert (Umlaute, Sonderzeichen).
+             - Callout-Parser erweitert für Multi-Line-Listen und Header-Typen.
 """
 import re
 from typing import List, Tuple

-# Fix: Erlaube alle Zeichen außer ']' im Target, statt nur a-z0-9.
-# Das fängt Umlaute, Emojis, '&', '#' und Leerzeichen ab.
+# Erlaube alle Zeichen außer ']' im Target (fängt Umlaute, Emojis, '&', '#' ab)
 _WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([^\]]+)\]\]")

 _REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
@ -15,8 +16,10 @@ _REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\
 _REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)

 _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
+# Erkennt "kind: targets..."
 _REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
+# Erkennt reine Typen (z.B. "depends_on" im Header)
+_SIMPLE_KIND   = re.compile(r"^[a-z_]+$", re.IGNORECASE)

 def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
    """
@ -35,28 +38,90 @@ def extract_typed_relations(text: str) -> Tuple[List[Tuple[str, str]], str]:
    return pairs, text

 def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """Verarbeitet Obsidian [!edge]-Callouts."""
+    """
+    Verarbeitet Obsidian [!edge]-Callouts.
+    Unterstützt zwei Formate:
+    1. Explizit: "kind: [[Target]]"
+    2. Implizit (Header): "> [!edge] kind" gefolgt von "[[Target]]" Zeilen
+    """
    if not text: return [], text
-    lines = text.splitlines(); out_pairs, keep_lines, i = [], [], 0
+    lines = text.splitlines()
+    out_pairs = []
+    keep_lines = []
+    i = 0
+    
    while i < len(lines):
-        m = _CALLOUT_START.match(lines[i])
+        line = lines[i]
+        m = _CALLOUT_START.match(line)
        if not m:
-            keep_lines.append(lines[i]); i += 1; continue
-        block_lines = [m.group(1)] if m.group(1).strip() else []
+            keep_lines.append(line)
+            i += 1
+            continue
+        
+        # Callout-Block gefunden. Wir sammeln alle relevanten Zeilen.
+        block_lines = []
+        
+        # Header Content prüfen (z.B. "type" aus "> [!edge] type")
+        header_raw = m.group(1).strip()
+        if header_raw:
+            block_lines.append(header_raw)
+            
        i += 1
        while i < len(lines) and lines[i].lstrip().startswith('>'):
-            block_lines.append(lines[i].lstrip()[1:].lstrip()); i += 1
+            # Entferne '>' und führende Leerzeichen
+            content = lines[i].lstrip()[1:].lstrip()
+            if content:
+                block_lines.append(content)
+            i += 1
+            
+        # Verarbeitung des Blocks
+        current_kind = None
+        
+        # Heuristik: Ist die allererste Zeile (meist aus dem Header) ein reiner Typ?
+        # Dann setzen wir diesen als Default für den Block.
+        if block_lines:
+            first = block_lines[0]
+            # Wenn es NICHT wie "Key: Value" aussieht, aber wie ein Wort:
+            if not _REL_LINE.match(first) and _SIMPLE_KIND.match(first):
+                current_kind = first.lower()
+                
        for bl in block_lines:
+            # 1. Prüfen auf explizites "Kind: Targets" (überschreibt Header-Typ für diese Zeile)
            mrel = _REL_LINE.match(bl)
-            if not mrel: continue
-            kind, targets = mrel.group("kind").strip().lower(), mrel.group("targets") or ""
-            found = _WIKILINKS_IN_LINE.findall(targets)
+            if mrel:
+                line_kind = mrel.group("kind").strip().lower()
+                targets = mrel.group("targets")
+                
+                # Links extrahieren
+                found = _WIKILINK_RE.findall(targets)
+                if found:
+                    for t in found: out_pairs.append((line_kind, t.strip()))
+                else:
+                    # Fallback für kommagetrennten Plaintext
+                    for raw in re.split(r"[,;]", targets):
+                        if raw.strip(): out_pairs.append((line_kind, raw.strip()))
+                
+                # Wenn wir eine explizite Zeile gefunden haben, aktualisieren wir NICHT 
+                # den current_kind für nachfolgende Zeilen (Design-Entscheidung: lokal scope),
+                # oder wir machen es doch? 
+                # Üblicher ist: Header setzt Default, Zeile überschreibt lokal. 
+                # Wir lassen current_kind also unangetastet.
+                continue
+            
+            # 2. Kein Key:Value Muster -> Prüfen auf Links, die den current_kind nutzen
+            found = _WIKILINK_RE.findall(bl)
            if found:
-                for t in found: out_pairs.append((kind, t.strip()))
+                if current_kind:
+                    for t in found: out_pairs.append((current_kind, t.strip()))
+                else:
+                    # Link ohne Typ und ohne Header-Typ.
+                    # Wird ignoriert oder könnte als 'related_to' fallback dienen.
+                    # Aktuell: Ignorieren, um False Positives zu vermeiden.
+                    pass
+
    return out_pairs, "\n".join(keep_lines)

 def extract_wikilinks(text: str) -> List[str]:
    """Findet Standard-Wikilinks [[Target]] oder [[Alias|Target]]."""
    if not text: return []
-    # match.group(1) ist jetzt das Target (dank des fixierten Regex)
    return [m.strip() for m in _WIKILINK_RE.findall(text) if m.strip()]
--- a/app/frontend/ui_graph_service.py
+++ b/app/frontend/ui_graph_service.py
@ -10,15 +10,19 @@ LAST_ANALYSIS: 2025-12-15
 import re
 from qdrant_client import QdrantClient, models
 from streamlit_agraph import Node, Edge
-from ui_config import GRAPH_COLORS, get_edge_color, SYSTEM_EDGES
+from ui_config import COLLECTION_PREFIX, GRAPH_COLORS, get_edge_color, SYSTEM_EDGES

 class GraphExplorerService:
-    def __init__(self, url, api_key=None, prefix="mindnet"):
+    def __init__(self, url, api_key=None, prefix=None):
+        """
+        Initialisiert den Service. Nutzt COLLECTION_PREFIX aus der Config, 
+        sofern kein spezifischer Prefix übergeben wurde.
+        """
        self.client = QdrantClient(url=url, api_key=api_key)
-        self.prefix = prefix
-        self.notes_col = f"{prefix}_notes"
-        self.chunks_col = f"{prefix}_chunks"
-        self.edges_col = f"{prefix}_edges"
+        self.prefix = prefix if prefix else COLLECTION_PREFIX
+        self.notes_col = f"{self.prefix}_notes"
+        self.chunks_col = f"{self.prefix}_chunks"
+        self.edges_col = f"{self.prefix}_edges"
        self._note_cache = {} 

    def get_note_with_full_content(self, note_id):
@ -421,7 +425,7 @@ class GraphExplorerService:

    def _add_node_to_dict(self, node_dict, note_payload, level=1):
        nid = note_payload.get("note_id")
-        if nid in node_dict: return
+        if not nid or nid in node_dict: return
        
        ntype = note_payload.get("type", "default")
        color = GRAPH_COLORS.get(ntype, GRAPH_COLORS["default"])