From 53058d150454560c2ca1cb65f0124a97b2bdb355 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sun, 28 Dec 2025 11:49:41 +0100
Subject: [PATCH] bug fix

---
 app/frontend/ui_graph_service.py | 51 +++++++++++++++-----------
 tests/debug_edge_search.py       | 62 ++++++++++++++++++++++++--------
 2 files changed, 78 insertions(+), 35 deletions(-)

diff --git a/app/frontend/ui_graph_service.py b/app/frontend/ui_graph_service.py
index 1d38a77..061cca7 100644
--- a/app/frontend/ui_graph_service.py
+++ b/app/frontend/ui_graph_service.py
@@ -311,31 +311,40 @@ class GraphExplorerService:
                 # ZUSÄTZLICH: Fuzzy-Matching für ähnliche Titel
                 # PROBLEM: Wikilinks können andere Titel verwenden als der gespeicherte Note-Titel
                 # Beispiel: Note-Titel = "Persönliches Leitbild (2025–2029)", aber Wikilink = "Mein Persönliches Leitbild 2025"
-                # Strategie: Prüfe, ob target_id mit dem Kern-Titel beginnt (ohne Jahreszahl/Varianten)
+                # Strategie: Normalisiere beide Titel und prüfe, ob sie ähnlich sind
                 if not any(tgt_id == t or tgt_id.startswith(t + "#") for t in note_titles_to_search):
-                    # Extrahiere Kern-Titel (ohne Jahreszahl, ohne Klammern)
+                    # Normalisiere target_id (entferne #Abschnitt)
+                    tgt_base = tgt_id.split("#")[0].strip()
+                    
+                    # Normalisiere jeden Titel und prüfe auf Ähnlichkeit
                     for title in note_titles_to_search:
-                        # Entferne Jahreszahlen und Klammern für Vergleich
-                        core_title = re.sub(r'\s*\([^)]*\)', '', title)  # Entferne (2025–2029)
-                        core_title = re.sub(r'\s+\d{4}', '', core_title)  # Entferne Jahreszahlen
-                        core_title = core_title.strip()
+                        # Normalisiere: Entferne Klammern, Jahreszahlen, Präfixe
+                        def normalize_title(t):
+                            if not t:
+                                return ""
+                            # Entferne Klammern und deren Inhalt (z.B. "(2025–2029)")
+                            t = re.sub(r'\s*\([^)]*\)', '', t)
+                            # Entferne Jahreszahlen (4-stellig, mit oder ohne Bindestrich/En-Dash)
+                            # Beispiele: "2025", "2025–2029", "2025-2029"
+                            t = re.sub(r'\s*\d{4}[\s–\-]*\d{0,4}', '', t)
+                            # Entferne "Mein/Meine" Präfixe
+                            t = re.sub(r'^(Mein|Meine)\s+', '', t, flags=re.IGNORECASE)
+                            # Normalisiere Whitespace
+                            t = re.sub(r'\s+', ' ', t).strip()
+                            return t.lower()  # Case-insensitive Vergleich
                         
-                        # Entferne auch "Mein/Meine" Präfixe für Vergleich
-                        core_title_clean = re.sub(r'^(Mein|Meine)\s+', '', core_title, flags=re.IGNORECASE).strip()
+                        title_norm = normalize_title(title)
+                        tgt_norm = normalize_title(tgt_base)
                         
-                        # Prüfe, ob target_id mit Kern-Titel beginnt
-                        tgt_core = re.sub(r'\s*\([^)]*\)', '', tgt_id.split("#")[0])  # Entferne Klammern aus target_id
-                        tgt_core = re.sub(r'\s+\d{4}', '', tgt_core).strip()
-                        tgt_core_clean = re.sub(r'^(Mein|Meine)\s+', '', tgt_core, flags=re.IGNORECASE).strip()
-                        
-                        # Vergleich: Wenn Kern-Titel ähnlich ist, akzeptiere
-                        if (core_title_clean and tgt_core_clean and 
-                            (tgt_core_clean.startswith(core_title_clean) or core_title_clean.startswith(tgt_core_clean)) and
-                            len(core_title_clean) > 5):  # Mindestlänge, um False Positives zu vermeiden
-                            results.append(edge)
-                            existing_edge_ids.add(edge.id)
-                            matched_count += 1
-                            break
+                        # Prüfe auf Ähnlichkeit: Entweder exakt gleich oder einer beginnt mit dem anderen
+                        if title_norm and tgt_norm and len(title_norm) > 5:
+                            if (title_norm == tgt_norm or 
+                                title_norm.startswith(tgt_norm) or 
+                                tgt_norm.startswith(title_norm)):
+                                results.append(edge)
+                                existing_edge_ids.add(edge.id)
+                                matched_count += 1
+                                break
         
         return results
 
diff --git a/tests/debug_edge_search.py b/tests/debug_edge_search.py
index c06ea5a..3c54b12 100644
--- a/tests/debug_edge_search.py
+++ b/tests/debug_edge_search.py
@@ -76,9 +76,10 @@ def find_edges_for_note(note_id: str, prefix: str = "mindnet"):
         else:
             print()
     
-    # 4. Eingehende Kanten - Titel#Abschnitt Varianten
-    print("🔍 EINGEHENDE KANTEN (target_id beginnt mit 'Titel#'):")
+    # 4. Eingehende Kanten - Titel#Abschnitt Varianten mit Fuzzy-Matching
+    print("🔍 EINGEHENDE KANTEN (target_id beginnt mit 'Titel#' + Fuzzy-Matching):")
     if note_title:
+        import re
         # Lade alle Kanten und filtere clientseitig (wie Case D in ui_graph_service.py)
         all_filter = rest.Filter(
             must=[rest.FieldCondition(key="kind", match=rest.MatchExcept(**{"except": ["prev", "next", "belongs_to"]}))]
@@ -86,17 +87,47 @@ def find_edges_for_note(note_id: str, prefix: str = "mindnet"):
         all_edges, _ = client.scroll(edges_col, scroll_filter=all_filter, limit=10000, with_payload=True)
         print(f"   Gesamt geladen: {len(all_edges)} Kanten aus der Datenbank")
         
-        # Clientseitige Filterung
-        matched = []
+        # Normalisierungs-Funktion (wie in ui_graph_service.py)
+        def normalize_title(t):
+            if not t:
+                return ""
+            t = re.sub(r'\s*\([^)]*\)', '', t)
+            t = re.sub(r'\s*\d{4}[\s–\-]*\d{0,4}', '', t)
+            t = re.sub(r'^(Mein|Meine)\s+', '', t, flags=re.IGNORECASE)
+            t = re.sub(r'\s+', ' ', t).strip()
+            return t.lower()
+        
+        note_title_norm = normalize_title(note_title)
+        print(f"   Normalisierter Note-Titel: '{note_title_norm}'")
+        
+        # Clientseitige Filterung: Exakte Matches
+        matched_exact = []
         for e in all_edges:
             tgt_id = e.payload.get("target_id", "")
             if tgt_id and (tgt_id == note_title or tgt_id.startswith(note_title + "#")):
-                matched.append(e)
+                matched_exact.append(e)
         
-        print(f"   Gefunden: {len(matched)} Kanten (mit Titel#Abschnitt Varianten)")
+        # Clientseitige Filterung: Fuzzy-Matches
+        matched_fuzzy = []
+        for e in all_edges:
+            tgt_id = e.payload.get("target_id", "")
+            if not tgt_id or e in matched_exact:
+                continue
+            tgt_base = tgt_id.split("#")[0].strip()
+            tgt_norm = normalize_title(tgt_base)
+            if tgt_norm and note_title_norm and len(note_title_norm) > 5:
+                if (tgt_norm == note_title_norm or 
+                    tgt_norm.startswith(note_title_norm) or 
+                    note_title_norm.startswith(tgt_norm)):
+                    matched_fuzzy.append((e, tgt_norm))
+        
+        matched = matched_exact + [e for e, _ in matched_fuzzy]
+        print(f"   Gefunden: {len(matched_exact)} exakte Matches, {len(matched_fuzzy)} Fuzzy-Matches")
+        print(f"   Gesamt: {len(matched)} Kanten")
         for i, e in enumerate(matched[:10], 1):
             pl = e.payload
-            print(f"   {i}. {pl.get('kind')}: {pl.get('source_id')} -> {pl.get('target_id')}")
+            match_type = "EXAKT" if e in matched_exact else "FUZZY"
+            print(f"   {i}. [{match_type}] {pl.get('kind')}: {pl.get('source_id')} -> {pl.get('target_id')}")
         if len(matched) > 10:
             print(f"   ... und {len(matched) - 10} weitere\n")
         else:
@@ -105,14 +136,17 @@ def find_edges_for_note(note_id: str, prefix: str = "mindnet"):
         # Zeige auch einige Beispiele von target_ids, die NICHT matchen
         print("   🔍 DEBUG: Beispiel target_ids die NICHT matchen (erste 10):")
         non_matched = []
-        for e in all_edges[:100]:  # Nur erste 100 prüfen
+        for e in all_edges[:200]:  # Erste 200 prüfen
             tgt_id = e.payload.get("target_id", "")
-            if tgt_id and tgt_id != note_title and not tgt_id.startswith(note_title + "#"):
-                non_matched.append(tgt_id)
-                if len(non_matched) >= 10:
-                    break
-        for i, tgt in enumerate(non_matched, 1):
-            print(f"      {i}. '{tgt}'")
+            if not tgt_id or e in matched:
+                continue
+            tgt_base = tgt_id.split("#")[0].strip()
+            tgt_norm = normalize_title(tgt_base)
+            non_matched.append((tgt_id, tgt_norm))
+            if len(non_matched) >= 10:
+                break
+        for i, (tgt, tgt_norm) in enumerate(non_matched, 1):
+            print(f"      {i}. '{tgt}' (normalisiert: '{tgt_norm}')")
         print()
     
     # 5. Zeige Beispiel target_ids aus der Datenbank