From 7fc316d2841aabadd324f9466c78c6ae62fd8197 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Fri, 12 Dec 2025 12:04:31 +0100
Subject: [PATCH] bug

---
 app/core/chunker.py | 64 ++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/app/core/chunker.py b/app/core/chunker.py
index e93bceb..88966dd 100644
--- a/app/core/chunker.py
+++ b/app/core/chunker.py
@@ -17,8 +17,8 @@ from app.services.semantic_analyzer import get_semantic_analyzer
 try:
     from app.core.derive_edges import build_edges_for_note
 except ImportError:
-    # Mock für Tests: Signatur muss mit dem Aufruf übereinstimmen
-    def build_edges_for_note(text, note_id, note_type, chunks=[], references=[]): return []
+    # Mock für Tests
+    def build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False): return []
 
 logger = logging.getLogger(__name__)
 
@@ -105,7 +105,6 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
     
     fm, text_without_fm = extract_frontmatter_from_text(md_text)
     
-    # H1 suchen
     h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
     if h1_match: 
         h1_title = h1_match.group(1).strip()
@@ -218,7 +217,7 @@ def _strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id
     return _strategy_sliding_window(blocks, config, note_id, doc_title, context_prefix=f"# {doc_title}")
 
 # ==========================================
-# 4. ORCHESTRATION (ASYNC)
+# 4. ORCHESTRATION (ASYNC) - WP-15 CORE
 # ==========================================
 
 async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Optional[Dict] = None) -> List[Chunk]:
@@ -246,6 +245,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
         return []
 
     if enable_smart_edges:
+        # Hier rufen wir nun die Smart Edge Allocation auf
         chunks = await _run_smart_edge_allocation(chunks, md_text, note_id, note_type)
 
     for i, ch in enumerate(chunks):
@@ -254,36 +254,57 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
 
     return chunks
 
+def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> List[str]:
+    """
+    Hilfsfunktion: Erstellt einen Dummy-Chunk für den gesamten Text und ruft
+    den Edge-Parser auf, um ALLE Kanten der Notiz zu finden.
+    """
+    # 1. Dummy Chunk erstellen, der den gesamten Text enthält
+    # Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
+    dummy_chunk = {
+        "chunk_id": f"{note_id}#full",
+        "text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content')
+        "type": note_type
+    }
+    
+    # 2. Aufruf des Parsers mit dem Dummy-Chunk
+    # WICHTIG: Argumentreihenfolge aus derive_edges.py beachten:
+    # note_id, chunks, note_level_references=None, include_note_scope_refs=False
+    raw_edges = build_edges_for_note(
+        note_id, 
+        [dummy_chunk], 
+        note_level_references=None,
+        include_note_scope_refs=False
+    )
+    
+    # 3. Kanten extrahieren und formatieren
+    all_candidates = set()
+    for e in raw_edges:
+        # Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
+        kind = e.get("kind")
+        target = e.get("target_id")
+        if target and kind not in ["belongs_to", "next", "prev"]:
+            all_candidates.add(f"{kind}:{target}")
+            
+    return list(all_candidates)
+
 async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_id: str, note_type: str) -> List[Chunk]:
     analyzer = get_semantic_analyzer()
     
-    # FIX: Nutzung von positional arguments für die ersten 3 Parameter
-    # Dies verhindert den "multiple values for argument" Fehler
-    raw_edges = build_edges_for_note(
-        full_text,
-        note_id,
-        note_type,
-        chunks=[], 
-        references=[] 
-    )
-    
-    all_candidates = set()
-    if raw_edges:
-        for e in raw_edges:
-            if e.get("target_id") and e.get("kind") not in ["next", "prev", "belongs_to"]:
-                all_candidates.add(f"{e['kind']}:{e['target_id']}")
-    
-    candidate_list = list(all_candidates)
+    # A. Alle potenziellen Kanten der Notiz sammeln (über den Dummy-Chunk Trick)
+    candidate_list = _extract_all_edges_from_md(full_text, note_id, note_type)
     
     if not candidate_list:
         return chunks
 
+    # B. LLM Filterung pro Chunk (Parallel)
     tasks = []
     for chunk in chunks:
         tasks.append(analyzer.assign_edges_to_chunk(chunk.text, candidate_list, note_type))
     
     results_per_chunk = await asyncio.gather(*tasks)
     
+    # C. Injection & Fallback
     assigned_edges_global = set()
     
     for i, confirmed_edges in enumerate(results_per_chunk):
@@ -296,6 +317,7 @@ async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_i
             chunk.text += injection_str
             chunk.window += injection_str
 
+    # D. Fallback: Unassigned Kanten überall hin
     unassigned = set(candidate_list) - assigned_edges_global
     if unassigned:
         fallback_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in unassigned if ':' in e])