From 74cac7e16c0403f97defeb03e10786619d890da5 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Mon, 22 Dec 2025 05:56:36 +0100
Subject: [PATCH] debug

---
 scripts/debug_edge_loss.py | 69 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 scripts/debug_edge_loss.py

diff --git a/scripts/debug_edge_loss.py b/scripts/debug_edge_loss.py
new file mode 100644
index 0000000..e88d2f3
--- /dev/null
+++ b/scripts/debug_edge_loss.py
@@ -0,0 +1,69 @@
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Pfad-Setup
+sys.path.insert(0, os.path.abspath("."))
+
+from app.core.chunker import assemble_chunks, _extract_all_edges_from_md
+from app.core.derive_edges import build_edges_for_note
+
+# Mock für Settings, falls nötig
+os.environ["MINDNET_LLM_MODEL"] = "phi3:mini" 
+
+async def analyze_file(file_path: str):
+    print(f"\n=== ANALYSE: {file_path} ===")
+    
+    with open(file_path, "r", encoding="utf-8") as f:
+        text = f.read()
+        
+    # 1. Globale Kandidaten (Was sieht der Pre-Scan?)
+    # Wir simulieren den Aufruf, den der Chunker macht
+    note_id = Path(file_path).stem
+    candidates = _extract_all_edges_from_md(text, note_id, "concept")
+    print(f"\n[1] Globale Kandidaten (Pre-Scan):")
+    for c in candidates:
+        print(f"  - {c}")
+        
+    # 2. Chunking (Ohne Smart Edges erstmal, um die physische Integrität zu prüfen)
+    # Wir nutzen ein Profil, das dem User-Setup entspricht
+    config = {
+        "strategy": "sliding_window",
+        "target": 400,
+        "max": 600,
+        "overlap": 50,
+        "enable_smart_edge_allocation": False # Erstmal aus
+    }
+    
+    chunks = await assemble_chunks(note_id, text, "concept", config=config)
+    
+    print(f"\n[2] Chunk-Struktur & Physische Kanten:")
+    for i, chunk in enumerate(chunks):
+        print(f"\n--- Chunk {i} (Section: {chunk.section_path}) ---")
+        print(f"Snippet: {chunk.text[:50]}...")
+        
+        # Was findet derive_edges in diesem rohen Chunk?
+        # Wir simulieren das Payload-Dict, das derive_edges erwartet
+        chunk_pl = {"text": chunk.text, "window": chunk.window, "chunk_id": chunk.id}
+        edges = build_edges_for_note(note_id, [chunk_pl])
+        
+        found_explicitly = [f"{e['kind']}:{e.get('target_id')}" for e in edges if e['rule_id'] in ['callout:edge', 'inline:rel']]
+        
+        if found_explicitly:
+            print(f"  ✅ Gefundene Explizite Kanten: {found_explicitly}")
+        else:
+            print(f"  ❌ Keine expliziten Kanten gefunden (trotz Callout im Text?)")
+            
+        # Check auf Callout im Text
+        if "> [!edge]" in chunk.text:
+            print("  ℹ️  HINWEIS: '> [!edge]' String ist im Text vorhanden!")
+
+if __name__ == "__main__":
+    # Pfad zu deiner problematischen Datei hier anpassen!
+    target_file = "./vault_master/Dein_Problematisches_File.md" 
+    
+    if len(sys.argv) > 1:
+        target_file = sys.argv[1]
+        
+    asyncio.run(analyze_file(target_file))
\ No newline at end of file