From 74cac7e16c0403f97defeb03e10786619d890da5 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 22 Dec 2025 05:56:36 +0100 Subject: [PATCH] debug --- scripts/debug_edge_loss.py | 69 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 scripts/debug_edge_loss.py diff --git a/scripts/debug_edge_loss.py b/scripts/debug_edge_loss.py new file mode 100644 index 0000000..e88d2f3 --- /dev/null +++ b/scripts/debug_edge_loss.py @@ -0,0 +1,69 @@ +import asyncio +import os +import sys +from pathlib import Path + +# Pfad-Setup +sys.path.insert(0, os.path.abspath(".")) + +from app.core.chunker import assemble_chunks, _extract_all_edges_from_md +from app.core.derive_edges import build_edges_for_note + +# Mock für Settings, falls nötig +os.environ["MINDNET_LLM_MODEL"] = "phi3:mini" + +async def analyze_file(file_path: str): + print(f"\n=== ANALYSE: {file_path} ===") + + with open(file_path, "r", encoding="utf-8") as f: + text = f.read() + + # 1. Globale Kandidaten (Was sieht der Pre-Scan?) + # Wir simulieren den Aufruf, den der Chunker macht + note_id = Path(file_path).stem + candidates = _extract_all_edges_from_md(text, note_id, "concept") + print(f"\n[1] Globale Kandidaten (Pre-Scan):") + for c in candidates: + print(f" - {c}") + + # 2. Chunking (Ohne Smart Edges erstmal, um die physische Integrität zu prüfen) + # Wir nutzen ein Profil, das dem User-Setup entspricht + config = { + "strategy": "sliding_window", + "target": 400, + "max": 600, + "overlap": 50, + "enable_smart_edge_allocation": False # Erstmal aus + } + + chunks = await assemble_chunks(note_id, text, "concept", config=config) + + print(f"\n[2] Chunk-Struktur & Physische Kanten:") + for i, chunk in enumerate(chunks): + print(f"\n--- Chunk {i} (Section: {chunk.section_path}) ---") + print(f"Snippet: {chunk.text[:50]}...") + + # Was findet derive_edges in diesem rohen Chunk? + # Wir simulieren das Payload-Dict, das derive_edges erwartet + chunk_pl = {"text": chunk.text, "window": chunk.window, "chunk_id": chunk.id} + edges = build_edges_for_note(note_id, [chunk_pl]) + + found_explicitly = [f"{e['kind']}:{e.get('target_id')}" for e in edges if e['rule_id'] in ['callout:edge', 'inline:rel']] + + if found_explicitly: + print(f" ✅ Gefundene Explizite Kanten: {found_explicitly}") + else: + print(f" ❌ Keine expliziten Kanten gefunden (trotz Callout im Text?)") + + # Check auf Callout im Text + if "> [!edge]" in chunk.text: + print(" ℹ️ HINWEIS: '> [!edge]' String ist im Text vorhanden!") + +if __name__ == "__main__": + # Pfad zu deiner problematischen Datei hier anpassen! + target_file = "./vault_master/Dein_Problematisches_File.md" + + if len(sys.argv) > 1: + target_file = sys.argv[1] + + asyncio.run(analyze_file(target_file)) \ No newline at end of file