This commit is contained in:
parent
0c2dc61cb5
commit
74cac7e16c
69
scripts/debug_edge_loss.py
Normal file
69
scripts/debug_edge_loss.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Pfad-Setup
|
||||||
|
sys.path.insert(0, os.path.abspath("."))
|
||||||
|
|
||||||
|
from app.core.chunker import assemble_chunks, _extract_all_edges_from_md
|
||||||
|
from app.core.derive_edges import build_edges_for_note
|
||||||
|
|
||||||
|
# Mock für Settings, falls nötig
|
||||||
|
os.environ["MINDNET_LLM_MODEL"] = "phi3:mini"
|
||||||
|
|
||||||
|
async def analyze_file(file_path: str):
|
||||||
|
print(f"\n=== ANALYSE: {file_path} ===")
|
||||||
|
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
# 1. Globale Kandidaten (Was sieht der Pre-Scan?)
|
||||||
|
# Wir simulieren den Aufruf, den der Chunker macht
|
||||||
|
note_id = Path(file_path).stem
|
||||||
|
candidates = _extract_all_edges_from_md(text, note_id, "concept")
|
||||||
|
print(f"\n[1] Globale Kandidaten (Pre-Scan):")
|
||||||
|
for c in candidates:
|
||||||
|
print(f" - {c}")
|
||||||
|
|
||||||
|
# 2. Chunking (Ohne Smart Edges erstmal, um die physische Integrität zu prüfen)
|
||||||
|
# Wir nutzen ein Profil, das dem User-Setup entspricht
|
||||||
|
config = {
|
||||||
|
"strategy": "sliding_window",
|
||||||
|
"target": 400,
|
||||||
|
"max": 600,
|
||||||
|
"overlap": 50,
|
||||||
|
"enable_smart_edge_allocation": False # Erstmal aus
|
||||||
|
}
|
||||||
|
|
||||||
|
chunks = await assemble_chunks(note_id, text, "concept", config=config)
|
||||||
|
|
||||||
|
print(f"\n[2] Chunk-Struktur & Physische Kanten:")
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
print(f"\n--- Chunk {i} (Section: {chunk.section_path}) ---")
|
||||||
|
print(f"Snippet: {chunk.text[:50]}...")
|
||||||
|
|
||||||
|
# Was findet derive_edges in diesem rohen Chunk?
|
||||||
|
# Wir simulieren das Payload-Dict, das derive_edges erwartet
|
||||||
|
chunk_pl = {"text": chunk.text, "window": chunk.window, "chunk_id": chunk.id}
|
||||||
|
edges = build_edges_for_note(note_id, [chunk_pl])
|
||||||
|
|
||||||
|
found_explicitly = [f"{e['kind']}:{e.get('target_id')}" for e in edges if e['rule_id'] in ['callout:edge', 'inline:rel']]
|
||||||
|
|
||||||
|
if found_explicitly:
|
||||||
|
print(f" ✅ Gefundene Explizite Kanten: {found_explicitly}")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Keine expliziten Kanten gefunden (trotz Callout im Text?)")
|
||||||
|
|
||||||
|
# Check auf Callout im Text
|
||||||
|
if "> [!edge]" in chunk.text:
|
||||||
|
print(" ℹ️ HINWEIS: '> [!edge]' String ist im Text vorhanden!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Pfad zu deiner problematischen Datei hier anpassen!
|
||||||
|
target_file = "./vault_master/Dein_Problematisches_File.md"
|
||||||
|
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
target_file = sys.argv[1]
|
||||||
|
|
||||||
|
asyncio.run(analyze_file(target_file))
|
||||||
Loading…
Reference in New Issue
Block a user