From 2b3e5546b8a8d66e7a64740b05c0a972b35b3e0a Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 5 Sep 2025 07:26:18 +0200 Subject: [PATCH] app/core/edges.py aktualisiert --- app/core/edges.py | 127 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 39 deletions(-) diff --git a/app/core/edges.py b/app/core/edges.py index 0cc0544..e928994 100644 --- a/app/core/edges.py +++ b/app/core/edges.py @@ -1,45 +1,94 @@ +#!/usr/bin/env python3 +""" +app/core/edges.py — Version 1.0.0 (2025-09-05) + +Zweck +----- +Zentrale Edge-Ableitung für den Import: +- Baut einen Note-Index (by_id/by_slug/by_file_slug) über ALLE Notizen im Vault. +- Leitet Kanten aus Volltext (references/backlink) und aus Chunk-Texten (references_at) ab. + +Aufruf / Verwendung +------------------- +from app.core.edges import build_note_index_for_vault, deriv_edges_for_note + +# Einmal pro Import: +index = build_note_index_for_vault(vault_root, exclude_dirs=[...]) + +# Pro Note: +edges = deriv_edges_for_note( + note_stub={"note_id": fm["id"], "title": fm["title"], "path": rel_path, "fulltext": parsed.body}, + chunks_for_links=[{"chunk_id": chunk_pl["chunk_id"], "text": chunks[i].text} ...], + note_index=index +) + +Parameter +--------- +- vault_root: Pfad zum Obsidian Vault +- exclude_dirs: Liste relativer Teilpfade, die ignoriert werden +- note_stub: dict mit mind. {"note_id","title","path","fulltext"} +- chunks_for_links: Liste dicts mit {"chunk_id","text"} (Text MUSS enthalten sein) +- note_index: Tuple (by_id, by_slug, by_file_slug) + +Änderungen +---------- +- Neu: Erst-Release. Konsolidiert die Edge-Ableitung an einer Stelle. +""" + from __future__ import annotations -from typing import List, Dict +import os, glob +from typing import List, Tuple, Dict -def deriv_edges_for_note(note_meta: Dict, chunk_payloads: List[Dict]) -> List[Dict]: - edges: List[Dict] = [] +from app.core.parser import read_markdown +from app.core.derive_edges import build_note_index, derive_wikilink_edges - # Chunk → Note (belongs_to) + prev/next - for idx, ch in enumerate(chunk_payloads): - edges.append({ - "src_id": ch["id"], "dst_id": note_meta["id"], - "edge_type": "belongs_to", "scope": "chunk" + +def _rel_from_abs(abs_path: str, vault_root: str) -> str: + rel = os.path.relpath(abs_path, vault_root).replace("\\", "/") + return rel + + +def build_note_index_for_vault(vault_root: str, exclude_dirs: List[str] | None = None): + """ + Liest alle Markdown-Dateien im Vault ein und baut einen Note-Index, der + Titel/Dateisnamen/IDs robust auflöst. + + Rückgabe: (by_id, by_slug, by_file_slug) + """ + exclude_dirs = exclude_dirs or ["/.obsidian/", "/_backup_frontmatter/", "/_imported/"] + files = [p for p in glob.glob(os.path.join(vault_root, "**", "*.md"), recursive=True)] + notes_payloads: List[dict] = [] + for abs_path in files: + pn = abs_path.replace("\\", "/") + if any(ex in pn for ex in exclude_dirs): + continue + parsed = read_markdown(abs_path) + fm = parsed.frontmatter or {} + note_id = fm.get("id") or fm.get("note_id") + if not note_id: + continue + title = fm.get("title") or os.path.basename(abs_path).rsplit(".", 1)[0] + notes_payloads.append({ + "note_id": note_id, + "title": title, + "path": _rel_from_abs(abs_path, vault_root), }) - prev_id = ch.get("neighbors",{}).get("prev") - next_id = ch.get("neighbors",{}).get("next") - if prev_id: - edges.append({"src_id": ch["id"], "dst_id": prev_id, "edge_type": "next", "scope": "chunk"}) - edges.append({"src_id": prev_id, "dst_id": ch["id"], "edge_type": "prev", "scope": "chunk"}) - if next_id: - edges.append({"src_id": ch["id"], "dst_id": next_id, "edge_type": "next", "scope": "chunk"}) - edges.append({"src_id": next_id, "dst_id": ch["id"], "edge_type": "prev", "scope": "chunk"}) + return build_note_index(notes_payloads) - # references aus wikilinks (Chunk-Scope) - for ref in ch.get("references", []): - tid = ref.get("target_id") - if not tid: continue - edges.append({"src_id": ch["id"], "dst_id": tid, "edge_type": "references", "scope": "chunk"}) - edges.append({"src_id": tid, "dst_id": ch["id"], "edge_type": "backlink", "scope": "chunk"}) - # depends_on / assigned_to (Note-Scope, aus Frontmatter) - for dep in note_meta.get("depends_on", []) or []: - edges.append({"src_id": note_meta["id"], "dst_id": dep, "edge_type": "depends_on", "scope": "note"}) - for ass in note_meta.get("assigned_to", []) or []: - edges.append({"src_id": note_meta["id"], "dst_id": ass, "edge_type": "assigned_to", "scope": "note"}) - - # Note-Level references (optional: falls du im Note-Payload `references` sammelst) - for tid in note_meta.get("references", []) or []: - edges.append({"src_id": note_meta["id"], "dst_id": tid, "edge_type": "references", "scope": "note"}) - edges.append({"src_id": tid, "dst_id": note_meta["id"], "edge_type": "backlink", "scope": "note"}) - - # Dedupe - uniq = {} - for e in edges: - k = (e["src_id"], e["edge_type"], e["dst_id"], e.get("scope","note")) - uniq[k] = e - return list(uniq.values()) +def deriv_edges_for_note( + note_stub: dict, + chunks_for_links: List[dict], + note_index: Tuple[Dict[str,dict], Dict[str,dict], Dict[str,dict]], +) -> List[dict]: + """ + Ableitung der Edges für EINE Notiz aus Volltext + Chunk-Texten. + Erwartet: + - note_stub enthält "note_id" und "fulltext" + - chunks_for_links: [{"chunk_id": "...", "text": "..."}] + """ + if not note_stub.get("note_id"): + return [] + # Die eigentliche Ableitung (inkl. references_at) liegt in derive_wikilink_edges: + edges = derive_wikilink_edges(note_stub, chunks_for_links, note_index) + return edges