mindnet/app/core/edges.py
Lars 2b3e5546b8
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
app/core/edges.py aktualisiert
2025-09-05 07:26:18 +02:00

95 lines
3.2 KiB
Python

#!/usr/bin/env python3
"""
app/core/edges.py — Version 1.0.0 (2025-09-05)
Zweck
-----
Zentrale Edge-Ableitung für den Import:
- Baut einen Note-Index (by_id/by_slug/by_file_slug) über ALLE Notizen im Vault.
- Leitet Kanten aus Volltext (references/backlink) und aus Chunk-Texten (references_at) ab.
Aufruf / Verwendung
-------------------
from app.core.edges import build_note_index_for_vault, deriv_edges_for_note
# Einmal pro Import:
index = build_note_index_for_vault(vault_root, exclude_dirs=[...])
# Pro Note:
edges = deriv_edges_for_note(
note_stub={"note_id": fm["id"], "title": fm["title"], "path": rel_path, "fulltext": parsed.body},
chunks_for_links=[{"chunk_id": chunk_pl["chunk_id"], "text": chunks[i].text} ...],
note_index=index
)
Parameter
---------
- vault_root: Pfad zum Obsidian Vault
- exclude_dirs: Liste relativer Teilpfade, die ignoriert werden
- note_stub: dict mit mind. {"note_id","title","path","fulltext"}
- chunks_for_links: Liste dicts mit {"chunk_id","text"} (Text MUSS enthalten sein)
- note_index: Tuple (by_id, by_slug, by_file_slug)
Änderungen
----------
- Neu: Erst-Release. Konsolidiert die Edge-Ableitung an einer Stelle.
"""
from __future__ import annotations
import os, glob
from typing import List, Tuple, Dict
from app.core.parser import read_markdown
from app.core.derive_edges import build_note_index, derive_wikilink_edges
def _rel_from_abs(abs_path: str, vault_root: str) -> str:
rel = os.path.relpath(abs_path, vault_root).replace("\\", "/")
return rel
def build_note_index_for_vault(vault_root: str, exclude_dirs: List[str] | None = None):
"""
Liest alle Markdown-Dateien im Vault ein und baut einen Note-Index, der
Titel/Dateisnamen/IDs robust auflöst.
Rückgabe: (by_id, by_slug, by_file_slug)
"""
exclude_dirs = exclude_dirs or ["/.obsidian/", "/_backup_frontmatter/", "/_imported/"]
files = [p for p in glob.glob(os.path.join(vault_root, "**", "*.md"), recursive=True)]
notes_payloads: List[dict] = []
for abs_path in files:
pn = abs_path.replace("\\", "/")
if any(ex in pn for ex in exclude_dirs):
continue
parsed = read_markdown(abs_path)
fm = parsed.frontmatter or {}
note_id = fm.get("id") or fm.get("note_id")
if not note_id:
continue
title = fm.get("title") or os.path.basename(abs_path).rsplit(".", 1)[0]
notes_payloads.append({
"note_id": note_id,
"title": title,
"path": _rel_from_abs(abs_path, vault_root),
})
return build_note_index(notes_payloads)
def deriv_edges_for_note(
note_stub: dict,
chunks_for_links: List[dict],
note_index: Tuple[Dict[str,dict], Dict[str,dict], Dict[str,dict]],
) -> List[dict]:
"""
Ableitung der Edges für EINE Notiz aus Volltext + Chunk-Texten.
Erwartet:
- note_stub enthält "note_id" und "fulltext"
- chunks_for_links: [{"chunk_id": "...", "text": "..."}]
"""
if not note_stub.get("note_id"):
return []
# Die eigentliche Ableitung (inkl. references_at) liegt in derive_wikilink_edges:
edges = derive_wikilink_edges(note_stub, chunks_for_links, note_index)
return edges