app/core/edges.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
This commit is contained in:
parent
40d915c4c4
commit
2b3e5546b8
|
|
@ -1,45 +1,94 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
app/core/edges.py — Version 1.0.0 (2025-09-05)
|
||||||
|
|
||||||
|
Zweck
|
||||||
|
-----
|
||||||
|
Zentrale Edge-Ableitung für den Import:
|
||||||
|
- Baut einen Note-Index (by_id/by_slug/by_file_slug) über ALLE Notizen im Vault.
|
||||||
|
- Leitet Kanten aus Volltext (references/backlink) und aus Chunk-Texten (references_at) ab.
|
||||||
|
|
||||||
|
Aufruf / Verwendung
|
||||||
|
-------------------
|
||||||
|
from app.core.edges import build_note_index_for_vault, deriv_edges_for_note
|
||||||
|
|
||||||
|
# Einmal pro Import:
|
||||||
|
index = build_note_index_for_vault(vault_root, exclude_dirs=[...])
|
||||||
|
|
||||||
|
# Pro Note:
|
||||||
|
edges = deriv_edges_for_note(
|
||||||
|
note_stub={"note_id": fm["id"], "title": fm["title"], "path": rel_path, "fulltext": parsed.body},
|
||||||
|
chunks_for_links=[{"chunk_id": chunk_pl["chunk_id"], "text": chunks[i].text} ...],
|
||||||
|
note_index=index
|
||||||
|
)
|
||||||
|
|
||||||
|
Parameter
|
||||||
|
---------
|
||||||
|
- vault_root: Pfad zum Obsidian Vault
|
||||||
|
- exclude_dirs: Liste relativer Teilpfade, die ignoriert werden
|
||||||
|
- note_stub: dict mit mind. {"note_id","title","path","fulltext"}
|
||||||
|
- chunks_for_links: Liste dicts mit {"chunk_id","text"} (Text MUSS enthalten sein)
|
||||||
|
- note_index: Tuple (by_id, by_slug, by_file_slug)
|
||||||
|
|
||||||
|
Änderungen
|
||||||
|
----------
|
||||||
|
- Neu: Erst-Release. Konsolidiert die Edge-Ableitung an einer Stelle.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import List, Dict
|
import os, glob
|
||||||
|
from typing import List, Tuple, Dict
|
||||||
|
|
||||||
def deriv_edges_for_note(note_meta: Dict, chunk_payloads: List[Dict]) -> List[Dict]:
|
from app.core.parser import read_markdown
|
||||||
edges: List[Dict] = []
|
from app.core.derive_edges import build_note_index, derive_wikilink_edges
|
||||||
|
|
||||||
# Chunk → Note (belongs_to) + prev/next
|
|
||||||
for idx, ch in enumerate(chunk_payloads):
|
def _rel_from_abs(abs_path: str, vault_root: str) -> str:
|
||||||
edges.append({
|
rel = os.path.relpath(abs_path, vault_root).replace("\\", "/")
|
||||||
"src_id": ch["id"], "dst_id": note_meta["id"],
|
return rel
|
||||||
"edge_type": "belongs_to", "scope": "chunk"
|
|
||||||
|
|
||||||
|
def build_note_index_for_vault(vault_root: str, exclude_dirs: List[str] | None = None):
|
||||||
|
"""
|
||||||
|
Liest alle Markdown-Dateien im Vault ein und baut einen Note-Index, der
|
||||||
|
Titel/Dateisnamen/IDs robust auflöst.
|
||||||
|
|
||||||
|
Rückgabe: (by_id, by_slug, by_file_slug)
|
||||||
|
"""
|
||||||
|
exclude_dirs = exclude_dirs or ["/.obsidian/", "/_backup_frontmatter/", "/_imported/"]
|
||||||
|
files = [p for p in glob.glob(os.path.join(vault_root, "**", "*.md"), recursive=True)]
|
||||||
|
notes_payloads: List[dict] = []
|
||||||
|
for abs_path in files:
|
||||||
|
pn = abs_path.replace("\\", "/")
|
||||||
|
if any(ex in pn for ex in exclude_dirs):
|
||||||
|
continue
|
||||||
|
parsed = read_markdown(abs_path)
|
||||||
|
fm = parsed.frontmatter or {}
|
||||||
|
note_id = fm.get("id") or fm.get("note_id")
|
||||||
|
if not note_id:
|
||||||
|
continue
|
||||||
|
title = fm.get("title") or os.path.basename(abs_path).rsplit(".", 1)[0]
|
||||||
|
notes_payloads.append({
|
||||||
|
"note_id": note_id,
|
||||||
|
"title": title,
|
||||||
|
"path": _rel_from_abs(abs_path, vault_root),
|
||||||
})
|
})
|
||||||
prev_id = ch.get("neighbors",{}).get("prev")
|
return build_note_index(notes_payloads)
|
||||||
next_id = ch.get("neighbors",{}).get("next")
|
|
||||||
if prev_id:
|
|
||||||
edges.append({"src_id": ch["id"], "dst_id": prev_id, "edge_type": "next", "scope": "chunk"})
|
|
||||||
edges.append({"src_id": prev_id, "dst_id": ch["id"], "edge_type": "prev", "scope": "chunk"})
|
|
||||||
if next_id:
|
|
||||||
edges.append({"src_id": ch["id"], "dst_id": next_id, "edge_type": "next", "scope": "chunk"})
|
|
||||||
edges.append({"src_id": next_id, "dst_id": ch["id"], "edge_type": "prev", "scope": "chunk"})
|
|
||||||
|
|
||||||
# references aus wikilinks (Chunk-Scope)
|
|
||||||
for ref in ch.get("references", []):
|
|
||||||
tid = ref.get("target_id")
|
|
||||||
if not tid: continue
|
|
||||||
edges.append({"src_id": ch["id"], "dst_id": tid, "edge_type": "references", "scope": "chunk"})
|
|
||||||
edges.append({"src_id": tid, "dst_id": ch["id"], "edge_type": "backlink", "scope": "chunk"})
|
|
||||||
|
|
||||||
# depends_on / assigned_to (Note-Scope, aus Frontmatter)
|
def deriv_edges_for_note(
|
||||||
for dep in note_meta.get("depends_on", []) or []:
|
note_stub: dict,
|
||||||
edges.append({"src_id": note_meta["id"], "dst_id": dep, "edge_type": "depends_on", "scope": "note"})
|
chunks_for_links: List[dict],
|
||||||
for ass in note_meta.get("assigned_to", []) or []:
|
note_index: Tuple[Dict[str,dict], Dict[str,dict], Dict[str,dict]],
|
||||||
edges.append({"src_id": note_meta["id"], "dst_id": ass, "edge_type": "assigned_to", "scope": "note"})
|
) -> List[dict]:
|
||||||
|
"""
|
||||||
# Note-Level references (optional: falls du im Note-Payload `references` sammelst)
|
Ableitung der Edges für EINE Notiz aus Volltext + Chunk-Texten.
|
||||||
for tid in note_meta.get("references", []) or []:
|
Erwartet:
|
||||||
edges.append({"src_id": note_meta["id"], "dst_id": tid, "edge_type": "references", "scope": "note"})
|
- note_stub enthält "note_id" und "fulltext"
|
||||||
edges.append({"src_id": tid, "dst_id": note_meta["id"], "edge_type": "backlink", "scope": "note"})
|
- chunks_for_links: [{"chunk_id": "...", "text": "..."}]
|
||||||
|
"""
|
||||||
# Dedupe
|
if not note_stub.get("note_id"):
|
||||||
uniq = {}
|
return []
|
||||||
for e in edges:
|
# Die eigentliche Ableitung (inkl. references_at) liegt in derive_wikilink_edges:
|
||||||
k = (e["src_id"], e["edge_type"], e["dst_id"], e.get("scope","note"))
|
edges = derive_wikilink_edges(note_stub, chunks_for_links, note_index)
|
||||||
uniq[k] = e
|
return edges
|
||||||
return list(uniq.values())
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user