app/core/note_payload.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
4637091125
commit
9ef2e8d397
|
|
@ -1,56 +1,48 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
# Modul: app/core/note_payload.py
|
||||||
Modul: app/core/note_payload.py
|
# Version: 1.6.1
|
||||||
Version: 1.5.1
|
# Datum: 2025-09-09
|
||||||
Datum: 2025-09-09
|
#
|
||||||
|
# Kurzbeschreibung
|
||||||
|
# ----------------
|
||||||
|
# Erzeugt den Qdrant-Payload für Notes inkl. robuster Hash-Bildung zur
|
||||||
|
# Änderungserkennung. Der vollständige Body wird unter "fulltext" persistiert;
|
||||||
|
# der Pfad ist relativ (für verlustfreien Export).
|
||||||
|
#
|
||||||
|
# Wichtige Punkte
|
||||||
|
# ---------------
|
||||||
|
# - Nur Inhalte fließen in den Hash ein (keine FS-Zeitstempel).
|
||||||
|
# - Vergleichsarten:
|
||||||
|
# Body -> nur Body
|
||||||
|
# Frontmatter -> nur Frontmatter
|
||||||
|
# Full -> Body + Frontmatter
|
||||||
|
# Steuerbar per CLI/ENV:
|
||||||
|
# --hash-mode body|frontmatter|full
|
||||||
|
# MINDNET_HASH_MODE / MINDNET_HASH_COMPARE (Body|Frontmatter|Full)
|
||||||
|
# - Hash-Quelle:
|
||||||
|
# parsed (Default) -> Parser-Body
|
||||||
|
# raw -> Rohdatei-Body (Frontmatter via Regex entfernt)
|
||||||
|
# Steuerbar per:
|
||||||
|
# --hash-source parsed|raw
|
||||||
|
# MINDNET_HASH_SOURCE
|
||||||
|
# - Normalisierung:
|
||||||
|
# canonical (Default) -> \r\n->\n, trailing spaces pro Zeile entfernt
|
||||||
|
# none -> keine Normalisierung (jede Kleinigkeit zählt)
|
||||||
|
# Steuerbar per:
|
||||||
|
# --hash-normalize canonical|none
|
||||||
|
# MINDNET_HASH_NORMALIZE
|
||||||
|
#
|
||||||
|
# Neu in v1.6.x
|
||||||
|
# -------------
|
||||||
|
# - "hash_signature" im Payload, z. B. "body:raw:none:<hex>".
|
||||||
|
# - Optional (ENV MINDNET_HASH_RECORD_ALL=true): zusätzliches Hash-Set für Debug:
|
||||||
|
# payload["hashes"] = {
|
||||||
|
# "body_parsed": "...", "body_raw": "...",
|
||||||
|
# "frontmatter": "...",
|
||||||
|
# "full_parsed": "...", "full_raw": "..."
|
||||||
|
# }
|
||||||
|
|
||||||
Kurzbeschreibung
|
|
||||||
----------------
|
|
||||||
Erzeugt den Qdrant-Payload für **Notes**. Neben stabiler Hash-Bildung zur#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Modul: app/core/note_payload.py
|
|
||||||
Version: 1.6.0
|
|
||||||
Datum: 2025-09-09
|
|
||||||
|
|
||||||
Kurzbeschreibung
|
|
||||||
----------------
|
|
||||||
Erzeugt den Qdrant-Payload für **Notes** inkl. robuster Hash-Bildung
|
|
||||||
zur Änderungserkennung. Der vollständige Body wird unter ``fulltext`` persistiert;
|
|
||||||
der Pfad ist relativ (für verlustfreien Export).
|
|
||||||
|
|
||||||
Wichtig
|
|
||||||
-------
|
|
||||||
- **Nur Inhalte** fließen in den Hash ein (keine FS-Zeitstempel).
|
|
||||||
- Vergleichsarten:
|
|
||||||
* Body → nur Body
|
|
||||||
* Frontmatter → nur Frontmatter
|
|
||||||
* Full → Body + Frontmatter
|
|
||||||
per CLI/ENV: ``--hash-mode`` oder ENV ``MINDNET_HASH_MODE``/``MINDNET_HASH_COMPARE``.
|
|
||||||
- Hash-Quelle:
|
|
||||||
* parsed (Default) → nutzt den vom Parser gelieferten Body
|
|
||||||
* raw → liest Rohdatei und entfernt Frontmatter via Regex
|
|
||||||
per CLI/ENV: ``--hash-source`` oder ENV ``MINDNET_HASH_SOURCE``.
|
|
||||||
- Normalisierung:
|
|
||||||
* canonical (Default) → \r\n→\n, Zeilenend-Whitespace entfernt
|
|
||||||
* none → keine Normalisierung (erkennt jede Kleinigkeit)
|
|
||||||
per CLI/ENV: ``--hash-normalize`` oder ENV ``MINDNET_HASH_NORMALIZE``.
|
|
||||||
|
|
||||||
Neu in v1.6.0
|
|
||||||
-------------
|
|
||||||
- ``hash_signature`` im Payload, z. B. "body:raw:none:<hex>".
|
|
||||||
- Optionales Mitspeichern eines "Hash-Sets", wenn ENV ``MINDNET_HASH_RECORD_ALL=true``:
|
|
||||||
payload["hashes"] = {
|
|
||||||
"body_parsed": "...", "body_raw": "...",
|
|
||||||
"frontmatter": "...",
|
|
||||||
"full_parsed": "...", "full_raw": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
CLI (Sichtprüfung)
|
|
||||||
------------------
|
|
||||||
python3 -m app.core.note_payload --from-file ./vault/demo.md --vault-root ./vault --print --hash-mode full --hash-source raw --hash-normalize none
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
@ -70,14 +62,13 @@ except Exception: # pragma: no cover
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _canon_frontmatter(fm: Dict[str, Any]) -> str:
|
def _canon_frontmatter(fm: Dict[str, Any]) -> str:
|
||||||
"""Kanonische, stabile JSON-Serialisierung der Frontmatter für Hashbildung."""
|
"""Kanonische JSON-Serialisierung der Frontmatter für Hashbildung."""
|
||||||
return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
|
return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
|
||||||
|
|
||||||
def _normalize_body(body: str, mode: str) -> str:
|
def _normalize_body(body: str, mode: str) -> str:
|
||||||
"""Normalisiert den Body für reproduzierbare Hashes (oder nicht)."""
|
"""Normalisiert den Body für reproduzierbare Hashes (oder nicht)."""
|
||||||
if mode == "none":
|
if mode == "none":
|
||||||
return body if body is not None else ""
|
return body if body is not None else ""
|
||||||
# canonical: \r\n→\n, trailing spaces am Zeilenende entfernen
|
|
||||||
text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
|
text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
|
||||||
text = "\n".join(line.rstrip() for line in text.split("\n"))
|
text = "\n".join(line.rstrip() for line in text.split("\n"))
|
||||||
return text
|
return text
|
||||||
|
|
@ -86,13 +77,15 @@ def _resolve_hash_mode(explicit: Optional[str]) -> str:
|
||||||
"""
|
"""
|
||||||
Normalisiert den Hash-Modus auf:
|
Normalisiert den Hash-Modus auf:
|
||||||
'body' | 'frontmatter' | 'body+frontmatter'
|
'body' | 'frontmatter' | 'body+frontmatter'
|
||||||
Akzeptiert auch 'full' (Alias).
|
Akzeptiert 'full' als Alias.
|
||||||
Beachtet zusätzlich ENV: MINDNET_HASH_MODE oder MINDNET_HASH_COMPARE.
|
Berücksichtigt ENV: MINDNET_HASH_MODE oder MINDNET_HASH_COMPARE.
|
||||||
"""
|
"""
|
||||||
if explicit:
|
if explicit:
|
||||||
val = explicit.strip().lower()
|
val = explicit.strip().lower()
|
||||||
else:
|
else:
|
||||||
val = (os.environ.get("MINDNET_HASH_MODE") or os.environ.get("MINDNET_HASH_COMPARE") or "body").strip().lower()
|
val = (os.environ.get("MINDNET_HASH_MODE")
|
||||||
|
or os.environ.get("MINDNET_HASH_COMPARE")
|
||||||
|
or "body").strip().lower()
|
||||||
if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
|
if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
|
||||||
return "body+frontmatter"
|
return "body+frontmatter"
|
||||||
if val in ("frontmatter", "fm"):
|
if val in ("frontmatter", "fm"):
|
||||||
|
|
@ -137,10 +130,16 @@ def _sha256(s: str) -> str:
|
||||||
h.update(s.encode("utf-8"))
|
h.update(s.encode("utf-8"))
|
||||||
return h.hexdigest()
|
return h.hexdigest()
|
||||||
|
|
||||||
def compute_hash(*, body: Optional[str], frontmatter: Optional[Dict[str, Any]],
|
def compute_hash(
|
||||||
mode: Optional[str] = None, normalize: Optional[str] = None) -> str:
|
*,
|
||||||
|
body: Optional[str],
|
||||||
|
frontmatter: Optional[Dict[str, Any]],
|
||||||
|
mode: Optional[str] = None,
|
||||||
|
normalize: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Berechnet einen Hex-Hash gemäß ``mode`` und ``normalize``.
|
Berechnet einen Hex-Hash gemäß 'mode' und 'normalize'.
|
||||||
|
|
||||||
mode: "body" | "frontmatter" | "body+frontmatter"
|
mode: "body" | "frontmatter" | "body+frontmatter"
|
||||||
normalize: "canonical" | "none"
|
normalize: "canonical" | "none"
|
||||||
"""
|
"""
|
||||||
|
|
@ -189,7 +188,7 @@ def make_note_payload(
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
parsed : Any
|
parsed : Any
|
||||||
Objekt mit Attributen/Keys ``frontmatter``, ``body``, ``path``.
|
Objekt mit Attributen/Keys 'frontmatter', 'body', 'path'.
|
||||||
vault_root : Optional[str]
|
vault_root : Optional[str]
|
||||||
Vault-Wurzel (für Pfad-Relativierung).
|
Vault-Wurzel (für Pfad-Relativierung).
|
||||||
hash_mode : Optional[str]
|
hash_mode : Optional[str]
|
||||||
|
|
@ -199,14 +198,14 @@ def make_note_payload(
|
||||||
hash_source : Optional[str]
|
hash_source : Optional[str]
|
||||||
"parsed" (Default) oder "raw". Wenn "raw", wird der Body aus der Rohdatei gelesen.
|
"parsed" (Default) oder "raw". Wenn "raw", wird der Body aus der Rohdatei gelesen.
|
||||||
file_path : Optional[str]
|
file_path : Optional[str]
|
||||||
Pfad zur Markdown-Datei, erforderlich für ``hash_source=raw``.
|
Pfad zur Markdown-Datei, erforderlich für 'hash_source=raw'.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
Dict[str, Any]
|
Dict[str, Any]
|
||||||
Qdrant-Payload für die Notes-Collection.
|
Qdrant-Payload für die Notes-Collection.
|
||||||
"""
|
"""
|
||||||
# "Duck typing": dict oder Objekt akzeptieren
|
# dict oder Objekt akzeptieren
|
||||||
if isinstance(parsed, dict):
|
if isinstance(parsed, dict):
|
||||||
fm = parsed.get("frontmatter") or {}
|
fm = parsed.get("frontmatter") or {}
|
||||||
body_parsed = parsed.get("body") or ""
|
body_parsed = parsed.get("body") or ""
|
||||||
|
|
@ -246,351 +245,4 @@ def make_note_payload(
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Note-Level-Wikilinks (Fallback, wenn Chunks nicht geliefert werden)
|
# Note-Level-Wikilinks
|
||||||
note_level_refs = list(dict.fromkeys(extract_wikilinks(body_parsed))) if body_parsed else []
|
|
||||||
|
|
||||||
payload: Dict[str, Any] = {
|
|
||||||
"note_id": fm.get("id") or fm.get("note_id"),
|
|
||||||
"title": fm.get("title"),
|
|
||||||
"type": fm.get("type"),
|
|
||||||
"status": fm.get("status"),
|
|
||||||
"created": fm.get("created"),
|
|
||||||
"updated": fm.get("updated"),
|
|
||||||
"path": rel_path or fm.get("path"),
|
|
||||||
"tags": fm.get("tags"),
|
|
||||||
# Primärer Hash + Signatur (für Vergleich)
|
|
||||||
"hash_fulltext": primary_hash,
|
|
||||||
"hash_signature": hash_signature,
|
|
||||||
# Volltext persistieren (verlustfreie Rekonstruktion) – parsed Body
|
|
||||||
"fulltext": body_parsed,
|
|
||||||
# Fallback-Refs auf Note-Ebene
|
|
||||||
"references": note_level_refs,
|
|
||||||
}
|
|
||||||
|
|
||||||
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
|
||||||
if k in fm:
|
|
||||||
payload[k] = fm[k]
|
|
||||||
|
|
||||||
# Optional: gesamtes Hash-Set persistieren (Debug/Monitoring)
|
|
||||||
if os.environ.get("MINDNET_HASH_RECORD_ALL", "false").strip().lower() == "true":
|
|
||||||
payload["hashes"] = compute_hash_set(body_parsed=body_parsed, body_raw=raw_body, fm=fm, normalize=normalize)
|
|
||||||
|
|
||||||
# Optional: Roh-Body-Hash separat (historische Kompatibilität)
|
|
||||||
if os.environ.get("MINDNET_HASH_STORE_RAW", "false").strip().lower() == "true" and src == "raw":
|
|
||||||
try:
|
|
||||||
payload["hash_raw_body"] = compute_hash(body=raw_body, frontmatter=fm, mode="body", normalize="none")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return payload
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# CLI – Sichtprüfung
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _cli() -> None:
|
|
||||||
ap = argparse.ArgumentParser(description="Note-Payload aus Markdown erzeugen und anzeigen")
|
|
||||||
ap.add_argument("--from-file", dest="src", required=True, help="Pfad zur Markdown-Datei")
|
|
||||||
ap.add_argument("--vault-root", dest="vault_root", default=None, help="Vault-Wurzel zur Pfad-Relativierung")
|
|
||||||
ap.add_argument("--print", dest="do_print", action="store_true", help="Payload auf stdout ausgeben")
|
|
||||||
ap.add_argument("--hash-mode", choices=["body", "frontmatter", "body+frontmatter", "full"], default=None)
|
|
||||||
ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None)
|
|
||||||
ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None)
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
parsed = read_markdown(args.src)
|
|
||||||
payload = make_note_payload(parsed, vault_root=args.vault_root,
|
|
||||||
hash_mode=args.hash_mode, hash_normalize=args.hash_normalize,
|
|
||||||
hash_source=args.hash_source, file_path=args.src)
|
|
||||||
|
|
||||||
if args.do_print:
|
|
||||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
_cli()
|
|
||||||
|
|
||||||
Idempotenz wird der vollständige Body unter ``fulltext`` persistiert und
|
|
||||||
der Dateipfad relativ zum Vault gespeichert. Das erlaubt eine verlustfreie
|
|
||||||
Rekonstruktion im Export (erst ``fulltext``, sonst Chunks).
|
|
||||||
|
|
||||||
Wichtig
|
|
||||||
-------
|
|
||||||
- **Nur Inhalte** gehen in den Hash: Weder Dateisystem-Zeitstempel (mtime/ctime)
|
|
||||||
noch sonstige FS-Metadaten werden berücksichtigt.
|
|
||||||
- Hash-Quelle: Parser-Body (Default) oder Rohdatei-Body (Frontmatter via Regex entfernt).
|
|
||||||
|
|
||||||
Änderungen in v1.5.1
|
|
||||||
--------------------
|
|
||||||
- Neue Env-Var **MINDNET_HASH_COMPARE** als Synonym zu MINDNET_HASH_MODE.
|
|
||||||
- Akzeptiert komfortable Werte (case-insensitive): **Body**, **Frontmatter**, **Full**.
|
|
||||||
*Full* wird intern zu ``body+frontmatter`` normalisiert.
|
|
||||||
- CLI akzeptiert weiterhin ``--hash-mode``; Importer reicht diese Einstellung durch.
|
|
||||||
|
|
||||||
Hash-Steuerung
|
|
||||||
--------------
|
|
||||||
- Modus (welche Teile in den Hash einfließen):
|
|
||||||
* ``body`` (Default)
|
|
||||||
* ``frontmatter``
|
|
||||||
* ``body+frontmatter`` (Synonym CLI/ENV: ``full``)
|
|
||||||
Quelle:
|
|
||||||
* Funktionsparameter ``hash_mode`` (höchste Priorität)
|
|
||||||
* Env ``MINDNET_HASH_MODE`` oder **``MINDNET_HASH_COMPARE``** (Fallback)
|
|
||||||
Normalisierung:
|
|
||||||
* ``canonical`` (Default) | ``none`` — via Param ``hash_normalize`` oder Env ``MINDNET_HASH_NORMALIZE``
|
|
||||||
Quelle des Body-Textes:
|
|
||||||
* ``parsed`` (Default) | ``raw`` — via Param ``hash_source`` oder Env ``MINDNET_HASH_SOURCE``
|
|
||||||
|
|
||||||
CLI (Sichtprüfung)
|
|
||||||
------------------
|
|
||||||
python3 -m app.core.note_payload --from-file ./vault/demo.md --vault-root ./vault --print --hash-source raw
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
import argparse
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
from typing import Any, Dict, Optional, Tuple
|
|
||||||
|
|
||||||
try:
|
|
||||||
from app.core.parser import read_markdown, extract_wikilinks, FRONTMATTER_RE
|
|
||||||
except Exception: # pragma: no cover
|
|
||||||
from .parser import read_markdown, extract_wikilinks, FRONTMATTER_RE # type: ignore
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Helpers
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _canon_frontmatter(fm: Dict[str, Any]) -> str:
|
|
||||||
"""Kanonische, stabile JSON-Serialisierung der Frontmatter für Hashbildung."""
|
|
||||||
return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
|
|
||||||
|
|
||||||
def _normalize_body(body: str, mode: str) -> str:
|
|
||||||
"""Normalisiert den Body für reproduzierbare Hashes (oder nicht)."""
|
|
||||||
if mode == "none":
|
|
||||||
return body if body is not None else ""
|
|
||||||
# canonical: \r\n→\n, trailing spaces entfernen
|
|
||||||
text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
|
|
||||||
text = "\n".join(line.rstrip() for line in text.split("\n"))
|
|
||||||
return text
|
|
||||||
|
|
||||||
def _resolve_hash_mode(explicit: Optional[str]) -> str:
|
|
||||||
"""
|
|
||||||
Normalisiert den Hash-Modus auf einen der Werte:
|
|
||||||
'body' | 'frontmatter' | 'body+frontmatter'
|
|
||||||
Akzeptiert auch 'full' als Alias für 'body+frontmatter'.
|
|
||||||
Beachtet zusätzlich die Env-Variablen MINDNET_HASH_MODE und MINDNET_HASH_COMPARE.
|
|
||||||
"""
|
|
||||||
if explicit:
|
|
||||||
val = explicit.strip().lower()
|
|
||||||
else:
|
|
||||||
val = (os.environ.get("MINDNET_HASH_MODE") or os.environ.get("MINDNET_HASH_COMPARE") or "body").strip().lower()
|
|
||||||
if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
|
|
||||||
return "body+frontmatter"
|
|
||||||
if val in ("frontmatter", "fm"):
|
|
||||||
return "frontmatter"
|
|
||||||
# default & fallbacks
|
|
||||||
return "body"
|
|
||||||
|
|
||||||
def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, Any]]:
|
|
||||||
"""Liest die Rohdatei und extrahiert Body & Frontmatter ohne Parser-Logik.
|
|
||||||
|
|
||||||
Rückgabe:
|
|
||||||
(body_text, frontmatter_dict)
|
|
||||||
"""
|
|
||||||
if not file_path or not os.path.exists(file_path):
|
|
||||||
return "", {}
|
|
||||||
try:
|
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
|
||||||
raw = f.read()
|
|
||||||
except Exception:
|
|
||||||
return "", {}
|
|
||||||
# Frontmatter per Regex entfernen
|
|
||||||
m = FRONTMATTER_RE.match(raw)
|
|
||||||
fm = {}
|
|
||||||
if m:
|
|
||||||
fm_txt = m.group(1)
|
|
||||||
try:
|
|
||||||
import yaml # lazy
|
|
||||||
fm = yaml.safe_load(fm_txt) or {}
|
|
||||||
except Exception:
|
|
||||||
fm = {}
|
|
||||||
body = raw[m.end():]
|
|
||||||
else:
|
|
||||||
body = raw
|
|
||||||
return body, fm
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Hashing
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def compute_hash(
|
|
||||||
*,
|
|
||||||
body: Optional[str],
|
|
||||||
frontmatter: Optional[Dict[str, Any]],
|
|
||||||
mode: Optional[str] = None,
|
|
||||||
normalize: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Berechnet einen Hex-Hash gemäß ``mode`` und ``normalize``.
|
|
||||||
|
|
||||||
mode:
|
|
||||||
- "body" (Default)
|
|
||||||
- "frontmatter"
|
|
||||||
- "body+frontmatter"
|
|
||||||
normalize:
|
|
||||||
- "canonical" (Default)
|
|
||||||
- "none"
|
|
||||||
"""
|
|
||||||
mode = _resolve_hash_mode(mode)
|
|
||||||
normalize = (normalize or os.environ.get("MINDNET_HASH_NORMALIZE", "canonical")).strip().lower()
|
|
||||||
|
|
||||||
body = _normalize_body(body or "", normalize)
|
|
||||||
fm_s = _canon_frontmatter(frontmatter or {})
|
|
||||||
|
|
||||||
h = hashlib.sha256()
|
|
||||||
if mode == "frontmatter":
|
|
||||||
h.update(fm_s.encode("utf-8"))
|
|
||||||
elif mode == "body+frontmatter":
|
|
||||||
h.update(body.encode("utf-8"))
|
|
||||||
h.update(b"\n--FM--\n")
|
|
||||||
h.update(fm_s.encode("utf-8"))
|
|
||||||
else: # body
|
|
||||||
h.update(body.encode("utf-8"))
|
|
||||||
return h.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Kernfunktion
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def make_note_payload(
|
|
||||||
parsed: Any,
|
|
||||||
vault_root: Optional[str] = None,
|
|
||||||
*,
|
|
||||||
hash_mode: Optional[str] = None,
|
|
||||||
hash_normalize: Optional[str] = None,
|
|
||||||
hash_source: Optional[str] = None,
|
|
||||||
file_path: Optional[str] = None,
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Erzeugt den Payload für eine geparste Note.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
parsed : Any
|
|
||||||
Objekt mit Attributen/Keys ``frontmatter``, ``body``, ``path``.
|
|
||||||
vault_root : Optional[str]
|
|
||||||
Vault-Wurzel (für Pfad-Relativierung). Wenn ``None``, wird ``path`` unverändert übernommen.
|
|
||||||
hash_mode : Optional[str]
|
|
||||||
"body" | "frontmatter" | "body+frontmatter" | "full" (Alias; überschreibt ENV).
|
|
||||||
hash_normalize : Optional[str]
|
|
||||||
"canonical" | "none" (überschreibt ENV).
|
|
||||||
hash_source : Optional[str]
|
|
||||||
"parsed" (Default) oder "raw". Wenn "raw", wird der Body aus der Rohdatei gelesen.
|
|
||||||
file_path : Optional[str]
|
|
||||||
Pfad zur Markdown-Datei, erforderlich für ``hash_source=raw``.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Dict[str, Any]
|
|
||||||
Qdrant-Payload für die Notes-Collection.
|
|
||||||
"""
|
|
||||||
# "Duck typing": dict oder Objekt akzeptieren
|
|
||||||
if isinstance(parsed, dict):
|
|
||||||
fm = parsed.get("frontmatter") or {}
|
|
||||||
body = parsed.get("body") or ""
|
|
||||||
path = parsed.get("path") or ""
|
|
||||||
else:
|
|
||||||
fm = getattr(parsed, "frontmatter", {}) or {}
|
|
||||||
body = getattr(parsed, "body", "") or ""
|
|
||||||
path = getattr(parsed, "path", "") or ""
|
|
||||||
|
|
||||||
# Hash-Quelle bestimmen
|
|
||||||
src = (hash_source or os.environ.get("MINDNET_HASH_SOURCE", "parsed")).strip().lower()
|
|
||||||
raw_body, raw_fm = ("", {})
|
|
||||||
if src == "raw":
|
|
||||||
raw_body, raw_fm = _read_raw_body_from_file(file_path or path)
|
|
||||||
# Roh-FM ergänzen (nicht überschreiben)
|
|
||||||
if isinstance(raw_fm, dict) and raw_fm:
|
|
||||||
merged_fm = dict(fm)
|
|
||||||
for k, v in raw_fm.items():
|
|
||||||
merged_fm.setdefault(k, v)
|
|
||||||
fm = merged_fm
|
|
||||||
|
|
||||||
# Hash gemäß Modus/Quelle bilden
|
|
||||||
body_for_hash = raw_body if src == "raw" else body
|
|
||||||
hash_fulltext = compute_hash(body=body_for_hash, frontmatter=fm, mode=hash_mode, normalize=hash_normalize)
|
|
||||||
|
|
||||||
# Pfad relativieren
|
|
||||||
rel_path = path
|
|
||||||
try:
|
|
||||||
if vault_root:
|
|
||||||
rel = os.path.relpath(path, vault_root)
|
|
||||||
rel = rel.replace("\\", "/").lstrip("/") # normalisieren
|
|
||||||
rel_path = rel
|
|
||||||
except Exception:
|
|
||||||
pass # Pfad ist nicht kritisch für Hash/ID
|
|
||||||
|
|
||||||
# Note-Level-Wikilinks (Fallback, wenn Chunks nicht geliefert werden)
|
|
||||||
note_level_refs = list(dict.fromkeys(extract_wikilinks(body))) if body else []
|
|
||||||
|
|
||||||
payload: Dict[str, Any] = {
|
|
||||||
"note_id": fm.get("id") or fm.get("note_id"),
|
|
||||||
"title": fm.get("title"),
|
|
||||||
"type": fm.get("type"),
|
|
||||||
"status": fm.get("status"),
|
|
||||||
"created": fm.get("created"),
|
|
||||||
"updated": fm.get("updated"),
|
|
||||||
"path": rel_path or fm.get("path"),
|
|
||||||
"tags": fm.get("tags"),
|
|
||||||
"hash_fulltext": hash_fulltext,
|
|
||||||
# Volltext persistieren (verlustfreie Rekonstruktion) – aus PARSED-Body
|
|
||||||
"fulltext": body,
|
|
||||||
# Fallback-Refs auf Note-Ebene
|
|
||||||
"references": note_level_refs,
|
|
||||||
}
|
|
||||||
|
|
||||||
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
|
||||||
if k in fm:
|
|
||||||
payload[k] = fm[k]
|
|
||||||
|
|
||||||
# Optional: Roh-Body-Hash zusätzlich persistieren
|
|
||||||
if os.environ.get("MINDNET_HASH_STORE_RAW", "false").strip().lower() == "true" and src == "raw":
|
|
||||||
try:
|
|
||||||
payload["hash_raw_body"] = compute_hash(body=raw_body, frontmatter=fm, mode="body", normalize="none")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return payload
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# CLI – Sichtprüfung
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _cli() -> None:
|
|
||||||
ap = argparse.ArgumentParser(description="Note-Payload aus Markdown erzeugen und anzeigen")
|
|
||||||
ap.add_argument("--from-file", dest="src", required=True, help="Pfad zur Markdown-Datei")
|
|
||||||
ap.add_argument("--vault-root", dest="vault_root", default=None, help="Vault-Wurzel zur Pfad-Relativierung")
|
|
||||||
ap.add_argument("--print", dest="do_print", action="store_true", help="Payload auf stdout ausgeben")
|
|
||||||
ap.add_argument("--hash-mode", choices=["body", "frontmatter", "body+frontmatter", "full"], default=None)
|
|
||||||
ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None)
|
|
||||||
ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None)
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
parsed = read_markdown(args.src)
|
|
||||||
payload = make_note_payload(parsed, vault_root=args.vault_root,
|
|
||||||
hash_mode=args.hash_mode, hash_normalize=args.hash_normalize,
|
|
||||||
hash_source=args.hash_source, file_path=args.src)
|
|
||||||
|
|
||||||
if args.do_print:
|
|
||||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
|
||||||
_cli()
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user