app/core/note_payload.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
81c1400ef4
commit
4872374a6e
|
|
@ -1,36 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Modul: app/core/note_payload.py
|
# Modul: app/core/note_payload.py
|
||||||
# Version: 1.6.3
|
# Version: 1.7.0
|
||||||
# Datum: 2025-09-09
|
# Datum: 2025-09-09
|
||||||
#
|
|
||||||
# Zweck
|
|
||||||
# -----
|
|
||||||
# Erzeugt den Qdrant-Payload für Notes inkl. robuster Hash-Bildung zur
|
|
||||||
# Änderungserkennung. Der vollständige (parsed) Body wird unter "fulltext"
|
|
||||||
# persistiert; der Pfad ist relativ (für verlustfreien Export).
|
|
||||||
#
|
|
||||||
# Steuerung (CLI/ENV, vom Importer durchgereicht)
|
|
||||||
# -----------------------------------------------
|
|
||||||
# - Vergleichsmodus:
|
|
||||||
# --hash-mode body|frontmatter|full
|
|
||||||
# ENV: MINDNET_HASH_MODE oder MINDNET_HASH_COMPARE (Body|Frontmatter|Full)
|
|
||||||
# "full" ist Alias für "body+frontmatter".
|
|
||||||
# - Hash-Quelle:
|
|
||||||
# --hash-source parsed|raw (ENV: MINDNET_HASH_SOURCE)
|
|
||||||
# - Normalisierung:
|
|
||||||
# --hash-normalize canonical|none (ENV: MINDNET_HASH_NORMALIZE)
|
|
||||||
#
|
|
||||||
# Payload-Felder (Auszug)
|
|
||||||
# -----------------------
|
|
||||||
# note_id, title, type, status, created, updated, path, tags,
|
|
||||||
# fulltext, references (Note-Level-Wikilinks),
|
|
||||||
# hash_fulltext (Primärhash), hash_signature (z. B. "body:raw:none:<hex>")
|
|
||||||
#
|
|
||||||
# Hinweise
|
|
||||||
# --------
|
|
||||||
# - Keine Abhängigkeit von FS-Zeitstempeln; nur Inhalte fließen in den Hash ein.
|
|
||||||
# - Abwärtskompatibel: Feldernamen bleiben stabil; zusätzliche Felder stören nicht.
|
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -45,17 +17,14 @@ try:
|
||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
from .parser import read_markdown, extract_wikilinks, FRONTMATTER_RE # type: ignore
|
from .parser import read_markdown, extract_wikilinks, FRONTMATTER_RE # type: ignore
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _canon_frontmatter(fm: Dict[str, Any]) -> str:
|
def _canon_frontmatter(fm: Dict[str, Any]) -> str:
|
||||||
"""Kanonische JSON-Serialisierung der Frontmatter für Hashbildung."""
|
|
||||||
return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
|
return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
|
||||||
|
|
||||||
def _normalize_body(body: str, mode: str) -> str:
|
def _normalize_body(body: str, mode: str) -> str:
|
||||||
"""Normalisiert den Body für reproduzierbare Hashes (oder nicht)."""
|
|
||||||
if mode == "none":
|
if mode == "none":
|
||||||
return body if body is not None else ""
|
return body if body is not None else ""
|
||||||
text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
|
text = (body or "").replace("\r\n", "\n").replace("\r", "\n")
|
||||||
|
|
@ -63,12 +32,6 @@ def _normalize_body(body: str, mode: str) -> str:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def _resolve_hash_mode(explicit: Optional[str]) -> str:
|
def _resolve_hash_mode(explicit: Optional[str]) -> str:
|
||||||
"""
|
|
||||||
Normalisiert den Hash-Modus auf:
|
|
||||||
'body' | 'frontmatter' | 'body+frontmatter'
|
|
||||||
Akzeptiert 'full' als Alias.
|
|
||||||
Berücksichtigt ENV: MINDNET_HASH_MODE oder MINDNET_HASH_COMPARE.
|
|
||||||
"""
|
|
||||||
if explicit:
|
if explicit:
|
||||||
val = explicit.strip().lower()
|
val = explicit.strip().lower()
|
||||||
else:
|
else:
|
||||||
|
|
@ -76,17 +39,12 @@ def _resolve_hash_mode(explicit: Optional[str]) -> str:
|
||||||
or os.environ.get("MINDNET_HASH_COMPARE")
|
or os.environ.get("MINDNET_HASH_COMPARE")
|
||||||
or "body").strip().lower()
|
or "body").strip().lower()
|
||||||
if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
|
if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
|
||||||
return "body+frontmatter"
|
return "full"
|
||||||
if val in ("frontmatter", "fm"):
|
if val in ("frontmatter", "fm"):
|
||||||
return "frontmatter"
|
return "frontmatter"
|
||||||
return "body"
|
return "body"
|
||||||
|
|
||||||
def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, Any]]:
|
def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, Any]]:
|
||||||
"""Liest die Rohdatei und extrahiert Body & Frontmatter ohne Parser-Logik.
|
|
||||||
|
|
||||||
Rückgabe:
|
|
||||||
(body_text, frontmatter_dict)
|
|
||||||
"""
|
|
||||||
if not file_path or not os.path.exists(file_path):
|
if not file_path or not os.path.exists(file_path):
|
||||||
return "", {}
|
return "", {}
|
||||||
try:
|
try:
|
||||||
|
|
@ -94,7 +52,6 @@ def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, A
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
except Exception:
|
except Exception:
|
||||||
return "", {}
|
return "", {}
|
||||||
# Frontmatter per Regex entfernen
|
|
||||||
m = FRONTMATTER_RE.match(raw)
|
m = FRONTMATTER_RE.match(raw)
|
||||||
fm = {}
|
fm = {}
|
||||||
if m:
|
if m:
|
||||||
|
|
@ -109,55 +66,21 @@ def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, A
|
||||||
body = raw
|
body = raw
|
||||||
return body, fm
|
return body, fm
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Hashing
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _sha256(s: str) -> str:
|
def _sha256(s: str) -> str:
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
h.update(s.encode("utf-8"))
|
h.update(s.encode("utf-8"))
|
||||||
return h.hexdigest()
|
return h.hexdigest()
|
||||||
|
|
||||||
def compute_hash(
|
def _hash_for(mode: str, *, body: str, fm: Dict[str, Any], normalize: str) -> str:
|
||||||
*,
|
|
||||||
body: Optional[str],
|
|
||||||
frontmatter: Optional[Dict[str, Any]],
|
|
||||||
mode: Optional[str] = None,
|
|
||||||
normalize: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Berechnet einen Hex-Hash gemäß 'mode' und 'normalize'.
|
|
||||||
|
|
||||||
mode: "body" | "frontmatter" | "body+frontmatter"
|
|
||||||
normalize: "canonical" | "none"
|
|
||||||
"""
|
|
||||||
mode = _resolve_hash_mode(mode)
|
|
||||||
normalize = (normalize or os.environ.get("MINDNET_HASH_NORMALIZE", "canonical")).strip().lower()
|
|
||||||
body_n = _normalize_body(body or "", normalize)
|
body_n = _normalize_body(body or "", normalize)
|
||||||
fm_s = _canon_frontmatter(frontmatter or {})
|
fm_s = _canon_frontmatter(fm or {})
|
||||||
|
|
||||||
if mode == "frontmatter":
|
if mode == "frontmatter":
|
||||||
return _sha256(fm_s)
|
return _sha256(fm_s)
|
||||||
if mode == "body+frontmatter":
|
if mode == "full":
|
||||||
return _sha256(body_n + "\n--FM--\n" + fm_s)
|
return _sha256(body_n + "\n--FM--\n" + fm_s)
|
||||||
# default: body
|
# default: body
|
||||||
return _sha256(body_n)
|
return _sha256(body_n)
|
||||||
|
|
||||||
def compute_hash_set(*, body_parsed: str, body_raw: str, fm: Dict[str, Any], normalize: str) -> Dict[str, str]:
|
|
||||||
"""Berechnet ein Set an Hashes für Monitoring/Debug."""
|
|
||||||
fm_s = _canon_frontmatter(fm or {})
|
|
||||||
bp = _normalize_body(body_parsed or "", normalize)
|
|
||||||
br = _normalize_body(body_raw or "", normalize)
|
|
||||||
return {
|
|
||||||
"frontmatter": _sha256(fm_s),
|
|
||||||
"body_parsed": _sha256(bp),
|
|
||||||
"body_raw": _sha256(br),
|
|
||||||
"full_parsed": _sha256(bp + "\n--FM--\n" + fm_s),
|
|
||||||
"full_raw": _sha256(br + "\n--FM--\n" + fm_s),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Kernfunktion
|
# Kernfunktion
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -172,27 +95,13 @@ def make_note_payload(
|
||||||
file_path: Optional[str] = None,
|
file_path: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Erzeugt den Payload für eine geparste Note.
|
Liefert den Note-Payload inkl. Mehrfach-Hashes.
|
||||||
|
- Es werden IMMER die drei Hashes für (body|frontmatter|full) unter
|
||||||
Parameters
|
'parsed:canonical' erzeugt (Schlüssel: z. B. 'body:parsed:canonical').
|
||||||
----------
|
- Zusätzlich werden – falls die aktuelle Konfig (source/normalize) davon
|
||||||
parsed : Any
|
abweicht – die drei Hashes unter den entsprechenden Schlüsseln erzeugt,
|
||||||
Objekt mit Attributen/Keys 'frontmatter', 'body', 'path'.
|
z. B. 'frontmatter:raw:none'.
|
||||||
vault_root : Optional[str]
|
- 'hash_fulltext' und 'hash_signature' repräsentieren den *aktuellen* Modus.
|
||||||
Vault-Wurzel (für Pfad-Relativierung).
|
|
||||||
hash_mode : Optional[str]
|
|
||||||
"body" | "frontmatter" | "body+frontmatter" | "full" (Alias; überschreibt ENV).
|
|
||||||
hash_normalize : Optional[str]
|
|
||||||
"canonical" | "none" (überschreibt ENV).
|
|
||||||
hash_source : Optional[str]
|
|
||||||
"parsed" (Default) oder "raw". Wenn "raw", wird der Body aus der Rohdatei gelesen.
|
|
||||||
file_path : Optional[str]
|
|
||||||
Pfad zur Markdown-Datei, erforderlich für 'hash_source=raw'.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Dict[str, Any]
|
|
||||||
Qdrant-Payload für die Notes-Collection.
|
|
||||||
"""
|
"""
|
||||||
# dict oder Objekt akzeptieren
|
# dict oder Objekt akzeptieren
|
||||||
if isinstance(parsed, dict):
|
if isinstance(parsed, dict):
|
||||||
|
|
@ -204,38 +113,62 @@ def make_note_payload(
|
||||||
body_parsed = getattr(parsed, "body", "") or ""
|
body_parsed = getattr(parsed, "body", "") or ""
|
||||||
path = getattr(parsed, "path", "") or ""
|
path = getattr(parsed, "path", "") or ""
|
||||||
|
|
||||||
# Hash-Quelle bestimmen
|
# Zielpfad relativieren
|
||||||
src = (hash_source or os.environ.get("MINDNET_HASH_SOURCE", "parsed")).strip().lower()
|
rel_path = path
|
||||||
|
try:
|
||||||
|
if vault_root:
|
||||||
|
rel = os.path.relpath(path, vault_root)
|
||||||
|
rel = rel.replace("\\", "/").lstrip("/")
|
||||||
|
rel_path = rel
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Konfiguration auflösen
|
||||||
|
mode_resolved = _resolve_hash_mode(hash_mode) # body|frontmatter|full
|
||||||
|
src = (hash_source or os.environ.get("MINDNET_HASH_SOURCE", "parsed")).strip().lower() # parsed|raw
|
||||||
|
norm = (hash_normalize or os.environ.get("MINDNET_HASH_NORMALIZE", "canonical")).strip().lower() # canonical|none
|
||||||
|
|
||||||
|
# Body-Quelle laden
|
||||||
raw_body, raw_fm = ("", {})
|
raw_body, raw_fm = ("", {})
|
||||||
if src == "raw":
|
if src == "raw":
|
||||||
raw_body, raw_fm = _read_raw_body_from_file(file_path or path)
|
raw_body, raw_fm = _read_raw_body_from_file(file_path or path)
|
||||||
# Roh-FM ergänzen (nicht überschreiben)
|
|
||||||
if isinstance(raw_fm, dict) and raw_fm:
|
if isinstance(raw_fm, dict) and raw_fm:
|
||||||
merged_fm = dict(fm)
|
merged_fm = dict(fm)
|
||||||
for k, v in raw_fm.items():
|
for k, v in raw_fm.items():
|
||||||
merged_fm.setdefault(k, v)
|
merged_fm.setdefault(k, v)
|
||||||
fm = merged_fm
|
fm = merged_fm
|
||||||
|
body_for_hash = raw_body
|
||||||
|
else:
|
||||||
|
body_for_hash = body_parsed
|
||||||
|
|
||||||
normalize = (hash_normalize or os.environ.get("MINDNET_HASH_NORMALIZE", "canonical")).strip().lower()
|
# --- 1) Standard-Tripel (parsed:canonical) immer erzeugen ---
|
||||||
mode_resolved = _resolve_hash_mode(hash_mode)
|
std_src = "parsed"
|
||||||
|
std_norm = "canonical"
|
||||||
|
std_hashes: Dict[str, str] = {}
|
||||||
|
for m in ("body", "frontmatter", "full"):
|
||||||
|
std_hashes[f"{m}:{std_src}:{std_norm}"] = _hash_for(
|
||||||
|
m, body=body_parsed, fm=fm, normalize=std_norm
|
||||||
|
)
|
||||||
|
|
||||||
# Hash gemäß Modus/Quelle bilden
|
# Convenience-Felder (für Tools)
|
||||||
body_for_hash = raw_body if src == "raw" else body_parsed
|
hash_body = std_hashes["body:parsed:canonical"]
|
||||||
primary_hash = compute_hash(body=body_for_hash, frontmatter=fm, mode=mode_resolved, normalize=normalize)
|
hash_frontmatter = std_hashes["frontmatter:parsed:canonical"]
|
||||||
hash_signature = f"{'full' if mode_resolved=='body+frontmatter' else mode_resolved}:{src}:{normalize}:{primary_hash}"
|
hash_full = std_hashes["full:parsed:canonical"]
|
||||||
|
|
||||||
# Pfad relativieren
|
# --- 2) Hashes für die *aktuelle* Konfiguration (falls abweichend) ---
|
||||||
rel_path = path
|
cur_hashes: Dict[str, str] = {}
|
||||||
try:
|
if not (src == std_src and norm == std_norm):
|
||||||
if vault_root:
|
for m in ("body", "frontmatter", "full"):
|
||||||
rel = os.path.relpath(path, vault_root)
|
cur_hashes[f"{m}:{src}:{norm}"] = _hash_for(
|
||||||
rel = rel.replace("\\", "/").lstrip("/") # normalisieren
|
m, body=body_for_hash, fm=fm, normalize=norm
|
||||||
rel_path = rel
|
)
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Note-Level-Wikilinks (Fallback, wenn Chunks nicht geliefert werden)
|
# --- 3) Aktueller Modus für Backwards-Compat Felder ---
|
||||||
note_level_refs = list(dict.fromkeys(extract_wikilinks(body_parsed))) if body_parsed else []
|
current_hash = _hash_for(mode_resolved, body=body_for_hash, fm=fm, normalize=norm)
|
||||||
|
hash_signature = f"{mode_resolved}:{src}:{norm}:{current_hash}"
|
||||||
|
|
||||||
|
# Wikilinks (Note-Ebene)
|
||||||
|
refs = list(dict.fromkeys(extract_wikilinks(body_parsed))) if body_parsed else []
|
||||||
|
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"note_id": fm.get("id") or fm.get("note_id"),
|
"note_id": fm.get("id") or fm.get("note_id"),
|
||||||
|
|
@ -246,47 +179,36 @@ def make_note_payload(
|
||||||
"updated": fm.get("updated"),
|
"updated": fm.get("updated"),
|
||||||
"path": rel_path or fm.get("path"),
|
"path": rel_path or fm.get("path"),
|
||||||
"tags": fm.get("tags"),
|
"tags": fm.get("tags"),
|
||||||
# Primärer Hash + Signatur (für Vergleich)
|
# Volltext für verlustfreien Export
|
||||||
"hash_fulltext": primary_hash,
|
|
||||||
"hash_signature": hash_signature,
|
|
||||||
# Volltext persistieren (verlustfreie Rekonstruktion) – parsed Body
|
|
||||||
"fulltext": body_parsed,
|
"fulltext": body_parsed,
|
||||||
# Fallback-Refs auf Note-Ebene
|
# Backwards-Compat:
|
||||||
"references": note_level_refs,
|
"hash_fulltext": current_hash,
|
||||||
|
"hash_signature": hash_signature,
|
||||||
|
# Option C: Mehrfach-Hashes
|
||||||
|
"hashes": {**std_hashes, **cur_hashes},
|
||||||
|
"hash_body": hash_body,
|
||||||
|
"hash_frontmatter": hash_frontmatter,
|
||||||
|
"hash_full": hash_full,
|
||||||
|
# Fallback-Refs
|
||||||
|
"references": refs,
|
||||||
}
|
}
|
||||||
|
|
||||||
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
||||||
if k in fm:
|
if k in fm:
|
||||||
payload[k] = fm[k]
|
payload[k] = fm[k]
|
||||||
|
|
||||||
# Optional: gesamtes Hash-Set persistieren (Debug/Monitoring)
|
|
||||||
if os.environ.get("MINDNET_HASH_RECORD_ALL", "false").strip().lower() == "true":
|
|
||||||
payload["hashes"] = compute_hash_set(
|
|
||||||
body_parsed=body_parsed, body_raw=raw_body, fm=fm, normalize=normalize
|
|
||||||
)
|
|
||||||
|
|
||||||
# Optional: Roh-Body-Hash separat (historische Kompatibilität)
|
|
||||||
if os.environ.get("MINDNET_HASH_STORE_RAW", "false").strip().lower() == "true" and src == "raw":
|
|
||||||
try:
|
|
||||||
payload["hash_raw_body"] = compute_hash(
|
|
||||||
body=raw_body, frontmatter=fm, mode="body", normalize="none"
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# CLI – Sichtprüfung
|
# CLI – Sichtprüfung
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _cli() -> None:
|
def _cli() -> None:
|
||||||
ap = argparse.ArgumentParser(description="Note-Payload aus Markdown erzeugen und anzeigen")
|
ap = argparse.ArgumentParser(description="Note-Payload aus Markdown erzeugen und anzeigen")
|
||||||
ap.add_argument("--from-file", dest="src", required=True, help="Pfad zur Markdown-Datei")
|
ap.add_argument("--from-file", dest="src", required=True)
|
||||||
ap.add_argument("--vault-root", dest="vault_root", default=None, help="Vault-Wurzel zur Pfad-Relativierung")
|
ap.add_argument("--vault-root", dest="vault_root", default=None)
|
||||||
ap.add_argument("--print", dest="do_print", action="store_true", help="Payload auf stdout ausgeben")
|
ap.add_argument("--print", dest="do_print", action="store_true")
|
||||||
ap.add_argument("--hash-mode", choices=["body", "frontmatter", "body+frontmatter", "full"], default=None)
|
ap.add_argument("--hash-mode", choices=["body", "frontmatter", "full"], default=None)
|
||||||
ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None)
|
ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None)
|
||||||
ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None)
|
ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None)
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
@ -300,10 +222,8 @@ def _cli() -> None:
|
||||||
hash_source=args.hash_source,
|
hash_source=args.hash_source,
|
||||||
file_path=args.src,
|
file_path=args.src,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.do_print:
|
if args.do_print:
|
||||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": # pragma: no cover
|
if __name__ == "__main__": # pragma: no cover
|
||||||
_cli()
|
_cli()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user