app/core/note_payload.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
This commit is contained in:
parent
f3b6166daa
commit
b394cadf73
|
|
@ -1,29 +1,23 @@
|
||||||
# app/core/note_payload.py
|
# app/core/note_payload.py
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Name: note_payload.py
|
# Name: note_payload.py
|
||||||
# Version: 1.2.0 (2025-09-08)
|
# Version: 1.2.1 (2025-09-08)
|
||||||
# Zweck: Erzeugt den Qdrant-Payload für Notes, inkl. deterministischer
|
# Zweck: Erzeugt den Qdrant-Payload für Notes, inkl. deterministischer
|
||||||
# Hash-Bildung zur Idempotenz-Erkennung.
|
# Hash-Bildung zur Idempotenz-Erkennung.
|
||||||
#
|
#
|
||||||
# Neu in 1.2.0:
|
# Änderungen:
|
||||||
# - Konfigurierbare Hash-Strategie via Umgebungsvariable MINDNET_HASH_MODE
|
# 1.2.1: Akzeptiert jetzt sowohl dict-Input als auch Objekt-Input (z. B. ParsedNote)
|
||||||
# * 'body' (Default, rückwärtskompatibel): nur Body geht in den Hash
|
# mit Attributen .frontmatter, .body, .path. Dadurch kein AttributeError mehr.
|
||||||
# * 'body+frontmatter' : Body + Frontmatter gehen in den Hash
|
# 1.2.0: Konfigurierbare Hash-Strategie via ENV MINDNET_HASH_MODE
|
||||||
# * 'frontmatter' : nur Frontmatter geht in den Hash
|
# ('body' | 'body+frontmatter' | 'frontmatter'); kanonische FM-Serialisierung.
|
||||||
# - Kanonische Serialisierung des Frontmatter (sortierte Keys, stabile JSON-Encodierung)
|
|
||||||
#
|
#
|
||||||
# Aufrufparameter / Steuerung:
|
# Steuerung Hash-Strategie (unverändert):
|
||||||
# - Über Umgebungsvariable:
|
# export MINDNET_HASH_MODE=body+frontmatter
|
||||||
# export MINDNET_HASH_MODE=body+frontmatter
|
# MINDNET_HASH_MODE=frontmatter python3 -m scripts.import_markdown --vault ./vault --apply
|
||||||
# oder direkt am Befehl:
|
|
||||||
# MINDNET_HASH_MODE=frontmatter python3 -m scripts.import_markdown --vault ./vault --apply
|
|
||||||
#
|
#
|
||||||
# Hinweise:
|
# Hinweis:
|
||||||
# - Diese Datei ist rückwärtskompatibel: Wenn die Variable nicht gesetzt ist
|
# - Datei-Zeitstempel (mtime/ctime) werden NICHT verwendet.
|
||||||
# oder ein unbekannter Wert verwendet wird, fällt die Logik auf 'body' zurück.
|
# - Default-Strategie bleibt 'body' (rückwärtskompatibel).
|
||||||
# - Die Datei-Zeitstempel (mtime/ctime) werden NICHT verwendet.
|
|
||||||
#
|
|
||||||
# Lizenz: MIT
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -31,7 +25,7 @@ from __future__ import annotations
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
|
|
@ -47,26 +41,22 @@ def canonicalize_frontmatter(fm: Dict[str, Any]) -> str:
|
||||||
"""
|
"""
|
||||||
Serialisiert das Frontmatter deterministisch:
|
Serialisiert das Frontmatter deterministisch:
|
||||||
- JSON mit sortierten Keys
|
- JSON mit sortierten Keys
|
||||||
- Keine überflüssigen Whitespaces (kompakte Separatoren)
|
- kompakte Separatoren
|
||||||
- UTF-8, keine ASCII-Escapes
|
- UTF-8, keine ASCII-Escapes
|
||||||
Achtung: Datumswerte müssen (wie im Projekt vereinbart) Strings sein.
|
Achtung: Datumswerte müssen Strings sein (siehe Schema).
|
||||||
"""
|
"""
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
fm,
|
fm or {},
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
sort_keys=True,
|
sort_keys=True,
|
||||||
separators=(",", ":")
|
separators=(",", ":"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_hash_mode_from_env() -> str:
|
def get_hash_mode_from_env() -> str:
|
||||||
"""
|
"""
|
||||||
Liest die Hash-Strategie aus der Umgebungsvariable MINDNET_HASH_MODE.
|
Liest die Hash-Strategie aus ENV MINDNET_HASH_MODE.
|
||||||
Erlaubte Werte:
|
Zulässig: 'body' (Default), 'body+frontmatter', 'frontmatter'
|
||||||
- 'body' (Default)
|
|
||||||
- 'body+frontmatter'
|
|
||||||
- 'frontmatter'
|
|
||||||
Unbekannte Werte -> 'body'.
|
|
||||||
"""
|
"""
|
||||||
val = (os.environ.get("MINDNET_HASH_MODE") or "").strip().lower()
|
val = (os.environ.get("MINDNET_HASH_MODE") or "").strip().lower()
|
||||||
if val in ("body", "body+frontmatter", "frontmatter"):
|
if val in ("body", "body+frontmatter", "frontmatter"):
|
||||||
|
|
@ -76,17 +66,13 @@ def get_hash_mode_from_env() -> str:
|
||||||
|
|
||||||
def compute_hash(body: str, frontmatter: Dict[str, Any], mode: Optional[str] = None) -> str:
|
def compute_hash(body: str, frontmatter: Dict[str, Any], mode: Optional[str] = None) -> str:
|
||||||
"""
|
"""
|
||||||
Berechnet den Hash gemäß der gewünschten Strategie.
|
Berechnet den Hash gemäß Strategie.
|
||||||
Args:
|
- 'body': nur Body
|
||||||
body: Markdown-Body (ohne Frontmatter), bereits als Text
|
- 'body+frontmatter': Body + FM (kanonisch)
|
||||||
frontmatter: geparstes Frontmatter-Objekt (Dict)
|
- 'frontmatter': nur FM (kanonisch)
|
||||||
mode: 'body' | 'body+frontmatter' | 'frontmatter' | None (= aus ENV)
|
|
||||||
Returns:
|
|
||||||
Hex-String (sha256)
|
|
||||||
"""
|
"""
|
||||||
strategy = (mode or get_hash_mode_from_env()).lower()
|
strategy = (mode or get_hash_mode_from_env()).lower()
|
||||||
|
|
||||||
# Kanonische Strings bilden
|
|
||||||
body_str = (body or "").strip()
|
body_str = (body or "").strip()
|
||||||
fm_str = canonicalize_frontmatter(frontmatter or {})
|
fm_str = canonicalize_frontmatter(frontmatter or {})
|
||||||
|
|
||||||
|
|
@ -94,7 +80,6 @@ def compute_hash(body: str, frontmatter: Dict[str, Any], mode: Optional[str] = N
|
||||||
return sha256_text(fm_str)
|
return sha256_text(fm_str)
|
||||||
|
|
||||||
if strategy == "body+frontmatter":
|
if strategy == "body+frontmatter":
|
||||||
# Trennmarker, um Kollisionen (z.B. 'ab'+'c' vs 'a'+'bc') auszuschließen
|
|
||||||
combo = body_str + "\n\n---\n\n" + fm_str
|
combo = body_str + "\n\n---\n\n" + fm_str
|
||||||
return sha256_text(combo)
|
return sha256_text(combo)
|
||||||
|
|
||||||
|
|
@ -102,30 +87,52 @@ def compute_hash(body: str, frontmatter: Dict[str, Any], mode: Optional[str] = N
|
||||||
return sha256_text(body_str)
|
return sha256_text(body_str)
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------#
|
||||||
|
# Helfer: parsed -> (frontmatter, body, path)
|
||||||
|
# -----------------------------------------------------------------------------#
|
||||||
|
|
||||||
|
def _extract_parsed(parsed: Any) -> tuple[Dict[str, Any], str, Optional[str]]:
|
||||||
|
"""
|
||||||
|
Erlaubt sowohl dict- als auch objektbasierte Parser-Ergebnisse.
|
||||||
|
Erwartet mindestens 'frontmatter' + 'body'. 'path' ist optional.
|
||||||
|
"""
|
||||||
|
# dict-Eingang
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
fm = dict(parsed.get("frontmatter") or {})
|
||||||
|
body = parsed.get("body") or ""
|
||||||
|
path = parsed.get("path")
|
||||||
|
return fm, body, path
|
||||||
|
|
||||||
|
# objektbasierter Eingang (z. B. ParsedNote)
|
||||||
|
# Erwartete Attribute: .frontmatter (dict), .body (str), optional .path
|
||||||
|
fm = {}
|
||||||
|
if hasattr(parsed, "frontmatter"):
|
||||||
|
fm_val = getattr(parsed, "frontmatter")
|
||||||
|
if isinstance(fm_val, dict):
|
||||||
|
fm = dict(fm_val)
|
||||||
|
else:
|
||||||
|
# Notfalls in ein dict konvertieren, falls FM ein pydantic/BaseModel ist
|
||||||
|
try:
|
||||||
|
fm = dict(fm_val) # type: ignore[arg-type]
|
||||||
|
except Exception:
|
||||||
|
# finaler Fallback: JSON roundtrip
|
||||||
|
fm = json.loads(json.dumps(fm_val, default=getattr(fm_val, "dict", None)))
|
||||||
|
body = getattr(parsed, "body", "") or ""
|
||||||
|
path = getattr(parsed, "path", None)
|
||||||
|
return fm, body, path
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
# Hauptfunktion für Note-Payload
|
# Hauptfunktion für Note-Payload
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
|
|
||||||
def make_note_payload(parsed: Dict[str, Any], vault_root: Optional[str] = None) -> Dict[str, Any]:
|
def make_note_payload(parsed: Any, vault_root: Optional[str] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Baut den Payload für eine Note auf Basis der geparsten Markdown-Datei.
|
Baut den Payload für eine Note auf Basis der geparsten Markdown-Datei.
|
||||||
|
|
||||||
Erwartete Struktur von `parsed` (wie vom Parser geliefert):
|
parsed: dict ODER Objekt mit Attributen .frontmatter, .body, optional .path
|
||||||
{
|
|
||||||
"frontmatter": {
|
|
||||||
"id": "...", # note_id (String, Pflicht im Schema)
|
|
||||||
"title": "...", # Titel (String)
|
|
||||||
"type": "...", # Notiztyp (String)
|
|
||||||
"status": "...", # Status (String)
|
|
||||||
"created": "...", # ISO-String, Pflicht im Schema
|
|
||||||
"updated": "...", # ISO-String (empfohlen)
|
|
||||||
"tags": [...], # optional
|
|
||||||
...
|
|
||||||
},
|
|
||||||
"body": "..." # Markdown-Inhalt ohne Frontmatter
|
|
||||||
}
|
|
||||||
|
|
||||||
Rückgabe-Payload (Beispielauszug, kompatibel mit mindnet_notes Schema):
|
Rückgabe-Payload (kompatibel mit mindnet_notes Schema):
|
||||||
{
|
{
|
||||||
"note_id": "...",
|
"note_id": "...",
|
||||||
"title": "...",
|
"title": "...",
|
||||||
|
|
@ -133,26 +140,17 @@ def make_note_payload(parsed: Dict[str, Any], vault_root: Optional[str] = None)
|
||||||
"status": "...",
|
"status": "...",
|
||||||
"created": "...",
|
"created": "...",
|
||||||
"updated": "...",
|
"updated": "...",
|
||||||
"path": "...", # falls vom Parser geliefert
|
"path": "...", # falls vorhanden
|
||||||
"tags": [...], # optional
|
"tags": [...], # optional
|
||||||
"hash_fulltext": "sha256...",
|
"hash_fulltext": "sha256...",
|
||||||
... (weitere, projektdefinierte Felder)
|
...
|
||||||
}
|
}
|
||||||
|
|
||||||
Hash-Bildung:
|
|
||||||
- Gesteuert über MINDNET_HASH_MODE (s. Kopf dieses Moduls).
|
|
||||||
- Datei-Zeitstempel werden NICHT verwendet.
|
|
||||||
|
|
||||||
Rückwärtskompatibilität:
|
|
||||||
- Standard bleibt 'body' (nur Body beeinflusst den Hash).
|
|
||||||
"""
|
"""
|
||||||
fm: Dict[str, Any] = dict(parsed.get("frontmatter") or {})
|
fm, body, path = _extract_parsed(parsed)
|
||||||
body: str = parsed.get("body") or ""
|
|
||||||
|
|
||||||
# Hash nach konfigurierter Strategie berechnen
|
# Hash nach konfigurierter Strategie berechnen
|
||||||
hash_fulltext = compute_hash(body=body, frontmatter=fm, mode=None)
|
hash_fulltext = compute_hash(body=body, frontmatter=fm, mode=None)
|
||||||
|
|
||||||
# Basis-Payload zusammenstellen
|
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"note_id": fm.get("id") or fm.get("note_id"),
|
"note_id": fm.get("id") or fm.get("note_id"),
|
||||||
"title": fm.get("title"),
|
"title": fm.get("title"),
|
||||||
|
|
@ -160,16 +158,14 @@ def make_note_payload(parsed: Dict[str, Any], vault_root: Optional[str] = None)
|
||||||
"status": fm.get("status"),
|
"status": fm.get("status"),
|
||||||
"created": fm.get("created"),
|
"created": fm.get("created"),
|
||||||
"updated": fm.get("updated"),
|
"updated": fm.get("updated"),
|
||||||
"path": fm.get("path"), # optional, falls der Parser path im FM ablegt
|
"path": path or fm.get("path"),
|
||||||
"tags": fm.get("tags"), # optional
|
"tags": fm.get("tags"),
|
||||||
"hash_fulltext": hash_fulltext,
|
"hash_fulltext": hash_fulltext,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Weitere projekt-/parser-spezifische Felder durchreichen (falls vorhanden)
|
# Bekannte optionale FM-Felder transparent durchreichen (ohne Hash-Einfluss)
|
||||||
# Wichtig: keine nicht-deterministischen Felder in den Hash aufnehmen!
|
|
||||||
passthrough_keys = [
|
passthrough_keys = [
|
||||||
"area", "project", "source", "lang", "slug",
|
"area", "project", "source", "lang", "slug",
|
||||||
# ... hier ggf. weitere bekannte, harmlose FM-Felder zulassen
|
|
||||||
]
|
]
|
||||||
for k in passthrough_keys:
|
for k in passthrough_keys:
|
||||||
if k in fm:
|
if k in fm:
|
||||||
|
|
@ -179,20 +175,38 @@ def make_note_payload(parsed: Dict[str, Any], vault_root: Optional[str] = None)
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
# Optional: kleines Self-Test-Snippet (nur lokal ausführen)
|
# Optional: Self-Test
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
demo_fm = {
|
class _PN:
|
||||||
"id": "demo-123",
|
def __init__(self):
|
||||||
"title": "Demo",
|
self.frontmatter = {
|
||||||
"type": "note",
|
"id": "demo-123",
|
||||||
"status": "active",
|
"title": "Demo",
|
||||||
"created": "2025-09-08T10:00:00+00:00",
|
"type": "note",
|
||||||
"updated": "2025-09-08T10:00:00+00:00",
|
"status": "active",
|
||||||
"tags": ["demo", "test"]
|
"created": "2025-09-08T10:00:00+00:00",
|
||||||
|
"updated": "2025-09-08T10:00:00+00:00",
|
||||||
|
"tags": ["demo", "test"],
|
||||||
|
}
|
||||||
|
body = "# Überschrift\n\nText."
|
||||||
|
path = "demo.md"
|
||||||
|
|
||||||
|
parsed_dict = {
|
||||||
|
"frontmatter": {
|
||||||
|
"id": "demo-456",
|
||||||
|
"title": "Demo2",
|
||||||
|
"type": "note",
|
||||||
|
"status": "active",
|
||||||
|
"created": "2025-09-08T10:00:00+00:00",
|
||||||
|
"updated": "2025-09-08T10:00:00+00:00",
|
||||||
|
},
|
||||||
|
"body": "Text2",
|
||||||
|
"path": "demo2.md",
|
||||||
}
|
}
|
||||||
demo_body = "# Überschrift\n\nText."
|
|
||||||
for m in ("body", "body+frontmatter", "frontmatter"):
|
for mode in ("body", "body+frontmatter", "frontmatter"):
|
||||||
os.environ["MINDNET_HASH_MODE"] = m
|
os.environ["MINDNET_HASH_MODE"] = mode
|
||||||
h = compute_hash(demo_body, demo_fm)
|
print(f"\n-- MODE={mode}")
|
||||||
print(f"{m:>18}: {h}")
|
print(make_note_payload(_PN()))
|
||||||
|
print(make_note_payload(parsed_dict))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user