aufräumen und löschen von Alt-Scripten WP19b

2025-12-27 14:15:22 +01:00 · 2025-12-27 14:15:22 +01:00 · e3858e8bc3
commit e3858e8bc3
parent f08a331bc6
14 changed files with 15 additions and 680 deletions
--- a/app/core/chunk_payload.py
+++ b/app/core/chunk_payload.py
@ -1,176 +0,0 @@
 """
 FILE: app/core/chunk_payload.py
 DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
 FEATURES:
  - Inkludiert Nachbarschafts-IDs (prev/next) und Titel.
  - FIX 3: Robuste Erkennung des Inputs (Frontmatter-Dict vs. Note-Objekt), damit Overrides ankommen.
 VERSION: 2.3.0
 STATUS: Active
 DEPENDENCIES: yaml, os
 EXTERNAL_CONFIG: config/types.yaml
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
 import os, yaml
 def _env(n: str, d: Optional[str]=None) -> str:
    v = os.getenv(n)
    return v if v is not None else (d or "")
 def _load_types() -> dict:
    p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
    try:
        with open(p, "r", encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except Exception:
        return {}
 def _get_types_map(reg: dict) -> dict:
    if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
        return reg["types"]
    return reg if isinstance(reg, dict) else {}
 def _get_defaults(reg: dict) -> dict:
    if isinstance(reg, dict) and isinstance(reg.get("defaults"), dict):
        return reg["defaults"]
    if isinstance(reg, dict) and isinstance(reg.get("global"), dict):
        return reg["global"]
    return {}
 def _as_float(x: Any):
    try: return float(x)
    except Exception: return None
 def _resolve_chunk_profile_from_config(note_type: str, reg: dict) -> Optional[str]:
    # 1. Type Level
    types = _get_types_map(reg)
    if isinstance(types, dict):
        t = types.get(note_type, {})
        if isinstance(t, dict):
            cp = t.get("chunking_profile") or t.get("chunk_profile")
            if isinstance(cp, str) and cp: return cp
    # 2. Defaults Level
    defs = _get_defaults(reg)
    if isinstance(defs, dict):
        cp = defs.get("chunking_profile") or defs.get("chunk_profile")
        if isinstance(cp, str) and cp: return cp
    return None
 def _resolve_retriever_weight_from_config(note_type: str, reg: dict) -> float:
    """
    Liest Weight nur aus Config (Type > Default).
    Wird aufgerufen, wenn im Frontmatter nichts steht.
    """
    # 1. Type Level
    types = _get_types_map(reg)
    if isinstance(types, dict):
        t = types.get(note_type, {})
        if isinstance(t, dict) and (t.get("retriever_weight") is not None):
            v = _as_float(t.get("retriever_weight"))
            if v is not None: return float(v)
    # 2. Defaults Level
    defs = _get_defaults(reg)
    if isinstance(defs, dict) and (defs.get("retriever_weight") is not None):
        v = _as_float(defs.get("retriever_weight"))
        if v is not None: return float(v)
    return 1.0
 def _as_list(x):
    if x is None: return []
    if isinstance(x, list): return x
    return [x]
 def make_chunk_payloads(note: Dict[str, Any],
                        note_path: str,
                        chunks_from_chunker: List[Any],
                        *,
                        note_text: str = "",
                        types_cfg: Optional[dict] = None,
                        file_path: Optional[str] = None) -> List[Dict[str, Any]]:
    """
    Erstellt die Payloads für die Chunks.
    Argument 'note' kann sein:
    A) Ein komplexes Objekt/Dict mit Key "frontmatter" (Legacy / Tests)
    B) Direkt das Frontmatter-Dictionary (Call aus ingestion.py)
    """
    # --- FIX 3: Intelligente Erkennung der Input-Daten ---
    # Wir prüfen: Ist 'note' ein Container MIT 'frontmatter', oder IST es das 'frontmatter'?
    if isinstance(note, dict) and "frontmatter" in note and isinstance(note["frontmatter"], dict):
        # Fall A: Container (wir müssen auspacken)
        fm = note["frontmatter"]
    else:
        # Fall B: Direktes Dict (so ruft ingestion.py es auf!)
        fm = note or {}
    note_type = fm.get("type") or note.get("type") or "concept"
    # Title Extraction (Fallback Chain)
    title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled"
    reg = types_cfg if isinstance(types_cfg, dict) else _load_types()
    # --- Profil-Ermittlung ---
    # Da wir 'fm' jetzt korrekt haben, funktionieren diese lookups:
    cp = fm.get("chunking_profile") or fm.get("chunk_profile")
    if not cp:
        cp = _resolve_chunk_profile_from_config(note_type, reg)
    if not cp:
        cp = "sliding_standard"
    # --- Retriever Weight Ermittlung ---
    rw = fm.get("retriever_weight")
    if rw is None:
        rw = _resolve_retriever_weight_from_config(note_type, reg)
    try:
        rw = float(rw)
    except Exception:
        rw = 1.0
    tags = fm.get("tags") or []
    if isinstance(tags, str):
        tags = [tags]
    out: List[Dict[str, Any]] = []
    for idx, ch in enumerate(chunks_from_chunker):
        # Attribute extrahieren
        cid = getattr(ch, "id", None) or (ch.get("id") if isinstance(ch, dict) else None)
        nid = getattr(ch, "note_id", None) or (ch.get("note_id") if isinstance(ch, dict) else fm.get("id"))
        index = getattr(ch, "index", None) or (ch.get("index") if isinstance(ch, dict) else idx)
        text = getattr(ch, "text", None) or (ch.get("text") if isinstance(ch, dict) else "")
        window = getattr(ch, "window", None) or (ch.get("window") if isinstance(ch, dict) else text)
        prev_id = getattr(ch, "neighbors_prev", None) or (ch.get("neighbors_prev") if isinstance(ch, dict) else None)
        next_id = getattr(ch, "neighbors_next", None) or (ch.get("neighbors_next") if isinstance(ch, dict) else None)
        pl: Dict[str, Any] = {
            "note_id": nid,
            "chunk_id": cid,
            "title": title,
            "index": int(index),
            "ord": int(index) + 1,
            "type": note_type,
            "tags": tags,
            "text": text,
            "window": window,
            "neighbors_prev": _as_list(prev_id),
            "neighbors_next": _as_list(next_id),
            "section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
            "path": note_path,
            "source_path": file_path or note_path,
            "retriever_weight": float(rw), 
            "chunk_profile": cp, # Jetzt endlich mit dem Override-Wert!
        }
        # Cleanup
        for alias in ("chunk_num", "Chunk_Number"):
            pl.pop(alias, None)
        out.append(pl)
    return out
--- a/app/core/chunker.py
+++ b/app/core/chunker.py
@ -1,10 +0,0 @@
 """
 FILE: app/core/chunker.py
 DESCRIPTION: Facade für das Chunking-Package. Stellt 100% Abwärtskompatibilität sicher.
 VERSION: 3.3.0
 """
 from .chunking.chunking_processor import assemble_chunks
 from .chunking.chunking_utils import get_chunk_config, extract_frontmatter_from_text
 from .chunking.chunking_models import Chunk
 __all__ = ["assemble_chunks", "get_chunk_config", "extract_frontmatter_from_text", "Chunk"]
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@ -1,15 +0,0 @@
 """
 FILE: app/core/ingestion.py
 DESCRIPTION: Facade für das Ingestion-Package. Stellt 100% Abwärtskompatibilität sicher.
             WP-14: Modularisierung der Ingestion-Pipeline abgeschlossen.
             Nutzt interne Module mit 'ingestion_' Präfix für maximale Wartbarkeit.
 VERSION: 2.13.0
 STATUS: Active
 """
 # Export der Hauptklasse für externe Module (z.B. scripts/import_markdown.py)
 from .ingestion.ingestion_processor import IngestionService
 # Export der Hilfsfunktionen für Abwärtskompatibilität
 from .ingestion.ingestion_utils import extract_json_from_response, load_type_registry
 __all__ = ["IngestionService", "extract_json_from_response", "load_type_registry"]
--- a/app/core/ingestion/ingestion_processor.py
+++ b/app/core/ingestion/ingestion_processor.py
@ -18,7 +18,7 @@ from app.core.parser import (
    read_markdown, pre_scan_markdown, normalize_frontmatter, 
    validate_required_frontmatter, NoteContext
 )
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks
 from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
 from app.core.qdrant_points import points_for_chunks, points_for_note, points_for_edges, upsert_batch
--- a/app/core/ingestion/ingestion_utils.py
+++ b/app/core/ingestion/ingestion_utils.py
@ -59,7 +59,7 @@ def resolve_note_type(registry: dict, requested: Optional[str]) -> str:
 def get_chunk_config_by_profile(registry: dict, profile_name: str, note_type: str) -> Dict[str, Any]:
    """Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry."""
-    from app.core.chunker import get_chunk_config
+    from app.core.chunking import get_chunk_config
    profiles = registry.get("chunking_profiles", {})
    if profile_name in profiles:
        cfg = profiles[profile_name].copy()
--- a/app/core/note_payload.py
+++ b/app/core/note_payload.py
@ -1,268 +0,0 @@
 """
 FILE: app/core/note_payload.py
 DESCRIPTION: Baut das JSON-Objekt. 
 FEATURES:
  1. Multi-Hash: Berechnet immer 'body' AND 'full' Hashes für flexible Change Detection.
  2. Config-Fix: Liest korrekt 'chunking_profile' aus types.yaml (statt Legacy 'chunk_profile').
 VERSION: 2.3.0
 STATUS: Active
 DEPENDENCIES: yaml, os, json, pathlib, hashlib
 EXTERNAL_CONFIG: config/types.yaml
 """
 from __future__ import annotations
 from typing import Any, Dict, Tuple, Optional
 import os
 import json
 import pathlib
 import hashlib
 try:
    import yaml  # type: ignore
 except Exception:
    yaml = None
 # ---------------------------------------------------------------------------
 # Helper
 # ---------------------------------------------------------------------------
 def _as_dict(x) -> Dict[str, Any]:
    """Versucht, ein ParsedMarkdown-ähnliches Objekt in ein Dict zu überführen."""
    if isinstance(x, dict):
        return dict(x)
    out: Dict[str, Any] = {}
    for attr in (
        "frontmatter",
        "body",
        "id",
        "note_id",
        "title",
        "path",
        "tags",
        "type",
        "created",
        "modified",
        "date",
    ):
        if hasattr(x, attr):
            val = getattr(x, attr)
            if val is not None:
                out[attr] = val
    if not out:
        out["raw"] = str(x)
    return out
 def _pick_args(*args, **kwargs) -> Tuple[Optional[str], Optional[dict]]:
    path = kwargs.get("path") or (args[0] if args else None)
    types_cfg = kwargs.get("types_cfg") or kwargs.get("types") or None
    return path, types_cfg
 def _env_float(name: str, default: float) -> float:
    try:
        return float(os.environ.get(name, default))
    except Exception:
        return default
 def _ensure_list(x) -> list:
    if x is None:
        return []
    if isinstance(x, list):
        return [str(i) for i in x]
    if isinstance(x, (set, tuple)):
        return [str(i) for i in x]
    return [str(x)]
 # --- Hash Logic ---
 def _compute_hash(content: str) -> str:
    """Berechnet einen SHA-256 Hash für den gegebenen String."""
    if not content:
        return ""
    return hashlib.sha256(content.encode("utf-8")).hexdigest()
 def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str:
    """
    Stellt den String zusammen, der gehasht werden soll.
    """
    body = str(n.get("body") or "")
    if mode == "body":
        return body
    if mode == "full":
        fm = n.get("frontmatter") or {}
        # Wichtig: Sortierte Keys für deterministisches Verhalten!
        # Wir nehmen alle steuernden Metadaten auf
        meta_parts = []
        # Hier checken wir keys, die eine Neu-Indizierung rechtfertigen würden
        for k in sorted(["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight"]):
            val = fm.get(k)
            if val is not None:
                meta_parts.append(f"{k}:{val}")
        meta_str = "|".join(meta_parts)
        return f"{meta_str}||{body}"
    return body
 # ---------------------------------------------------------------------------
 # Type-Registry laden
 # ---------------------------------------------------------------------------
 def _load_types_config(explicit_cfg: Optional[dict] = None) -> dict:
    if explicit_cfg and isinstance(explicit_cfg, dict):
        return explicit_cfg
    path = os.getenv("MINDNET_TYPES_FILE") or "./config/types.yaml"
    if not os.path.isfile(path) or yaml is None:
        return {}
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = yaml.safe_load(f) or {}
        return data if isinstance(data, dict) else {}
    except Exception:
        return {}
 def _cfg_for_type(note_type: str, reg: dict) -> dict:
    if not isinstance(reg, dict):
        return {}
    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
    return types.get(note_type, {}) if isinstance(types, dict) else {}
 def _cfg_defaults(reg: dict) -> dict:
    if not isinstance(reg, dict):
        return {}
    for key in ("defaults", "default", "global"):
        v = reg.get(key)
        if isinstance(v, dict):
            return v
    return {}
 # ---------------------------------------------------------------------------
 # Haupt-API
 # ---------------------------------------------------------------------------
 def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
    """
    Baut das Note-Payload für mindnet_notes auf.
    Inkludiert Hash-Berechnung (Body & Full) und korrigierte Config-Lookups.
    """
    n = _as_dict(note)
    path_arg, types_cfg_explicit = _pick_args(*args, **kwargs)
    reg = _load_types_config(types_cfg_explicit)
    # Hash Config (Parameter für Source/Normalize, Mode ist hardcoded auf 'beide')
    hash_source = kwargs.get("hash_source", "parsed")
    hash_normalize = kwargs.get("hash_normalize", "canonical")
    fm = n.get("frontmatter") or {}
    fm_type = fm.get("type") or n.get("type") or "concept"
    note_type = str(fm_type)
    cfg_type = _cfg_for_type(note_type, reg)
    cfg_def = _cfg_defaults(reg)
    # --- retriever_weight ---
    default_rw = _env_float("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0)
    retriever_weight = fm.get("retriever_weight")
    if retriever_weight is None:
        retriever_weight = cfg_type.get(
            "retriever_weight",
            cfg_def.get("retriever_weight", default_rw),
        )
    try:
        retriever_weight = float(retriever_weight)
    except Exception:
        retriever_weight = default_rw
    # --- chunk_profile (FIXED LOGIC) ---
    # 1. Frontmatter Override (beide Schreibweisen erlaubt)
    chunk_profile = fm.get("chunking_profile") or fm.get("chunk_profile")
    # 2. Type Config (Korrekter Key 'chunking_profile' aus types.yaml)
    if chunk_profile is None:
        chunk_profile = cfg_type.get("chunking_profile")
    # 3. Default Config (Fallback auf sliding_standard statt medium)
    if chunk_profile is None:
        chunk_profile = cfg_def.get("chunking_profile", "sliding_standard")
    # 4. Safety Fallback
    if not isinstance(chunk_profile, str) or not chunk_profile:
        chunk_profile = "sliding_standard"
    # --- edge_defaults ---
    edge_defaults = fm.get("edge_defaults")
    if edge_defaults is None:
        edge_defaults = cfg_type.get(
            "edge_defaults",
            cfg_def.get("edge_defaults", []),
        )
    edge_defaults = _ensure_list(edge_defaults)
    # --- Basis-Metadaten ---
    note_id = n.get("note_id") or n.get("id") or fm.get("id")
    title = n.get("title") or fm.get("title") or ""
    path = n.get("path") or path_arg
    if isinstance(path, pathlib.Path):
        path = str(path)
    payload: Dict[str, Any] = {
        "note_id": note_id,
        "title": title,
        "type": note_type,
        "path": path or "",
        "retriever_weight": retriever_weight,
        "chunk_profile": chunk_profile,
        "edge_defaults": edge_defaults,
        "hashes": {} # Init Hash Dict
    }
    # --- MULTI-HASH CALCULATION (Strategy Decoupling) ---
    # Wir berechnen immer BEIDE Strategien und speichern sie.
    # ingestion.py entscheidet dann anhand der ENV-Variable, welcher verglichen wird.
    modes_to_calc = ["body", "full"]
    for mode in modes_to_calc:
        content_to_hash = _get_hash_source_content(n, mode)
        computed_hash = _compute_hash(content_to_hash)
        # Key Schema: mode:source:normalize (z.B. "full:parsed:canonical")
        key = f"{mode}:{hash_source}:{hash_normalize}"
        payload["hashes"][key] = computed_hash
    # Tags / Keywords
    tags = fm.get("tags") or fm.get("keywords") or n.get("tags")
    if tags:
        payload["tags"] = _ensure_list(tags)
    # Aliases
    aliases = fm.get("aliases")
    if aliases:
        payload["aliases"] = _ensure_list(aliases)
    # Zeit
    for k in ("created", "modified", "date"):
        v = fm.get(k) or n.get(k)
        if v:
            payload[k] = str(v)
    # Fulltext
    if "body" in n and n["body"]:
        payload["fulltext"] = str(n["body"])
    # JSON Validation
    json.loads(json.dumps(payload, ensure_ascii=False))
    return payload
--- a/app/services/semantic_analyzer.py
+++ b/app/services/semantic_analyzer.py
@ -1,199 +0,0 @@
 """
 FILE: app/services/semantic_analyzer.py
 DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
             WP-20 Fix: Volle Kompatibilität mit der provider-basierten Routing-Logik (OpenRouter Primary).
             WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
 FIX: Mistral-sicheres JSON-Parsing (<s> & [OUT] Handling) und 100% Logik-Erhalt.
 VERSION: 2.2.6
 STATUS: Active
 DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging, re
 """
 import json
 import logging
 import re
 from typing import List, Optional, Any
 from dataclasses import dataclass
 # Importe
 from app.services.llm_service import LLMService
 # WP-22: Registry für Vokabular-Erzwingung
 from app.services.edge_registry import registry as edge_registry
 logger = logging.getLogger(__name__)
 class SemanticAnalyzer:
    def __init__(self):
        self.llm = LLMService()
    def _is_valid_edge_string(self, edge_str: str) -> bool:
        """
        Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
        Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
        """
        if not isinstance(edge_str, str) or ":" not in edge_str:
            return False
        parts = edge_str.split(":", 1)
        kind = parts[0].strip()
        target = parts[1].strip()
        # Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
        if " " in kind:
            return False
        # Regel 2: Plausible Länge für den Typ (Vermeidet Sätze als Typ)
        if len(kind) > 40 or len(kind) < 2:
            return False
        # Regel 3: Target darf nicht leer sein
        if not target:
            return False
        return True
    def _extract_json_safely(self, text: str) -> Any:
        """
        Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen (Mistral/Llama).
        Implementiert robuste Recovery-Logik für Cloud-Provider.
        """
        if not text:
            return []
        # 1. Entferne Mistral/Llama Steuerzeichen und Tags
        clean = text.replace("<s>", "").replace("</s>", "")
        clean = clean.replace("[OUT]", "").replace("[/OUT]", "")
        clean = clean.strip()
        # 2. Suche nach Markdown JSON-Blöcken
        match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean, re.DOTALL)
        payload = match.group(1) if match else clean
        try:
            return json.loads(payload.strip())
        except json.JSONDecodeError:
            # 3. Recovery: Suche nach der ersten [ und letzten ]
            start = payload.find('[')
            end = payload.rfind(']') + 1
            if start != -1 and end > start:
                try:
                    return json.loads(payload[start:end])
                except: pass
            # 4. Zweite Recovery: Suche nach der ersten { und letzten }
            start_obj = payload.find('{')
            end_obj = payload.rfind('}') + 1
            if start_obj != -1 and end_obj > start_obj:
                try:
                    return json.loads(payload[start_obj:end_obj])
                except: pass
        return []
    async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
        """
        Sendet einen Chunk und eine Liste potenzieller Kanten an das LLM.
        Das LLM filtert heraus, welche Kanten für diesen Chunk relevant sind.
        WP-20: Nutzt primär den konfigurierten Provider (z.B. OpenRouter).
        """
        if not all_edges:
            return []
        # 1. Bestimmung des Providers und Modells (Dynamisch über Settings)
        provider = self.llm.settings.MINDNET_LLM_PROVIDER
        model = self.llm.settings.OPENROUTER_MODEL if provider == "openrouter" else self.llm.settings.GEMINI_MODEL
        # 2. Prompt laden (Provider-spezifisch via get_prompt)
        prompt_template = self.llm.get_prompt("edge_allocation_template", provider)
        if not prompt_template or not isinstance(prompt_template, str):
            logger.warning("⚠️ [SemanticAnalyzer] Prompt 'edge_allocation_template' ungültig. Nutze Recovery-Template.")
            prompt_template = (
                "TASK: Wähle aus den Kandidaten die relevanten Kanten für den Text.\n"
                "TEXT: {chunk_text}\n"
                "KANDIDATEN: {edge_list}\n"
                "OUTPUT: JSON Liste von Strings [\"kind:target\"]."
            )
        # 3. Daten für Template vorbereiten (Vokabular-Check)
        edge_registry.ensure_latest()
        valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
        edges_str = "\n".join([f"- {e}" for e in all_edges])
        logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
        # 4. Prompt füllen mit Format-Check (Kein Shortcut)
        try:
            # Wir begrenzen den Text auf eine vernünftige Länge für das Kontextfenster
            final_prompt = prompt_template.format(
                chunk_text=chunk_text[:6000], 
                edge_list=edges_str,
                valid_types=valid_types_str
            )
        except Exception as format_err:
            logger.error(f"❌ [SemanticAnalyzer] Prompt Formatting failed: {format_err}")
            return []
        try:
            # 5. LLM Call mit Background Priority & Semaphore Control
            response_json = await self.llm.generate_raw_response(
                prompt=final_prompt,
                force_json=True,
                max_retries=3, 
                base_delay=2.0,
                priority="background",
                provider=provider,
                model_override=model
            )
            # 6. Mistral-sicheres JSON Parsing via Helper
            data = self._extract_json_safely(response_json)
            if not data: 
                return []
            # 7. Robuste Normalisierung (List vs Dict Recovery)
            raw_candidates = []
            if isinstance(data, list):
                raw_candidates = data
            elif isinstance(data, dict):
                logger.info(f"ℹ️ [SemanticAnalyzer] LLM returned dict, trying recovery.")
                for key in ["edges", "results", "kanten", "matches"]:
                    if key in data and isinstance(data[key], list):
                         raw_candidates.extend(data[key])
                         break
                # Falls immer noch leer, nutze Schlüssel-Wert Paare als Behelf
                if not raw_candidates:
                    for k, v in data.items():
                        if isinstance(v, str): raw_candidates.append(f"{k}:{v}")
                        elif isinstance(v, list): 
                            for target in v:
                                if isinstance(target, str): raw_candidates.append(f"{k}:{target}")
            # 8. Strikte Validierung gegen Kanten-Format
            valid_edges = []
            for e in raw_candidates:
                e_str = str(e).strip()
                if self._is_valid_edge_string(e_str):
                    valid_edges.append(e_str)
                else:
                    logger.debug(f"   [SemanticAnalyzer] Rejected invalid edge format: '{e_str}'")
            if valid_edges:
                logger.info(f"✅ [SemanticAnalyzer] Assigned {len(valid_edges)} edges to chunk.")
            return valid_edges
        except Exception as e:
            logger.error(f"💥 [SemanticAnalyzer] Critical error during analysis: {e}", exc_info=True)
            return []
    async def close(self):
        if self.llm:
            await self.llm.close()
 # Singleton Instanziierung
 _analyzer_instance = None
 def get_semantic_analyzer():
    global _analyzer_instance
    if _analyzer_instance is None:
        _analyzer_instance = SemanticAnalyzer()
    return _analyzer_instance
--- a/scripts/audit_chunks.py
+++ b/scripts/audit_chunks.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import argparse, os, json, glob, statistics as stats
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks
 def iter_md(root: str):
    for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True):
--- a/scripts/debug_edge_loss.py
+++ b/scripts/debug_edge_loss.py
@ -6,7 +6,7 @@ from pathlib import Path
 # Pfad-Setup
 sys.path.insert(0, os.path.abspath("."))
-from app.core.chunker import assemble_chunks, _extract_all_edges_from_md
+from app.core.chunking import assemble_chunks, _extract_all_edges_from_md
 from app.core.derive_edges import build_edges_for_note
 # Mock für Settings, falls nötig
--- a/scripts/dump_note_chunks.py
+++ b/scripts/dump_note_chunks.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import argparse, os, glob
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks
 def iter_md(root: str):
    return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]
--- a/scripts/fix_frontmatter.py
+++ b/scripts/fix_frontmatter.py
@ -7,7 +7,7 @@ from slugify import slugify
 from app.core.parser import read_markdown, normalize_frontmatter
 from app.core.parser import FRONTMATTER_RE  # für Re-Inject
 from app.core.validate_note import validate_note_payload
-from app.core.note_payload import make_note_payload
+from app.core.ingestion.ingestion_note_payload import make_note_payload
 DATE_IN_NAME = re.compile(r"(?P<y>\d{4})[-_\.]?(?P<m>\d{2})[-_\.]?(?P<d>\d{2})")
--- a/scripts/parse_validate_notes.py
+++ b/scripts/parse_validate_notes.py
@ -8,6 +8,8 @@ from jsonschema import ValidationError
 from app.core.parser import read_markdown, validate_required_frontmatter, normalize_frontmatter
 from app.core.note_payload import make_note_payload
 from app.core.validate_note import validate_note_payload
 from app.core.ingestion.ingestion_note_payload import make_note_payload
 def iter_md_files(root: str, include: str, exclude: list[str]) -> list[str]:
    # include z.B. "**/*.md"
--- a/scripts/payload_dryrun.py
+++ b/scripts/payload_dryrun.py
@ -10,9 +10,9 @@ import argparse, os, json
 from typing import Any, Dict, List, Optional
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.note_payload import make_note_payload
+from app.core.chunking import assemble_chunks
-from app.core.chunker import assemble_chunks
+from app.core.ingestion.ingestion_note_payload import make_note_payload
-from app.core.chunk_payload import make_chunk_payloads
+from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
 try:
    from app.core.derive_edges import build_edges_for_note
 except Exception:
--- a/scripts/preview_chunks.py
+++ b/scripts/preview_chunks.py
@ -2,9 +2,10 @@
 from __future__ import annotations
 import argparse, os, glob, json
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks
-from app.core.chunk_payload import make_chunk_payloads
+from app.core.ingestion.ingestion_note_payload import make_note_payload
-from app.core.note_payload import make_note_payload
+from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
 def iter_md(root: str) -> list[str]:
    return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]