Compare commits
No commits in common. "SectionType" and "main" have entirely different histories.
SectionTyp
...
main
|
|
@ -1,17 +1,13 @@
|
|||
"""
|
||||
FILE: app/core/chunking/chunking_models.py
|
||||
DESCRIPTION: Datenklassen für das Chunking-System.
|
||||
WP-26 v1.0: Erweiterung um section_type für typ-spezifische Sektionen.
|
||||
"""
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Optional, Any
|
||||
|
||||
@dataclass
|
||||
class RawBlock:
|
||||
"""
|
||||
Repräsentiert einen logischen Block aus dem Markdown-Parsing.
|
||||
WP-26 v1.0: Erweitert um section_type für typ-spezifische Sektionen.
|
||||
"""
|
||||
"""Repräsentiert einen logischen Block aus dem Markdown-Parsing."""
|
||||
kind: str
|
||||
text: str
|
||||
level: Optional[int]
|
||||
|
|
@ -19,17 +15,10 @@ class RawBlock:
|
|||
section_title: Optional[str]
|
||||
exclude_from_chunking: bool = False # WP-24c v4.2.0: Flag für Edge-Zonen, die nicht gechunkt werden sollen
|
||||
is_meta_content: bool = False # WP-24c v4.2.6: Flag für Meta-Content (Callouts), der später entfernt wird
|
||||
# WP-26 v1.0: Section-Type für typ-spezifische Sektionen
|
||||
section_type: Optional[str] = None # z.B. "insight", "decision", "experience"
|
||||
# WP-26 v1.0: Block-ID für Intra-Note-Links (z.B. "^sit" aus "## Situation ^sit")
|
||||
block_id: Optional[str] = None
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
"""
|
||||
Das finale Chunk-Objekt für Embedding und Graph-Speicherung.
|
||||
WP-26 v1.0: Erweitert um section_type für effektiven Typ.
|
||||
"""
|
||||
"""Das finale Chunk-Objekt für Embedding und Graph-Speicherung."""
|
||||
id: str
|
||||
note_id: str
|
||||
index: int
|
||||
|
|
@ -41,9 +30,4 @@ class Chunk:
|
|||
neighbors_prev: Optional[str]
|
||||
neighbors_next: Optional[str]
|
||||
candidate_pool: List[Dict[str, Any]] = field(default_factory=list)
|
||||
suggested_edges: Optional[List[str]] = None
|
||||
# WP-26 v1.0: Section-Type für typ-spezifische Sektionen
|
||||
# Wenn gesetzt, wird dieser als "effektiver Typ" verwendet statt note_type
|
||||
section_type: Optional[str] = None
|
||||
# WP-26 v1.0: Block-ID für Intra-Note-Links
|
||||
block_id: Optional[str] = None
|
||||
suggested_edges: Optional[List[str]] = None
|
||||
|
|
@ -5,28 +5,16 @@ DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks).
|
|||
Stellt die Funktion parse_edges_robust zur Verfügung.
|
||||
WP-24c v4.2.0: Identifiziert Edge-Zonen und markiert sie für Chunking-Ausschluss.
|
||||
WP-24c v4.2.5: Callout-Exclusion - Callouts werden als separate RawBlocks identifiziert und ausgeschlossen.
|
||||
WP-26 v1.0: Section-Type-Erkennung via [!section]-Callouts und automatische Section-Erkennung.
|
||||
"""
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
from typing import List, Tuple, Set, Dict, Any, Optional
|
||||
from .chunking_models import RawBlock
|
||||
from .chunking_utils import extract_frontmatter_from_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WS = re.compile(r'\s+')
|
||||
_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
|
||||
|
||||
# WP-26 v1.0: Pattern für [!section]-Callouts
|
||||
# Matches: > [!section] type-name
|
||||
_SECTION_CALLOUT_PATTERN = re.compile(r'^\s*>\s*\[!section\]\s*(\w+)', re.IGNORECASE)
|
||||
|
||||
# WP-26 v1.0: Pattern für Block-IDs in Überschriften
|
||||
# Matches: ## Titel ^block-id
|
||||
_BLOCK_ID_PATTERN = re.compile(r'\^([a-zA-Z0-9_-]+)\s*$')
|
||||
|
||||
def split_sentences(text: str) -> list[str]:
|
||||
"""Teilt Text in Sätze auf unter Berücksichtigung deutscher Interpunktion."""
|
||||
text = _WS.sub(' ', text.strip())
|
||||
|
|
@ -39,18 +27,12 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6.
|
||||
WP-24c v4.2.0: Identifiziert Edge-Zonen (LLM-Validierung & Note-Scope) und markiert sie für Chunking-Ausschluss.
|
||||
WP-24c v4.2.6: Callouts werden mit is_meta_content=True markiert (werden gechunkt, aber später entfernt).
|
||||
WP-26 v1.0: Section-Type-Erkennung via [!section]-Callouts und automatische Section-Erkennung.
|
||||
"""
|
||||
blocks = []
|
||||
h1_title = "Dokument"
|
||||
section_path = "/"
|
||||
current_section_title = None
|
||||
|
||||
# WP-26 v1.0: State-Machine für Section-Type-Tracking
|
||||
current_section_type: Optional[str] = None # Aktueller Section-Type (oder None für note_type Fallback)
|
||||
section_introduced_at_level: Optional[int] = None # Ebene, auf der erste Section eingeführt wurde
|
||||
current_block_id: Optional[str] = None # Block-ID der aktuellen Sektion
|
||||
|
||||
# Frontmatter entfernen
|
||||
fm, text_without_fm = extract_frontmatter_from_text(md_text)
|
||||
|
||||
|
|
@ -88,9 +70,8 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
buffer = []
|
||||
|
||||
# WP-24c v4.2.5: Callout-Erkennung (auch verschachtelt: >>)
|
||||
# WP-26 v1.0: Erweitert um [!section]-Callouts
|
||||
# Regex für Callouts: >\s*[!edge], >\s*[!abstract], >\s*[!section] (auch mit mehreren >)
|
||||
callout_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract|section)\]', re.IGNORECASE)
|
||||
# Regex für Callouts: >\s*[!edge] oder >\s*[!abstract] (auch mit mehreren >)
|
||||
callout_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE)
|
||||
|
||||
# WP-24c v4.2.5: Markiere verarbeitete Zeilen, um sie zu überspringen
|
||||
processed_indices = set()
|
||||
|
|
@ -105,39 +86,13 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
# Prüfe, ob diese Zeile ein Callout startet
|
||||
callout_match = callout_pattern.match(line)
|
||||
if callout_match:
|
||||
callout_type = callout_match.group(1).lower() # "edge", "abstract", oder "section"
|
||||
|
||||
# WP-26 v1.0: [!section] Callout-Behandlung
|
||||
if callout_type == "section":
|
||||
# Extrahiere Section-Type aus dem Callout
|
||||
section_match = _SECTION_CALLOUT_PATTERN.match(line)
|
||||
if section_match:
|
||||
new_section_type = section_match.group(1).lower()
|
||||
current_section_type = new_section_type
|
||||
|
||||
# Tracke die Ebene, auf der die erste Section eingeführt wurde
|
||||
# Wir nehmen die Ebene der letzten Überschrift (section_path basiert)
|
||||
if section_introduced_at_level is None:
|
||||
# Bestimme Ebene aus section_path
|
||||
# "/" = H1, "/Title" = H2, "/Title/Sub" = H3, etc.
|
||||
path_depth = section_path.count('/') if section_path else 1
|
||||
section_introduced_at_level = max(1, path_depth + 1)
|
||||
|
||||
logger.debug(f"WP-26: Section-Type erkannt: '{new_section_type}' bei '{current_section_title}' (Level: {section_introduced_at_level})")
|
||||
|
||||
# [!section] Callout wird nicht als Block hinzugefügt (ist nur Metadaten)
|
||||
processed_indices.add(i)
|
||||
continue
|
||||
|
||||
# Vorherigen Text-Block abschließen
|
||||
if buffer:
|
||||
content = "\n".join(buffer).strip()
|
||||
if content:
|
||||
blocks.append(RawBlock(
|
||||
"paragraph", content, None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
buffer = []
|
||||
|
||||
|
|
@ -165,9 +120,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
blocks.append(RawBlock(
|
||||
"callout", callout_content, None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone, # Nur Edge-Zonen werden ausgeschlossen
|
||||
is_meta_content=True, # WP-24c v4.2.6: Markierung für spätere Entfernung
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
is_meta_content=True # WP-24c v4.2.6: Markierung für spätere Entfernung
|
||||
))
|
||||
continue
|
||||
|
||||
|
|
@ -180,32 +133,13 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
if content:
|
||||
blocks.append(RawBlock(
|
||||
"paragraph", content, None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
buffer = []
|
||||
|
||||
level = len(heading_match.group(1))
|
||||
title = heading_match.group(2).strip()
|
||||
|
||||
# WP-26 v1.0: Block-ID aus Überschrift extrahieren (z.B. "## Titel ^block-id")
|
||||
block_id_match = _BLOCK_ID_PATTERN.search(title)
|
||||
if block_id_match:
|
||||
current_block_id = block_id_match.group(1)
|
||||
# Entferne Block-ID aus dem Titel für saubere Anzeige
|
||||
title = _BLOCK_ID_PATTERN.sub('', title).strip()
|
||||
else:
|
||||
current_block_id = None
|
||||
|
||||
# WP-26 v1.0: Section-Type State-Machine
|
||||
# Wenn eine Section eingeführt wurde und wir auf gleicher oder höherer Ebene sind:
|
||||
# -> Automatisch neue Section erkennen (FA-02b)
|
||||
if section_introduced_at_level is not None and level <= section_introduced_at_level:
|
||||
# Neue Überschrift auf gleicher oder höherer Ebene -> Reset auf None (note_type Fallback)
|
||||
current_section_type = None
|
||||
logger.debug(f"WP-26: Neue Section erkannt bei H{level} '{title}' -> Reset auf note_type")
|
||||
|
||||
# WP-24c v4.2.0: Prüfe, ob dieser Header eine Edge-Zone startet
|
||||
is_llm_validation_zone = (
|
||||
level == llm_validation_level and
|
||||
|
|
@ -236,9 +170,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
# Die Überschrift selbst als regulären Block hinzufügen (auch markiert, wenn in Zone)
|
||||
blocks.append(RawBlock(
|
||||
"heading", stripped, level, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
continue
|
||||
|
||||
|
|
@ -249,17 +181,13 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
if content:
|
||||
blocks.append(RawBlock(
|
||||
"paragraph", content, None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
buffer = []
|
||||
if stripped == "---":
|
||||
blocks.append(RawBlock(
|
||||
"separator", "---", None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
else:
|
||||
buffer.append(line)
|
||||
|
|
@ -269,70 +197,11 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
|||
if content:
|
||||
blocks.append(RawBlock(
|
||||
"paragraph", content, None, section_path, current_section_title,
|
||||
exclude_from_chunking=in_exclusion_zone,
|
||||
section_type=current_section_type,
|
||||
block_id=current_block_id
|
||||
exclude_from_chunking=in_exclusion_zone
|
||||
))
|
||||
|
||||
# WP-26 v1.3: Post-Processing - Section-Type rückwirkend setzen
|
||||
# Der [!section] Callout kann IRGENDWO im Abschnitt stehen und gilt rückwirkend
|
||||
# für die gesamte Heading-Sektion (vom Heading bis zum nächsten Heading gleicher/höherer Ebene)
|
||||
blocks = _propagate_section_type_backwards(blocks, split_level=2)
|
||||
|
||||
return blocks, h1_title
|
||||
|
||||
|
||||
def _propagate_section_type_backwards(blocks: List[RawBlock], split_level: int = 2) -> List[RawBlock]:
|
||||
"""
|
||||
WP-26 v1.3: Propagiert section_type rückwirkend für Heading-Sektionen.
|
||||
|
||||
Der [!section] Callout kann irgendwo im Abschnitt stehen (nicht nur direkt nach dem Heading).
|
||||
Diese Funktion findet den section_type innerhalb einer Heading-Sektion und setzt ihn
|
||||
rückwirkend für ALLE Blöcke dieser Sektion (inklusive dem Heading selbst).
|
||||
|
||||
Args:
|
||||
blocks: Liste von RawBlock-Objekten
|
||||
split_level: Heading-Ebene, die eine neue Sektion startet (Standard: 2 für H2)
|
||||
|
||||
Returns:
|
||||
Liste von RawBlock-Objekten mit korrigiertem section_type
|
||||
"""
|
||||
if not blocks:
|
||||
return blocks
|
||||
|
||||
# Gruppiere Blöcke nach Heading-Sektionen
|
||||
sections: List[List[int]] = [] # Liste von Index-Listen
|
||||
current_section_indices: List[int] = []
|
||||
|
||||
for idx, block in enumerate(blocks):
|
||||
if block.kind == "heading" and block.level is not None and block.level <= split_level:
|
||||
# Neues Heading startet neue Sektion
|
||||
if current_section_indices:
|
||||
sections.append(current_section_indices)
|
||||
current_section_indices = [idx]
|
||||
else:
|
||||
current_section_indices.append(idx)
|
||||
|
||||
# Letzte Sektion hinzufügen
|
||||
if current_section_indices:
|
||||
sections.append(current_section_indices)
|
||||
|
||||
# Für jede Sektion: Finde den section_type und setze ihn rückwirkend
|
||||
for section_indices in sections:
|
||||
# Finde den section_type innerhalb dieser Sektion
|
||||
section_type_found = None
|
||||
for idx in section_indices:
|
||||
if blocks[idx].section_type:
|
||||
section_type_found = blocks[idx].section_type
|
||||
break # Erster gefundener section_type gewinnt
|
||||
|
||||
# Wenn ein section_type gefunden wurde, setze ihn für alle Blöcke der Sektion
|
||||
if section_type_found:
|
||||
for idx in section_indices:
|
||||
blocks[idx].section_type = section_type_found
|
||||
|
||||
return blocks
|
||||
|
||||
def parse_edges_robust(text: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts.
|
||||
|
|
|
|||
|
|
@ -6,11 +6,6 @@ DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.9.
|
|||
- Strikte Einhaltung von Sektionsgrenzen via Look-Ahead.
|
||||
- Fix: Synchronisierung der Parameter mit dem Orchestrator (context_prefix).
|
||||
WP-24c v4.2.5: Strict-Mode ohne Carry-Over - Bei strict_heading_split wird nach jeder Sektion geflasht.
|
||||
WP-26 v1.0: section_type und block_id werden an Chunks weitergegeben.
|
||||
WP-26 v1.1: Section-Type-Wechsel erzwingt IMMER einen neuen Chunk (unabhängig vom Profil).
|
||||
WP-26 v1.3: Parser propagiert section_type rückwirkend für Heading-Sektionen.
|
||||
Der [!section] Callout kann irgendwo im Abschnitt stehen.
|
||||
Alle Blöcke einer Heading-Sektion haben den korrekten section_type.
|
||||
"""
|
||||
from typing import List, Dict, Any, Optional
|
||||
from .chunking_models import RawBlock, Chunk
|
||||
|
|
@ -41,60 +36,41 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
|
||||
chunks: List[Chunk] = []
|
||||
|
||||
def _emit(txt, title, path, section_type=None, block_id=None):
|
||||
"""
|
||||
Schreibt den finalen Chunk ohne Text-Modifikationen.
|
||||
WP-26 v1.0: Erweitert um section_type und block_id.
|
||||
"""
|
||||
def _emit(txt, title, path):
|
||||
"""Schreibt den finalen Chunk ohne Text-Modifikationen."""
|
||||
idx = len(chunks)
|
||||
win = _create_win(context_prefix, title, txt)
|
||||
chunks.append(Chunk(
|
||||
id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
|
||||
text=txt, window=win, token_count=estimate_tokens(txt),
|
||||
section_title=title, section_path=path, neighbors_prev=None, neighbors_next=None,
|
||||
section_type=section_type, block_id=block_id
|
||||
section_title=title, section_path=path, neighbors_prev=None, neighbors_next=None
|
||||
))
|
||||
|
||||
# --- SCHRITT 1: Gruppierung in atomare Sektions-Einheiten ---
|
||||
# WP-26 v1.3: Der Parser propagiert section_type bereits rückwirkend für Heading-Sektionen.
|
||||
# Alle Blöcke einer Heading-Sektion (inkl. Heading selbst) haben bereits den korrekten section_type.
|
||||
sections: List[Dict[str, Any]] = []
|
||||
curr_blocks = []
|
||||
|
||||
def _flush_section():
|
||||
"""Hilfsfunktion zum Abschließen einer Sektion."""
|
||||
nonlocal curr_blocks
|
||||
if not curr_blocks:
|
||||
return
|
||||
# WP-26 v1.3: section_type wird vom Parser bereits korrekt gesetzt (rückwirkend)
|
||||
# Alle Blöcke einer Heading-Sektion haben denselben section_type
|
||||
effective_section_type = next((cb.section_type for cb in curr_blocks if cb.section_type), None)
|
||||
effective_block_id = next((cb.block_id for cb in curr_blocks if cb.block_id), None)
|
||||
|
||||
sections.append({
|
||||
"text": "\n\n".join([x.text for x in curr_blocks]),
|
||||
"meta": curr_blocks[0],
|
||||
"is_empty": len(curr_blocks) == 1 and curr_blocks[0].kind == "heading",
|
||||
"section_type": effective_section_type,
|
||||
"block_id": effective_block_id
|
||||
})
|
||||
curr_blocks = []
|
||||
|
||||
for b in blocks:
|
||||
if b.kind == "heading" and b.level <= split_level:
|
||||
# Heading-basierter Split
|
||||
_flush_section()
|
||||
if curr_blocks:
|
||||
sections.append({
|
||||
"text": "\n\n".join([x.text for x in curr_blocks]),
|
||||
"meta": curr_blocks[0],
|
||||
"is_empty": len(curr_blocks) == 1 and curr_blocks[0].kind == "heading"
|
||||
})
|
||||
curr_blocks = [b]
|
||||
else:
|
||||
curr_blocks.append(b)
|
||||
|
||||
_flush_section()
|
||||
if curr_blocks:
|
||||
sections.append({
|
||||
"text": "\n\n".join([x.text for x in curr_blocks]),
|
||||
"meta": curr_blocks[0],
|
||||
"is_empty": len(curr_blocks) == 1 and curr_blocks[0].kind == "heading"
|
||||
})
|
||||
|
||||
# --- SCHRITT 2: Verarbeitung der Queue ---
|
||||
queue = list(sections)
|
||||
current_chunk_text = ""
|
||||
# WP-26 v1.0: Erweitert um section_type und block_id
|
||||
current_meta = {"title": None, "path": "/", "section_type": None, "block_id": None}
|
||||
current_meta = {"title": None, "path": "/"}
|
||||
|
||||
# Bestimmung des Modus: Hard-Split wenn smart_edge=False ODER strict=True
|
||||
is_hard_split_mode = (not smart_edge) or (strict)
|
||||
|
|
@ -107,9 +83,6 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
if not current_chunk_text:
|
||||
current_meta["title"] = item["meta"].section_title
|
||||
current_meta["path"] = item["meta"].section_path
|
||||
# WP-26 v1.0: section_type und block_id aus Item übernehmen
|
||||
current_meta["section_type"] = item.get("section_type")
|
||||
current_meta["block_id"] = item.get("block_id")
|
||||
|
||||
# FALL A: HARD SPLIT MODUS (WP-24c v4.2.5: Strict-Mode ohne Carry-Over)
|
||||
if is_hard_split_mode:
|
||||
|
|
@ -117,23 +90,18 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
# Kein Carry-Over erlaubt, auch nicht für leere Überschriften
|
||||
if current_chunk_text:
|
||||
# Flashe vorherigen Chunk
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"])
|
||||
current_chunk_text = ""
|
||||
|
||||
# Neue Sektion: Initialisiere Meta
|
||||
current_meta["title"] = item["meta"].section_title
|
||||
current_meta["path"] = item["meta"].section_path
|
||||
# WP-26 v1.0: section_type und block_id aus Item übernehmen
|
||||
current_meta["section_type"] = item.get("section_type")
|
||||
current_meta["block_id"] = item.get("block_id")
|
||||
|
||||
# WP-24c v4.2.5: Auch leere Sektionen werden als separater Chunk erstellt
|
||||
# (nur Überschrift, kein Inhalt)
|
||||
if item.get("is_empty", False):
|
||||
# Leere Sektion: Nur Überschrift als Chunk
|
||||
_emit(item_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(item_text, current_meta["title"], current_meta["path"])
|
||||
else:
|
||||
# Normale Sektion: Prüfe auf Token-Limit
|
||||
if estimate_tokens(item_text) > max_tokens:
|
||||
|
|
@ -145,54 +113,21 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
while sents:
|
||||
s = sents.pop(0); slen = estimate_tokens(s)
|
||||
if take_len + slen > target and take_sents:
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
|
||||
take_sents = [s]; take_len = slen
|
||||
else:
|
||||
take_sents.append(s); take_len += slen
|
||||
|
||||
if take_sents:
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
|
||||
else:
|
||||
# Sektion passt: Direkt als Chunk
|
||||
_emit(item_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(item_text, current_meta["title"], current_meta["path"])
|
||||
|
||||
current_chunk_text = ""
|
||||
continue
|
||||
|
||||
# FALL B: SMART MODE (Regel 1-3)
|
||||
# WP-26 v1.1: Prüfe auf Section-Type-Wechsel AUCH in Schritt 2
|
||||
# Wenn sich der section_type zwischen current_meta und item ändert, muss gesplittet werden
|
||||
item_section_type = item.get("section_type")
|
||||
current_section_type_meta = current_meta.get("section_type")
|
||||
|
||||
# Section-Type-Wechsel: Von None zu einem Typ ODER von einem Typ zu einem anderen
|
||||
is_section_type_change_step2 = (
|
||||
current_chunk_text and # Es gibt bereits Content
|
||||
(
|
||||
# Wechsel von None zu einem Typ
|
||||
(current_section_type_meta is None and item_section_type is not None) or
|
||||
# Wechsel von einem Typ zu None
|
||||
(current_section_type_meta is not None and item_section_type is None) or
|
||||
# Wechsel zwischen verschiedenen Typen
|
||||
(current_section_type_meta is not None and item_section_type is not None
|
||||
and current_section_type_meta != item_section_type)
|
||||
)
|
||||
)
|
||||
|
||||
if is_section_type_change_step2:
|
||||
# WP-26 v1.1: Section-Type-Wechsel erzwingt Split
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
current_chunk_text = ""
|
||||
# Reset Meta für nächsten Chunk
|
||||
current_meta["title"] = item["meta"].section_title
|
||||
current_meta["path"] = item["meta"].section_path
|
||||
current_meta["section_type"] = item_section_type
|
||||
current_meta["block_id"] = item.get("block_id")
|
||||
|
||||
combined_text = (current_chunk_text + "\n\n" + item_text).strip() if current_chunk_text else item_text
|
||||
combined_est = estimate_tokens(combined_text)
|
||||
|
||||
|
|
@ -202,8 +137,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
else:
|
||||
if current_chunk_text:
|
||||
# Regel 2: Flashen an Sektionsgrenze, Item zurücklegen
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"])
|
||||
current_chunk_text = ""
|
||||
queue.insert(0, item)
|
||||
else:
|
||||
|
|
@ -218,8 +152,7 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
sents.insert(0, s); break
|
||||
take_sents.append(s); take_len += slen
|
||||
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
|
||||
|
||||
if sents:
|
||||
remainder = " ".join(sents)
|
||||
|
|
@ -227,69 +160,31 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
|
|||
if header_prefix and not remainder.startswith(header_prefix):
|
||||
remainder = header_prefix + "\n\n" + remainder
|
||||
# Carry-Over: Rest wird vorne in die Queue geschoben
|
||||
# WP-26 v1.0: section_type und block_id weitergeben
|
||||
queue.insert(0, {"text": remainder, "meta": item["meta"], "is_split": True,
|
||||
"section_type": item.get("section_type"), "block_id": item.get("block_id")})
|
||||
queue.insert(0, {"text": remainder, "meta": item["meta"], "is_split": True})
|
||||
|
||||
if current_chunk_text:
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"],
|
||||
current_meta["section_type"], current_meta["block_id"])
|
||||
_emit(current_chunk_text, current_meta["title"], current_meta["path"])
|
||||
|
||||
return chunks
|
||||
|
||||
def strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, context_prefix: str = "") -> List[Chunk]:
|
||||
"""
|
||||
Standard-Sliding-Window für flache Texte ohne Sektionsfokus.
|
||||
WP-26 v1.0: Erweitert um section_type und block_id Weitergabe.
|
||||
WP-26 v1.3: Parser propagiert section_type rückwirkend - vereinfachte Logik.
|
||||
"""
|
||||
"""Standard-Sliding-Window für flache Texte ohne Sektionsfokus."""
|
||||
target = config.get("target", 400); max_tokens = config.get("max", 600)
|
||||
chunks: List[Chunk] = []; buf: List[RawBlock] = []
|
||||
current_section_type = None # Tracking des aktuellen section_type
|
||||
|
||||
def _flush_buffer():
|
||||
"""Hilfsfunktion zum Flushen des Buffers."""
|
||||
nonlocal buf, current_section_type
|
||||
if not buf:
|
||||
return
|
||||
txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
|
||||
win = _create_win(context_prefix, buf[0].section_title, txt)
|
||||
# WP-26 v1.3: section_type wird bereits vom Parser rückwirkend propagiert
|
||||
effective_section_type = next((b.section_type for b in buf if b.section_type), None)
|
||||
effective_block_id = next((b.block_id for b in buf if b.block_id), None)
|
||||
chunks.append(Chunk(
|
||||
id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
|
||||
text=txt, window=win, token_count=sum(estimate_tokens(x.text) for x in buf),
|
||||
section_title=buf[0].section_title, section_path=buf[0].section_path,
|
||||
neighbors_prev=None, neighbors_next=None,
|
||||
section_type=effective_section_type, block_id=effective_block_id
|
||||
))
|
||||
buf = []
|
||||
current_section_type = effective_section_type
|
||||
|
||||
for b in blocks:
|
||||
b_tokens = estimate_tokens(b.text)
|
||||
curr_tokens = sum(estimate_tokens(x.text) for x in buf) if buf else 0
|
||||
block_section_type = b.section_type
|
||||
|
||||
# WP-26 v1.3: Prüfe auf Section-Type-Wechsel
|
||||
# Da der Parser section_type rückwirkend setzt, haben alle Blöcke einer
|
||||
# Heading-Sektion denselben section_type. Ein Wechsel bedeutet neue Sektion.
|
||||
is_section_type_change = (
|
||||
buf and # Es gibt bereits Blöcke im Buffer
|
||||
current_section_type != block_section_type # Typ hat sich geändert
|
||||
)
|
||||
|
||||
# Flush wenn: Token-Limit überschritten ODER Section-Type-Wechsel
|
||||
if (curr_tokens + b_tokens > max_tokens and buf) or is_section_type_change:
|
||||
_flush_buffer()
|
||||
|
||||
if curr_tokens + b_tokens > max_tokens and buf:
|
||||
txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
|
||||
win = _create_win(context_prefix, buf[0].section_title, txt)
|
||||
chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=curr_tokens, section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
|
||||
buf = []
|
||||
buf.append(b)
|
||||
# Update section_type
|
||||
if block_section_type:
|
||||
current_section_type = block_section_type
|
||||
|
||||
# Letzten Buffer flushen
|
||||
_flush_buffer()
|
||||
|
||||
if buf:
|
||||
txt = "\n\n".join([x.text for x in buf]); idx = len(chunks)
|
||||
win = _create_win(context_prefix, buf[0].section_title, txt)
|
||||
chunks.append(Chunk(id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx, text=txt, window=win, token_count=estimate_tokens(txt), section_title=buf[0].section_title, section_path=buf[0].section_path, neighbors_prev=None, neighbors_next=None))
|
||||
|
||||
return chunks
|
||||
|
|
@ -24,7 +24,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
|
|||
- Chunk-Scope gewinnt zwingend über Note-Scope (außer explicit:note_zone)
|
||||
- Confidence-Werte: candidate_pool explicit:callout = 1.0, globaler Scan = 0.7
|
||||
- Key-Generierung gehärtet für konsistente Deduplizierung
|
||||
VERSION: 4.4.0 (WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges)
|
||||
VERSION: 4.3.1 (WP-24c: Präzisions-Priorität)
|
||||
STATUS: Active
|
||||
"""
|
||||
import re
|
||||
|
|
@ -32,14 +32,8 @@ import logging
|
|||
from typing import List, Optional, Dict, Tuple, Set
|
||||
from .graph_utils import (
|
||||
_get, _edge, _mk_edge_id, _dedupe_seq, parse_link_target,
|
||||
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for,
|
||||
get_typical_edge_for # WP-26 v1.1: Für automatische Intra-Note-Edges
|
||||
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
|
||||
)
|
||||
# WP-26 v1.4: Für automatische Backlinks bei Intra-Note-Edges
|
||||
try:
|
||||
from app.services.edge_registry import registry as edge_registry
|
||||
except ImportError:
|
||||
edge_registry = None
|
||||
from .graph_extractors import (
|
||||
extract_typed_relations, extract_callout_relations, extract_wikilinks
|
||||
)
|
||||
|
|
@ -651,53 +645,6 @@ def build_edges_for_note(
|
|||
"edge_id": _mk_edge_id("prev", next_id, cid, "chunk"),
|
||||
"provenance": "structure", "rule_id": "structure:order", "confidence": PROVENANCE_PRIORITY["structure:order"]
|
||||
}))
|
||||
|
||||
# 1b) WP-26 v1.1: Automatische Intra-Note-Edges zwischen Sektionen mit unterschiedlichen Typen
|
||||
# Wenn sich der section_type zwischen aufeinanderfolgenden Chunks ändert,
|
||||
# wird eine semantische Kante basierend auf graph_schema.md erstellt.
|
||||
for idx, ch in enumerate(chunks):
|
||||
if idx >= len(chunks) - 1:
|
||||
continue # Kein nächster Chunk
|
||||
|
||||
cid = _get(ch, "chunk_id", "id")
|
||||
next_ch = chunks[idx + 1]
|
||||
next_id = _get(next_ch, "chunk_id", "id")
|
||||
|
||||
if not cid or not next_id:
|
||||
continue
|
||||
|
||||
# Hole die effective_types der Chunks
|
||||
# WP-26 v1.1: section_type oder note_type (effective_type)
|
||||
current_section_type = ch.get("section_type")
|
||||
next_section_type = next_ch.get("section_type")
|
||||
current_type = current_section_type or ch.get("type") or note_type
|
||||
next_type = next_section_type or next_ch.get("type") or note_type
|
||||
|
||||
# Prüfe, ob es einen Section-Type-Wechsel gibt
|
||||
# Nur wenn beide einen expliziten section_type haben oder sich die effective_types unterscheiden
|
||||
is_section_change = (
|
||||
(current_section_type is not None or next_section_type is not None) and
|
||||
current_type != next_type
|
||||
)
|
||||
|
||||
if is_section_change:
|
||||
# Ermittle den passenden Edge-Typ aus graph_schema.md
|
||||
edge_kind = get_typical_edge_for(current_type, next_type)
|
||||
|
||||
logger.debug(f"WP-26 Intra-Note-Edge: {current_type} -> {next_type} = {edge_kind}")
|
||||
|
||||
# Erstelle die automatische Edge (Forward-Richtung)
|
||||
edges.append(_edge(edge_kind, "chunk", cid, next_id, note_id, {
|
||||
"chunk_id": cid,
|
||||
"edge_id": _mk_edge_id(edge_kind, cid, next_id, "chunk"),
|
||||
"provenance": "rule",
|
||||
"rule_id": "inferred:section_transition",
|
||||
"source_hint": "schema_default",
|
||||
"confidence": PROVENANCE_PRIORITY.get("schema_default", 0.85),
|
||||
"is_internal": True, # Explizit als Intra-Note-Edge markieren
|
||||
"virtual": True, # WP-26 v1.4: Automatisch generierte Section-Transitions sind virtuell
|
||||
"section_transition": f"{current_type}->{next_type}" # Debug-Info
|
||||
}))
|
||||
|
||||
# 2) Inhaltliche Kanten (Explicit & Candidate Pool)
|
||||
reg = load_types_registry()
|
||||
|
|
@ -1058,62 +1005,4 @@ def build_edges_for_note(
|
|||
|
||||
final_edges.append(winner)
|
||||
|
||||
# WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges (Chunk-Level)
|
||||
# Erstelle inverse Edges für alle Intra-Note-Edges, wenn noch nicht vorhanden
|
||||
if edge_registry:
|
||||
# Erstelle Set aller existierenden Edge-Keys für schnelle Lookup
|
||||
existing_edge_keys: Set[Tuple[str, str, str, Optional[str]]] = set()
|
||||
for e in final_edges:
|
||||
source = e.get("source_id", "")
|
||||
target = e.get("target_id", "")
|
||||
kind = e.get("kind", "")
|
||||
target_section = e.get("target_section")
|
||||
existing_edge_keys.add((source, target, kind, target_section))
|
||||
|
||||
# Durchlaufe alle Edges und erstelle Backlinks für Intra-Note-Edges
|
||||
backlinks_to_add: List[dict] = []
|
||||
for e in final_edges:
|
||||
is_internal = e.get("is_internal", False)
|
||||
scope = e.get("scope", "chunk")
|
||||
source_id = e.get("source_id", "")
|
||||
target_id = e.get("target_id", "")
|
||||
kind = e.get("kind", "")
|
||||
target_section = e.get("target_section")
|
||||
|
||||
# Nur Intra-Note-Edges auf Chunk-Level berücksichtigen
|
||||
if not is_internal or scope != "chunk":
|
||||
continue
|
||||
|
||||
# Prüfe, ob bereits ein inverser Edge existiert
|
||||
inv_kind = edge_registry.get_inverse(kind) if edge_registry else None
|
||||
if not inv_kind:
|
||||
continue # Kein inverser Edge-Type verfügbar
|
||||
|
||||
# Prüfe, ob inverser Edge bereits existiert
|
||||
inv_key = (target_id, source_id, inv_kind, None) # Backlink hat keine target_section
|
||||
if inv_key in existing_edge_keys:
|
||||
continue # Backlink bereits vorhanden
|
||||
|
||||
# Erstelle automatischen Backlink
|
||||
backlink_edge = _edge(inv_kind, "chunk", target_id, source_id, note_id, {
|
||||
"chunk_id": target_id, # Backlink geht vom Target-Chunk aus
|
||||
"edge_id": _mk_edge_id(inv_kind, target_id, source_id, "chunk"),
|
||||
"provenance": "rule",
|
||||
"rule_id": "derived:intra_note_backlink",
|
||||
"source_hint": "automatic_backlink",
|
||||
"confidence": PROVENANCE_PRIORITY.get("derived:backlink", 0.8),
|
||||
"is_internal": True,
|
||||
"virtual": True, # WP-26 v1.4: Automatisch generierte Backlinks sind virtuell
|
||||
"original_edge_kind": kind # Debug-Info: Welcher Edge-Type wurde invertiert
|
||||
})
|
||||
|
||||
backlinks_to_add.append(backlink_edge)
|
||||
existing_edge_keys.add(inv_key) # Verhindere Duplikate
|
||||
logger.debug(f"WP-26 Backlink erstellt: {target_id} --[{inv_kind}]--> {source_id} (Original: {kind})")
|
||||
|
||||
# Füge Backlinks zu final_edges hinzu
|
||||
if backlinks_to_add:
|
||||
final_edges.extend(backlinks_to_add)
|
||||
logger.info(f"WP-26: {len(backlinks_to_add)} automatische Backlinks für Intra-Note-Edges erstellt")
|
||||
|
||||
return final_edges
|
||||
|
|
@ -5,56 +5,18 @@ DESCRIPTION: In-Memory Repräsentation eines Graphen für Scoring und Analyse.
|
|||
WP-15c Update: Erhalt von Metadaten (target_section, provenance)
|
||||
für präzises Retrieval-Reasoning.
|
||||
WP-24c v4.1.0: Scope-Awareness und Section-Filtering Support.
|
||||
WP-26 v1.0: is_internal-Boost für Intra-Note-Edges.
|
||||
VERSION: 1.4.0 (WP-26: Intra-Note-Edge-Boost)
|
||||
VERSION: 1.3.0 (WP-24c: Gold-Standard v4.1.0)
|
||||
STATUS: Active
|
||||
"""
|
||||
import os
|
||||
import math
|
||||
from functools import lru_cache
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional, DefaultDict, Any, Set
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
# Lokale Paket-Imports
|
||||
from .graph_weights import EDGE_BASE_WEIGHTS, calculate_edge_weight
|
||||
from .graph_db_adapter import fetch_edges_from_qdrant
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_edge_scoring_config() -> Dict[str, float]:
|
||||
"""
|
||||
WP-26 v1.0: Lädt Edge-Scoring-Konfiguration aus retriever.yaml.
|
||||
|
||||
Returns:
|
||||
Dict mit internal_edge_boost und external_edge_boost
|
||||
"""
|
||||
defaults = {
|
||||
"internal_edge_boost": 1.2, # +20% Boost für Intra-Note-Edges
|
||||
"external_edge_boost": 1.0 # Standard für Inter-Note-Edges
|
||||
}
|
||||
|
||||
config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
|
||||
if yaml and os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
edge_scoring = data.get("edge_scoring", {})
|
||||
defaults["internal_edge_boost"] = float(edge_scoring.get("internal_edge_boost", defaults["internal_edge_boost"]))
|
||||
defaults["external_edge_boost"] = float(edge_scoring.get("external_edge_boost", defaults["external_edge_boost"]))
|
||||
except Exception as e:
|
||||
logger.warning(f"Edge-Scoring-Konfiguration konnte nicht geladen werden: {e}")
|
||||
|
||||
return defaults
|
||||
|
||||
class Subgraph:
|
||||
"""
|
||||
Leichtgewichtiger Subgraph mit Adjazenzlisten & Kennzahlen.
|
||||
|
|
@ -74,39 +36,24 @@ class Subgraph:
|
|||
"""
|
||||
Fügt eine Kante hinzu und aktualisiert Indizes.
|
||||
WP-15c: Speichert das vollständige Payload für den Explanation Layer.
|
||||
WP-26 v1.0: Wendet is_internal-Boost auf Intra-Note-Edges an.
|
||||
"""
|
||||
src = e.get("source")
|
||||
tgt = e.get("target")
|
||||
kind = e.get("kind")
|
||||
|
||||
# Basis-Gewicht aus Payload oder Edge-Weights
|
||||
base_weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
|
||||
|
||||
# WP-26 v1.0: is_internal-Boost anwenden
|
||||
is_internal = e.get("is_internal", False)
|
||||
edge_scoring = get_edge_scoring_config()
|
||||
if is_internal:
|
||||
weight_multiplier = edge_scoring["internal_edge_boost"]
|
||||
else:
|
||||
weight_multiplier = edge_scoring["external_edge_boost"]
|
||||
|
||||
final_weight = base_weight * weight_multiplier
|
||||
|
||||
# Das gesamte Payload wird als Kanten-Objekt behalten
|
||||
# Wir stellen sicher, dass alle relevanten Metadaten vorhanden sind
|
||||
edge_data = {
|
||||
"source": src,
|
||||
"target": tgt,
|
||||
"kind": kind,
|
||||
"weight": final_weight,
|
||||
"weight": e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0)),
|
||||
"provenance": e.get("provenance", "rule"),
|
||||
"confidence": e.get("confidence", 1.0),
|
||||
"target_section": e.get("target_section"), # Essentiell für Präzision
|
||||
"is_super_edge": e.get("is_super_edge", False),
|
||||
"virtual": e.get("virtual", False), # WP-24c v4.1.0: Für Authority-Priorisierung
|
||||
"chunk_id": e.get("chunk_id"), # WP-24c v4.1.0: Für RAG-Kontext
|
||||
"is_internal": is_internal # WP-26 v1.0: Flag für Debugging
|
||||
"chunk_id": e.get("chunk_id") # WP-24c v4.1.0: Für RAG-Kontext
|
||||
}
|
||||
|
||||
owner = e.get("note_id")
|
||||
|
|
|
|||
|
|
@ -12,87 +12,28 @@ STATUS: Active
|
|||
import os
|
||||
import uuid
|
||||
import hashlib
|
||||
from typing import Dict, Iterable, List, Optional, Set, Any, Tuple
|
||||
from typing import Iterable, List, Optional, Set, Any, Tuple
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
# WP-26 v1.0: Provenance-Literale auf valide EdgeDTO-Werte reduziert
|
||||
# Legacy-Prioritäten für interne Verarbeitung (werden zu source_hint gemappt)
|
||||
# WP-15b: Prioritäten-Ranking für die De-Duplizierung von Kanten unterschiedlicher Herkunft
|
||||
PROVENANCE_PRIORITY = {
|
||||
# Explizite Kanten (provenance: "explicit")
|
||||
"explicit:wikilink": 1.00,
|
||||
"inline:rel": 0.95,
|
||||
"callout:edge": 0.90,
|
||||
"explicit:callout": 0.90,
|
||||
"explicit:note_scope": 1.00,
|
||||
"explicit:note_zone": 1.00,
|
||||
# Regel-basierte Kanten (provenance: "rule")
|
||||
"derived:backlink": 0.90,
|
||||
"edge_defaults": 0.70,
|
||||
"schema_default": 0.85,
|
||||
"inferred:section_transition": 0.85, # WP-26 v1.1: Automatische Section-Übergänge
|
||||
# Struktur-Kanten (provenance: "structure")
|
||||
"explicit:callout": 0.90, # WP-24c v4.2.7: Callout-Kanten aus candidate_pool
|
||||
"semantic_ai": 0.90, # Validierte KI-Kanten
|
||||
"structure:belongs_to": 1.00,
|
||||
"structure:order": 0.95,
|
||||
# KI-generierte Kanten (provenance: "smart")
|
||||
"semantic_ai": 0.90,
|
||||
"global_pool": 0.80,
|
||||
"structure:order": 0.95, # next/prev
|
||||
"explicit:note_scope": 1.00,
|
||||
"explicit:note_zone": 1.00, # WP-24c v4.2.0: Note-Scope Zonen (höchste Priorität)
|
||||
"derived:backlink": 0.90,
|
||||
"edge_defaults": 0.70 # Heuristik basierend auf types.yaml
|
||||
}
|
||||
|
||||
# WP-26 v1.0: Mapping von internen Provenance-Werten zu EdgeDTO-konformen Literalen
|
||||
PROVENANCE_TO_DTO = {
|
||||
# explicit
|
||||
"explicit:wikilink": ("explicit", "wikilink"),
|
||||
"explicit:callout": ("explicit", "callout"),
|
||||
"explicit:note_scope": ("explicit", "note_scope"),
|
||||
"explicit:note_zone": ("explicit", "note_zone"),
|
||||
"inline:rel": ("explicit", "inline_rel"),
|
||||
"callout:edge": ("explicit", "callout"),
|
||||
"explicit": ("explicit", None),
|
||||
# rule
|
||||
"derived:backlink": ("rule", "backlink"),
|
||||
"edge_defaults": ("rule", "edge_defaults"),
|
||||
"schema_default": ("rule", "schema_default"),
|
||||
"inferred:schema": ("rule", "schema_default"),
|
||||
"inferred:section_transition": ("rule", "schema_default"), # WP-26 v1.1
|
||||
"rule": ("rule", None),
|
||||
# structure
|
||||
"structure:belongs_to": ("structure", "belongs_to"),
|
||||
"structure:order": ("structure", "order"),
|
||||
"structure": ("structure", None),
|
||||
# smart
|
||||
"semantic_ai": ("smart", None),
|
||||
"global_pool": ("smart", "global_pool"),
|
||||
"smart": ("smart", None),
|
||||
}
|
||||
|
||||
def normalize_provenance(internal_provenance: str) -> Tuple[str, Optional[str]]:
|
||||
"""
|
||||
WP-26 v1.0: Normalisiert interne Provenance-Werte zu EdgeDTO-konformen Literalen.
|
||||
|
||||
Args:
|
||||
internal_provenance: Interner Provenance-String (z.B. "explicit:callout")
|
||||
|
||||
Returns:
|
||||
Tuple (provenance, source_hint) mit validen EdgeDTO-Werten
|
||||
"""
|
||||
if internal_provenance in PROVENANCE_TO_DTO:
|
||||
return PROVENANCE_TO_DTO[internal_provenance]
|
||||
|
||||
# Fallback: Versuche Präfix-Matching
|
||||
if internal_provenance.startswith("explicit"):
|
||||
return ("explicit", internal_provenance.split(":")[-1] if ":" in internal_provenance else None)
|
||||
if internal_provenance.startswith("structure"):
|
||||
return ("structure", internal_provenance.split(":")[-1] if ":" in internal_provenance else None)
|
||||
if internal_provenance.startswith("rule") or internal_provenance.startswith("derived"):
|
||||
return ("rule", internal_provenance.split(":")[-1] if ":" in internal_provenance else None)
|
||||
|
||||
# Default: explicit ohne source_hint
|
||||
return ("explicit", None)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pfad-Auflösung (Integration der .env Umgebungsvariablen)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -131,12 +72,6 @@ def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[
|
|||
Trennt einen Obsidian-Link [[Target#Section]] in seine Bestandteile Target und Section.
|
||||
Behandelt Self-Links (z.B. [[#Ziele]]), indem die aktuelle note_id eingesetzt wird.
|
||||
|
||||
WP-26 v1.1: Extrahiert Block-ID aus Section-Strings.
|
||||
- Wenn Section "^block-id" enthält, wird nur der Block-ID-Teil extrahiert
|
||||
- Beispiel: "📖 Diagnose: Glioblastom ^kontext" -> section = "kontext"
|
||||
- Beispiel: "^learning" -> section = "learning"
|
||||
- Beispiel: " ^sit" (nur Block-ID) -> section = "sit"
|
||||
|
||||
Returns:
|
||||
Tuple (target_id, target_section)
|
||||
"""
|
||||
|
|
@ -147,16 +82,6 @@ def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[
|
|||
target = parts[0].strip()
|
||||
section = parts[1].strip() if len(parts) > 1 else None
|
||||
|
||||
# WP-26 v1.1: Block-ID-Extraktion aus Section
|
||||
# Wenn die Section ein "^" enthält, extrahiere nur den Block-ID-Teil
|
||||
if section and "^" in section:
|
||||
# Finde den ^block-id Teil
|
||||
import re
|
||||
block_id_match = re.search(r'\^([a-zA-Z0-9_-]+)', section)
|
||||
if block_id_match:
|
||||
# Ersetze die gesamte Section durch nur die Block-ID
|
||||
section = block_id_match.group(1)
|
||||
|
||||
# Spezialfall: Self-Link innerhalb derselben Datei
|
||||
if not target and section and current_note_id:
|
||||
target = current_note_id
|
||||
|
|
@ -198,15 +123,7 @@ def _mk_edge_id(kind: str, s: str, t: str, scope: str, target_section: Optional[
|
|||
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
|
||||
"""
|
||||
Konstruiert ein standardisiertes Kanten-Payload für Qdrant.
|
||||
WP-26 v1.0: Erweitert um is_internal Flag und Provenance-Normalisierung.
|
||||
|
||||
Args:
|
||||
kind: Kantentyp (z.B. "derives", "caused_by")
|
||||
scope: Granularität ("chunk" oder "note")
|
||||
source_id: ID der Quelle (Chunk oder Note)
|
||||
target_id: ID des Ziels (Chunk oder Note)
|
||||
note_id: ID der Note (für Kontext)
|
||||
extra: Zusätzliche Payload-Felder
|
||||
Wird von graph_derive_edges.py benötigt.
|
||||
"""
|
||||
pl = {
|
||||
"kind": kind,
|
||||
|
|
@ -217,24 +134,8 @@ def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, e
|
|||
"note_id": note_id,
|
||||
"virtual": False # Standardmäßig explizit, solange nicht anders in Phase 2 gesetzt
|
||||
}
|
||||
|
||||
# WP-26 v1.0: is_internal Flag berechnen
|
||||
# Intra-Note-Edge: Source und Target gehören zur gleichen Note
|
||||
source_note = source_id.split("#")[0] if "#" in source_id else source_id
|
||||
target_note = target_id.split("#")[0] if "#" in target_id else target_id
|
||||
pl["is_internal"] = (source_note == target_note) or (source_note == note_id and target_note == note_id)
|
||||
|
||||
if extra:
|
||||
pl.update(extra)
|
||||
|
||||
# WP-26 v1.0: Provenance normalisieren, falls vorhanden
|
||||
if "provenance" in extra:
|
||||
internal_prov = extra["provenance"]
|
||||
dto_prov, source_hint = normalize_provenance(internal_prov)
|
||||
pl["provenance"] = dto_prov
|
||||
if source_hint:
|
||||
pl["source_hint"] = source_hint
|
||||
|
||||
return pl
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -273,228 +174,4 @@ def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
|||
if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
|
||||
return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
|
||||
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WP-26 v1.1: Graph-Schema Parser für automatische Edge-Typ-Ableitung
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Cache für geladenes Schema (vermeidet mehrfaches Parsen)
|
||||
_GRAPH_SCHEMA_CACHE: Optional[Dict[str, Dict[str, List[str]]]] = None
|
||||
# WP-26 v1.3: Erweitertes Schema mit prohibited edges
|
||||
_GRAPH_SCHEMA_FULL_CACHE: Optional[Dict[str, Dict[str, Dict[str, List[str]]]]] = None
|
||||
|
||||
def load_graph_schema() -> Dict[str, Dict[str, List[str]]]:
|
||||
"""
|
||||
WP-26 v1.1: Parst das graph_schema.md und extrahiert Typical Edge-Types.
|
||||
|
||||
Das Schema hat folgendes Format:
|
||||
## Source: `experience`
|
||||
| Target-Note-type | Typical Edge-Types | Prohibited Edge-Types |
|
||||
| :--- | :--- | :--- |
|
||||
| `event` | `caused_by` | `consists_of` |
|
||||
|
||||
Returns:
|
||||
Dict[source_type, Dict[target_type, List[typical_edges]]]
|
||||
Beispiel: {"experience": {"event": ["caused_by"], "insight": ["resulted_in"]}}
|
||||
"""
|
||||
global _GRAPH_SCHEMA_CACHE
|
||||
if _GRAPH_SCHEMA_CACHE is not None:
|
||||
return _GRAPH_SCHEMA_CACHE
|
||||
|
||||
# Nutze das erweiterte Schema und extrahiere nur typical
|
||||
full_schema = load_graph_schema_full()
|
||||
|
||||
schema: Dict[str, Dict[str, List[str]]] = {}
|
||||
for source_type, targets in full_schema.items():
|
||||
schema[source_type] = {}
|
||||
for target_type, edge_info in targets.items():
|
||||
schema[source_type][target_type] = edge_info.get("typical", [])
|
||||
|
||||
_GRAPH_SCHEMA_CACHE = schema
|
||||
return schema
|
||||
|
||||
|
||||
def load_graph_schema_full() -> Dict[str, Dict[str, Dict[str, List[str]]]]:
|
||||
"""
|
||||
WP-26 v1.3: Parst das graph_schema.md und extrahiert sowohl Typical als auch Prohibited Edge-Types.
|
||||
|
||||
Returns:
|
||||
Dict[source_type, Dict[target_type, {"typical": [...], "prohibited": [...]}]]
|
||||
Beispiel: {"experience": {"event": {"typical": ["caused_by"], "prohibited": ["consists_of"]}}}
|
||||
"""
|
||||
global _GRAPH_SCHEMA_FULL_CACHE
|
||||
if _GRAPH_SCHEMA_FULL_CACHE is not None:
|
||||
return _GRAPH_SCHEMA_FULL_CACHE
|
||||
|
||||
import re
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
schema_path = get_schema_path()
|
||||
|
||||
# Versuche verschiedene Pfade
|
||||
paths_to_try = [
|
||||
schema_path,
|
||||
os.path.join(os.getcwd(), "config", "graph_schema.md"),
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "..", "config", "graph_schema.md"),
|
||||
]
|
||||
|
||||
# Falls MINDNET_OBSIDIAN_DICTIONARY gesetzt ist, nutze diesen Pfad
|
||||
obsidian_dict = os.getenv("MINDNET_OBSIDIAN_DICTIONARY")
|
||||
if obsidian_dict:
|
||||
paths_to_try.insert(0, os.path.join(obsidian_dict, "graph_schema.md"))
|
||||
|
||||
content = None
|
||||
for path in paths_to_try:
|
||||
if os.path.isfile(path):
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
logger.debug(f"Graph-Schema geladen von: {path}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Fehler beim Laden von {path}: {e}")
|
||||
|
||||
if not content:
|
||||
logger.warning("Graph-Schema nicht gefunden. Fallback auf leeres Schema.")
|
||||
_GRAPH_SCHEMA_FULL_CACHE = {}
|
||||
return _GRAPH_SCHEMA_FULL_CACHE
|
||||
|
||||
schema: Dict[str, Dict[str, Dict[str, List[str]]]] = {}
|
||||
current_source = None
|
||||
|
||||
# Regex für Source-Header: ## Source: `experience`
|
||||
source_pattern = re.compile(r'^##\s+Source:\s*`(\w+)`', re.IGNORECASE)
|
||||
|
||||
# Regex für Tabellen-Zeile: | `event` | `caused_by` | `consists_of` |
|
||||
# oder | `event` | `caused_by`, `resulted_in` | - |
|
||||
table_row_pattern = re.compile(
|
||||
r'^\|\s*`(\w+)`\s*\|\s*([^|]+)\s*\|\s*([^|]*)\s*\|'
|
||||
)
|
||||
|
||||
edge_pattern = re.compile(r'`(\w+)`')
|
||||
|
||||
for line in content.split('\n'):
|
||||
stripped = line.strip()
|
||||
|
||||
# Prüfe auf Source-Header
|
||||
source_match = source_pattern.match(stripped)
|
||||
if source_match:
|
||||
current_source = source_match.group(1).lower()
|
||||
if current_source not in schema:
|
||||
schema[current_source] = {}
|
||||
continue
|
||||
|
||||
# Prüfe auf Tabellen-Zeile (nur wenn wir einen Source haben)
|
||||
if current_source:
|
||||
row_match = table_row_pattern.match(stripped)
|
||||
if row_match:
|
||||
target_type = row_match.group(1).lower()
|
||||
typical_edges_raw = row_match.group(2).strip()
|
||||
prohibited_edges_raw = row_match.group(3).strip()
|
||||
|
||||
# Parse die Edge-Types
|
||||
typical_edges = edge_pattern.findall(typical_edges_raw)
|
||||
prohibited_edges = edge_pattern.findall(prohibited_edges_raw)
|
||||
|
||||
schema[current_source][target_type] = {
|
||||
"typical": typical_edges,
|
||||
"prohibited": prohibited_edges
|
||||
}
|
||||
|
||||
logger.info(f"Graph-Schema (full) geladen: {len(schema)} Source-Types")
|
||||
_GRAPH_SCHEMA_FULL_CACHE = schema
|
||||
return schema
|
||||
|
||||
|
||||
def get_topology_info(source_type: str, target_type: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
WP-26 v1.3: Ermittelt Typical und Prohibited Edge-Types für ein Typ-Paar.
|
||||
|
||||
Args:
|
||||
source_type: Typ der Quell-Sektion (z.B. "experience")
|
||||
target_type: Typ der Ziel-Sektion (z.B. "insight")
|
||||
|
||||
Returns:
|
||||
Dict mit "typical" und "prohibited" Listen
|
||||
Beispiel: {"typical": ["resulted_in"], "prohibited": ["solves"]}
|
||||
"""
|
||||
schema = load_graph_schema_full()
|
||||
|
||||
source_lower = source_type.lower() if source_type else "default"
|
||||
target_lower = target_type.lower() if target_type else "any"
|
||||
|
||||
result = {"typical": [], "prohibited": []}
|
||||
|
||||
# 1. Exakter Match
|
||||
if source_lower in schema and target_lower in schema[source_lower]:
|
||||
return schema[source_lower][target_lower]
|
||||
|
||||
# 2. Fallback auf "any" Target
|
||||
if source_lower in schema and "any" in schema[source_lower]:
|
||||
return schema[source_lower]["any"]
|
||||
|
||||
# 3. Fallback auf "default" Source
|
||||
if "default" in schema:
|
||||
if target_lower in schema["default"]:
|
||||
return schema["default"][target_lower]
|
||||
if "any" in schema["default"]:
|
||||
return schema["default"]["any"]
|
||||
|
||||
# 4. Absoluter Fallback: alles erlaubt
|
||||
return {"typical": ["related_to", "references"], "prohibited": []}
|
||||
|
||||
def get_typical_edge_for(source_type: str, target_type: str) -> Optional[str]:
|
||||
"""
|
||||
WP-26 v1.1: Ermittelt den ersten "Typical Edge-Type" für ein Typ-Paar.
|
||||
|
||||
Args:
|
||||
source_type: Typ der Quell-Sektion (z.B. "experience")
|
||||
target_type: Typ der Ziel-Sektion (z.B. "insight")
|
||||
|
||||
Returns:
|
||||
Der erste Typical Edge-Type (z.B. "resulted_in") oder None
|
||||
"""
|
||||
schema = load_graph_schema()
|
||||
|
||||
source_lower = source_type.lower() if source_type else "default"
|
||||
target_lower = target_type.lower() if target_type else "any"
|
||||
|
||||
# 1. Exakter Match
|
||||
if source_lower in schema:
|
||||
source_rules = schema[source_lower]
|
||||
if target_lower in source_rules:
|
||||
edges = source_rules[target_lower]
|
||||
if edges:
|
||||
return edges[0]
|
||||
# 2. Fallback auf "any" Target
|
||||
if "any" in source_rules:
|
||||
edges = source_rules["any"]
|
||||
if edges:
|
||||
return edges[0]
|
||||
|
||||
# 3. Fallback auf "default" Source
|
||||
if "default" in schema:
|
||||
default_rules = schema["default"]
|
||||
if target_lower in default_rules:
|
||||
edges = default_rules[target_lower]
|
||||
if edges:
|
||||
return edges[0]
|
||||
if "any" in default_rules:
|
||||
edges = default_rules["any"]
|
||||
if edges:
|
||||
return edges[0]
|
||||
|
||||
# 4. Absoluter Fallback
|
||||
return "related_to"
|
||||
|
||||
def clear_graph_schema_cache():
|
||||
"""
|
||||
WP-26 v1.1: Löscht den Cache für das Graph-Schema.
|
||||
Nützlich für Tests oder wenn das Schema neu geladen werden soll.
|
||||
WP-26 v1.3: Löscht auch den erweiterten Schema-Cache.
|
||||
"""
|
||||
global _GRAPH_SCHEMA_CACHE, _GRAPH_SCHEMA_FULL_CACHE
|
||||
_GRAPH_SCHEMA_CACHE = None
|
||||
_GRAPH_SCHEMA_FULL_CACHE = None
|
||||
return []
|
||||
|
|
@ -3,8 +3,7 @@ FILE: app/core/ingestion/ingestion_chunk_payload.py
|
|||
DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
|
||||
Fix v2.4.3: Integration der zentralen Registry (WP-14) für konsistente Defaults.
|
||||
WP-24c v4.3.0: candidate_pool wird explizit übernommen für Chunk-Attribution.
|
||||
WP-26 v1.0: Erweiterung um effective_type (section_type || note_type) und note_type-Feld.
|
||||
VERSION: 2.5.0 (WP-26 v1.0)
|
||||
VERSION: 2.4.4 (WP-24c v4.3.0)
|
||||
STATUS: Active
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
|
@ -92,35 +91,14 @@ def make_chunk_payloads(note: Dict[str, Any], note_path: str, chunks_from_chunke
|
|||
section = getattr(ch, "section_title", "") if not is_dict else ch.get("section", "")
|
||||
# WP-24c v4.3.0: candidate_pool muss erhalten bleiben für Chunk-Attribution
|
||||
candidate_pool = getattr(ch, "candidate_pool", []) if not is_dict else ch.get("candidate_pool", [])
|
||||
|
||||
# WP-26 v1.0: Section-Type für typ-spezifische Sektionen
|
||||
section_type = getattr(ch, "section_type", None) if not is_dict else ch.get("section_type")
|
||||
# WP-26 v1.0: Block-ID für Intra-Note-Links
|
||||
block_id = getattr(ch, "block_id", None) if not is_dict else ch.get("block_id")
|
||||
|
||||
# WP-26 v1.0: Effektiver Typ = section_type || note_type (FA-03)
|
||||
effective_type = section_type if section_type else note_type
|
||||
|
||||
# WP-26 v1.0: retriever_weight basiert auf effektivem Typ (FA-09b)
|
||||
# Wenn section_type vorhanden, nutze dessen retriever_weight
|
||||
effective_rw = rw
|
||||
if section_type:
|
||||
effective_rw = _resolve_val(section_type, reg, "retriever_weight", rw)
|
||||
try:
|
||||
effective_rw = float(effective_rw)
|
||||
except:
|
||||
effective_rw = rw
|
||||
|
||||
pl: Dict[str, Any] = {
|
||||
"note_id": nid or fm.get("id"),
|
||||
"chunk_id": cid,
|
||||
"title": title,
|
||||
"index": int(index),
|
||||
"ord": int(index) + 1,
|
||||
# WP-26 v1.0: type enthält den effektiven Typ (section_type || note_type)
|
||||
"type": effective_type,
|
||||
# WP-26 v1.0: note_type ist immer der ursprüngliche Note-Typ (für Filterung)
|
||||
"note_type": note_type,
|
||||
"type": note_type,
|
||||
"tags": tags,
|
||||
"text": text,
|
||||
"window": window,
|
||||
|
|
@ -129,13 +107,9 @@ def make_chunk_payloads(note: Dict[str, Any], note_path: str, chunks_from_chunke
|
|||
"section": section,
|
||||
"path": note_path,
|
||||
"source_path": kwargs.get("file_path") or note_path,
|
||||
# WP-26 v1.0: retriever_weight basiert auf effektivem Typ
|
||||
"retriever_weight": effective_rw,
|
||||
"retriever_weight": rw,
|
||||
"chunk_profile": cp,
|
||||
"candidate_pool": candidate_pool, # WP-24c v4.3.0: Kritisch für Chunk-Attribution
|
||||
# WP-26 v1.0: Optionale Felder für Section-Type-Tracking
|
||||
"section_type": section_type, # Expliziter Section-Type (oder None)
|
||||
"block_id": block_id, # Block-ID für Intra-Note-Links (oder None)
|
||||
"candidate_pool": candidate_pool # WP-24c v4.3.0: Kritisch für Chunk-Attribution
|
||||
}
|
||||
|
||||
# Audit: Cleanup Pop (Vermeidung von redundanten Alias-Feldern)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ from app.services.llm_service import LLMService
|
|||
# Package-Interne Imports (Refactoring WP-14)
|
||||
from .ingestion_utils import load_type_registry, resolve_note_type, get_chunk_config_by_profile
|
||||
from .ingestion_db import fetch_note_payload, artifacts_missing, purge_artifacts, is_explicit_edge_present
|
||||
from .ingestion_validation import validate_edge_candidate, validate_edge_against_schema
|
||||
from .ingestion_validation import validate_edge_candidate
|
||||
from .ingestion_note_payload import make_note_payload
|
||||
from .ingestion_chunk_payload import make_chunk_payloads
|
||||
|
||||
|
|
@ -621,31 +621,6 @@ class IngestionService:
|
|||
v_edge["target_section"] = target_section
|
||||
self.symmetry_buffer.append(v_edge)
|
||||
|
||||
# WP-26 v1.3: Schema-Validierung für Intra-Note-Edges (FA-12)
|
||||
# Prüfe is_internal Edges gegen graph_schema.md
|
||||
if explicit_edges:
|
||||
chunks_by_id = {c.get("chunk_id", c.get("id", "")): c for c in chunk_pls}
|
||||
schema_validated_edges = []
|
||||
schema_rejected_count = 0
|
||||
|
||||
for e in explicit_edges:
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=e,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False # Im normalen Modus: atypische Edges erlaubt mit reduzierter Confidence
|
||||
)
|
||||
|
||||
if is_valid:
|
||||
schema_validated_edges.append(updated_edge)
|
||||
else:
|
||||
schema_rejected_count += 1
|
||||
logger.info(f"🚫 [SCHEMA-VALIDATION] Edge abgelehnt: {e.get('source_id')} -> {e.get('target_id')} ({e.get('kind')})")
|
||||
|
||||
if schema_rejected_count > 0:
|
||||
logger.info(f"📊 [SCHEMA-VALIDATION] {schema_rejected_count} Intra-Note-Edges aufgrund von Schema-Verletzungen abgelehnt")
|
||||
|
||||
explicit_edges = schema_validated_edges
|
||||
|
||||
# DB Upsert
|
||||
if purge_before and old_payload: purge_artifacts(self.client, self.prefix, note_id)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,145 +3,24 @@ FILE: app/core/ingestion/ingestion_validation.py
|
|||
DESCRIPTION: WP-15b semantische Validierung von Kanten gegen den LocalBatchCache.
|
||||
WP-24c: Erweiterung um automatische Symmetrie-Generierung (Inverse Kanten).
|
||||
WP-25b: Konsequente Lazy-Prompt-Orchestration (prompt_key + variables).
|
||||
WP-26 v1.3: Schema-Validierung für Intra-Note-Edges gegen graph_schema.md.
|
||||
VERSION: 3.1.0 (WP-26: Intra-Note-Edge Schema-Validation)
|
||||
VERSION: 3.0.0 (WP-24c: Symmetric Edge Management)
|
||||
STATUS: Active
|
||||
FIX:
|
||||
- WP-24c: Integration der EdgeRegistry zur dynamischen Inversions-Ermittlung.
|
||||
- WP-24c: Implementierung von validate_and_symmetrize für bidirektionale Graphen.
|
||||
- WP-25b: Beibehaltung der hierarchischen Prompt-Resolution und Modell-Spezi-Logik.
|
||||
- WP-26: FA-12 Schema-Validierung gegen effektiven Chunk-Typ.
|
||||
"""
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
from typing import Dict, Any, Optional, List
|
||||
from app.core.parser import NoteContext
|
||||
|
||||
# Import der neutralen Bereinigungs-Logik zur Vermeidung von Circular Imports
|
||||
from app.core.registry import clean_llm_text
|
||||
# WP-24c: Zugriff auf das dynamische Vokabular
|
||||
from app.services.edge_registry import registry as edge_registry
|
||||
# WP-26 v1.3: Graph-Schema für Validierung
|
||||
from app.core.graph.graph_utils import get_topology_info
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# WP-26 v1.3: Schema-Validierung für Intra-Note-Edges (FA-12)
|
||||
# ==============================================================================
|
||||
|
||||
def validate_intra_note_edge(
|
||||
edge: Dict[str, Any],
|
||||
source_chunk: Dict[str, Any],
|
||||
target_chunk: Dict[str, Any],
|
||||
strict_mode: bool = False
|
||||
) -> Tuple[bool, float, Optional[str]]:
|
||||
"""
|
||||
WP-26 v1.3 (FA-12): Validiert eine Intra-Note-Edge gegen das graph_schema.md.
|
||||
Verwendet den EFFEKTIVEN Typ (section_type || note_type) beider Chunks.
|
||||
|
||||
Args:
|
||||
edge: Das Edge-Dict mit "kind", "source_id", "target_id"
|
||||
source_chunk: Chunk-Payload der Quelle mit "type" (effektiver Typ)
|
||||
target_chunk: Chunk-Payload des Ziels mit "type" (effektiver Typ)
|
||||
strict_mode: Wenn True, werden atypische Edges abgelehnt (nicht nur gewarnt)
|
||||
|
||||
Returns:
|
||||
Tuple (is_valid, confidence, reason)
|
||||
- is_valid: True wenn die Edge erlaubt ist
|
||||
- confidence: Angepasste Confidence (0.7 für atypische, 0.0 für prohibited)
|
||||
- reason: Optional Begründung für Ablehnung/Warnung
|
||||
"""
|
||||
# Effektive Typen extrahieren (section_type hat Vorrang vor note_type)
|
||||
source_type = source_chunk.get("type") or source_chunk.get("note_type") or "default"
|
||||
target_type = target_chunk.get("type") or target_chunk.get("note_type") or "default"
|
||||
edge_kind = edge.get("kind", "related_to")
|
||||
|
||||
# Schema-Lookup
|
||||
topology = get_topology_info(source_type, target_type)
|
||||
typical_edges = topology.get("typical", [])
|
||||
prohibited_edges = topology.get("prohibited", [])
|
||||
|
||||
# 1. Prüfung: Ist die Edge verboten?
|
||||
if edge_kind in prohibited_edges:
|
||||
reason = f"Edge '{edge_kind}' von {source_type} → {target_type} ist verboten (prohibited)"
|
||||
logger.warning(f"🚫 [SCHEMA-VALIDATION] {reason}")
|
||||
return (False, 0.0, reason)
|
||||
|
||||
# 2. Prüfung: Ist die Edge typisch?
|
||||
if edge_kind in typical_edges:
|
||||
# Edge ist typisch → volle Confidence
|
||||
logger.debug(f"✅ [SCHEMA-VALIDATION] Edge '{edge_kind}' von {source_type} → {target_type} ist typisch")
|
||||
return (True, 1.0, None)
|
||||
|
||||
# 3. Edge ist atypisch (weder typical noch prohibited)
|
||||
reason = f"Edge '{edge_kind}' von {source_type} → {target_type} ist atypisch (nicht in typical: {typical_edges})"
|
||||
|
||||
if strict_mode:
|
||||
# Im Strict-Mode werden atypische Edges abgelehnt
|
||||
logger.warning(f"⚠️ [SCHEMA-VALIDATION] {reason} - ABGELEHNT (strict_mode)")
|
||||
return (False, 0.0, reason)
|
||||
else:
|
||||
# Im normalen Modus: Edge erlaubt, aber mit reduzierter Confidence (0.7)
|
||||
logger.info(f"ℹ️ [SCHEMA-VALIDATION] {reason} - erlaubt mit reduzierter Confidence")
|
||||
return (True, 0.7, reason)
|
||||
|
||||
|
||||
def validate_edge_against_schema(
|
||||
edge: Dict[str, Any],
|
||||
chunks_by_id: Dict[str, Dict[str, Any]],
|
||||
strict_mode: bool = False
|
||||
) -> Tuple[bool, Dict[str, Any]]:
|
||||
"""
|
||||
WP-26 v1.3: Wrapper für die Schema-Validierung mit Chunk-Lookup.
|
||||
|
||||
Args:
|
||||
edge: Das Edge-Dict
|
||||
chunks_by_id: Dictionary von chunk_id → chunk_payload
|
||||
strict_mode: Wenn True, werden atypische Edges abgelehnt
|
||||
|
||||
Returns:
|
||||
Tuple (is_valid, updated_edge)
|
||||
- is_valid: True wenn die Edge erlaubt ist
|
||||
- updated_edge: Edge mit ggf. angepasster Confidence
|
||||
"""
|
||||
source_id = edge.get("source_id", "")
|
||||
target_id = edge.get("target_id", "")
|
||||
is_internal = edge.get("is_internal", False)
|
||||
|
||||
# Nur Intra-Note-Edges validieren
|
||||
if not is_internal:
|
||||
return (True, edge)
|
||||
|
||||
# Chunks nachschlagen
|
||||
source_chunk = chunks_by_id.get(source_id, {})
|
||||
target_chunk = chunks_by_id.get(target_id, {})
|
||||
|
||||
# Wenn Chunks nicht gefunden → Edge erlauben (Integrität vor Präzision)
|
||||
if not source_chunk or not target_chunk:
|
||||
logger.debug(f"[SCHEMA-VALIDATION] Chunks nicht gefunden für {source_id} / {target_id} - Edge erlaubt")
|
||||
return (True, edge)
|
||||
|
||||
# Schema-Validierung durchführen
|
||||
is_valid, confidence, reason = validate_intra_note_edge(
|
||||
edge=edge,
|
||||
source_chunk=source_chunk,
|
||||
target_chunk=target_chunk,
|
||||
strict_mode=strict_mode
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
return (False, edge)
|
||||
|
||||
# Confidence anpassen wenn nötig
|
||||
updated_edge = edge.copy()
|
||||
if confidence < 1.0:
|
||||
original_confidence = edge.get("confidence", 1.0)
|
||||
updated_edge["confidence"] = min(original_confidence, confidence)
|
||||
updated_edge["schema_validation_note"] = reason
|
||||
|
||||
return (True, updated_edge)
|
||||
|
||||
async def validate_edge_candidate(
|
||||
chunk_text: str,
|
||||
edge: Dict,
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@ FILE: app/core/retrieval/retriever.py
|
|||
DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
|
||||
WP-15c Update: Note-Level Diversity Pooling & Super-Edge Aggregation.
|
||||
WP-24c v4.1.0: Gold-Standard - Scope-Awareness, Section-Filtering, Authority-Priorisierung.
|
||||
WP-26 v1.0: Konfigurierbare Aggregation (note/chunk Level).
|
||||
VERSION: 0.9.0 (WP-26: Aggregation-Level)
|
||||
VERSION: 0.8.0 (WP-24c: Gold-Standard v4.1.0)
|
||||
STATUS: Active
|
||||
DEPENDENCIES: app.config, app.models.dto, app.core.database*, app.core.graph_adapter
|
||||
"""
|
||||
|
|
@ -35,39 +34,8 @@ from qdrant_client.http import models as rest
|
|||
# Mathematische Engine importieren
|
||||
from app.core.retrieval.retriever_scoring import get_weights, compute_wp22_score
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_aggregation_config() -> Dict[str, Any]:
|
||||
"""
|
||||
WP-26 v1.0: Lädt Aggregation-Konfiguration aus retriever.yaml.
|
||||
|
||||
Returns:
|
||||
Dict mit level ("note" oder "chunk") und max_chunks_per_note
|
||||
"""
|
||||
defaults = {
|
||||
"level": "note", # "note" (Default) oder "chunk"
|
||||
"max_chunks_per_note": 3 # Limit bei "note"-Level
|
||||
}
|
||||
|
||||
config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
|
||||
if yaml and os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
aggregation = data.get("aggregation", {})
|
||||
defaults["level"] = str(aggregation.get("level", defaults["level"])).lower()
|
||||
defaults["max_chunks_per_note"] = int(aggregation.get("max_chunks_per_note", defaults["max_chunks_per_note"]))
|
||||
except Exception as e:
|
||||
logger.warning(f"Aggregation-Konfiguration konnte nicht geladen werden: {e}")
|
||||
|
||||
return defaults
|
||||
|
||||
# ==============================================================================
|
||||
# 1. CORE HELPERS & CONFIG LOADERS
|
||||
# ==============================================================================
|
||||
|
|
@ -348,33 +316,22 @@ def _build_hits_from_semantic(
|
|||
# 1. Sortierung nach finalem mathematischen Score
|
||||
enriched_sorted = sorted(enriched, key=lambda h: h[3]["total"], reverse=True)
|
||||
|
||||
# 2. WP-26 v1.0: Konfigurierbare Aggregation (Note-Level oder Chunk-Level)
|
||||
aggregation_config = _get_aggregation_config()
|
||||
aggregation_level = aggregation_config["level"]
|
||||
max_chunks_per_note = aggregation_config["max_chunks_per_note"]
|
||||
# 2. WP-15c: Note-Level Diversity Pooling
|
||||
# Wir behalten pro note_id nur den Hit mit dem höchsten total_score.
|
||||
# Dies verhindert, dass 10 Chunks derselben Note andere KeyNotes verdrängen.
|
||||
unique_note_hits = []
|
||||
seen_notes = set()
|
||||
|
||||
if aggregation_level == "chunk":
|
||||
# WP-26 v1.0: Chunk-Level - alle Chunks individuell ranken (keine Deduplizierung)
|
||||
logger.debug(f"📊 [AGGREGATION] Chunk-Level: Alle {len(enriched_sorted)} Chunks individuell")
|
||||
pooled_hits = enriched_sorted
|
||||
else:
|
||||
# WP-15c: Note-Level Diversity Pooling (Default)
|
||||
# Behalten pro note_id bis zu max_chunks_per_note Hits
|
||||
pooled_hits = []
|
||||
note_chunk_count: Dict[str, int] = defaultdict(int)
|
||||
for item in enriched_sorted:
|
||||
_, _, payload, _ = item
|
||||
note_id = str(payload.get("note_id", "unknown"))
|
||||
|
||||
for item in enriched_sorted:
|
||||
_, _, payload, _ = item
|
||||
note_id = str(payload.get("note_id", "unknown"))
|
||||
|
||||
if note_chunk_count[note_id] < max_chunks_per_note:
|
||||
pooled_hits.append(item)
|
||||
note_chunk_count[note_id] += 1
|
||||
|
||||
logger.debug(f"📊 [AGGREGATION] Note-Level: {len(pooled_hits)} Chunks (max {max_chunks_per_note}/Note)")
|
||||
if note_id not in seen_notes:
|
||||
unique_note_hits.append(item)
|
||||
seen_notes.add(note_id)
|
||||
|
||||
# 3. Begrenzung auf top_k nach dem Diversity-Pooling
|
||||
limited_hits = pooled_hits[: max(1, top_k)]
|
||||
limited_hits = unique_note_hits[: max(1, top_k)]
|
||||
|
||||
results: List[QueryHit] = []
|
||||
for pid, s_score, pl, dbg in limited_hits:
|
||||
|
|
|
|||
|
|
@ -46,18 +46,16 @@ class EdgeDTO(BaseModel):
|
|||
target: str
|
||||
weight: float
|
||||
direction: Literal["out", "in", "undirected"] = "out"
|
||||
# WP-26 v1.0: Provenance auf valide Literale reduziert (EdgeDTO-Constraint)
|
||||
# Detail-Informationen werden über source_hint transportiert
|
||||
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
|
||||
# WP-26 v1.0: Neues Feld für Detail-Informationen zur Herkunft
|
||||
source_hint: Optional[Literal[
|
||||
"callout", "wikilink", "inline_rel", "schema_default", "note_scope",
|
||||
"note_zone", "belongs_to", "order", "backlink", "edge_defaults", "global_pool"
|
||||
]] = None
|
||||
# WP-24c v4.5.3: Erweiterte Provenance-Werte für Chunk-Aware Edges
|
||||
# Unterstützt alle tatsächlich verwendeten Provenance-Typen im System
|
||||
provenance: Optional[Literal[
|
||||
"explicit", "rule", "smart", "structure",
|
||||
"explicit:callout", "explicit:wikilink", "explicit:note_zone", "explicit:note_scope",
|
||||
"inline:rel", "callout:edge", "semantic_ai", "structure:belongs_to", "structure:order",
|
||||
"derived:backlink", "edge_defaults", "global_pool"
|
||||
]] = "explicit"
|
||||
confidence: float = 1.0
|
||||
target_section: Optional[str] = None
|
||||
# WP-26 v1.0: Flag für Intra-Note-Edges
|
||||
is_internal: Optional[bool] = None
|
||||
target_section: Optional[str] = None
|
||||
|
||||
|
||||
# --- Request Models ---
|
||||
|
|
|
|||
|
|
@ -1,16 +1,4 @@
|
|||
version: 1.3
|
||||
|
||||
# WP-26 Phase 2: Aggregation-Level für Retrieval
|
||||
# - note: Beste Chunk pro Note (Default, wie bisher)
|
||||
# - chunk: Alle Chunks individuell ranken
|
||||
aggregation:
|
||||
level: note # "note" (default) oder "chunk"
|
||||
max_chunks_per_note: 3 # Optional: Limit bei "note"-Level
|
||||
|
||||
# WP-26 Phase 2: Edge-Scoring mit Intra-Note-Boost
|
||||
edge_scoring:
|
||||
internal_edge_boost: 1.2 # +20% Boost für Intra-Note-Edges (is_internal=true)
|
||||
external_edge_boost: 1.0 # Standard für Inter-Note-Edges
|
||||
version: 1.2
|
||||
|
||||
scoring:
|
||||
# W_sem: skaliert den Term (semantic_score * retriever_weight)
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ id: 01-authoring-guidelines
|
|||
title: Authoring Guidelines – Handbuch für den Digitalen Zwilling
|
||||
type: principle
|
||||
status: stable
|
||||
version: 1.4.0
|
||||
version: 1.3.0
|
||||
area: system_documentation
|
||||
tags: [handbuch, authoring, methodik, obsidian, mindnet, best-practice, section_types, wp26]
|
||||
tags: [handbuch, authoring, methodik, obsidian, mindnet, best-practice]
|
||||
retriever_weight: 2.0
|
||||
---
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ Dieses Handbuch ist dein primäres Werkzeug, um Wissen so zu strukturieren, dass
|
|||
## ⚡ Die 6 Goldenen Regeln (TL;DR)
|
||||
|
||||
1. **Atomare Gedanken:** Eine Notiz = Ein Thema. Trenne z. B. „Meditation“ von „Mobility“.
|
||||
2. **Explizite Typen:** Nutze den `type` im Frontmatter (z. B. `insight`, `experience`, `value`), um die mathematische Gewichtung zu steuern. **Section Types (WP-26):** Innerhalb einer Note können Abschnitte eigene Types haben (`> [!section] insight`), um präzisere semantische Verarbeitung zu ermöglichen.
|
||||
2. **Explizite Typen:** Nutze den `type` im Frontmatter (z. B. `insight`, `experience`, `value`), um die mathematische Gewichtung zu steuern.
|
||||
3. **H3-Hub-Pairing (NEU):** Nutze H3-Überschriften in Hubs, um spezifische Links und ihre Bedeutung (Edges) in isolierten Chunks für die KI zu fixieren, ohne die Obsidian-Graphen-Logik zu brechen.
|
||||
4. **Werte & Ziele definieren:** Erstelle für jeden Kernwert eine eigene Notiz (`type: value`). Ohne explizite Maßstäbe kann die Decision Engine nicht in deinem Sinne abwägen.
|
||||
5. **Emotionales Bridging:** Nutze Begriffe wie „Druck“, „Faszination“ oder „Angst“, um die Empathie-Ebene der KI zu aktivieren.
|
||||
|
|
@ -81,13 +81,6 @@ Nutze das kanonische Vokabular in `[!edge]` Callouts innerhalb der H3-Sektionen:
|
|||
* **`part_of` / `gehört_zu`**: Bindet Details an einen übergeordneten Cluster oder Hub.
|
||||
* **`guides` / `steuert`**: Prinzipien oder Werte, die eine Sektion oder ein Vorhaben leiten.
|
||||
|
||||
**Intra-Note-Edges (WP-26):** Für Verbindungen innerhalb derselben Note nutze Block-References:
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#^block-id]]
|
||||
```
|
||||
Dies erzeugt semantische Verbindungen zwischen Chunks derselben Note (`is_internal: true`).
|
||||
|
||||
### 4.2 Forward-Mapping (Strategische Lücken)
|
||||
Setze bewusst Links auf Dateien, die noch nicht existieren (z. B. `[[Die beste Version meiner selbst]]`). Die KI erkennt diese Lücken und stellt proaktiv Fragen, um diese Felder gemeinsam mit dir zu füllen.
|
||||
|
||||
|
|
@ -99,17 +92,6 @@ Setze bewusst Links auf Dateien, die noch nicht existieren (z. B. `[[Die beste V
|
|||
**Ziel:** Den „Spiegel“ (Empathy) mit deiner Biografie kalibrieren.
|
||||
* **Struktur:** Kontext (Was ist passiert?), Emotions-Check (Gefühle?), Lektion (Was gelernt?).
|
||||
* **Deep-Edge:** Verknüpfe es immer mit einer Rolle: `[[rel:supports Meine Rollenlandkarte 2025#Vater]]`.
|
||||
* **Section Types (WP-26):** Nutze unterschiedliche Section-Types für verschiedene Abschnitte:
|
||||
```markdown
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
> [!edge] derives
|
||||
> [[#^sit]]
|
||||
```
|
||||
Dies ermöglicht präzisere semantische Verarbeitung und Intra-Note-Verbindungen.
|
||||
|
||||
### 5.2 Eine Beobachtung festhalten (`type: insight`)
|
||||
**Ziel:** Den „Berater“ (Decision) mit Mustern versorgen.
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
---
|
||||
doc_type: user_manual
|
||||
audience: user, author
|
||||
scope: vault, markdown, schema, agentic_validation, note_scope, section_types, intra_note_edges
|
||||
scope: vault, markdown, schema, agentic_validation, note_scope
|
||||
status: active
|
||||
version: 4.6.0
|
||||
context: "Regelwerk für das Erstellen von Notizen im Vault. Die 'Source of Truth' für Autoren. Inkludiert WP-24c Phase 3 Agentic Edge Validation, automatische Spiegelkanten, Note-Scope Zonen und WP-26 Section Types mit Intra-Note-Edges."
|
||||
version: 4.5.8
|
||||
context: "Regelwerk für das Erstellen von Notizen im Vault. Die 'Source of Truth' für Autoren. Inkludiert WP-24c Phase 3 Agentic Edge Validation, automatische Spiegelkanten und Note-Scope Zonen."
|
||||
---
|
||||
|
||||
# Knowledge Design Manual
|
||||
|
|
@ -198,169 +198,6 @@ Damit dein System sauber bleibt, beachte diese Regeln:
|
|||
|
||||
---
|
||||
|
||||
## 4. Section Types & Intra-Note-Edges (WP-26) [NEU]
|
||||
|
||||
### 4.0 Übersicht: Section Types
|
||||
|
||||
**Section Types** ermöglichen es, innerhalb einer Note unterschiedliche Typen für verschiedene Abschnitte zu definieren. Dies erlaubt präzisere semantische Verarbeitung und Intra-Note-Verbindungen zwischen Chunks.
|
||||
|
||||
**Kernkonzept:**
|
||||
- Eine Note hat einen **Note-Type** (im Frontmatter: `type: experience`)
|
||||
- Abschnitte innerhalb der Note können einen eigenen **Section-Type** haben (z.B. `insight`, `decision`)
|
||||
- Der **effektive Typ** eines Chunks ist: `section_type` falls vorhanden, sonst `note_type`
|
||||
- Section Types ermöglichen **Intra-Note-Edges** – semantische Verbindungen zwischen Chunks derselben Note
|
||||
|
||||
### 4.0.1 Section-Type-Deklaration
|
||||
|
||||
**Syntax:**
|
||||
```markdown
|
||||
## Überschrift ^block-id
|
||||
> [!section] type-name
|
||||
```
|
||||
|
||||
**Beispiel:**
|
||||
```markdown
|
||||
---
|
||||
type: experience
|
||||
title: Konflikt im Team-Meeting
|
||||
---
|
||||
|
||||
# Konflikt im Team-Meeting
|
||||
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Am 15. Januar kam es zu einer Eskalation...
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Diese Erfahrung zeigt mir, dass...
|
||||
|
||||
## Nächste Schritte ^next
|
||||
> [!section] decision
|
||||
|
||||
Ich werde in Zukunft früher eingreifen.
|
||||
```
|
||||
|
||||
**Regeln:**
|
||||
- Der Section-Type gilt ab der Überschrift bis zur nächsten Überschrift **gleicher oder höherer Ebene**
|
||||
- Das `[!section]`-Callout kann **an beliebiger Stelle** innerhalb des Abschnitts stehen (muss nicht direkt unter der Überschrift sein)
|
||||
- Das `[!section]`-Callout ist **unabhängig** von `[!edge]`-Callouts und kann separat platziert werden
|
||||
- Bei Fehlen eines `[!section]`-Callouts gilt der Note-Type als Fallback
|
||||
- Valide Section-Types müssen in `types.yaml` definiert sein
|
||||
|
||||
**Automatische Section-Erkennung:**
|
||||
Sobald eine Section auf einer bestimmten Überschriften-Ebene eingeführt wurde (z.B. H2), beginnt bei **jeder weiteren Überschrift auf dieser Ebene automatisch eine neue Section** – auch ohne explizites `[!section]`-Callout.
|
||||
|
||||
**Beispiel:**
|
||||
```markdown
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
Text A... → type = "experience" (explizit)
|
||||
|
||||
## Reflexion ^ref
|
||||
<!-- KEIN [!section] Callout -->
|
||||
Text B... → type = "experience" (note_type Fallback)
|
||||
→ ABER: Neue Section erkannt, neuer Chunk!
|
||||
|
||||
## Learnings ^learn
|
||||
> [!section] insight
|
||||
Text C... → type = "insight" (explizit)
|
||||
```
|
||||
|
||||
**Body-Section:**
|
||||
Textblöcke, die **vor dem ersten `[!section]`-Callout** stehen, erhalten:
|
||||
- `section: "body"`
|
||||
- `type: note_type` (Fallback)
|
||||
- `section_type: None`
|
||||
|
||||
### 4.0.2 Intra-Note-Edges (Verbindungen innerhalb einer Note)
|
||||
|
||||
**Block-References als Link-Format:**
|
||||
|
||||
Das bevorzugte Format für Intra-Note-Links:
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#^block-id]]
|
||||
```
|
||||
|
||||
**Fallback (mit Einschränkungen):**
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#Section-Name]]
|
||||
```
|
||||
|
||||
**Vollständiges Beispiel:**
|
||||
```markdown
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Die Geschichte...
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
> [!edge] derives
|
||||
> [[#^sit]]
|
||||
|
||||
Was ich daraus lerne...
|
||||
```
|
||||
|
||||
**Ergebnis:**
|
||||
- Chunk der Reflexion (`#ref`) erhält eine `derives`-Kante zum Chunk der Situation (`#sit`)
|
||||
- Beide Chunks sind in derselben Note → `is_internal: true`
|
||||
- Scope ist `chunk` (nicht `note`)
|
||||
|
||||
**Automatische Backlinks:**
|
||||
Für alle Intra-Note-Edges werden automatisch inverse Backlinks erzeugt:
|
||||
- Forward-Edge: `#ref --[derives]--> #sit`
|
||||
- Backlink: `#sit --[derived_from]--> #ref` (automatisch)
|
||||
|
||||
**Default-Edges aus graph_schema.md:**
|
||||
Wenn keine expliziten Intra-Note-Edges definiert sind, aber Section-Types vorhanden:
|
||||
- System ermittelt Source-Type und Target-Type (benachbarte Sektionen)
|
||||
- Lookup in `graph_schema.md` via `get_topology_info(source_type, target_type)`
|
||||
- Erster Eintrag aus `typical` wird als Default-Edge-Type verwendet
|
||||
|
||||
**Beispiel:**
|
||||
```markdown
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
<!-- Kein expliziter [!edge] -->
|
||||
```
|
||||
|
||||
**Ergebnis:** Automatische Edge `experience --[resulted_in]--> insight` (aus Schema)
|
||||
|
||||
### 4.0.3 Effektiver Typ & Retrieval
|
||||
|
||||
**Kernregel:** Der **Section-Type (sofern vorhanden) hat immer Vorrang** vor dem `note_type` für:
|
||||
- Vektor-Embedding (Suche)
|
||||
- `retriever_weight` Lookup
|
||||
- Type-Filter in Queries
|
||||
- Graph-Expansion
|
||||
|
||||
**Beispiel:**
|
||||
Ein Chunk mit `type: "insight"` und `note_type: "experience"` erhält:
|
||||
- `retriever_weight: 1.20` (aus `types.yaml` für `insight`, nicht `experience`)
|
||||
- Wird bei `filter: {type: "insight"}` gefunden
|
||||
- Wird bei `filter: {note_type: "experience"}` ebenfalls gefunden
|
||||
|
||||
**Chunk-Payload-Struktur:**
|
||||
```python
|
||||
{
|
||||
"type": "insight", # Effektiver Typ (section_type || note_type)
|
||||
"note_type": "experience", # Ursprünglicher Note-Typ (immer vorhanden)
|
||||
"section": "Reflexion",
|
||||
"section_type": "insight", # Expliziter Section-Type (optional)
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Edges & Verlinkung
|
||||
|
||||
Mindnet versteht Zusammenhänge durch Kanten.
|
||||
|
|
@ -377,15 +214,6 @@ Du kannst auch auf spezifische Abschnitte innerhalb einer Note verlinken:
|
|||
|
||||
Das System trennt automatisch den Note-Namen (`Mein Leitbild`) vom Abschnitts-Namen (`P3 – Disziplin`), sodass mehrere Links zur gleichen Note möglich sind, wenn sie auf verschiedene Abschnitte zeigen.
|
||||
|
||||
**Intra-Note-Links mit Block-IDs (WP-26):**
|
||||
Für Verbindungen **innerhalb derselben Note** nutze Block-References:
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#^block-id]]
|
||||
```
|
||||
|
||||
Dies erzeugt eine Intra-Note-Edge mit `is_internal: true` und `scope: "chunk"`.
|
||||
|
||||
**Gültige Relationen:**
|
||||
* `depends_on`: Hängt ab von / Benötigt.
|
||||
* `blocks`: Blockiert oder gefährdet (z.B. Risiko -> Projekt).
|
||||
|
|
@ -407,24 +235,6 @@ Für Zusammenfassungen am Ende einer Notiz, oder eines Absatzes:
|
|||
**Multi-Line Support (v2.9.1):**
|
||||
Callout-Blocks mit mehreren Zeilen werden korrekt verarbeitet. Das System erkennt automatisch, wenn mehrere Links im gleichen Callout-Block stehen, und erstellt für jeden Link eine separate Kante (auch bei Deep-Links zu verschiedenen Sections).
|
||||
|
||||
**Verschachtelte Edge-Callouts in Containern (WP-26):**
|
||||
Für übersichtliche Gruppierung von Edges kannst du verschachtelte Callouts nutzen:
|
||||
|
||||
```markdown
|
||||
> [!abstract] Semantic Edges
|
||||
>> [!edge] derived_from
|
||||
>> [[Wikilink#Abschnitt]]
|
||||
>
|
||||
>> [!edge] solves
|
||||
>> [[Wikilink2]]
|
||||
```
|
||||
|
||||
**Regeln:**
|
||||
- Container-Callouts wie `[!abstract]` werden als Gruppierung erkannt, aber nicht semantisch verarbeitet
|
||||
- Eingebettete `>> [!edge]` Callouts werden korrekt extrahiert
|
||||
- Die Einrückungsebene (Anzahl `>`) bestimmt die Zugehörigkeit zum Block
|
||||
- Leere Zeilen innerhalb des Containers (mit `>`) beenden den Edge-Block nicht
|
||||
|
||||
**Format-agnostische De-Duplizierung:**
|
||||
Wenn Kanten bereits via `[!edge]` Callout vorhanden sind, werden sie nicht mehrfach injiziert. Das System erkennt vorhandene Kanten unabhängig vom Format (Inline, Callout, Wikilink).
|
||||
|
||||
|
|
@ -642,65 +452,6 @@ Wir haben uns für Qdrant entschieden.
|
|||
Wir haben auch [[rel:similar_to Pinecone]] und [[rel:similar_to Weaviate]] betrachtet.
|
||||
```
|
||||
|
||||
### 6.3 Beispiel: Section Types & Intra-Note-Edges (WP-26)
|
||||
Eine Erfahrungs-Notiz mit unterschiedlichen Section-Types und Intra-Note-Verbindungen:
|
||||
|
||||
```markdown
|
||||
---
|
||||
id: erlebnis-konflikt-team
|
||||
title: Konflikt im Team-Meeting
|
||||
type: experience
|
||||
tags: [team, konflikt, learning]
|
||||
---
|
||||
|
||||
# Konflikt im Team-Meeting
|
||||
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Am 15. Januar 2026 kam es im Sprint-Review zu einer Eskalation...
|
||||
|
||||
## Meine Reaktion ^react
|
||||
> [!section] experience
|
||||
> [!edge] followed_by
|
||||
> [[#^sit]]
|
||||
|
||||
Ich habe versucht zu deeskalieren, aber...
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Diese Erfahrung zeigt mir, dass ich in Konfliktsituationen...
|
||||
|
||||
> [!abstract] Semantic Edges
|
||||
>> [!edge] derives
|
||||
>> [[#^sit]]
|
||||
>> [[#^react]]
|
||||
|
||||
## Nächste Schritte ^next
|
||||
> [!section] decision
|
||||
|
||||
Ich werde in Zukunft:
|
||||
1. Früher eingreifen
|
||||
2. Neutrale Sprache verwenden
|
||||
|
||||
> [!edge] caused_by
|
||||
> [[#^ref]]
|
||||
```
|
||||
|
||||
**Ergebnis:**
|
||||
- **4 Chunks** mit unterschiedlichen Types:
|
||||
- `#sit`: `type: experience` (explizit)
|
||||
- `#react`: `type: experience` (explizit)
|
||||
- `#ref`: `type: insight` (explizit, überschreibt `note_type`)
|
||||
- `#next`: `type: decision` (explizit, überschreibt `note_type`)
|
||||
- **Intra-Note-Edges:**
|
||||
- `#react --[followed_by]--> #sit`
|
||||
- `#ref --[derives]--> #sit`
|
||||
- `#ref --[derives]--> #react`
|
||||
- `#next --[caused_by]--> #ref`
|
||||
- Alle Edges haben `is_internal: true` und `scope: "chunk"`
|
||||
|
||||
---
|
||||
|
||||
## 7. Virtual Schema Layer
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
---
|
||||
doc_type: technical_reference
|
||||
audience: developer, power_user
|
||||
scope: obsidian, scripts, workflow, section_types, intra_note_edges
|
||||
scope: obsidian, scripts, workflow
|
||||
status: active
|
||||
version: 1.1.0
|
||||
context: "Setup und Dokumentation der Obsidian-Integration für Mindnet v2.9. Inkludiert WP-26 Section Types und Intra-Note-Edges."
|
||||
version: 1.0.0
|
||||
context: "Setup und Dokumentation der Obsidian-Integration für Mindnet v2.9."
|
||||
---
|
||||
|
||||
# Obsidian Integration Guide
|
||||
|
|
@ -67,87 +67,6 @@ Damit die Skripte funktionieren, müssen folgende Pfade im Vault existieren:
|
|||
|
||||
---
|
||||
|
||||
## 5. Section Types & Intra-Note-Edges (WP-26)
|
||||
|
||||
### 5.1 Section-Type-Syntax
|
||||
|
||||
Das Backend unterstützt nun **Section Types** innerhalb von Notes:
|
||||
|
||||
**Format:**
|
||||
```markdown
|
||||
## Überschrift ^block-id
|
||||
> [!section] type-name
|
||||
```
|
||||
|
||||
**Beispiel:**
|
||||
```markdown
|
||||
---
|
||||
type: experience
|
||||
---
|
||||
|
||||
# Meine Erfahrung
|
||||
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Die Geschichte...
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
> [!edge] derives
|
||||
> [[#^sit]]
|
||||
|
||||
Was ich daraus lerne...
|
||||
```
|
||||
|
||||
**Wichtige Regeln:**
|
||||
- Das `[!section]`-Callout kann an beliebiger Stelle innerhalb des Abschnitts stehen
|
||||
- Der Section-Type gilt bis zur nächsten Überschrift gleicher oder höherer Ebene
|
||||
- Bei Fehlen eines `[!section]`-Callouts gilt der Note-Type als Fallback
|
||||
- Valide Section-Types müssen in `types.yaml` definiert sein
|
||||
|
||||
### 5.2 Intra-Note-Edges mit Block-References
|
||||
|
||||
Für Verbindungen **innerhalb derselben Note** nutze Block-References:
|
||||
|
||||
**Bevorzugtes Format:**
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#^block-id]]
|
||||
```
|
||||
|
||||
**Fallback:**
|
||||
```markdown
|
||||
> [!edge] derives
|
||||
> [[#Section-Name]]
|
||||
```
|
||||
|
||||
**Verschachtelte Edge-Callouts:**
|
||||
Für übersichtliche Gruppierung kannst du verschachtelte Callouts nutzen:
|
||||
|
||||
```markdown
|
||||
> [!abstract] Semantic Edges
|
||||
>> [!edge] derived_from
|
||||
>> [[#^sit]]
|
||||
>> [[#^react]]
|
||||
>
|
||||
>> [!edge] supports
|
||||
>> [[Externe Note]]
|
||||
```
|
||||
|
||||
### 5.3 Automatische Features
|
||||
|
||||
**Automatische Section-Erkennung:**
|
||||
Sobald eine Section auf einer bestimmten Überschriften-Ebene eingeführt wurde, beginnt bei jeder weiteren Überschrift auf dieser Ebene automatisch eine neue Section – auch ohne explizites `[!section]`-Callout.
|
||||
|
||||
**Automatische Backlinks:**
|
||||
Für alle Intra-Note-Edges werden automatisch inverse Backlinks erzeugt:
|
||||
- Forward-Edge: `#ref --[derives]--> #sit`
|
||||
- Backlink: `#sit --[derived_from]--> #ref` (automatisch)
|
||||
|
||||
**Default-Edges aus graph_schema.md:**
|
||||
Wenn keine expliziten Intra-Note-Edges definiert sind, aber Section-Types vorhanden, werden automatisch Default-Edges aus dem Graph-Schema generiert.
|
||||
|
||||
## 6. Wartung & Updates
|
||||
## 5. Wartung & Updates
|
||||
|
||||
Bei Änderungen an den Notiz-Typen in der `types.yaml` müssen die entsprechenden Markdown-Vorlagen im Ordner `creation` manuell synchronisiert werden, um die Konsistenz zwischen Obsidian und dem Backend zu wahren.
|
||||
|
|
@ -1,387 +0,0 @@
|
|||
# WP-26 Manuelle Testszenarien
|
||||
|
||||
**Version:** 1.3
|
||||
**Datum:** 25. Januar 2026
|
||||
**Status:** Alle Phasen (Phase 1-3) implementiert
|
||||
|
||||
---
|
||||
|
||||
## 1. Überblick
|
||||
|
||||
Dieses Dokument beschreibt die manuellen Testszenarien für WP-26 Phase 1: Section-Types und Intra-Note-Edges.
|
||||
|
||||
---
|
||||
|
||||
## 2. Voraussetzungen
|
||||
|
||||
1. **Python-Umgebung** mit allen Dependencies aus `requirements.txt`
|
||||
2. **Qdrant-Instanz** erreichbar (lokal oder Docker)
|
||||
3. **Vault mit Test-Note** (siehe Abschnitt 3)
|
||||
|
||||
---
|
||||
|
||||
## 3. Test-Note erstellen
|
||||
|
||||
Erstelle eine neue Markdown-Datei im Vault mit folgendem Inhalt:
|
||||
|
||||
```markdown
|
||||
---
|
||||
id: wp26-test-experience
|
||||
title: WP-26 Test Experience
|
||||
type: experience
|
||||
tags: [test, wp26]
|
||||
---
|
||||
|
||||
# WP-26 Test Experience
|
||||
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Am 25. Januar 2026 testete ich das neue Section-Type Feature.
|
||||
Dies ist der Experience-Teil der Note.
|
||||
|
||||
## Meine Reaktion ^react
|
||||
> [!section] experience
|
||||
|
||||
> [!edge] followed_by
|
||||
> [[#^sit]]
|
||||
|
||||
Ich war zunächst skeptisch, aber die Implementierung sah solide aus.
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Diese Erfahrung zeigt mir, dass typ-spezifische Sektionen
|
||||
die semantische Präzision des Retrievals verbessern können.
|
||||
|
||||
> [!abstract] Semantic Edges
|
||||
>> [!edge] derives
|
||||
>> [[#^sit]]
|
||||
>> [[#^react]]
|
||||
|
||||
## Nächste Schritte ^next
|
||||
> [!section] decision
|
||||
|
||||
Ich werde:
|
||||
1. Die Tests ausführen
|
||||
2. Die Ergebnisse dokumentieren
|
||||
|
||||
> [!edge] caused_by
|
||||
> [[#^ref]]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Testszenarien
|
||||
|
||||
### 4.1 TS-01: Section-Type-Erkennung
|
||||
|
||||
**Ziel:** Prüfen, ob `[!section]`-Callouts korrekt erkannt werden.
|
||||
|
||||
**Schritte:**
|
||||
|
||||
1. Importiere die Test-Note via `scripts/import_markdown.py`
|
||||
2. Prüfe die Chunks in Qdrant via API oder Debug-Skript
|
||||
|
||||
**Prüfkriterien:**
|
||||
|
||||
| Chunk | Erwarteter `type` | Erwarteter `note_type` | Erwarteter `section` |
|
||||
|-------|-------------------|------------------------|----------------------|
|
||||
| #c00 | experience | experience | Situation |
|
||||
| #c01 | experience | experience | Meine Reaktion |
|
||||
| #c02 | insight | experience | Reflexion |
|
||||
| #c03 | decision | experience | Nächste Schritte |
|
||||
|
||||
**Prüf-Script:**
|
||||
|
||||
```python
|
||||
# scripts/check_wp26_chunks.py
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient("http://localhost:6333")
|
||||
note_id = "wp26-test-experience"
|
||||
|
||||
# Hole alle Chunks der Note
|
||||
result = client.scroll(
|
||||
collection_name="mindnet_chunks",
|
||||
scroll_filter={"must": [{"key": "note_id", "match": {"value": note_id}}]},
|
||||
with_payload=True,
|
||||
limit=100
|
||||
)
|
||||
|
||||
for point in result[0]:
|
||||
p = point.payload
|
||||
print(f"Chunk: {p.get('chunk_id')}")
|
||||
print(f" type: {p.get('type')}")
|
||||
print(f" note_type: {p.get('note_type')}")
|
||||
print(f" section: {p.get('section')}")
|
||||
print(f" section_type: {p.get('section_type')}")
|
||||
print(f" block_id: {p.get('block_id')}")
|
||||
print()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 TS-02: Block-ID-Erkennung
|
||||
|
||||
**Ziel:** Prüfen, ob Block-IDs (`^id`) aus Überschriften korrekt extrahiert werden.
|
||||
|
||||
**Prüfkriterien:**
|
||||
|
||||
| Chunk | Erwartete `block_id` |
|
||||
|-------|---------------------|
|
||||
| #c00 | sit |
|
||||
| #c01 | react |
|
||||
| #c02 | ref |
|
||||
| #c03 | next |
|
||||
|
||||
---
|
||||
|
||||
### 4.3 TS-03: is_internal Flag für Edges
|
||||
|
||||
**Ziel:** Prüfen, ob Intra-Note-Edges das `is_internal: true` Flag erhalten.
|
||||
|
||||
**Schritte:**
|
||||
|
||||
1. Importiere die Test-Note
|
||||
2. Prüfe die Edges in Qdrant
|
||||
|
||||
**Prüfkriterien:**
|
||||
|
||||
| Edge | `is_internal` |
|
||||
|------|---------------|
|
||||
| #c01 → #c00 (followed_by) | `true` |
|
||||
| #c02 → #c00 (derives) | `true` |
|
||||
| #c02 → #c01 (derives) | `true` |
|
||||
| #c03 → #c02 (caused_by) | `true` |
|
||||
| Alle structure edges (next/prev) | `true` |
|
||||
|
||||
**Prüf-Script:**
|
||||
|
||||
```python
|
||||
# scripts/check_wp26_edges.py
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
client = QdrantClient("http://localhost:6333")
|
||||
note_id = "wp26-test-experience"
|
||||
|
||||
# Hole alle Edges der Note
|
||||
result = client.scroll(
|
||||
collection_name="mindnet_edges",
|
||||
scroll_filter={"must": [{"key": "note_id", "match": {"value": note_id}}]},
|
||||
with_payload=True,
|
||||
limit=100
|
||||
)
|
||||
|
||||
for point in result[0]:
|
||||
p = point.payload
|
||||
kind = p.get('kind', 'unknown')
|
||||
source = p.get('source_id', '?')
|
||||
target = p.get('target_id', '?')
|
||||
is_internal = p.get('is_internal', 'MISSING')
|
||||
provenance = p.get('provenance', '?')
|
||||
source_hint = p.get('source_hint', '-')
|
||||
|
||||
print(f"{source} --[{kind}]--> {target}")
|
||||
print(f" is_internal: {is_internal}")
|
||||
print(f" provenance: {provenance}")
|
||||
print(f" source_hint: {source_hint}")
|
||||
print()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 TS-04: Provenance-Normalisierung
|
||||
|
||||
**Ziel:** Prüfen, ob Provenance-Werte korrekt normalisiert werden.
|
||||
|
||||
**Prüfkriterien:**
|
||||
|
||||
| Altes Provenance | Neues `provenance` | `source_hint` |
|
||||
|------------------|-------------------|---------------|
|
||||
| explicit:callout | explicit | callout |
|
||||
| explicit:wikilink | explicit | wikilink |
|
||||
| structure:belongs_to | structure | belongs_to |
|
||||
| structure:order | structure | order |
|
||||
| edge_defaults | rule | edge_defaults |
|
||||
|
||||
---
|
||||
|
||||
### 4.5 TS-05: Automatische Section-Erkennung
|
||||
|
||||
**Ziel:** Prüfen, ob neue Überschriften ohne `[!section]` automatisch neue Chunks erstellen.
|
||||
|
||||
**Test-Note:**
|
||||
|
||||
```markdown
|
||||
---
|
||||
id: wp26-test-auto-section
|
||||
type: experience
|
||||
---
|
||||
|
||||
# Test Auto Section
|
||||
|
||||
## Section A ^a
|
||||
> [!section] insight
|
||||
|
||||
Content A (insight).
|
||||
|
||||
## Section B ^b
|
||||
|
||||
Content B (sollte experience sein - Fallback).
|
||||
|
||||
## Section C ^c
|
||||
> [!section] decision
|
||||
|
||||
Content C (decision).
|
||||
```
|
||||
|
||||
**Prüfkriterien:**
|
||||
|
||||
| Chunk | `type` | Grund |
|
||||
|-------|--------|-------|
|
||||
| Section A | insight | Explizites `[!section]` |
|
||||
| Section B | experience | Fallback auf `note_type` |
|
||||
| Section C | decision | Explizites `[!section]` |
|
||||
|
||||
---
|
||||
|
||||
## 5. Unit-Tests ausführen
|
||||
|
||||
```bash
|
||||
# Im Projekt-Root
|
||||
cd c:\Dev\cursor\mindnet
|
||||
|
||||
# Aktiviere virtuelle Umgebung (falls vorhanden)
|
||||
# .venv\Scripts\activate
|
||||
|
||||
# Führe WP-26 Tests aus
|
||||
python -m pytest tests/test_wp26_section_types.py -v
|
||||
```
|
||||
|
||||
**Erwartetes Ergebnis:** Alle Tests grün.
|
||||
|
||||
---
|
||||
|
||||
## 6. Bekannte Einschränkungen
|
||||
|
||||
1. **Block-ID-Stability:** Obsidian aktualisiert Block-IDs nicht automatisch bei Umbenennung von Überschriften.
|
||||
2. **Heading-Links:** Links wie `[[#Section Name]]` werden unterstützt, aber Block-References (`[[#^id]]`) werden bevorzugt.
|
||||
3. **Nested Callouts:** Verschachtelte Callouts (`>> [!edge]`) werden korrekt verarbeitet.
|
||||
|
||||
---
|
||||
|
||||
## 7. Phase 2: Retriever-Anpassungen
|
||||
|
||||
### 7.1 is_internal-Boost
|
||||
|
||||
**Konfiguration:** `config/retriever.yaml`
|
||||
|
||||
```yaml
|
||||
edge_scoring:
|
||||
internal_edge_boost: 1.2 # +20% Boost für Intra-Note-Edges
|
||||
external_edge_boost: 1.0 # Standard für Inter-Note-Edges
|
||||
```
|
||||
|
||||
**Manuelle Prüfung:**
|
||||
|
||||
1. Führe eine Suche durch, die eine Note mit internen Edges trifft
|
||||
2. Prüfe im Debug-Log, dass `is_internal: True` Edges höheres Gewicht erhalten
|
||||
|
||||
### 7.2 Aggregation-Level
|
||||
|
||||
**Konfiguration:** `config/retriever.yaml`
|
||||
|
||||
```yaml
|
||||
aggregation:
|
||||
level: note # "note" (default) oder "chunk"
|
||||
max_chunks_per_note: 3 # Limit bei "note"-Level
|
||||
```
|
||||
|
||||
**Test mit Chunk-Level:**
|
||||
|
||||
1. Setze `level: chunk` in `retriever.yaml`
|
||||
2. Führe Suche durch
|
||||
3. Prüfe, dass mehrere Chunks derselben Note zurückgegeben werden (keine Deduplizierung)
|
||||
|
||||
### 7.3 Unit-Tests Phase 2
|
||||
|
||||
```bash
|
||||
python -m pytest tests/test_wp26_phase2_retriever.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Phase 3: Schema-Validierung (FA-12)
|
||||
|
||||
### 8.1 get_topology_info()
|
||||
|
||||
Die neue Funktion ermittelt typische und verbotene Edge-Types für ein Source/Target-Typ-Paar.
|
||||
|
||||
**Beispiel:**
|
||||
|
||||
```python
|
||||
from app.core.graph.graph_utils import get_topology_info
|
||||
|
||||
topology = get_topology_info("experience", "insight")
|
||||
# Gibt: {"typical": ["resulted_in", ...], "prohibited": [...]}
|
||||
```
|
||||
|
||||
### 8.2 validate_intra_note_edge()
|
||||
|
||||
Validiert Intra-Note-Edges gegen das `graph_schema.md`.
|
||||
|
||||
**Verhalten:**
|
||||
|
||||
| Edge-Typ | Ergebnis | Confidence |
|
||||
|----------|----------|------------|
|
||||
| In `typical` | ✅ Erlaubt | 1.0 |
|
||||
| Nicht in `typical`, nicht in `prohibited` | ✅ Erlaubt (atypisch) | 0.7 |
|
||||
| In `prohibited` | ❌ Abgelehnt | 0.0 |
|
||||
|
||||
### 8.3 Manuelle Prüfung
|
||||
|
||||
1. Erstelle eine Note mit einer verbotenen Edge-Kombination
|
||||
2. Führe Ingestion durch
|
||||
3. Prüfe, dass die Edge abgelehnt wurde (Log: `🚫 [SCHEMA-VALIDATION]`)
|
||||
|
||||
### 8.4 Unit-Tests Phase 3
|
||||
|
||||
```bash
|
||||
python -m pytest tests/test_wp26_phase3_validation.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. Alle WP-26 Tests ausführen
|
||||
|
||||
```bash
|
||||
# Alle WP-26 Unit-Tests
|
||||
python -m pytest tests/test_wp26_section_types.py tests/test_wp26_phase2_retriever.py tests/test_wp26_phase3_validation.py -v
|
||||
|
||||
# Nur fehlgeschlagene Tests erneut ausführen
|
||||
python -m pytest --lf -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Bekannte Einschränkungen
|
||||
|
||||
1. **Block-ID-Stability:** Obsidian aktualisiert Block-IDs nicht automatisch bei Umbenennung von Überschriften.
|
||||
2. **Heading-Links:** Links wie `[[#Section Name]]` werden unterstützt, aber Block-References (`[[#^id]]`) werden bevorzugt.
|
||||
3. **Nested Callouts:** Verschachtelte Callouts (`>> [!edge]`) werden korrekt verarbeitet.
|
||||
4. **Strict-Mode:** `strict_mode=True` in der Validierung lehnt atypische Edges ab (Standard: `False`).
|
||||
|
||||
---
|
||||
|
||||
## 11. Zusammenfassung
|
||||
|
||||
| Phase | Status | Beschreibung |
|
||||
|-------|--------|--------------|
|
||||
| Phase 1 | ✅ | Section-Types, Block-IDs, Intra-Note-Edges |
|
||||
| Phase 2 | ✅ | is_internal-Boost, Aggregation-Level |
|
||||
| Phase 3 | ✅ | Schema-Validierung (FA-12) |
|
||||
|
||||
---
|
||||
|
||||
**Ende der Testdokumentation (WP-26 v1.3)**
|
||||
|
|
@ -1,344 +0,0 @@
|
|||
# WP-26 Anforderungen-Checkliste
|
||||
|
||||
**Version:** 1.3
|
||||
**Datum:** 25. Januar 2026
|
||||
**Status:** Implementierung abgeschlossen
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Section-Types & Parsing
|
||||
|
||||
### ✅ FA-01: Neues Callout-Format `[!section]`
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `chunking_parser.py`: Regex für `[!section]` Callout-Erkennung
|
||||
- State-Machine für `current_section_type` und `section_introduced_at_level`
|
||||
- Retroaktive Propagation via `_propagate_section_type_backwards()`
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/chunking/chunking_parser.py`
|
||||
- `app/core/chunking/chunking_models.py` (RawBlock, Chunk)
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestSectionTypeRecognition`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-01b: Verschachtelte Edge-Callouts
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `graph_derive_edges.py`: `extract_callout_relations()` unterstützt verschachtelte Callouts
|
||||
- Einrückungsebene (`>>`) wird korrekt erkannt
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_derive_edges.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestNestedEdgeCallouts`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-02: Scope-Beendigung
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Scope endet bei Überschrift gleicher oder höherer Ebene
|
||||
- `section_introduced_at_level` Tracking
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/chunking/chunking_parser.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestSectionTypeScope`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-02b: Automatische Section-Erkennung
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Neue Überschrift auf `section_introduced_at_level` erzeugt automatisch neue Section
|
||||
- Fallback auf `note_type` wenn kein `[!section]` Callout vorhanden
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/chunking/chunking_parser.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestAutomaticSectionRecognition`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-03: `type`-Feld-Befüllung
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `effective_type = section_type if section_type else note_type`
|
||||
- Wird in `ingestion_chunk_payload.py` berechnet
|
||||
- `type`-Feld enthält immer den effektiven Typ
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/ingestion/ingestion_chunk_payload.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py` (implizit)
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-03b: Body-Section Handling
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Textblöcke vor erstem `[!section]` erhalten `section: "body"`
|
||||
- `section_type: None` (Fallback auf `note_type`)
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/chunking/chunking_parser.py`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-04: Optionales Feld `note_type`
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Neues Feld `note_type` im Chunk-Payload
|
||||
- Keyword-Index in Qdrant erstellt
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/ingestion/ingestion_chunk_payload.py`
|
||||
- `scripts/setup_mindnet_collections.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py` (implizit)
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-05: Block-Reference als Link-Format
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `parse_link_target()` extrahiert Block-ID aus `[[#^block-id]]`
|
||||
- Unterstützt auch `[[#Section Name ^block-id]]` Format
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_utils.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestBlockIdParsing`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-06: Section-zu-Chunk-Mapping
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Mapping erfolgt implizit über Block-IDs und Heading-Matches
|
||||
- `parse_link_target()` löst Section-Referenzen auf
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_derive_edges.py`
|
||||
- `app/core/graph/graph_utils.py`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-07: Edge-Erstellung für Intra-Note-Links
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- Intra-Note-Links werden zu Chunk-Scope Edges
|
||||
- `scope: "chunk"` für Intra-Note-Edges
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_derive_edges.py`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-07b: Metadaten-Erweiterung (`is_internal` Flag)
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `is_internal: True` für Edges innerhalb derselben Note
|
||||
- Automatische Berechnung in `graph_utils._edge()`
|
||||
- Boolean-Index in Qdrant
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_utils.py`
|
||||
- `scripts/setup_mindnet_collections.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestIsInternalFlag`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-08: Default-Edges aus graph_schema.md
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `get_typical_edge_for()` ermittelt Default-Edge aus Schema
|
||||
- Automatische Edge-Erstellung bei Section-Transitions
|
||||
- `provenance: "rule"`, `rule_id: "inferred:section_transition"`
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_derive_edges.py`
|
||||
- `app/core/graph/graph_utils.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_section_types.py::TestAutomaticIntraNoteEdges`
|
||||
- `tests/test_wp26_section_types.py::TestGraphSchemaParser`
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Retriever-Anpassungen
|
||||
|
||||
### ✅ FA-09: Edge-Gewichtung für Intra-Note-Edges
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `internal_edge_boost` und `external_edge_boost` in `retriever.yaml`
|
||||
- Boost wird in `Subgraph.add_edge()` angewendet
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/graph/graph_subgraph.py`
|
||||
- `config/retriever.yaml`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_phase2_retriever.py::TestIsInternalBoost`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-09b: Retrieval-Priorisierung (Section-Type vor Note-Type)
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `effective_type` wird für `retriever_weight` Lookup verwendet
|
||||
- `type`-Feld enthält bereits den effektiven Typ
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/ingestion/ingestion_chunk_payload.py`
|
||||
|
||||
---
|
||||
|
||||
### ✅ FA-10: Optionale Chunk-Level-Deduplizierung
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `aggregation.level` in `retriever.yaml` (`"note"` oder `"chunk"`)
|
||||
- `max_chunks_per_note` für Note-Level-Limitierung
|
||||
- Implementiert in `retriever._score_and_pool_hits()`
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/retrieval/retriever.py`
|
||||
- `config/retriever.yaml`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_phase2_retriever.py::TestNoteLevelAggregation`
|
||||
- `tests/test_wp26_phase2_retriever.py::TestChunkLevelAggregation`
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Schema-Validierung
|
||||
|
||||
### ✅ FA-12: Schema-Validierung gegen effektiven Chunk-Typ
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Implementierung:**
|
||||
- `validate_intra_note_edge()` prüft gegen `graph_schema.md`
|
||||
- Verwendet `effective_type` (type-Feld) beider Chunks
|
||||
- `get_topology_info()` liefert `typical` und `prohibited` Listen
|
||||
- Integration in Ingestion-Pipeline (nach LLM-Validierung)
|
||||
|
||||
**Dateien:**
|
||||
- `app/core/ingestion/ingestion_validation.py`
|
||||
- `app/core/graph/graph_utils.py`
|
||||
- `app/core/ingestion/ingestion_processor.py`
|
||||
|
||||
**Tests:**
|
||||
- `tests/test_wp26_phase3_validation.py`
|
||||
|
||||
**Verhalten:**
|
||||
- Edge in `prohibited` → ❌ Abgelehnt (confidence: 0.0)
|
||||
- Edge in `typical` → ✅ Erlaubt (confidence: 1.0)
|
||||
- Edge atypisch → ✅ Erlaubt (confidence: 0.7)
|
||||
|
||||
---
|
||||
|
||||
## Abwärtskompatibilität
|
||||
|
||||
### ✅ FA-11: Fallback-Verhalten
|
||||
|
||||
**Status:** ✅ Implementiert
|
||||
|
||||
**Garantien:**
|
||||
- Notes ohne `[!section]` Callouts funktionieren unverändert
|
||||
- `Chunk.type = note_type` (wie bisher)
|
||||
- Keine Breaking Changes für bestehende Notes
|
||||
|
||||
---
|
||||
|
||||
## Zusammenfassung
|
||||
|
||||
| Phase | Requirements | Status |
|
||||
|-------|--------------|--------|
|
||||
| **Phase 1** | FA-01 bis FA-08 | ✅ 8/8 |
|
||||
| **Phase 2** | FA-09, FA-09b, FA-10 | ✅ 3/3 |
|
||||
| **Phase 3** | FA-12 | ✅ 1/1 |
|
||||
| **Kompatibilität** | FA-11 | ✅ 1/1 |
|
||||
| **GESAMT** | | ✅ **13/13** |
|
||||
|
||||
---
|
||||
|
||||
## Manuelle Tests
|
||||
|
||||
### 1. Umfassendes Test-Script ausführen
|
||||
|
||||
```bash
|
||||
cd c:\Dev\cursor\mindnet
|
||||
python scripts/test_wp26_comprehensive.py
|
||||
```
|
||||
|
||||
### 2. Unit-Tests ausführen
|
||||
|
||||
```bash
|
||||
# Alle WP-26 Tests
|
||||
python -m pytest tests/test_wp26_section_types.py tests/test_wp26_phase2_retriever.py tests/test_wp26_phase3_validation.py -v
|
||||
|
||||
# Einzelne Phasen
|
||||
python -m pytest tests/test_wp26_section_types.py -v
|
||||
python -m pytest tests/test_wp26_phase2_retriever.py -v
|
||||
python -m pytest tests/test_wp26_phase3_validation.py -v
|
||||
```
|
||||
|
||||
### 3. Integrationstest mit echter Note
|
||||
|
||||
1. Erstelle Test-Note im Vault (siehe `05_WP26_Manual_Testing.md`)
|
||||
2. Importiere via `scripts/import_markdown.py`
|
||||
3. Prüfe Chunks und Edges in Qdrant
|
||||
|
||||
---
|
||||
|
||||
## Bekannte Einschränkungen
|
||||
|
||||
1. **Block-ID-Stability:** Obsidian aktualisiert Block-IDs nicht automatisch bei Umbenennung
|
||||
2. **Heading-Links:** `[[#Section Name]]` funktioniert, aber `[[#^block-id]]` wird bevorzugt
|
||||
3. **Strict-Mode:** Schema-Validierung im Strict-Mode lehnt atypische Edges ab (Standard: `False`)
|
||||
|
||||
---
|
||||
|
||||
**Ende der Checkliste**
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -2,9 +2,9 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
FILE: scripts/setup_mindnet_collections.py
|
||||
VERSION: 2.2.0 (2026-01-25)
|
||||
VERSION: 2.1.0 (2025-12-15)
|
||||
STATUS: Active
|
||||
COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b), WP-26 (Intra-Note-Edges)
|
||||
COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b)
|
||||
|
||||
Zweck:
|
||||
-------
|
||||
|
|
@ -107,12 +107,6 @@ class QdrantHTTP:
|
|||
payload = {"field_name": field, "field_schema": {"type": "text"}}
|
||||
self.rq("PUT", f"/collections/{collection}/index", json=payload)
|
||||
print(f"[+] Index text on {collection}.{field}")
|
||||
|
||||
def create_bool_index(self, collection: str, field: str) -> None:
|
||||
"""WP-26 v1.0: Boolean-Index für Filterung (z.B. is_internal)."""
|
||||
payload = {"field_name": field, "field_schema": "bool"}
|
||||
self.rq("PUT", f"/collections/{collection}/index", json=payload)
|
||||
print(f"[+] Index bool on {collection}.{field}")
|
||||
|
||||
def list_collections(self) -> Dict[str, Any]:
|
||||
r = self.rq("GET", "/collections")
|
||||
|
|
@ -135,9 +129,6 @@ def setup_mindnet_collections(q: QdrantHTTP, prefix: str, dim: int, distance: st
|
|||
q.create_keyword_index(chunks, f)
|
||||
for f in ["tags", "Rolle", "links"]:
|
||||
q.create_keyword_index(chunks, f)
|
||||
# WP-26 v1.0: note_type für Filterung (Section-Type vs Note-Type)
|
||||
q.create_keyword_index(chunks, "note_type")
|
||||
q.create_keyword_index(chunks, "type") # Effektiver Typ (section_type || note_type)
|
||||
q.create_text_index(chunks, "text") # Volltextsuche auf dem Textfeld
|
||||
|
||||
# mindnet_notes: Metadaten der Notizen
|
||||
|
|
@ -154,16 +145,8 @@ def setup_mindnet_collections(q: QdrantHTTP, prefix: str, dim: int, distance: st
|
|||
"dst_chunk_id",
|
||||
"link_text",
|
||||
"relation",
|
||||
"kind", # WP-26 v1.0: Kantentyp für Filterung
|
||||
"source_id", # WP-26 v1.0: Source-ID für Graph-Queries
|
||||
"target_id", # WP-26 v1.0: Target-ID für Graph-Queries
|
||||
"scope", # WP-26 v1.0: "chunk" oder "note"
|
||||
"provenance", # WP-26 v1.0: Herkunft der Kante
|
||||
]:
|
||||
q.create_keyword_index(edges, f)
|
||||
|
||||
# WP-26 v1.0: Boolean-Index für is_internal (Intra-Note-Edge-Filterung)
|
||||
q.create_bool_index(edges, "is_internal")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
|
|
|
|||
|
|
@ -1,561 +0,0 @@
|
|||
"""
|
||||
FILE: scripts/test_wp26_comprehensive.py
|
||||
DESCRIPTION: Umfassendes Test-Script für WP-26 - Prüft alle FA-Requirements
|
||||
aus dem Lastenheft v1.3
|
||||
VERSION: 1.0.0
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Füge Projekt-Root zum Python-Pfad hinzu
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from qdrant_client import QdrantClient
|
||||
import yaml
|
||||
import json
|
||||
|
||||
# Farben für Terminal-Output
|
||||
class Colors:
|
||||
GREEN = '\033[92m'
|
||||
RED = '\033[91m'
|
||||
YELLOW = '\033[93m'
|
||||
BLUE = '\033[94m'
|
||||
RESET = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
|
||||
def print_header(text: str):
|
||||
print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.RESET}")
|
||||
print(f"{Colors.BOLD}{Colors.BLUE}{text}{Colors.RESET}")
|
||||
print(f"{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.RESET}\n")
|
||||
|
||||
def print_success(text: str):
|
||||
print(f"{Colors.GREEN}✓ {text}{Colors.RESET}")
|
||||
|
||||
def print_error(text: str):
|
||||
print(f"{Colors.RED}✗ {text}{Colors.RESET}")
|
||||
|
||||
def print_warning(text: str):
|
||||
print(f"{Colors.YELLOW}⚠ {text}{Colors.RESET}")
|
||||
|
||||
def print_info(text: str):
|
||||
print(f" {text}")
|
||||
|
||||
# ============================================================================
|
||||
# PHASE 1: Section-Types & Parsing
|
||||
# ============================================================================
|
||||
|
||||
def test_fa01_section_callout_format():
|
||||
"""FA-01: Neues Callout-Format [!section]"""
|
||||
print_header("FA-01: Section-Callout-Format")
|
||||
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
|
||||
markdown = """## Test Section ^test-id
|
||||
> [!section] insight
|
||||
|
||||
Content here.
|
||||
"""
|
||||
blocks, _ = parse_blocks(markdown)
|
||||
|
||||
section_found = False
|
||||
for block in blocks:
|
||||
if block.section_type == "insight":
|
||||
section_found = True
|
||||
print_success(f"Section-Type 'insight' erkannt in Block: {block.text[:50]}...")
|
||||
break
|
||||
|
||||
if not section_found:
|
||||
print_error("Section-Type wurde nicht erkannt")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def test_fa01b_nested_edge_callouts():
|
||||
"""FA-01b: Verschachtelte Edge-Callouts"""
|
||||
print_header("FA-01b: Verschachtelte Edge-Callouts")
|
||||
|
||||
from app.core.graph.graph_derive_edges import extract_callout_relations
|
||||
|
||||
markdown = """> [!abstract] Semantic Edges
|
||||
>> [!edge] derives
|
||||
>> [[#^sit]]
|
||||
>>
|
||||
>> [!edge] supports
|
||||
>> [[Target]]
|
||||
"""
|
||||
pairs, _ = extract_callout_relations(markdown)
|
||||
|
||||
if len(pairs) >= 2:
|
||||
print_success(f"Verschachtelte Callouts erkannt: {len(pairs)} Edges gefunden")
|
||||
for kind, target in pairs:
|
||||
print_info(f" - {kind} -> {target}")
|
||||
return True
|
||||
else:
|
||||
print_error(f"Verschachtelte Callouts nicht korrekt erkannt: {len(pairs)} Edges")
|
||||
return False
|
||||
|
||||
def test_fa02_scope_termination():
|
||||
"""FA-02: Scope-Beendigung"""
|
||||
print_header("FA-02: Scope-Beendigung")
|
||||
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
|
||||
markdown = """## Section A ^a
|
||||
> [!section] insight
|
||||
|
||||
Content A.
|
||||
|
||||
## Section B ^b
|
||||
<!-- Kein Callout -->
|
||||
|
||||
Content B (sollte note_type verwenden).
|
||||
"""
|
||||
blocks, _ = parse_blocks(markdown)
|
||||
|
||||
section_a_type = None
|
||||
section_b_type = None
|
||||
|
||||
for block in blocks:
|
||||
if "Section A" in block.text or block.section_type == "insight":
|
||||
section_a_type = block.section_type
|
||||
if "Section B" in block.text:
|
||||
section_b_type = block.section_type
|
||||
|
||||
if section_a_type == "insight":
|
||||
print_success(f"Section A hat korrekten Type: {section_a_type}")
|
||||
else:
|
||||
print_error(f"Section A hat falschen Type: {section_a_type}")
|
||||
return False
|
||||
|
||||
# Section B sollte None haben (Fallback auf note_type)
|
||||
if section_b_type is None:
|
||||
print_success("Section B verwendet Fallback (None = note_type)")
|
||||
else:
|
||||
print_warning(f"Section B hat Type: {section_b_type} (erwartet: None)")
|
||||
|
||||
return True
|
||||
|
||||
def test_fa03_type_field():
|
||||
"""FA-03: type-Feld-Befüllung mit effective_type"""
|
||||
print_header("FA-03: type-Feld-Befüllung")
|
||||
|
||||
from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
from app.core.chunking.chunking_strategies import strategy_by_heading
|
||||
|
||||
# Mock Note
|
||||
markdown = """---
|
||||
type: experience
|
||||
---
|
||||
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Text.
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Text.
|
||||
"""
|
||||
blocks, h1_title = parse_blocks(markdown)
|
||||
|
||||
# Korrekte Signatur: strategy_by_heading(blocks, config, note_id, context_prefix)
|
||||
config = {
|
||||
"max": 500,
|
||||
"target": 400,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
chunks = strategy_by_heading(blocks, config, note_id="test-note")
|
||||
|
||||
# Erstelle Payloads
|
||||
# Signatur: make_chunk_payloads(note, note_path, chunks_from_chunker, **kwargs)
|
||||
payloads = make_chunk_payloads(
|
||||
note={"frontmatter": {"type": "experience"}},
|
||||
note_path="test.md",
|
||||
chunks_from_chunker=chunks,
|
||||
file_path="test.md",
|
||||
types_cfg={}
|
||||
)
|
||||
|
||||
# Prüfe effective_type
|
||||
for p in payloads:
|
||||
effective_type = p.get("type")
|
||||
note_type = p.get("note_type")
|
||||
section_type = p.get("section_type")
|
||||
|
||||
print_info(f"Chunk: type={effective_type}, note_type={note_type}, section_type={section_type}")
|
||||
|
||||
# Section-Type sollte Vorrang haben
|
||||
if section_type:
|
||||
if effective_type != section_type:
|
||||
print_error(f"effective_type ({effective_type}) != section_type ({section_type})")
|
||||
return False
|
||||
|
||||
print_success("effective_type wird korrekt berechnet (section_type || note_type)")
|
||||
return True
|
||||
|
||||
def test_fa04_note_type_field():
|
||||
"""FA-04: Optionales Feld note_type"""
|
||||
print_header("FA-04: note_type-Feld")
|
||||
|
||||
from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
from app.core.chunking.chunking_strategies import strategy_by_heading
|
||||
|
||||
markdown = """---
|
||||
type: experience
|
||||
---
|
||||
|
||||
## Section ^sec
|
||||
> [!section] insight
|
||||
|
||||
Text.
|
||||
"""
|
||||
blocks, _ = parse_blocks(markdown)
|
||||
|
||||
# Korrekte Signatur: strategy_by_heading(blocks, config, note_id, context_prefix)
|
||||
config = {
|
||||
"max": 500,
|
||||
"target": 400,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
chunks = strategy_by_heading(blocks, config, note_id="test-note")
|
||||
|
||||
# Signatur: make_chunk_payloads(note, note_path, chunks_from_chunker, **kwargs)
|
||||
payloads = make_chunk_payloads(
|
||||
note={"frontmatter": {"type": "experience"}},
|
||||
note_path="test.md",
|
||||
chunks_from_chunker=chunks,
|
||||
file_path="test.md",
|
||||
types_cfg={}
|
||||
)
|
||||
|
||||
for p in payloads:
|
||||
if "note_type" not in p:
|
||||
print_error("note_type-Feld fehlt im Payload")
|
||||
return False
|
||||
|
||||
if p["note_type"] != "experience":
|
||||
print_error(f"note_type ist falsch: {p['note_type']} (erwartet: experience)")
|
||||
return False
|
||||
|
||||
print_success("note_type-Feld ist vorhanden und korrekt")
|
||||
return True
|
||||
|
||||
def test_fa05_block_reference():
|
||||
"""FA-05: Block-Reference als Link-Format"""
|
||||
print_header("FA-05: Block-Reference")
|
||||
|
||||
from app.core.graph.graph_utils import parse_link_target
|
||||
|
||||
# Test Block-ID-Extraktion
|
||||
target, section = parse_link_target("[[#^block-id]]", "test-note")
|
||||
|
||||
if section == "block-id":
|
||||
print_success(f"Block-ID korrekt extrahiert: {section}")
|
||||
else:
|
||||
print_error(f"Block-ID falsch extrahiert: {section} (erwartet: block-id)")
|
||||
return False
|
||||
|
||||
# Test mit Section-String
|
||||
target2, section2 = parse_link_target("[[#📖 Diagnose ^kontext]]", "test-note")
|
||||
|
||||
if section2 == "kontext":
|
||||
print_success(f"Block-ID aus Section-String extrahiert: {section2}")
|
||||
else:
|
||||
print_error(f"Block-ID aus Section-String falsch: {section2} (erwartet: kontext)")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def test_fa07_is_internal_flag():
|
||||
"""FA-07b: is_internal Flag"""
|
||||
print_header("FA-07b: is_internal Flag")
|
||||
|
||||
from app.core.graph.graph_utils import _edge
|
||||
|
||||
# Intra-Note-Edge
|
||||
edge1 = _edge("derives", "chunk", "note1#c01", "note1#c02", "note1", {})
|
||||
|
||||
if edge1.get("is_internal") is True:
|
||||
print_success("Intra-Note-Edge hat is_internal=True")
|
||||
else:
|
||||
print_error(f"Intra-Note-Edge hat is_internal={edge1.get('is_internal')}")
|
||||
return False
|
||||
|
||||
# Inter-Note-Edge (würde normalerweise False sein, aber _edge prüft nur note_id)
|
||||
# Für echten Test müsste man build_edges_for_note aufrufen
|
||||
|
||||
return True
|
||||
|
||||
def test_fa08_default_edges_from_schema():
|
||||
"""FA-08: Default-Edges aus graph_schema.md"""
|
||||
print_header("FA-08: Default-Edges aus Schema")
|
||||
|
||||
from app.core.graph.graph_utils import get_typical_edge_for, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
|
||||
# Test für experience -> insight
|
||||
edge_type = get_typical_edge_for("experience", "insight")
|
||||
|
||||
if edge_type:
|
||||
print_success(f"Typische Edge gefunden: {edge_type}")
|
||||
print_info(f" experience -> insight: {edge_type}")
|
||||
else:
|
||||
print_warning("Keine typische Edge gefunden (Fallback auf 'any' oder 'default')")
|
||||
|
||||
return True
|
||||
|
||||
# ============================================================================
|
||||
# PHASE 2: Retriever-Anpassungen
|
||||
# ============================================================================
|
||||
|
||||
def test_fa09_internal_edge_boost():
|
||||
"""FA-09: Edge-Gewichtung für Intra-Note-Edges"""
|
||||
print_header("FA-09: Internal Edge Boost")
|
||||
|
||||
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
||||
from app.core.graph.graph_utils import clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
get_edge_scoring_config.cache_clear()
|
||||
|
||||
config = get_edge_scoring_config()
|
||||
|
||||
if "internal_edge_boost" in config and "external_edge_boost" in config:
|
||||
print_success(f"Edge-Scoring-Config geladen:")
|
||||
print_info(f" internal_edge_boost: {config['internal_edge_boost']}")
|
||||
print_info(f" external_edge_boost: {config['external_edge_boost']}")
|
||||
|
||||
# Test Subgraph
|
||||
sg = Subgraph()
|
||||
sg.add_edge({
|
||||
"source": "note1#c01",
|
||||
"target": "note1#c02",
|
||||
"kind": "derives",
|
||||
"weight": 1.0,
|
||||
"is_internal": True
|
||||
})
|
||||
|
||||
edges = sg.adj.get("note1#c01", [])
|
||||
if edges:
|
||||
final_weight = edges[0]["weight"]
|
||||
expected_weight = 1.0 * config["internal_edge_boost"]
|
||||
|
||||
if abs(final_weight - expected_weight) < 0.01:
|
||||
print_success(f"Boost korrekt angewendet: {final_weight} (erwartet: {expected_weight})")
|
||||
else:
|
||||
print_error(f"Boost falsch: {final_weight} (erwartet: {expected_weight})")
|
||||
return False
|
||||
else:
|
||||
print_error("Edge-Scoring-Config fehlt")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def test_fa10_chunk_level_aggregation():
|
||||
"""FA-10: Optionale Chunk-Level-Deduplizierung"""
|
||||
print_header("FA-10: Aggregation-Level")
|
||||
|
||||
from app.core.retrieval.retriever import _get_aggregation_config
|
||||
|
||||
config = _get_aggregation_config()
|
||||
|
||||
if "level" in config and "max_chunks_per_note" in config:
|
||||
print_success(f"Aggregation-Config geladen:")
|
||||
print_info(f" level: {config['level']}")
|
||||
print_info(f" max_chunks_per_note: {config['max_chunks_per_note']}")
|
||||
|
||||
if config["level"] in ["note", "chunk"]:
|
||||
print_success("Aggregation-Level ist gültig")
|
||||
else:
|
||||
print_error(f"Aggregation-Level ist ungültig: {config['level']}")
|
||||
return False
|
||||
else:
|
||||
print_error("Aggregation-Config fehlt")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# ============================================================================
|
||||
# PHASE 3: Schema-Validierung
|
||||
# ============================================================================
|
||||
|
||||
def test_fa12_schema_validation():
|
||||
"""FA-12: Schema-Validierung gegen effektiven Chunk-Typ"""
|
||||
print_header("FA-12: Schema-Validierung")
|
||||
|
||||
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
||||
from app.core.graph.graph_utils import clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
|
||||
# Test 1: Typische Edge
|
||||
edge1 = {"kind": "resulted_in", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk1 = {"type": "experience"}
|
||||
target_chunk1 = {"type": "insight"}
|
||||
|
||||
is_valid1, confidence1, reason1 = validate_intra_note_edge(
|
||||
edge=edge1,
|
||||
source_chunk=source_chunk1,
|
||||
target_chunk=target_chunk1,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
if is_valid1:
|
||||
print_success(f"Typische Edge validiert: {edge1['kind']} (confidence: {confidence1})")
|
||||
else:
|
||||
print_error(f"Typische Edge abgelehnt: {reason1}")
|
||||
return False
|
||||
|
||||
# Test 2: Atypische Edge (sollte mit reduzierter Confidence erlaubt sein)
|
||||
edge2 = {"kind": "very_unusual_edge_xyz123", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
|
||||
is_valid2, confidence2, reason2 = validate_intra_note_edge(
|
||||
edge=edge2,
|
||||
source_chunk=source_chunk1,
|
||||
target_chunk=target_chunk1,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
if is_valid2 and confidence2 == 0.7:
|
||||
print_success(f"Atypische Edge erlaubt mit reduzierter Confidence: {confidence2}")
|
||||
else:
|
||||
print_warning(f"Atypische Edge: valid={is_valid2}, confidence={confidence2}")
|
||||
|
||||
# Test 3: Effektiver Typ wird verwendet
|
||||
edge3 = {"kind": "related_to", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk3 = {"type": "insight", "note_type": "experience"} # type hat Vorrang
|
||||
target_chunk3 = {"type": "decision", "note_type": "experience"}
|
||||
|
||||
is_valid3, confidence3, reason3 = validate_intra_note_edge(
|
||||
edge=edge3,
|
||||
source_chunk=source_chunk3,
|
||||
target_chunk=target_chunk3,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
if is_valid3:
|
||||
print_success("Effektiver Typ (type-Feld) wird für Validierung verwendet")
|
||||
else:
|
||||
print_error(f"Validierung mit effektivem Typ fehlgeschlagen: {reason3}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# ============================================================================
|
||||
# QDRANT-INTEGRATION TESTS
|
||||
# ============================================================================
|
||||
|
||||
def test_qdrant_indices():
|
||||
"""Prüft Qdrant-Indizes für WP-26"""
|
||||
print_header("Qdrant-Indizes")
|
||||
|
||||
try:
|
||||
client = QdrantClient("http://localhost:6333")
|
||||
|
||||
# Prüfe Collections
|
||||
collections = client.get_collections().collections
|
||||
chunks_collection = None
|
||||
edges_collection = None
|
||||
|
||||
for col in collections:
|
||||
if "chunks" in col.name.lower():
|
||||
chunks_collection = col.name
|
||||
if "edges" in col.name.lower():
|
||||
edges_collection = col.name
|
||||
|
||||
if not chunks_collection or not edges_collection:
|
||||
print_warning("Collections nicht gefunden - möglicherweise noch nicht initialisiert")
|
||||
print_info("Führe 'python scripts/setup_mindnet_collections.py' aus")
|
||||
return True # Nicht kritisch für Funktionalität
|
||||
|
||||
print_success(f"Collections gefunden: {chunks_collection}, {edges_collection}")
|
||||
|
||||
# Prüfe Indizes (vereinfacht - echte Prüfung würde Collection-Info benötigen)
|
||||
print_info("Indizes sollten vorhanden sein für:")
|
||||
print_info(" - chunks: note_type, type, block_id")
|
||||
print_info(" - edges: is_internal (bool), kind, source_id, target_id")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print_warning(f"Qdrant-Verbindung fehlgeschlagen: {e}")
|
||||
print_info("Stelle sicher, dass Qdrant läuft: docker-compose up -d")
|
||||
return True # Nicht kritisch
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
"""Führt alle Tests aus"""
|
||||
print(f"\n{Colors.BOLD}{Colors.BLUE}")
|
||||
print("="*70)
|
||||
print("WP-26 Umfassende Funktionsprüfung")
|
||||
print("Lastenheft v1.3 - Alle FA-Requirements")
|
||||
print("="*70)
|
||||
print(f"{Colors.RESET}\n")
|
||||
|
||||
tests = [
|
||||
# Phase 1
|
||||
("FA-01: Section-Callout-Format", test_fa01_section_callout_format),
|
||||
("FA-01b: Verschachtelte Edge-Callouts", test_fa01b_nested_edge_callouts),
|
||||
("FA-02: Scope-Beendigung", test_fa02_scope_termination),
|
||||
("FA-03: type-Feld-Befüllung", test_fa03_type_field),
|
||||
("FA-04: note_type-Feld", test_fa04_note_type_field),
|
||||
("FA-05: Block-Reference", test_fa05_block_reference),
|
||||
("FA-07b: is_internal Flag", test_fa07_is_internal_flag),
|
||||
("FA-08: Default-Edges aus Schema", test_fa08_default_edges_from_schema),
|
||||
|
||||
# Phase 2
|
||||
("FA-09: Internal Edge Boost", test_fa09_internal_edge_boost),
|
||||
("FA-10: Aggregation-Level", test_fa10_chunk_level_aggregation),
|
||||
|
||||
# Phase 3
|
||||
("FA-12: Schema-Validierung", test_fa12_schema_validation),
|
||||
|
||||
# Integration
|
||||
("Qdrant-Indizes", test_qdrant_indices),
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for test_name, test_func in tests:
|
||||
try:
|
||||
result = test_func()
|
||||
results.append((test_name, result))
|
||||
except Exception as e:
|
||||
print_error(f"Test '{test_name}' fehlgeschlagen mit Exception: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
results.append((test_name, False))
|
||||
|
||||
# Zusammenfassung
|
||||
print_header("ZUSAMMENFASSUNG")
|
||||
|
||||
passed = sum(1 for _, result in results if result)
|
||||
total = len(results)
|
||||
|
||||
for test_name, result in results:
|
||||
if result:
|
||||
print_success(test_name)
|
||||
else:
|
||||
print_error(test_name)
|
||||
|
||||
print(f"\n{Colors.BOLD}Ergebnis: {passed}/{total} Tests bestanden{Colors.RESET}\n")
|
||||
|
||||
if passed == total:
|
||||
print(f"{Colors.GREEN}{Colors.BOLD}✓ Alle Tests bestanden! WP-26 ist vollständig implementiert.{Colors.RESET}\n")
|
||||
return 0
|
||||
else:
|
||||
print(f"{Colors.RED}{Colors.BOLD}✗ Einige Tests fehlgeschlagen. Bitte prüfe die Fehler oben.{Colors.RESET}\n")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -1,240 +0,0 @@
|
|||
"""
|
||||
FILE: tests/test_wp26_phase2_retriever.py
|
||||
DESCRIPTION: Unit-Tests für WP-26 Phase 2: Retriever-Anpassungen
|
||||
- is_internal-Boost für Intra-Note-Edges
|
||||
- Konfigurierbare Aggregation (Note/Chunk Level)
|
||||
VERSION: 1.0.0
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import os
|
||||
|
||||
|
||||
class TestEdgeScoringConfig:
|
||||
"""UT-19: Edge-Scoring-Konfiguration"""
|
||||
|
||||
def test_get_edge_scoring_config_defaults(self):
|
||||
"""Default-Werte werden korrekt geladen"""
|
||||
from app.core.graph.graph_subgraph import get_edge_scoring_config
|
||||
|
||||
# Cache leeren
|
||||
get_edge_scoring_config.cache_clear()
|
||||
|
||||
# Mit nicht-existierender Config-Datei
|
||||
with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
|
||||
get_edge_scoring_config.cache_clear()
|
||||
config = get_edge_scoring_config()
|
||||
|
||||
assert config["internal_edge_boost"] == 1.2
|
||||
assert config["external_edge_boost"] == 1.0
|
||||
|
||||
def test_get_edge_scoring_config_from_yaml(self):
|
||||
"""Werte werden aus YAML geladen"""
|
||||
from app.core.graph.graph_subgraph import get_edge_scoring_config
|
||||
|
||||
# Cache leeren und echte Config laden
|
||||
get_edge_scoring_config.cache_clear()
|
||||
|
||||
# Mit echter Config-Datei
|
||||
config = get_edge_scoring_config()
|
||||
|
||||
# Die Werte sollten den Defaults entsprechen (aus retriever.yaml)
|
||||
assert config["internal_edge_boost"] >= 1.0
|
||||
assert config["external_edge_boost"] >= 1.0
|
||||
|
||||
|
||||
class TestIsInternalBoost:
|
||||
"""UT-20: is_internal-Boost im Subgraph"""
|
||||
|
||||
def test_internal_edge_gets_boost(self):
|
||||
"""Intra-Note-Edges erhalten höheres Gewicht"""
|
||||
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
||||
|
||||
# Cache leeren
|
||||
get_edge_scoring_config.cache_clear()
|
||||
|
||||
sg = Subgraph()
|
||||
|
||||
# Interne Edge (innerhalb derselben Note)
|
||||
sg.add_edge({
|
||||
"source": "note1#c01",
|
||||
"target": "note1#c02",
|
||||
"kind": "derives",
|
||||
"weight": 1.0,
|
||||
"is_internal": True
|
||||
})
|
||||
|
||||
# Prüfe, dass das Gewicht erhöht wurde
|
||||
edges = sg.adj.get("note1#c01", [])
|
||||
assert len(edges) == 1
|
||||
|
||||
internal_boost = get_edge_scoring_config()["internal_edge_boost"]
|
||||
assert edges[0]["weight"] == 1.0 * internal_boost
|
||||
assert edges[0]["is_internal"] is True
|
||||
|
||||
def test_external_edge_no_boost(self):
|
||||
"""Inter-Note-Edges erhalten keinen Boost"""
|
||||
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
||||
|
||||
# Cache leeren
|
||||
get_edge_scoring_config.cache_clear()
|
||||
|
||||
sg = Subgraph()
|
||||
|
||||
# Externe Edge (zwischen verschiedenen Notes)
|
||||
sg.add_edge({
|
||||
"source": "note1#c01",
|
||||
"target": "note2#c01",
|
||||
"kind": "references",
|
||||
"weight": 1.0,
|
||||
"is_internal": False
|
||||
})
|
||||
|
||||
edges = sg.adj.get("note1#c01", [])
|
||||
assert len(edges) == 1
|
||||
|
||||
external_boost = get_edge_scoring_config()["external_edge_boost"]
|
||||
assert edges[0]["weight"] == 1.0 * external_boost
|
||||
assert edges[0]["is_internal"] is False
|
||||
|
||||
def test_edge_bonus_aggregation_with_internal(self):
|
||||
"""Edge-Bonus aggregiert korrekt mit is_internal-Boost"""
|
||||
from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config
|
||||
|
||||
get_edge_scoring_config.cache_clear()
|
||||
sg = Subgraph()
|
||||
|
||||
# Zwei Edges: eine interne, eine externe
|
||||
sg.add_edge({
|
||||
"source": "note1",
|
||||
"target": "note2",
|
||||
"kind": "solves",
|
||||
"weight": 1.5,
|
||||
"is_internal": True
|
||||
})
|
||||
sg.add_edge({
|
||||
"source": "note1",
|
||||
"target": "note3",
|
||||
"kind": "references",
|
||||
"weight": 0.1,
|
||||
"is_internal": False
|
||||
})
|
||||
|
||||
# Aggregierter Bonus
|
||||
bonus = sg.edge_bonus("note1")
|
||||
|
||||
# Sollte > 0 sein
|
||||
assert bonus > 0
|
||||
|
||||
|
||||
class TestAggregationConfig:
|
||||
"""UT-21: Aggregation-Konfiguration"""
|
||||
|
||||
def test_get_aggregation_config_defaults(self):
|
||||
"""Default-Werte werden korrekt geladen"""
|
||||
from app.core.retrieval.retriever import _get_aggregation_config
|
||||
|
||||
# Mit nicht-existierender Config-Datei
|
||||
with patch.dict(os.environ, {"MINDNET_RETRIEVER_CONFIG": "/nonexistent/path.yaml"}):
|
||||
config = _get_aggregation_config()
|
||||
|
||||
assert config["level"] == "note"
|
||||
assert config["max_chunks_per_note"] == 3
|
||||
|
||||
def test_get_aggregation_config_from_yaml(self):
|
||||
"""Werte werden aus YAML geladen"""
|
||||
from app.core.retrieval.retriever import _get_aggregation_config
|
||||
|
||||
config = _get_aggregation_config()
|
||||
|
||||
# Die Werte sollten aus retriever.yaml kommen
|
||||
assert config["level"] in ["note", "chunk"]
|
||||
assert config["max_chunks_per_note"] >= 1
|
||||
|
||||
|
||||
class TestNoteLevelAggregation:
|
||||
"""UT-22: Note-Level Aggregation mit max_chunks_per_note"""
|
||||
|
||||
def test_note_level_limits_chunks(self):
|
||||
"""Note-Level-Aggregation limitiert Chunks pro Note"""
|
||||
# Mock-Daten: 5 Chunks von Note1, 3 Chunks von Note2
|
||||
mock_hits = [
|
||||
("c1", 0.9, {"note_id": "note1", "chunk_id": "c1"}),
|
||||
("c2", 0.85, {"note_id": "note1", "chunk_id": "c2"}),
|
||||
("c3", 0.8, {"note_id": "note2", "chunk_id": "c3"}),
|
||||
("c4", 0.75, {"note_id": "note1", "chunk_id": "c4"}),
|
||||
("c5", 0.7, {"note_id": "note2", "chunk_id": "c5"}),
|
||||
("c6", 0.65, {"note_id": "note1", "chunk_id": "c6"}),
|
||||
("c7", 0.6, {"note_id": "note1", "chunk_id": "c7"}),
|
||||
]
|
||||
|
||||
# Simuliere Note-Level-Aggregation mit max_chunks_per_note=2
|
||||
max_chunks_per_note = 2
|
||||
pooled = []
|
||||
note_count = {}
|
||||
|
||||
for pid, score, payload in sorted(mock_hits, key=lambda x: x[1], reverse=True):
|
||||
note_id = payload["note_id"]
|
||||
if note_count.get(note_id, 0) < max_chunks_per_note:
|
||||
pooled.append((pid, score, payload))
|
||||
note_count[note_id] = note_count.get(note_id, 0) + 1
|
||||
|
||||
# Erwartung: 2 von note1, 2 von note2 = 4 Chunks
|
||||
assert len(pooled) == 4
|
||||
|
||||
# Prüfe, dass jede Note maximal 2 Chunks hat
|
||||
note1_chunks = [p for p in pooled if p[2]["note_id"] == "note1"]
|
||||
note2_chunks = [p for p in pooled if p[2]["note_id"] == "note2"]
|
||||
assert len(note1_chunks) == 2
|
||||
assert len(note2_chunks) == 2
|
||||
|
||||
|
||||
class TestChunkLevelAggregation:
|
||||
"""UT-23: Chunk-Level Aggregation (keine Deduplizierung)"""
|
||||
|
||||
def test_chunk_level_no_dedup(self):
|
||||
"""Chunk-Level-Aggregation gibt alle Chunks zurück"""
|
||||
mock_hits = [
|
||||
("c1", 0.9, {"note_id": "note1"}),
|
||||
("c2", 0.85, {"note_id": "note1"}),
|
||||
("c3", 0.8, {"note_id": "note1"}),
|
||||
("c4", 0.75, {"note_id": "note1"}),
|
||||
("c5", 0.7, {"note_id": "note1"}),
|
||||
]
|
||||
|
||||
# Chunk-Level: Keine Deduplizierung
|
||||
aggregation_level = "chunk"
|
||||
|
||||
if aggregation_level == "chunk":
|
||||
pooled = mock_hits
|
||||
else:
|
||||
pooled = [] # Note-Level würde nur 1 behalten
|
||||
|
||||
# Alle 5 Chunks sollten erhalten bleiben
|
||||
assert len(pooled) == 5
|
||||
|
||||
|
||||
class TestQdrantIndexSetup:
|
||||
"""UT-24: Qdrant-Index-Setup"""
|
||||
|
||||
def test_bool_index_method_exists(self):
|
||||
"""create_bool_index Methode existiert"""
|
||||
from scripts.setup_mindnet_collections import QdrantHTTP
|
||||
|
||||
q = QdrantHTTP("http://localhost:6333")
|
||||
assert hasattr(q, "create_bool_index")
|
||||
|
||||
def test_setup_includes_is_internal_index(self):
|
||||
"""Setup-Funktion enthält is_internal Index"""
|
||||
import inspect
|
||||
from scripts.setup_mindnet_collections import setup_mindnet_collections
|
||||
|
||||
# Prüfe den Quellcode der Funktion
|
||||
source = inspect.getsource(setup_mindnet_collections)
|
||||
|
||||
assert "is_internal" in source
|
||||
assert "create_bool_index" in source
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -1,331 +0,0 @@
|
|||
"""
|
||||
FILE: tests/test_wp26_phase3_validation.py
|
||||
DESCRIPTION: Unit-Tests für WP-26 Phase 3: Schema-Validierung für Intra-Note-Edges
|
||||
- FA-12: Validierung gegen effektiven Chunk-Typ
|
||||
- get_topology_info() Funktion
|
||||
- validate_intra_note_edge() Funktion
|
||||
VERSION: 1.0.0
|
||||
"""
|
||||
import pytest
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
class TestLoadGraphSchemaFull:
|
||||
"""UT-25: Erweitertes Schema-Laden mit prohibited Edges"""
|
||||
|
||||
def test_load_graph_schema_full_returns_dict(self):
|
||||
"""Schema wird korrekt geladen"""
|
||||
from app.core.graph.graph_utils import load_graph_schema_full, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
schema = load_graph_schema_full()
|
||||
|
||||
assert isinstance(schema, dict)
|
||||
|
||||
def test_schema_contains_typical_and_prohibited(self):
|
||||
"""Schema enthält sowohl typical als auch prohibited Listen"""
|
||||
from app.core.graph.graph_utils import load_graph_schema_full, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
schema = load_graph_schema_full()
|
||||
|
||||
# Prüfe, dass mindestens ein Eintrag existiert
|
||||
if schema:
|
||||
for source_type, targets in schema.items():
|
||||
for target_type, edge_info in targets.items():
|
||||
assert "typical" in edge_info
|
||||
assert "prohibited" in edge_info
|
||||
assert isinstance(edge_info["typical"], list)
|
||||
assert isinstance(edge_info["prohibited"], list)
|
||||
|
||||
|
||||
class TestGetTopologyInfo:
|
||||
"""UT-26: get_topology_info() Funktion"""
|
||||
|
||||
def test_get_topology_info_returns_dict(self):
|
||||
"""get_topology_info() gibt Dict mit typical und prohibited zurück"""
|
||||
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
topology = get_topology_info("experience", "insight")
|
||||
|
||||
assert isinstance(topology, dict)
|
||||
assert "typical" in topology
|
||||
assert "prohibited" in topology
|
||||
|
||||
def test_get_topology_info_fallback(self):
|
||||
"""Fallback für unbekannte Typen gibt Defaults zurück"""
|
||||
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
topology = get_topology_info("unknown_type_xyz", "another_unknown")
|
||||
|
||||
# Fallback sollte mindestens related_to oder references enthalten
|
||||
assert isinstance(topology["typical"], list)
|
||||
assert isinstance(topology["prohibited"], list)
|
||||
|
||||
def test_get_topology_info_experience_to_insight(self):
|
||||
"""Typische Edge von experience zu insight"""
|
||||
from app.core.graph.graph_utils import get_topology_info, clear_graph_schema_cache
|
||||
|
||||
clear_graph_schema_cache()
|
||||
topology = get_topology_info("experience", "insight")
|
||||
|
||||
# Basierend auf graph_schema.md
|
||||
assert len(topology["typical"]) > 0 or len(topology["prohibited"]) == 0
|
||||
|
||||
|
||||
class TestValidateIntraNoteEdge:
|
||||
"""UT-27: validate_intra_note_edge() Funktion"""
|
||||
|
||||
def test_validate_typical_edge_returns_true(self):
|
||||
"""Typische Edge wird akzeptiert mit Confidence 1.0"""
|
||||
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
||||
|
||||
# Mock-Daten
|
||||
edge = {"kind": "resulted_in", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk = {"type": "experience"}
|
||||
target_chunk = {"type": "insight"}
|
||||
|
||||
is_valid, confidence, reason = validate_intra_note_edge(
|
||||
edge=edge,
|
||||
source_chunk=source_chunk,
|
||||
target_chunk=target_chunk,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
# Edge sollte akzeptiert werden
|
||||
assert is_valid is True
|
||||
assert confidence >= 0.7 # Mindestens 0.7 (atypisch) oder 1.0 (typisch)
|
||||
|
||||
def test_validate_atypical_edge_reduced_confidence(self):
|
||||
"""Atypische Edge wird akzeptiert mit reduzierter Confidence"""
|
||||
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
||||
|
||||
# Mock-Daten mit sehr ungewöhnlicher Edge
|
||||
edge = {"kind": "very_unusual_edge_type_xyz", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk = {"type": "experience"}
|
||||
target_chunk = {"type": "insight"}
|
||||
|
||||
is_valid, confidence, reason = validate_intra_note_edge(
|
||||
edge=edge,
|
||||
source_chunk=source_chunk,
|
||||
target_chunk=target_chunk,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
# Atypische Edge sollte akzeptiert werden, aber mit reduzierter Confidence
|
||||
assert is_valid is True
|
||||
assert confidence == 0.7
|
||||
assert reason is not None
|
||||
|
||||
def test_validate_atypical_edge_strict_mode_rejected(self):
|
||||
"""Atypische Edge wird im Strict-Mode abgelehnt"""
|
||||
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
||||
|
||||
# Mock-Daten
|
||||
edge = {"kind": "very_unusual_edge_type_xyz", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk = {"type": "experience"}
|
||||
target_chunk = {"type": "insight"}
|
||||
|
||||
is_valid, confidence, reason = validate_intra_note_edge(
|
||||
edge=edge,
|
||||
source_chunk=source_chunk,
|
||||
target_chunk=target_chunk,
|
||||
strict_mode=True
|
||||
)
|
||||
|
||||
# Im Strict-Mode sollte die Edge abgelehnt werden
|
||||
assert is_valid is False
|
||||
assert confidence == 0.0
|
||||
|
||||
def test_validate_uses_effective_type(self):
|
||||
"""Validierung verwendet effektiven Typ (section_type über note_type)"""
|
||||
from app.core.ingestion.ingestion_validation import validate_intra_note_edge
|
||||
|
||||
# Chunk hat sowohl type (effektiv) als auch note_type
|
||||
edge = {"kind": "related_to", "source_id": "chunk1", "target_id": "chunk2"}
|
||||
source_chunk = {"type": "insight", "note_type": "experience"} # type hat Vorrang
|
||||
target_chunk = {"type": "decision", "note_type": "experience"}
|
||||
|
||||
is_valid, confidence, reason = validate_intra_note_edge(
|
||||
edge=edge,
|
||||
source_chunk=source_chunk,
|
||||
target_chunk=target_chunk,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
# Edge sollte gegen insight->decision validiert werden
|
||||
assert is_valid is True
|
||||
|
||||
|
||||
class TestValidateEdgeAgainstSchema:
|
||||
"""UT-28: validate_edge_against_schema() Wrapper-Funktion"""
|
||||
|
||||
def test_non_internal_edge_passes(self):
|
||||
"""Nicht-interne Edges werden ohne Schema-Check durchgelassen"""
|
||||
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
||||
|
||||
edge = {
|
||||
"kind": "references",
|
||||
"source_id": "note1#chunk1",
|
||||
"target_id": "note2#chunk1",
|
||||
"is_internal": False
|
||||
}
|
||||
chunks_by_id = {}
|
||||
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=edge,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
assert is_valid is True
|
||||
assert updated_edge == edge
|
||||
|
||||
def test_internal_edge_validated(self):
|
||||
"""Interne Edges werden gegen Schema validiert"""
|
||||
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
||||
|
||||
edge = {
|
||||
"kind": "derived_from",
|
||||
"source_id": "chunk1",
|
||||
"target_id": "chunk2",
|
||||
"is_internal": True,
|
||||
"confidence": 1.0
|
||||
}
|
||||
chunks_by_id = {
|
||||
"chunk1": {"type": "insight"},
|
||||
"chunk2": {"type": "experience"}
|
||||
}
|
||||
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=edge,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
assert is_valid is True
|
||||
|
||||
def test_missing_chunks_passes(self):
|
||||
"""Wenn Chunks nicht gefunden werden, wird Edge erlaubt (Integrität vor Präzision)"""
|
||||
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
||||
|
||||
edge = {
|
||||
"kind": "derived_from",
|
||||
"source_id": "chunk1",
|
||||
"target_id": "chunk2",
|
||||
"is_internal": True
|
||||
}
|
||||
chunks_by_id = {} # Keine Chunks
|
||||
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=edge,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
assert is_valid is True
|
||||
|
||||
|
||||
class TestSchemaValidationIntegration:
|
||||
"""UT-29: Integration der Schema-Validierung"""
|
||||
|
||||
def test_clear_cache_clears_both_caches(self):
|
||||
"""clear_graph_schema_cache() löscht beide Caches"""
|
||||
from app.core.graph.graph_utils import (
|
||||
load_graph_schema,
|
||||
load_graph_schema_full,
|
||||
clear_graph_schema_cache,
|
||||
_GRAPH_SCHEMA_CACHE,
|
||||
_GRAPH_SCHEMA_FULL_CACHE
|
||||
)
|
||||
|
||||
# Lade beide Schemas
|
||||
load_graph_schema()
|
||||
load_graph_schema_full()
|
||||
|
||||
# Cache leeren
|
||||
clear_graph_schema_cache()
|
||||
|
||||
# Module-Level Variablen prüfen (Zugriff über import)
|
||||
import app.core.graph.graph_utils as utils_module
|
||||
assert utils_module._GRAPH_SCHEMA_CACHE is None
|
||||
assert utils_module._GRAPH_SCHEMA_FULL_CACHE is None
|
||||
|
||||
def test_topology_info_consistent_with_typical_edges(self):
|
||||
"""get_topology_info() ist konsistent mit get_typical_edge_for()"""
|
||||
from app.core.graph.graph_utils import (
|
||||
get_topology_info,
|
||||
get_typical_edge_for,
|
||||
clear_graph_schema_cache
|
||||
)
|
||||
|
||||
clear_graph_schema_cache()
|
||||
|
||||
# Test für experience -> insight
|
||||
topology = get_topology_info("experience", "insight")
|
||||
typical_edge = get_typical_edge_for("experience", "insight")
|
||||
|
||||
# Wenn get_typical_edge_for einen Wert zurückgibt, sollte er in typical sein
|
||||
if typical_edge and topology["typical"]:
|
||||
assert typical_edge in topology["typical"]
|
||||
|
||||
|
||||
class TestConfidenceAdjustment:
|
||||
"""UT-30: Confidence-Anpassung bei atypischen Edges"""
|
||||
|
||||
def test_atypical_edge_confidence_reduced(self):
|
||||
"""Atypische Edge erhält reduzierte Confidence (0.7)"""
|
||||
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
||||
|
||||
edge = {
|
||||
"kind": "completely_unknown_edge_type_xyz123",
|
||||
"source_id": "chunk1",
|
||||
"target_id": "chunk2",
|
||||
"is_internal": True,
|
||||
"confidence": 1.0
|
||||
}
|
||||
chunks_by_id = {
|
||||
"chunk1": {"type": "experience"},
|
||||
"chunk2": {"type": "insight"}
|
||||
}
|
||||
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=edge,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
assert is_valid is True
|
||||
# Confidence sollte auf 0.7 reduziert worden sein (da atypisch)
|
||||
assert updated_edge.get("confidence") == 0.7
|
||||
|
||||
def test_schema_validation_note_added(self):
|
||||
"""Atypische Edge erhält Validierungs-Notiz"""
|
||||
from app.core.ingestion.ingestion_validation import validate_edge_against_schema
|
||||
|
||||
edge = {
|
||||
"kind": "completely_unknown_edge_type_xyz123",
|
||||
"source_id": "chunk1",
|
||||
"target_id": "chunk2",
|
||||
"is_internal": True,
|
||||
"confidence": 1.0
|
||||
}
|
||||
chunks_by_id = {
|
||||
"chunk1": {"type": "experience"},
|
||||
"chunk2": {"type": "insight"}
|
||||
}
|
||||
|
||||
is_valid, updated_edge = validate_edge_against_schema(
|
||||
edge=edge,
|
||||
chunks_by_id=chunks_by_id,
|
||||
strict_mode=False
|
||||
)
|
||||
|
||||
# Validierungs-Notiz sollte hinzugefügt worden sein
|
||||
assert "schema_validation_note" in updated_edge
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -1,725 +0,0 @@
|
|||
"""
|
||||
FILE: tests/test_wp26_section_types.py
|
||||
DESCRIPTION: Unit-Tests für WP-26 Phase 1: Section-Types und Intra-Note-Edges
|
||||
WP-26 v1.1: Erweitert um Tests für Section-Split und automatische Edges
|
||||
WP-26 v1.3: Erweitert um Tests für rückwirkende section_type Propagation
|
||||
VERSION: 1.4.0 (WP-26 v1.4: Automatische Backlinks)
|
||||
"""
|
||||
import pytest
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
from app.core.chunking.chunking_models import RawBlock, Chunk
|
||||
from app.core.chunking.chunking_strategies import strategy_by_heading, strategy_sliding_window
|
||||
from app.core.graph.graph_utils import (
|
||||
normalize_provenance, _edge, get_typical_edge_for,
|
||||
load_graph_schema, clear_graph_schema_cache
|
||||
)
|
||||
|
||||
|
||||
class TestSectionTypeRecognition:
|
||||
"""UT-01: Parser – Section-Type-Erkennung"""
|
||||
|
||||
def test_section_type_recognition(self):
|
||||
"""Testet, ob [!section]-Callouts korrekt erkannt werden."""
|
||||
md = """
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Content here about insights.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde den Paragraph-Block nach dem Section-Callout
|
||||
paragraph_blocks = [b for b in blocks if b.kind == "paragraph"]
|
||||
assert len(paragraph_blocks) >= 1
|
||||
|
||||
# Der Paragraph-Block sollte section_type "insight" haben
|
||||
assert paragraph_blocks[0].section_type == "insight"
|
||||
|
||||
def test_section_type_with_block_id(self):
|
||||
"""Testet, ob Block-IDs in Überschriften korrekt extrahiert werden."""
|
||||
md = """
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Die Geschichte beginnt hier.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde den Heading-Block
|
||||
heading_blocks = [b for b in blocks if b.kind == "heading"]
|
||||
assert len(heading_blocks) >= 1
|
||||
|
||||
# Block-ID sollte "sit" sein
|
||||
assert heading_blocks[0].block_id == "sit"
|
||||
|
||||
def test_section_type_propagated_backwards_to_heading(self):
|
||||
"""WP-26 v1.3: section_type wird rückwirkend auf das Heading propagiert."""
|
||||
md = """
|
||||
## Lektion ^learning
|
||||
|
||||
Einleitender Text ohne section callout.
|
||||
|
||||
Noch mehr Text hier...
|
||||
|
||||
> [!section] insight
|
||||
|
||||
Und dann kommt der eigentliche Insight-Content.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde den Heading-Block
|
||||
heading_blocks = [b for b in blocks if b.kind == "heading"]
|
||||
assert len(heading_blocks) >= 1
|
||||
|
||||
# WP-26 v1.3: Das Heading sollte section_type "insight" haben
|
||||
# (rückwirkend propagiert, obwohl [!section] später im Abschnitt steht)
|
||||
assert heading_blocks[0].section_type == "insight", \
|
||||
f"Heading sollte section_type 'insight' haben, hat aber: {heading_blocks[0].section_type}"
|
||||
|
||||
# Alle Paragraphen in dieser Sektion sollten auch section_type "insight" haben
|
||||
paragraphs = [b for b in blocks if b.kind == "paragraph"]
|
||||
for p in paragraphs:
|
||||
assert p.section_type == "insight", \
|
||||
f"Paragraph sollte section_type 'insight' haben: {p.text[:50]}"
|
||||
|
||||
|
||||
class TestSectionTypeScope:
|
||||
"""UT-02: Parser – Scope-Beendigung"""
|
||||
|
||||
def test_section_type_scope_ends_at_same_level_heading(self):
|
||||
"""Section-Type endet bei nächster H2."""
|
||||
md = """
|
||||
## Section A
|
||||
> [!section] insight
|
||||
|
||||
Content A with insight.
|
||||
|
||||
## Section B
|
||||
|
||||
Content B without section callout.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde Paragraph-Blöcke
|
||||
paragraphs = [b for b in blocks if b.kind == "paragraph"]
|
||||
|
||||
# Erster Paragraph hat section_type "insight"
|
||||
assert paragraphs[0].section_type == "insight"
|
||||
|
||||
# Zweiter Paragraph hat section_type None (Reset)
|
||||
assert paragraphs[1].section_type is None
|
||||
|
||||
|
||||
class TestProvenanceNormalization:
|
||||
"""UT für Provenance-Normalisierung (WP-26 v1.0)"""
|
||||
|
||||
def test_normalize_explicit_callout(self):
|
||||
"""explicit:callout -> (explicit, callout)"""
|
||||
prov, hint = normalize_provenance("explicit:callout")
|
||||
assert prov == "explicit"
|
||||
assert hint == "callout"
|
||||
|
||||
def test_normalize_explicit_wikilink(self):
|
||||
"""explicit:wikilink -> (explicit, wikilink)"""
|
||||
prov, hint = normalize_provenance("explicit:wikilink")
|
||||
assert prov == "explicit"
|
||||
assert hint == "wikilink"
|
||||
|
||||
def test_normalize_structure_belongs_to(self):
|
||||
"""structure:belongs_to -> (structure, belongs_to)"""
|
||||
prov, hint = normalize_provenance("structure:belongs_to")
|
||||
assert prov == "structure"
|
||||
assert hint == "belongs_to"
|
||||
|
||||
def test_normalize_schema_default(self):
|
||||
"""inferred:schema -> (rule, schema_default)"""
|
||||
prov, hint = normalize_provenance("inferred:schema")
|
||||
assert prov == "rule"
|
||||
assert hint == "schema_default"
|
||||
|
||||
def test_normalize_unknown_fallback(self):
|
||||
"""Unbekannte Provenance -> (explicit, None)"""
|
||||
prov, hint = normalize_provenance("unknown_provenance")
|
||||
assert prov == "explicit"
|
||||
assert hint is None
|
||||
|
||||
|
||||
class TestIsInternalFlag:
|
||||
"""UT-13: is_internal Flag für Intra-Note-Edges"""
|
||||
|
||||
def test_is_internal_true_for_same_note(self):
|
||||
"""Edges zwischen Chunks derselben Note haben is_internal=True"""
|
||||
edge = _edge(
|
||||
kind="derives",
|
||||
scope="chunk",
|
||||
source_id="note1#c01",
|
||||
target_id="note1#c02",
|
||||
note_id="note1"
|
||||
)
|
||||
assert edge["is_internal"] is True
|
||||
|
||||
def test_is_internal_false_for_different_notes(self):
|
||||
"""Edges zwischen verschiedenen Notes haben is_internal=False"""
|
||||
edge = _edge(
|
||||
kind="references",
|
||||
scope="chunk",
|
||||
source_id="note1#c01",
|
||||
target_id="note2#c01",
|
||||
note_id="note1"
|
||||
)
|
||||
assert edge["is_internal"] is False
|
||||
|
||||
def test_is_internal_true_for_note_to_chunk(self):
|
||||
"""Edges von Note zu eigenem Chunk haben is_internal=True"""
|
||||
edge = _edge(
|
||||
kind="belongs_to",
|
||||
scope="chunk",
|
||||
source_id="note1#c01",
|
||||
target_id="note1",
|
||||
note_id="note1"
|
||||
)
|
||||
assert edge["is_internal"] is True
|
||||
|
||||
|
||||
class TestEdgeProvenanceInPayload:
|
||||
"""Test für Provenance-Normalisierung in Edge-Payloads"""
|
||||
|
||||
def test_edge_provenance_normalized(self):
|
||||
"""Provenance wird in Edge-Payloads normalisiert"""
|
||||
edge = _edge(
|
||||
kind="derives",
|
||||
scope="chunk",
|
||||
source_id="note1#c01",
|
||||
target_id="note1#c02",
|
||||
note_id="note1",
|
||||
extra={"provenance": "explicit:callout"}
|
||||
)
|
||||
|
||||
assert edge["provenance"] == "explicit"
|
||||
assert edge["source_hint"] == "callout"
|
||||
|
||||
|
||||
class TestAutomaticSectionRecognition:
|
||||
"""UT-09: Automatische Section-Erkennung bei neuen Überschriften"""
|
||||
|
||||
def test_automatic_section_recognition_at_same_heading_level(self):
|
||||
"""Neue Überschriften auf gleicher Ebene starten automatisch neue Sections"""
|
||||
md = """
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Content A.
|
||||
|
||||
## Reflexion ^ref
|
||||
|
||||
Content B.
|
||||
|
||||
## Learnings ^learn
|
||||
> [!section] insight
|
||||
|
||||
Content C.
|
||||
|
||||
## Ausblick ^out
|
||||
|
||||
Content D.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Sammle alle Paragraph-Blöcke in Reihenfolge
|
||||
paragraphs = [b for b in blocks if b.kind == "paragraph"]
|
||||
|
||||
assert len(paragraphs) == 4
|
||||
|
||||
# Chunk 1: Expliziter section_type "experience"
|
||||
assert paragraphs[0].section_type == "experience"
|
||||
|
||||
# Chunk 2: Neue Section ohne Callout → None (Fallback auf note_type)
|
||||
assert paragraphs[1].section_type is None
|
||||
|
||||
# Chunk 3: Expliziter section_type "insight"
|
||||
assert paragraphs[2].section_type == "insight"
|
||||
|
||||
# Chunk 4: Neue Section ohne Callout → None (Fallback auf note_type)
|
||||
assert paragraphs[3].section_type is None
|
||||
|
||||
|
||||
class TestSeparateSectionCallout:
|
||||
"""UT-10: Separates Section-Callout an beliebiger Stelle"""
|
||||
|
||||
def test_section_callout_separate_from_edge_callout(self):
|
||||
"""Section-Callout kann separat von Edge-Callouts stehen"""
|
||||
md = """
|
||||
## Reflexion ^ref
|
||||
|
||||
Einleitender Text hier...
|
||||
|
||||
> [!section] insight
|
||||
|
||||
Weiterer normaler Inhalt...
|
||||
|
||||
> [!edge] derives
|
||||
> [[#^sit]]
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde Paragraph-Blöcke nach dem Section-Callout
|
||||
paragraphs = [b for b in blocks if b.kind == "paragraph"]
|
||||
|
||||
# Es sollten mindestens 2 Paragraphen geben
|
||||
assert len(paragraphs) >= 2
|
||||
|
||||
# Der erste Paragraph hat noch keinen section_type (vor dem Callout)
|
||||
# Der zweite Paragraph hat section_type "insight"
|
||||
# Hinweis: Die genaue Zuordnung hängt von der Parser-Implementierung ab
|
||||
section_types = [p.section_type for p in paragraphs]
|
||||
assert "insight" in section_types
|
||||
|
||||
|
||||
class TestNestedEdgeCallouts:
|
||||
"""UT-08: Verschachtelte Edge-Callouts in Container"""
|
||||
|
||||
def test_nested_callouts_recognized(self):
|
||||
"""Verschachtelte Callouts werden als Callout-Blöcke erkannt"""
|
||||
md = """
|
||||
> [!abstract] Semantic Edges
|
||||
>> [!edge] derived_from
|
||||
>> [[Target1#Section]]
|
||||
>
|
||||
>> [!edge] solves
|
||||
>> [[Target2]]
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Finde Callout-Blöcke
|
||||
callouts = [b for b in blocks if b.kind == "callout"]
|
||||
|
||||
# Es sollte mindestens ein Callout-Block erkannt werden
|
||||
assert len(callouts) >= 1
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# WP-26 v1.1: Tests für Section-Type-Wechsel und automatische Chunk-Splits
|
||||
# =============================================================================
|
||||
|
||||
class TestSectionTypeSplit:
|
||||
"""UT-14: Section-Type-Wechsel erzwingt neuen Chunk"""
|
||||
|
||||
def test_section_type_change_forces_new_chunk_by_heading(self):
|
||||
"""Section-Type-Wechsel erzwingt neuen Chunk (by_heading Strategie)"""
|
||||
md = """
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
Die Geschichte beginnt hier.
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
|
||||
Erkenntnisse aus der Situation.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Konfiguration: strict=False (würde normalerweise zusammenfassen)
|
||||
config = {
|
||||
"target": 1000, # Hoher Wert, um sicherzustellen, dass nicht wegen Token-Limit gesplittet wird
|
||||
"max": 2000,
|
||||
"split_level": 2,
|
||||
"strict_heading_split": False, # Normalerweise würde zusammengefasst
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
|
||||
chunks = strategy_by_heading(blocks, config, "test-note")
|
||||
|
||||
# Es sollten mindestens 2 Chunks geben (wegen Section-Type-Wechsel)
|
||||
assert len(chunks) >= 2
|
||||
|
||||
# Prüfe, dass die Chunks unterschiedliche section_types haben
|
||||
section_types = [c.section_type for c in chunks if c.section_type]
|
||||
assert "experience" in section_types
|
||||
assert "insight" in section_types
|
||||
|
||||
def test_same_section_type_follows_normal_behavior(self):
|
||||
"""Gleicher Section-Type folgt normalem Chunking-Verhalten"""
|
||||
md = """
|
||||
## Section A
|
||||
> [!section] experience
|
||||
|
||||
Content A about experience.
|
||||
|
||||
## Section B
|
||||
> [!section] experience
|
||||
|
||||
Content B also about experience.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Konfiguration: strict=False (würde zusammenfassen bei gleichem Typ)
|
||||
config = {
|
||||
"target": 1000,
|
||||
"max": 2000,
|
||||
"split_level": 2,
|
||||
"strict_heading_split": False,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
|
||||
chunks = strategy_by_heading(blocks, config, "test-note")
|
||||
|
||||
# Bei gleichem section_type und strict=False könnten Chunks zusammengefasst werden
|
||||
# (abhängig von Token-Limits)
|
||||
# Wichtig: Alle Chunks sollten section_type "experience" haben
|
||||
for c in chunks:
|
||||
if c.section_type:
|
||||
assert c.section_type == "experience"
|
||||
|
||||
def test_sliding_window_respects_section_type_change(self):
|
||||
"""sliding_window Strategie respektiert Section-Type-Wechsel"""
|
||||
md = """
|
||||
## Part 1
|
||||
> [!section] experience
|
||||
|
||||
Short content.
|
||||
|
||||
## Part 2
|
||||
> [!section] insight
|
||||
|
||||
Another short content.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
config = {
|
||||
"target": 1000, # Hoher Wert
|
||||
"max": 2000
|
||||
}
|
||||
|
||||
chunks = strategy_sliding_window(blocks, config, "test-note")
|
||||
|
||||
# Bei Section-Type-Wechsel sollte gesplittet werden
|
||||
section_types = [c.section_type for c in chunks if c.section_type]
|
||||
|
||||
# Beide Typen sollten in separaten Chunks sein
|
||||
if len(section_types) >= 2:
|
||||
assert "experience" in section_types or "insight" in section_types
|
||||
|
||||
|
||||
class TestGraphSchemaParser:
|
||||
"""UT-15: Graph-Schema Parser Tests"""
|
||||
|
||||
def test_get_typical_edge_experience_to_insight(self):
|
||||
"""Typischer Edge von experience zu insight ist 'resulted_in'"""
|
||||
# Lade Schema (falls verfügbar)
|
||||
edge = get_typical_edge_for("experience", "insight")
|
||||
|
||||
# Sollte entweder 'resulted_in' oder Fallback 'related_to' sein
|
||||
assert edge in ["resulted_in", "related_to"]
|
||||
|
||||
def test_get_typical_edge_insight_to_decision(self):
|
||||
"""Typischer Edge von insight zu decision"""
|
||||
edge = get_typical_edge_for("insight", "decision")
|
||||
|
||||
# Basierend auf graph_schema.md: foundation_for oder guides
|
||||
# Fallback über "any"-Regel: references oder related_to
|
||||
assert edge in ["foundation_for", "guides", "related_to", "references"]
|
||||
|
||||
def test_get_typical_edge_fallback(self):
|
||||
"""Fallback auf 'related_to' für unbekannte Typen"""
|
||||
edge = get_typical_edge_for("unknown_type_1", "unknown_type_2")
|
||||
|
||||
# Fallback sollte immer related_to sein
|
||||
assert edge == "related_to"
|
||||
|
||||
def test_get_typical_edge_any_target(self):
|
||||
"""Fallback auf 'any' Target-Regel"""
|
||||
edge = get_typical_edge_for("experience", "unknown_target")
|
||||
|
||||
# Sollte Fallback auf "any"-Regel oder "related_to"
|
||||
assert edge is not None
|
||||
|
||||
|
||||
class TestAutomaticIntraNoteEdges:
|
||||
"""UT-16: Automatische Intra-Note-Edges zwischen Sektionen"""
|
||||
|
||||
def test_edge_payload_has_section_transition(self):
|
||||
"""Edge zwischen Sektionen enthält section_transition Metadaten"""
|
||||
edge = _edge(
|
||||
kind="resulted_in",
|
||||
scope="chunk",
|
||||
source_id="note1#c00",
|
||||
target_id="note1#c01",
|
||||
note_id="note1",
|
||||
extra={
|
||||
"provenance": "rule",
|
||||
"rule_id": "inferred:section_transition",
|
||||
"section_transition": "experience->insight",
|
||||
"is_internal": True
|
||||
}
|
||||
)
|
||||
|
||||
assert edge["is_internal"] is True
|
||||
assert edge["section_transition"] == "experience->insight"
|
||||
assert edge["provenance"] == "rule"
|
||||
|
||||
def test_inferred_section_transition_provenance(self):
|
||||
"""Provenance 'inferred:section_transition' wird korrekt normalisiert"""
|
||||
prov, hint = normalize_provenance("inferred:section_transition")
|
||||
|
||||
assert prov == "rule"
|
||||
assert hint == "schema_default"
|
||||
|
||||
|
||||
class TestRealWorldScenario:
|
||||
"""UT-17: Real-World Szenario - Krebsdiagnose Note"""
|
||||
|
||||
def test_krebsdiagnose_note_structure(self):
|
||||
"""Testet die erwartete Struktur der Krebsdiagnose-Note"""
|
||||
md = """
|
||||
## 📖 Diagnose: Glioblastom ^kontext
|
||||
|
||||
Nach der Operation gab es ein Diagnose-Gespräch.
|
||||
|
||||
## 🎭 Emotions-Check ^emotionen
|
||||
|
||||
Ich reagierte mit Zittern am Körper.
|
||||
|
||||
## 💡 Lektion ^learning
|
||||
> [!section] insight
|
||||
|
||||
Ich habe versucht die nächsten Schritte zu durchdenken.
|
||||
Meine positive Einstellung hat mir geholfen.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# Konfiguration wie structured_smart_edges
|
||||
config = {
|
||||
"target": 400,
|
||||
"max": 600,
|
||||
"split_level": 2,
|
||||
"strict_heading_split": False,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
|
||||
chunks = strategy_by_heading(blocks, config, "krebsdiagnose")
|
||||
|
||||
# Wegen Section-Type-Wechsel (None -> insight) sollte die Lektion
|
||||
# ein separater Chunk sein
|
||||
insight_chunks = [c for c in chunks if c.section_type == "insight"]
|
||||
|
||||
# Mindestens ein Chunk mit section_type "insight"
|
||||
assert len(insight_chunks) >= 1
|
||||
|
||||
# WP-26 v1.2: Der insight-Chunk MUSS die Überschrift "💡 Lektion" enthalten!
|
||||
# (Nicht nur den Inhalt nach dem [!section] Callout)
|
||||
insight_text = insight_chunks[0].text
|
||||
assert "Lektion" in insight_text, f"Überschrift '💡 Lektion' fehlt im insight-Chunk: {insight_text[:100]}"
|
||||
assert "durchdenken" in insight_text.lower() or "positive" in insight_text.lower()
|
||||
|
||||
def test_section_type_change_in_smart_mode_forces_split(self):
|
||||
"""WP-26 v1.1 Fix: Section-Type-Wechsel erzwingt Split auch in SMART MODE (Schritt 2)"""
|
||||
md = """
|
||||
## Section A ohne Typ
|
||||
|
||||
Inhalt A ohne section_type.
|
||||
|
||||
## Section B ohne Typ
|
||||
|
||||
Inhalt B ohne section_type.
|
||||
|
||||
## Section C mit Typ
|
||||
> [!section] insight
|
||||
|
||||
Inhalt C mit section_type "insight".
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
# SMART MODE: strict=False, smart_edge=True
|
||||
# Token-Limit hoch genug, dass alles zusammengefasst werden KÖNNTE
|
||||
config = {
|
||||
"target": 2000,
|
||||
"max": 4000,
|
||||
"split_level": 2,
|
||||
"strict_heading_split": False,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
|
||||
chunks = strategy_by_heading(blocks, config, "test-note")
|
||||
|
||||
# Trotz hohem Token-Limit sollte Section C ein separater Chunk sein
|
||||
# wegen Section-Type-Wechsel (None -> insight)
|
||||
assert len(chunks) >= 2, f"Erwartet mindestens 2 Chunks, bekommen: {len(chunks)}"
|
||||
|
||||
# Der letzte Chunk sollte section_type "insight" haben
|
||||
insight_chunks = [c for c in chunks if c.section_type == "insight"]
|
||||
assert len(insight_chunks) >= 1, "Kein Chunk mit section_type 'insight' gefunden"
|
||||
|
||||
def test_heading_belongs_to_new_section_with_section_type(self):
|
||||
"""WP-26 v1.2: Heading gehört zur neuen Sektion wenn [!section] folgt"""
|
||||
md = """
|
||||
## Section A
|
||||
|
||||
Inhalt ohne section_type.
|
||||
|
||||
## Section B mit Typ
|
||||
> [!section] insight
|
||||
|
||||
Inhalt mit section_type.
|
||||
"""
|
||||
blocks, _ = parse_blocks(md)
|
||||
|
||||
config = {
|
||||
"target": 2000,
|
||||
"max": 4000,
|
||||
"split_level": 2,
|
||||
"strict_heading_split": False,
|
||||
"enable_smart_edge_allocation": True
|
||||
}
|
||||
|
||||
chunks = strategy_by_heading(blocks, config, "test-note")
|
||||
|
||||
# Es sollten 2 Chunks geben
|
||||
assert len(chunks) == 2, f"Erwartet 2 Chunks, bekommen: {len(chunks)}"
|
||||
|
||||
# Chunk 1: Section A (section_type = None)
|
||||
assert chunks[0].section_type is None
|
||||
assert "Section A" in chunks[0].text
|
||||
assert "Section B" not in chunks[0].text # Heading B darf NICHT in Chunk 1 sein!
|
||||
|
||||
# Chunk 2: Section B (section_type = insight) - MUSS die Überschrift enthalten!
|
||||
assert chunks[1].section_type == "insight"
|
||||
assert "Section B mit Typ" in chunks[1].text, "Überschrift 'Section B mit Typ' muss im insight-Chunk sein!"
|
||||
|
||||
|
||||
class TestBlockIdParsing:
|
||||
"""UT-18: Block-ID-Extraktion aus Section-Referenzen"""
|
||||
|
||||
def test_block_id_extraction_from_section(self):
|
||||
"""Block-ID wird aus Section-String extrahiert"""
|
||||
from app.core.graph.graph_utils import parse_link_target
|
||||
|
||||
# Test: Überschrift mit Block-ID
|
||||
target, section = parse_link_target("#📖 Diagnose: Glioblastom ^kontext", "note1")
|
||||
assert target == "note1" # Self-Link
|
||||
assert section == "kontext", f"Erwartet 'kontext', bekommen: {section}"
|
||||
|
||||
def test_block_id_extraction_only_caret(self):
|
||||
"""Nur Block-ID mit ^"""
|
||||
from app.core.graph.graph_utils import parse_link_target
|
||||
|
||||
target, section = parse_link_target("#^learning", "note1")
|
||||
assert target == "note1"
|
||||
assert section == "learning"
|
||||
|
||||
def test_block_id_extraction_with_spaces(self):
|
||||
"""Block-ID mit Text davor"""
|
||||
from app.core.graph.graph_utils import parse_link_target
|
||||
|
||||
target, section = parse_link_target("OtherNote#🎭 Emotions-Check ^emotionen", None)
|
||||
assert target == "OtherNote"
|
||||
assert section == "emotionen"
|
||||
|
||||
def test_section_without_block_id(self):
|
||||
"""Section ohne Block-ID bleibt unverändert"""
|
||||
from app.core.graph.graph_utils import parse_link_target
|
||||
|
||||
target, section = parse_link_target("Note#Normale Überschrift", None)
|
||||
assert target == "Note"
|
||||
assert section == "Normale Überschrift"
|
||||
|
||||
|
||||
class TestAutomaticBacklinks:
|
||||
"""UT-18: Automatische Backlinks für Intra-Note-Edges (WP-26 v1.4)"""
|
||||
|
||||
def test_backlink_created_for_intra_note_edge(self):
|
||||
"""Backlink wird automatisch für Intra-Note-Edge erstellt"""
|
||||
from app.core.graph.graph_derive_edges import build_edges_for_note
|
||||
|
||||
# Mock-Chunks mit Section-Types
|
||||
chunks = [
|
||||
{
|
||||
"chunk_id": "note1#c01",
|
||||
"type": "experience",
|
||||
"section_type": "experience",
|
||||
"window": "Situation text"
|
||||
},
|
||||
{
|
||||
"chunk_id": "note1#c02",
|
||||
"type": "insight",
|
||||
"section_type": "insight",
|
||||
"window": "Reflexion text"
|
||||
}
|
||||
]
|
||||
|
||||
edges = build_edges_for_note(
|
||||
note_id="note1",
|
||||
chunks=chunks,
|
||||
note_level_references=None,
|
||||
include_note_scope_refs=False,
|
||||
markdown_body=""
|
||||
)
|
||||
|
||||
# Prüfe, dass sowohl Forward-Edge als auch Backlink vorhanden sind
|
||||
forward_edges = [e for e in edges if e.get("source_id") == "note1#c01" and e.get("target_id") == "note1#c02"]
|
||||
backlink_edges = [e for e in edges if e.get("source_id") == "note1#c02" and e.get("target_id") == "note1#c01"]
|
||||
|
||||
assert len(forward_edges) > 0, "Forward-Edge sollte vorhanden sein"
|
||||
assert len(backlink_edges) > 0, "Backlink sollte automatisch erstellt werden"
|
||||
|
||||
# Prüfe Backlink-Eigenschaften
|
||||
backlink = backlink_edges[0]
|
||||
assert backlink.get("is_internal") is True
|
||||
assert backlink.get("scope") == "chunk"
|
||||
assert backlink.get("provenance") == "rule"
|
||||
assert backlink.get("rule_id") == "derived:intra_note_backlink"
|
||||
|
||||
def test_backlink_not_created_if_already_exists(self):
|
||||
"""Backlink wird nicht erstellt, wenn bereits ein inverser Edge existiert"""
|
||||
from app.core.graph.graph_derive_edges import build_edges_for_note
|
||||
|
||||
# Mock-Chunks
|
||||
chunks = [
|
||||
{
|
||||
"chunk_id": "note1#c01",
|
||||
"type": "experience",
|
||||
"section_type": "experience",
|
||||
"window": "Situation text",
|
||||
"candidate_pool": [
|
||||
{
|
||||
"kind": "derived_from", # Inverser Edge-Type
|
||||
"to": "note1#c02",
|
||||
"provenance": "explicit:callout"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"chunk_id": "note1#c02",
|
||||
"type": "insight",
|
||||
"section_type": "insight",
|
||||
"window": "Reflexion text",
|
||||
"candidate_pool": [
|
||||
{
|
||||
"kind": "derives", # Forward-Edge
|
||||
"to": "note1#c01",
|
||||
"provenance": "explicit:callout"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
edges = build_edges_for_note(
|
||||
note_id="note1",
|
||||
chunks=chunks,
|
||||
note_level_references=None,
|
||||
include_note_scope_refs=False,
|
||||
markdown_body=""
|
||||
)
|
||||
|
||||
# Zähle Backlinks (sollte nicht dupliziert werden)
|
||||
backlink_edges = [e for e in edges
|
||||
if e.get("source_id") == "note1#c01"
|
||||
and e.get("target_id") == "note1#c02"
|
||||
and e.get("kind") == "derived_from"]
|
||||
|
||||
# Sollte genau einen Backlink geben (der explizite, nicht zusätzlich automatischer)
|
||||
assert len(backlink_edges) == 1, f"Erwartet genau einen Backlink, gefunden: {len(backlink_edges)}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Loading…
Reference in New Issue
Block a user