Enhance chunking system with WP-24c v4.2.6 and v4.2.7 updates: Introduce is_meta_content flag for callouts in RawBlock, ensuring they are chunked but later removed for clean context. Update parse_blocks and propagate_section_edges to handle callout edges with explicit provenance for chunk attribution. Implement clean-context logic to remove callout syntax post-processing, maintaining chunk integrity. Adjust get_chunk_config to prioritize frontmatter overrides for chunking profiles. Update documentation to reflect these changes.

2026-01-11 11:14:31 +01:00 · 2026-01-11 11:14:31 +01:00 · 55b64c331a
commit 55b64c331a
parent 4d43cc526e
8 changed files with 231 additions and 43 deletions
--- a/app/core/chunking/chunking_models.py
+++ b/app/core/chunking/chunking_models.py
@ -14,6 +14,7 @@ class RawBlock:
    section_path: str
    section_title: Optional[str]
    exclude_from_chunking: bool = False  # WP-24c v4.2.0: Flag für Edge-Zonen, die nicht gechunkt werden sollen
+    is_meta_content: bool = False  # WP-24c v4.2.6: Flag für Meta-Content (Callouts), der später entfernt wird

@dataclass
 class Chunk:
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@ -4,10 +4,11 @@ DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks).
             Hält alle Überschriftenebenen (H1-H6) im Stream.
             Stellt die Funktion parse_edges_robust zur Verfügung.
             WP-24c v4.2.0: Identifiziert Edge-Zonen und markiert sie für Chunking-Ausschluss.
+             WP-24c v4.2.5: Callout-Exclusion - Callouts werden als separate RawBlocks identifiziert und ausgeschlossen.
 """
 import re
 import os
-from typing import List, Tuple, Set
+from typing import List, Tuple, Set, Dict, Any
 from .chunking_models import RawBlock
 from .chunking_utils import extract_frontmatter_from_text

@ -25,6 +26,7 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
    """
    Zerlegt Text in logische Einheiten (RawBlocks), inklusive H1-H6.
    WP-24c v4.2.0: Identifiziert Edge-Zonen (LLM-Validierung & Note-Scope) und markiert sie für Chunking-Ausschluss.
+    WP-24c v4.2.6: Callouts werden mit is_meta_content=True markiert (werden gechunkt, aber später entfernt).
    """
    blocks = []
    h1_title = "Dokument"
@ -67,9 +69,61 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
    lines = text_without_fm.split('\n')
    buffer = []
    
-    for line in lines:
+    # WP-24c v4.2.5: Callout-Erkennung (auch verschachtelt: >>)
+    # Regex für Callouts: >\s*[!edge] oder >\s*[!abstract] (auch mit mehreren >)
+    callout_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE)
+    
+    # WP-24c v4.2.5: Markiere verarbeitete Zeilen, um sie zu überspringen
+    processed_indices = set()
+    
+    for i, line in enumerate(lines):
+        if i in processed_indices:
+            continue
+            
        stripped = line.strip()
        
+        # WP-24c v4.2.5: Callout-Erkennung (VOR Heading-Erkennung)
+        # Prüfe, ob diese Zeile ein Callout startet
+        callout_match = callout_pattern.match(line)
+        if callout_match:
+            # Vorherigen Text-Block abschließen
+            if buffer:
+                content = "\n".join(buffer).strip()
+                if content: 
+                    blocks.append(RawBlock(
+                        "paragraph", content, None, section_path, current_section_title,
+                        exclude_from_chunking=in_exclusion_zone
+                    ))
+                buffer = []
+            
+            # Sammle alle Zeilen des Callout-Blocks
+            callout_lines = [line]
+            leading_gt_count = len(line) - len(line.lstrip('>'))
+            processed_indices.add(i)
+            
+            # Sammle alle Zeilen, die zum Callout gehören (gleiche oder höhere Einrückung)
+            j = i + 1
+            while j < len(lines):
+                next_line = lines[j]
+                if not next_line.strip().startswith('>'):
+                    break
+                next_leading_gt = len(next_line) - len(next_line.lstrip('>'))
+                if next_leading_gt < leading_gt_count:
+                    break
+                callout_lines.append(next_line)
+                processed_indices.add(j)
+                j += 1
+            
+            # WP-24c v4.2.6: Erstelle Callout-Block mit is_meta_content = True
+            # Callouts werden gechunkt (für Chunk-Attribution), aber später entfernt (Clean-Context)
+            callout_content = "\n".join(callout_lines)
+            blocks.append(RawBlock(
+                "callout", callout_content, None, section_path, current_section_title,
+                exclude_from_chunking=in_exclusion_zone,  # Nur Edge-Zonen werden ausgeschlossen
+                is_meta_content=True  # WP-24c v4.2.6: Markierung für spätere Entfernung
+            ))
+            continue
+        
        # Heading-Erkennung (H1 bis H6)
        heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
        if heading_match:
@ -148,15 +202,22 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
            
    return blocks, h1_title

-def parse_edges_robust(text: str) -> Set[str]:
-    """Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts."""
-    found_edges = set()
+def parse_edges_robust(text: str) -> List[Dict[str, Any]]:
+    """
+    Extrahiert Kanten-Kandidaten aus Wikilinks und Callouts.
+    WP-24c v4.2.7: Gibt Liste von Dicts zurück mit is_callout Flag für Chunk-Attribution.
+    
+    Returns:
+        List[Dict] mit keys: "edge" (str: "kind:target"), "is_callout" (bool)
+    """
+    found_edges: List[Dict[str, any]] = []
    # 1. Wikilinks [[rel:kind|target]]
    inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
    for kind, target in inlines:
        k = kind.strip().lower()
        t = target.strip()
-        if k and t: found_edges.add(f"{k}:{t}")
+        if k and t:
+            found_edges.append({"edge": f"{k}:{t}", "is_callout": False})
    
    # 2. Callout Edges > [!edge] kind
    lines = text.split('\n')
@ -169,13 +230,15 @@ def parse_edges_robust(text: str) -> Set[str]:
            # Links in der gleichen Zeile des Callouts
            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
            for l in links: 
-                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+                if "rel:" not in l:
+                    found_edges.append({"edge": f"{current_edge_type}:{l}", "is_callout": True})
            continue
        # Links in Folgezeilen des Callouts
        if current_edge_type and stripped.startswith('>'):
            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
            for l in links: 
-                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+                if "rel:" not in l:
+                    found_edges.append({"edge": f"{current_edge_type}:{l}", "is_callout": True})
        elif not stripped.startswith('>'): 
            current_edge_type = None
    return found_edges
--- a/app/core/chunking/chunking_processor.py
+++ b/app/core/chunking/chunking_processor.py
@ -7,6 +7,16 @@ DESCRIPTION: Der zentrale Orchestrator für das Chunking-System.
             - Stellt H1-Kontext-Fenster sicher.
             - Baut den Candidate-Pool für die WP-15b Ingestion auf.
             WP-24c v4.2.0: Konfigurierbare Header-Namen für LLM-Validierung.
+             WP-24c v4.2.5: Wiederherstellung der Chunking-Präzision
+             - Frontmatter-Override für chunking_profile
+             - Callout-Exclusion aus Chunks
+             - Strict-Mode ohne Carry-Over
+             WP-24c v4.2.6: Finale Härtung - "Semantic First, Clean Second"
+             - Callouts werden gechunkt (Chunk-Attribution), aber später entfernt (Clean-Context)
+             - remove_callouts_from_text erst nach propagate_section_edges und Candidate Pool
+             WP-24c v4.2.7: Wiederherstellung der Chunk-Attribution
+             - Callout-Kanten erhalten explicit:callout Provenance im candidate_pool
+             - graph_derive_edges.py erkennt diese und verhindert Note-Scope Duplikate
 """
 import asyncio
 import re
@ -25,16 +35,19 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
    """
    Hauptfunktion zur Zerlegung einer Note. 
    Verbindet Strategien mit physikalischer Kontext-Anreicherung.
+    WP-24c v4.2.5: Frontmatter-Override für chunking_profile wird berücksichtigt.
    """
-    # 1. Konfiguration & Parsing
-    if config is None: 
-        config = get_chunk_config(note_type)
-        
+    # 1. WP-24c v4.2.5: Frontmatter VOR Konfiguration extrahieren (für Override)
    fm, body_text = extract_frontmatter_from_text(md_text)
+    
+    # 2. Konfiguration mit Frontmatter-Override
+    if config is None: 
+        config = get_chunk_config(note_type, frontmatter=fm)
+        
    blocks, doc_title = parse_blocks(md_text)
    
-    # WP-24c v4.2.0: Filtere Blöcke aus Edge-Zonen (LLM-Validierung & Note-Scope)
-    # Diese Bereiche sollen nicht als Chunks angelegt werden, sondern nur die Kanten extrahiert werden
+    # WP-24c v4.2.6: Filtere NUR Edge-Zonen (LLM-Validierung & Note-Scope)
+    # Callouts (is_meta_content=True) müssen durch, damit Chunk-Attribution erhalten bleibt
    blocks_for_chunking = [b for b in blocks if not getattr(b, 'exclude_from_chunking', False)]
    
    # Vorbereitung des H1-Präfix für die Embedding-Fenster (Breadcrumbs)
@ -42,6 +55,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
    
    # 2. Anwendung der Splitting-Strategie
    # Alle Strategien nutzen nun einheitlich context_prefix für die Window-Bildung.
+    # WP-24c v4.2.6: Callouts sind in blocks_for_chunking enthalten (für Chunk-Attribution)
    if config.get("strategy") == "by_heading":
        chunks = await asyncio.to_thread(
            strategy_by_heading, blocks_for_chunking, config, note_id, context_prefix=h1_prefix
@ -55,21 +69,27 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
        return []

    # 3. Physikalische Kontext-Anreicherung (Der Qualitäts-Fix)
+    # WP-24c v4.2.6: Arbeite auf Original-Text inkl. Callouts (für korrekte Chunk-Attribution)
    # Schreibt Kanten aus Callouts/Inlines hart in den Text für Qdrant.
    chunks = propagate_section_edges(chunks)

-    # 4. WP-15b: Candidate Pool Aufbau (Metadaten für IngestionService)
+    # 5. WP-15b: Candidate Pool Aufbau (Metadaten für IngestionService)
+    # WP-24c v4.2.7: Markiere Callout-Kanten explizit für Chunk-Attribution
    # Zuerst die explizit im Text vorhandenen Kanten sammeln.
    for ch in chunks:
        # Wir extrahieren aus dem bereits (durch Propagation) angereicherten Text.
        # ch.candidate_pool wird im Modell-Konstruktor als leere Liste initialisiert.
-        for e_str in parse_edges_robust(ch.text):
-            parts = e_str.split(':', 1)
+        for edge_info in parse_edges_robust(ch.text):
+            edge_str = edge_info["edge"]
+            is_callout = edge_info.get("is_callout", False)
+            parts = edge_str.split(':', 1)
            if len(parts) == 2:
                k, t = parts
-                ch.candidate_pool.append({"kind": k, "to": t, "provenance": "explicit"})
+                # WP-24c v4.2.7: Callout-Kanten erhalten explicit:callout Provenance
+                provenance = "explicit:callout" if is_callout else "explicit"
+                ch.candidate_pool.append({"kind": k, "to": t, "provenance": provenance})

-    # 5. Global Pool (Unzugeordnete Kanten - kann mitten im Dokument oder am Ende stehen)
+    # 6. Global Pool (Unzugeordnete Kanten - kann mitten im Dokument oder am Ende stehen)
    # WP-24c v4.2.0: Konfigurierbare Header-Namen und -Ebene via .env
    # Sucht nach ALLEN Edge-Pool Blöcken im Original-Markdown (nicht nur am Ende).
    llm_validation_headers = os.getenv(
@ -93,15 +113,16 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
    
    for pool_match in re.finditer(zone_pattern, body_text, re.DOTALL | re.IGNORECASE | re.MULTILINE):
        global_edges = parse_edges_robust(pool_match.group(1))
-        for e_str in global_edges:
-            parts = e_str.split(':', 1)
+        for edge_info in global_edges:
+            edge_str = edge_info["edge"]
+            parts = edge_str.split(':', 1)
            if len(parts) == 2:
                k, t = parts
                # Diese Kanten werden als "global_pool" markiert für die spätere KI-Prüfung.
                for ch in chunks: 
                    ch.candidate_pool.append({"kind": k, "to": t, "provenance": "global_pool"})

-    # 6. De-Duplikation des Pools & Linking
+    # 7. De-Duplikation des Pools & Linking
    for ch in chunks:
        seen = set()
        unique = []
@ -113,6 +134,54 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
                unique.append(c)
        ch.candidate_pool = unique

+    # 8. WP-24c v4.2.6: Clean-Context - Entferne Callout-Syntax aus Chunk-Text
+    # WICHTIG: Dies geschieht NACH propagate_section_edges und Candidate Pool Aufbau,
+    # damit Chunk-Attribution erhalten bleibt und Kanten korrekt extrahiert werden.
+    # Hinweis: Callouts können mehrzeilig sein (auch verschachtelt: >>)
+    def remove_callouts_from_text(text: str) -> str:
+        """Entfernt alle Callout-Zeilen (> [!edge] oder > [!abstract]) aus dem Text."""
+        if not text:
+            return text
+        
+        lines = text.split('\n')
+        cleaned_lines = []
+        i = 0
+        
+        callout_start_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE)
+        
+        while i < len(lines):
+            line = lines[i]
+            callout_match = callout_start_pattern.match(line)
+            
+            if callout_match:
+                # Callout gefunden: Überspringe alle Zeilen des Callout-Blocks
+                leading_gt_count = len(line) - len(line.lstrip('>'))
+                i += 1
+                
+                # Überspringe alle Zeilen, die zum Callout gehören
+                while i < len(lines):
+                    next_line = lines[i]
+                    if not next_line.strip().startswith('>'):
+                        break
+                    next_leading_gt = len(next_line) - len(next_line.lstrip('>'))
+                    if next_leading_gt < leading_gt_count:
+                        break
+                    i += 1
+            else:
+                # Normale Zeile: Behalte
+                cleaned_lines.append(line)
+                i += 1
+        
+        # Normalisiere Leerzeilen (max. 2 aufeinanderfolgende)
+        result = '\n'.join(cleaned_lines)
+        result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result)
+        return result
+    
+    for ch in chunks:
+        ch.text = remove_callouts_from_text(ch.text)
+        if ch.window:
+            ch.window = remove_callouts_from_text(ch.window)
+
    # Verknüpfung der Nachbarschaften für Graph-Traversierung
    for i, ch in enumerate(chunks):
        ch.neighbors_prev = chunks[i-1].id if i > 0 else None
--- a/app/core/chunking/chunking_propagation.py
+++ b/app/core/chunking/chunking_propagation.py
@ -22,11 +22,13 @@ def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
            continue
        
        # Nutzt den robusten Parser aus dem Package
-        edges = parse_edges_robust(ch.text)
-        if edges:
+        # WP-24c v4.2.7: parse_edges_robust gibt jetzt Liste von Dicts zurück
+        edge_infos = parse_edges_robust(ch.text)
+        if edge_infos:
            if ch.section_path not in section_map:
                section_map[ch.section_path] = set()
-            section_map[ch.section_path].update(edges)
+            for edge_info in edge_infos:
+                section_map[ch.section_path].add(edge_info["edge"])
            
    # 2. Injizieren: Kanten in jeden Chunk der Sektion zurückschreiben (Broadcasting)
    for ch in chunks:
@ -37,7 +39,9 @@ def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
            
            # Vorhandene Kanten (Typ:Ziel) in DIESEM Chunk ermitteln, 
            # um Dopplungen (z.B. durch Callouts) zu vermeiden.
-            existing_edges = parse_edges_robust(ch.text)
+            # WP-24c v4.2.7: parse_edges_robust gibt jetzt Liste von Dicts zurück
+            existing_edge_infos = parse_edges_robust(ch.text)
+            existing_edges = {ei["edge"] for ei in existing_edge_infos}
            
            injections = []
            # Sortierung für deterministische Ergebnisse
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@ -5,6 +5,7 @@ DESCRIPTION: Strategien für atomares Sektions-Chunking v3.9.9.
             - Keine redundante Kanten-Injektion.
             - Strikte Einhaltung von Sektionsgrenzen via Look-Ahead.
             - Fix: Synchronisierung der Parameter mit dem Orchestrator (context_prefix).
+             WP-24c v4.2.5: Strict-Mode ohne Carry-Over - Bei strict_heading_split wird nach jeder Sektion geflasht.
 """
 from typing import List, Dict, Any, Optional
 from .chunking_models import RawBlock, Chunk
@ -83,23 +84,46 @@ def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id:
            current_meta["title"] = item["meta"].section_title
            current_meta["path"] = item["meta"].section_path

-        # FALL A: HARD SPLIT MODUS
+        # FALL A: HARD SPLIT MODUS (WP-24c v4.2.5: Strict-Mode ohne Carry-Over)
        if is_hard_split_mode:
-            # Leere Überschriften (z.B. H1 direkt vor H2) verbleiben am nächsten Chunk
-            if item.get("is_empty", False) and queue:
-                current_chunk_text = (current_chunk_text + "\n\n" + item_text).strip()
-                continue 
-            
-            combined = (current_chunk_text + "\n\n" + item_text).strip()
-            # Wenn durch Verschmelzung das Limit gesprengt würde, vorher flashen
-            if estimate_tokens(combined) > max_tokens and current_chunk_text:
+            # WP-24c v4.2.5: Bei strict_heading_split: true wird nach JEDER Sektion geflasht
+            # Kein Carry-Over erlaubt, auch nicht für leere Überschriften
+            if current_chunk_text:
+                # Flashe vorherigen Chunk
                _emit(current_chunk_text, current_meta["title"], current_meta["path"])
-                current_chunk_text = item_text
+                current_chunk_text = ""
+            
+            # Neue Sektion: Initialisiere Meta
+            current_meta["title"] = item["meta"].section_title
+            current_meta["path"] = item["meta"].section_path
+            
+            # WP-24c v4.2.5: Auch leere Sektionen werden als separater Chunk erstellt
+            # (nur Überschrift, kein Inhalt)
+            if item.get("is_empty", False):
+                # Leere Sektion: Nur Überschrift als Chunk
+                _emit(item_text, current_meta["title"], current_meta["path"])
            else:
-                current_chunk_text = combined
+                # Normale Sektion: Prüfe auf Token-Limit
+                if estimate_tokens(item_text) > max_tokens:
+                    # Sektion zu groß: Smart Zerlegung (aber trotzdem in separaten Chunks)
+                    sents = split_sentences(item_text)
+                    header_prefix = item["meta"].text if item["meta"].kind == "heading" else ""
+                    
+                    take_sents = []; take_len = 0
+                    while sents:
+                        s = sents.pop(0); slen = estimate_tokens(s)
+                        if take_len + slen > target and take_sents:
+                            _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
+                            take_sents = [s]; take_len = slen
+                        else:
+                            take_sents.append(s); take_len += slen
+                    
+                    if take_sents:
+                        _emit(" ".join(take_sents), current_meta["title"], current_meta["path"])
+                else:
+                    # Sektion passt: Direkt als Chunk
+                    _emit(item_text, current_meta["title"], current_meta["path"])
            
-            # Im Hard-Split wird nach jeder Sektion geflasht
-            _emit(current_chunk_text, current_meta["title"], current_meta["path"])
            current_chunk_text = ""
            continue

--- a/app/core/chunking/chunking_utils.py
+++ b/app/core/chunking/chunking_utils.py
@ -27,12 +27,31 @@ def load_yaml_config() -> Dict[str, Any]:
            return data
    except Exception: return {}

-def get_chunk_config(note_type: str) -> Dict[str, Any]:
-    """Lädt die Chunking-Strategie basierend auf dem Note-Type."""
+def get_chunk_config(note_type: str, frontmatter: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """
+    Lädt die Chunking-Strategie basierend auf dem Note-Type.
+    WP-24c v4.2.5: Frontmatter-Override für chunking_profile hat höchste Priorität.
+    
+    Args:
+        note_type: Der Typ der Note (z.B. "decision", "experience")
+        frontmatter: Optionales Frontmatter-Dict mit chunking_profile Override
+        
+    Returns:
+        Dict mit Chunking-Konfiguration
+    """
    full_config = load_yaml_config()
    profiles = full_config.get("chunking_profiles", {})
    type_def = full_config.get("types", {}).get(note_type.lower(), {})
-    profile_name = type_def.get("chunking_profile") or full_config.get("defaults", {}).get("chunking_profile", "sliding_standard")
+    
+    # WP-24c v4.2.5: Priorität: Frontmatter > Type-Def > Defaults
+    profile_name = None
+    if frontmatter and "chunking_profile" in frontmatter:
+        profile_name = frontmatter.get("chunking_profile")
+    if not profile_name:
+        profile_name = type_def.get("chunking_profile")
+    if not profile_name:
+        profile_name = full_config.get("defaults", {}).get("chunking_profile", "sliding_standard")
+    
    config = profiles.get(profile_name, DEFAULT_PROFILE).copy()
    if "overlap" in config and isinstance(config["overlap"], list): 
        config["overlap"] = tuple(config["overlap"])
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@ -209,6 +209,7 @@ def build_edges_for_note(
    """
    Erzeugt und aggregiert alle Kanten für eine Note.
    WP-24c v4.2.0: Unterstützt Note-Scope Extraktions-Zonen.
+    WP-24c v4.2.7: Chunk-Attribution für Callouts über candidate_pool mit explicit:callout Provenance.
    
    Args:
        note_id: ID der Note
@ -313,11 +314,17 @@ def build_edges_for_note(
            edges.append(_edge(k, "chunk", cid, t, note_id, payload))

        # B. Candidate Pool (WP-15b Validierte KI-Kanten)
+        # WP-24c v4.2.7: Sammle Callout-Keys für Chunk-Attribution
        pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
        for cand in pool:
            raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
            t, sec = parse_link_target(raw_t, note_id)
            if t:
+                # WP-24c v4.2.7: Wenn Provenance explicit:callout, füge zu all_chunk_callout_keys hinzu
+                # Dadurch weiß die globale Extraktion, dass diese Kante bereits auf Chunk-Ebene versorgt ist
+                if p == "explicit:callout":
+                    all_chunk_callout_keys.add((k, t, sec))
+                
                # WP-24c v4.1.0: target_section fließt nun fest in die ID-Generierung ein
                payload = {
                    "chunk_id": cid, 
--- a/app/core/graph/graph_utils.py
+++ b/app/core/graph/graph_utils.py
@ -24,6 +24,7 @@ PROVENANCE_PRIORITY = {
    "explicit:wikilink": 1.00,
    "inline:rel": 0.95,
    "callout:edge": 0.90,
+    "explicit:callout": 0.90,  # WP-24c v4.2.7: Callout-Kanten aus candidate_pool
    "semantic_ai": 0.90,           # Validierte KI-Kanten
    "structure:belongs_to": 1.00,
    "structure:order": 0.95,       # next/prev