Update graph_derive_edges.py to version 4.3.1: Introduce precision prioritization for chunk scope, ensuring chunk candidates are favored over note scope. Adjust confidence values for explicit callouts and enhance key generation for consistent deduplication. Improve edge processing logic to reinforce the precedence of chunk scope in decision-making.

2026-01-11 15:08:08 +01:00 · 2026-01-11 15:08:08 +01:00 · ee91583614
commit ee91583614
parent 3a17b646e1
1 changed files with 21 additions and 8 deletions
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@ -20,7 +20,11 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
             WP-24c v4.3.0: Lokalisierung des Datenverlusts
             - Debug-Logik für Audit des Datentransfers
             - Verifizierung der candidate_pool Übertragung
-VERSION: 4.3.0 (WP-24c: Datenverlust-Lokalisierung)
+             WP-24c v4.3.1: Präzisions-Priorität für Chunk-Scope
+             - Chunk-Scope gewinnt zwingend über Note-Scope (außer explicit:note_zone)
+             - Confidence-Werte: candidate_pool explicit:callout = 1.0, globaler Scan = 0.7
+             - Key-Generierung gehärtet für konsistente Deduplizierung
+VERSION: 4.3.1 (WP-24c: Präzisions-Priorität)
 STATUS: Active
 """
 import re
@ -182,11 +186,12 @@ def extract_callouts_from_markdown(
        
        # WP-24c v4.2.1: Callout ist NICHT in Chunks -> lege mit scope: "note" an
        # (typischerweise in Edge-Zonen, die nicht gechunkt werden)
+        # WP-24c v4.3.1: Confidence auf 0.7 gesenkt, damit chunk-Scope (1.0) gewinnt
        payload = {
            "edge_id": _mk_edge_id(k, note_id, t, "note", target_section=sec),
            "provenance": "explicit:callout",
            "rule_id": "callout:edge",
-            "confidence": PROVENANCE_PRIORITY.get("callout:edge", 1.0)
+            "confidence": 0.7  # WP-24c v4.3.1: Niedrigere Confidence für Note-Scope Callouts
        }
        if sec:
            payload["target_section"] = sec
@ -328,12 +333,15 @@ def build_edges_for_note(
            p = cand.get("provenance", "semantic_ai")
            
            # WP-24c v4.2.9 Fix B: Wenn Provenance explicit:callout, extrahiere Key
+            # WP-24c v4.3.1: Key-Generierung gehärtet - Format (kind, target_id, target_section)
+            # Exakt konsistent mit dem globalen Scan für zuverlässige Deduplizierung
            if p == "explicit:callout":
                t, sec = parse_link_target(raw_t, note_id)
                if t:
-                    # Key-Format: (kind, target, section) für Multigraph-Präzision
+                    # Key-Format: (kind, target_id, target_section) - exakt wie im globalen Scan
                    # Dies verhindert, dass der globale Scan diese Kante als Note-Scope neu anlegt
-                    all_chunk_callout_keys.add((k, t, sec))
+                    callout_key = (k, t, sec)  # WP-24c v4.3.1: Explizite Key-Generierung
+                    all_chunk_callout_keys.add(callout_key)
                    logger.debug(f"Note [{note_id}]: Callout-Key gesammelt: ({k}, {t}, {sec})")
    
    # WP-24c v4.3.0: Debug-Logik - Ausgabe der gesammelten Keys
@ -371,10 +379,12 @@ def build_edges_for_note(
            t, sec = parse_link_target(raw_t, note_id)
            if t:
                # WP-24c v4.1.0: target_section fließt nun fest in die ID-Generierung ein
+                # WP-24c v4.3.1: explicit:callout erhält Confidence 1.0 für Präzisions-Priorität
+                confidence = 1.0 if p == "explicit:callout" else PROVENANCE_PRIORITY.get(p, 0.90)
                payload = {
                    "chunk_id": cid, 
                    "edge_id": _mk_edge_id(k, cid, t, "chunk", target_section=sec),
-                    "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
+                    "provenance": p, "rule_id": f"candidate:{p}", "confidence": confidence
                }
                if sec: payload["target_section"] = sec
                edges.append(_edge(k, "chunk", cid, t, note_id, payload))
@ -539,23 +549,26 @@ def build_edges_for_note(
            final_edges.append(winner)
        else:
            # Mehrere Kanten mit gleichem semantischen Schlüssel: Scope-Entscheidung
+            # WP-24c v4.3.1: Präzision (Chunk) siegt über Globalität (Note)
            winner = None
            
-            # Regel 1: explicit:note_zone hat höchste Priorität
+            # Regel 1: explicit:note_zone hat höchste Priorität (Autorität)
            note_zone_candidates = [e for e in group if e.get("provenance") == "explicit:note_zone"]
            if note_zone_candidates:
                # Wenn mehrere note_zone: Nimm die mit höchster Confidence
                winner = max(note_zone_candidates, key=lambda e: e.get("confidence", 0))
            else:
-                # Regel 2: chunk-Scope bevorzugen (Präzisions-Vorteil)
+                # Regel 2: chunk-Scope ZWINGEND bevorzugen (Präzisions-Vorteil)
+                # WP-24c v4.3.1: Wenn mindestens ein chunk-Kandidat existiert, muss dieser gewinnen
                chunk_candidates = [e for e in group if e.get("scope") == "chunk"]
                if chunk_candidates:
                    # Wenn mehrere chunk: Nimm die mit höchster Confidence * Priority
+                    # Die Confidence ist hier nicht der alleinige Ausschlaggeber - chunk-Scope hat Vorrang
                    winner = max(chunk_candidates, key=lambda e: (
                        e.get("confidence", 0) * PROVENANCE_PRIORITY.get(e.get("provenance", ""), 0.7)
                    ))
                else:
-                    # Regel 3: Fallback: Höchste Confidence * Priority
+                    # Regel 3: Fallback (nur wenn KEIN chunk-Kandidat vorhanden): Höchste Confidence * Priority
                    winner = max(group, key=lambda e: (
                        e.get("confidence", 0) * PROVENANCE_PRIORITY.get(e.get("provenance", ""), 0.7)
                    ))