diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py index 9b90463..3317a82 100644 --- a/app/core/graph/graph_derive_edges.py +++ b/app/core/graph/graph_derive_edges.py @@ -24,7 +24,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung. - Chunk-Scope gewinnt zwingend über Note-Scope (außer explicit:note_zone) - Confidence-Werte: candidate_pool explicit:callout = 1.0, globaler Scan = 0.7 - Key-Generierung gehärtet für konsistente Deduplizierung -VERSION: 4.3.1 (WP-24c: Präzisions-Priorität) +VERSION: 4.4.0 (WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges) STATUS: Active """ import re @@ -35,6 +35,11 @@ from .graph_utils import ( PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for, get_typical_edge_for # WP-26 v1.1: Für automatische Intra-Note-Edges ) +# WP-26 v1.4: Für automatische Backlinks bei Intra-Note-Edges +try: + from app.services.edge_registry import registry as edge_registry +except ImportError: + edge_registry = None from .graph_extractors import ( extract_typed_relations, extract_callout_relations, extract_wikilinks ) @@ -1052,4 +1057,61 @@ def build_edges_for_note( final_edges.append(winner) + # WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges (Chunk-Level) + # Erstelle inverse Edges für alle Intra-Note-Edges, wenn noch nicht vorhanden + if edge_registry: + # Erstelle Set aller existierenden Edge-Keys für schnelle Lookup + existing_edge_keys: Set[Tuple[str, str, str, Optional[str]]] = set() + for e in final_edges: + source = e.get("source_id", "") + target = e.get("target_id", "") + kind = e.get("kind", "") + target_section = e.get("target_section") + existing_edge_keys.add((source, target, kind, target_section)) + + # Durchlaufe alle Edges und erstelle Backlinks für Intra-Note-Edges + backlinks_to_add: List[dict] = [] + for e in final_edges: + is_internal = e.get("is_internal", False) + scope = e.get("scope", "chunk") + source_id = e.get("source_id", "") + target_id = e.get("target_id", "") + kind = e.get("kind", "") + target_section = e.get("target_section") + + # Nur Intra-Note-Edges auf Chunk-Level berücksichtigen + if not is_internal or scope != "chunk": + continue + + # Prüfe, ob bereits ein inverser Edge existiert + inv_kind = edge_registry.get_inverse(kind) if edge_registry else None + if not inv_kind: + continue # Kein inverser Edge-Type verfügbar + + # Prüfe, ob inverser Edge bereits existiert + inv_key = (target_id, source_id, inv_kind, None) # Backlink hat keine target_section + if inv_key in existing_edge_keys: + continue # Backlink bereits vorhanden + + # Erstelle automatischen Backlink + backlink_edge = _edge(inv_kind, "chunk", target_id, source_id, note_id, { + "chunk_id": target_id, # Backlink geht vom Target-Chunk aus + "edge_id": _mk_edge_id(inv_kind, target_id, source_id, "chunk"), + "provenance": "rule", + "rule_id": "derived:intra_note_backlink", + "source_hint": "automatic_backlink", + "confidence": PROVENANCE_PRIORITY.get("derived:backlink", 0.8), + "is_internal": True, + "original_edge_kind": kind # Debug-Info: Welcher Edge-Type wurde invertiert + }) + + backlinks_to_add.append(backlink_edge) + existing_edge_keys.add(inv_key) # Verhindere Duplikate + logger.debug(f"WP-26 Backlink erstellt: {target_id} --[{inv_kind}]--> {source_id} (Original: {kind})") + + # Füge Backlinks zu final_edges hinzu + if backlinks_to_add: + final_edges.extend(backlinks_to_add) + logger.info(f"WP-26: {len(backlinks_to_add)} automatische Backlinks für Intra-Note-Edges erstellt") + return final_edges \ No newline at end of file diff --git a/docs/06_Roadmap/06_LH_Section_Types_Intra_Note_Edges.md b/docs/06_Roadmap/06_LH_Section_Types_Intra_Note_Edges.md index 3c187e6..945c124 100644 --- a/docs/06_Roadmap/06_LH_Section_Types_Intra_Note_Edges.md +++ b/docs/06_Roadmap/06_LH_Section_Types_Intra_Note_Edges.md @@ -566,6 +566,98 @@ default_edge = topology["typical"][0] # "resulted_in" } ``` +#### FA-08b: Automatische Backlinks für Intra-Note-Edges + +**Anforderung:** Für alle Intra-Note-Edges (`is_internal: True`, `scope: "chunk"`) werden automatisch inverse Backlinks erzeugt, wenn diese nicht bereits explizit vorhanden sind. + +**Regeln:** + +1. **Nur für Intra-Note-Edges:** Backlinks werden nur für Edges innerhalb derselben Note erstellt +2. **Chunk-Level:** Backlinks haben `scope: "chunk"` (nicht `scope: "note"`) +3. **Inverser Edge-Type:** Der inverse Edge-Type wird via `EdgeRegistry.get_inverse()` ermittelt +4. **Deduplizierung:** Backlinks werden nur erstellt, wenn noch kein inverser Edge existiert +5. **Gilt für alle Intra-Note-Edges:** Explizite Edges, automatische Section-Transitions, etc. + +**Beispiel:** + +**Input (explizite Edge):** +```markdown +## Reflexion ^ref +> [!section] insight +> [!edge] derives +> [[#^sit]] +``` + +**Generierte Edges:** + +```python +# Forward-Edge (explizit) +{ + "kind": "derives", + "source_id": "NoteID#c02", # Reflexion + "target_id": "NoteID#c01", # Situation + "scope": "chunk", + "is_internal": True, + "provenance": "explicit" +} + +# Backlink (automatisch erzeugt) +{ + "kind": "derived_from", # Inverser Edge-Type + "source_id": "NoteID#c01", # Situation (invertiert) + "target_id": "NoteID#c02", # Reflexion (invertiert) + "scope": "chunk", + "is_internal": True, + "provenance": "rule", + "rule_id": "derived:intra_note_backlink", + "source_hint": "automatic_backlink", + "original_edge_kind": "derives" # Debug-Info +} +``` + +**Beispiel (automatische Section-Transition):** + +**Input:** +```markdown +## Situation ^sit +> [!section] experience + +## Reflexion ^ref +> [!section] insight + +``` + +**Generierte Edges:** + +```python +# Forward-Edge (automatisch aus Schema) +{ + "kind": "resulted_in", + "source_id": "NoteID#c01", # experience + "target_id": "NoteID#c02", # insight + "scope": "chunk", + "is_internal": True, + "provenance": "rule", + "rule_id": "inferred:section_transition" +} + +# Backlink (automatisch erzeugt) +{ + "kind": "caused_by", # Inverser Edge-Type + "source_id": "NoteID#c02", # insight (invertiert) + "target_id": "NoteID#c01", # experience (invertiert) + "scope": "chunk", + "is_internal": True, + "provenance": "rule", + "rule_id": "derived:intra_note_backlink" +} +``` + +**Wichtig:** +- Backlinks werden **nur** erstellt, wenn noch kein inverser Edge existiert +- Backlinks haben immer `scope: "chunk"` (nicht `scope: "note"`) +- Backlinks werden **nach** der Deduplizierung erstellt, um Duplikate zu vermeiden + ### 4.4 Retriever-Anpassungen #### FA-09: Edge-Gewichtung für Intra-Note-Edges diff --git a/tests/test_wp26_section_types.py b/tests/test_wp26_section_types.py index fc88152..06425b9 100644 --- a/tests/test_wp26_section_types.py +++ b/tests/test_wp26_section_types.py @@ -3,7 +3,7 @@ FILE: tests/test_wp26_section_types.py DESCRIPTION: Unit-Tests für WP-26 Phase 1: Section-Types und Intra-Note-Edges WP-26 v1.1: Erweitert um Tests für Section-Split und automatische Edges WP-26 v1.3: Erweitert um Tests für rückwirkende section_type Propagation -VERSION: 1.3.0 +VERSION: 1.4.0 (WP-26 v1.4: Automatische Backlinks) """ import pytest from app.core.chunking.chunking_parser import parse_blocks @@ -624,5 +624,102 @@ class TestBlockIdParsing: assert section == "Normale Überschrift" +class TestAutomaticBacklinks: + """UT-18: Automatische Backlinks für Intra-Note-Edges (WP-26 v1.4)""" + + def test_backlink_created_for_intra_note_edge(self): + """Backlink wird automatisch für Intra-Note-Edge erstellt""" + from app.core.graph.graph_derive_edges import build_edges_for_note + + # Mock-Chunks mit Section-Types + chunks = [ + { + "chunk_id": "note1#c01", + "type": "experience", + "section_type": "experience", + "window": "Situation text" + }, + { + "chunk_id": "note1#c02", + "type": "insight", + "section_type": "insight", + "window": "Reflexion text" + } + ] + + edges = build_edges_for_note( + note_id="note1", + chunks=chunks, + note_level_references=None, + include_note_scope_refs=False, + markdown_body="" + ) + + # Prüfe, dass sowohl Forward-Edge als auch Backlink vorhanden sind + forward_edges = [e for e in edges if e.get("source_id") == "note1#c01" and e.get("target_id") == "note1#c02"] + backlink_edges = [e for e in edges if e.get("source_id") == "note1#c02" and e.get("target_id") == "note1#c01"] + + assert len(forward_edges) > 0, "Forward-Edge sollte vorhanden sein" + assert len(backlink_edges) > 0, "Backlink sollte automatisch erstellt werden" + + # Prüfe Backlink-Eigenschaften + backlink = backlink_edges[0] + assert backlink.get("is_internal") is True + assert backlink.get("scope") == "chunk" + assert backlink.get("provenance") == "rule" + assert backlink.get("rule_id") == "derived:intra_note_backlink" + + def test_backlink_not_created_if_already_exists(self): + """Backlink wird nicht erstellt, wenn bereits ein inverser Edge existiert""" + from app.core.graph.graph_derive_edges import build_edges_for_note + + # Mock-Chunks + chunks = [ + { + "chunk_id": "note1#c01", + "type": "experience", + "section_type": "experience", + "window": "Situation text", + "candidate_pool": [ + { + "kind": "derived_from", # Inverser Edge-Type + "to": "note1#c02", + "provenance": "explicit:callout" + } + ] + }, + { + "chunk_id": "note1#c02", + "type": "insight", + "section_type": "insight", + "window": "Reflexion text", + "candidate_pool": [ + { + "kind": "derives", # Forward-Edge + "to": "note1#c01", + "provenance": "explicit:callout" + } + ] + } + ] + + edges = build_edges_for_note( + note_id="note1", + chunks=chunks, + note_level_references=None, + include_note_scope_refs=False, + markdown_body="" + ) + + # Zähle Backlinks (sollte nicht dupliziert werden) + backlink_edges = [e for e in edges + if e.get("source_id") == "note1#c01" + and e.get("target_id") == "note1#c02" + and e.get("kind") == "derived_from"] + + # Sollte genau einen Backlink geben (der explizite, nicht zusätzlich automatischer) + assert len(backlink_edges) == 1, f"Erwartet genau einen Backlink, gefunden: {len(backlink_edges)}" + + if __name__ == "__main__": pytest.main([__file__, "-v"])