Implement automatic backlinks for intra-note edges (WP-26 v1.4)

- Added functionality to automatically create inverse backlinks for intra-note edges at the chunk level, ensuring that backlinks are generated only when they do not already exist.
- Updated the documentation to outline the requirements and rules for backlink creation, including conditions for deduplication and scope.
- Introduced unit tests to validate the creation of backlinks and ensure correct behavior when existing backlinks are present.
- Incremented version to 4.4.0 to reflect the new feature addition.
This commit is contained in:
Lars 2026-01-26 11:16:40 +01:00
parent bc8bdfac3c
commit 52ed079067
3 changed files with 253 additions and 2 deletions

View File

@ -24,7 +24,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
- Chunk-Scope gewinnt zwingend über Note-Scope (außer explicit:note_zone)
- Confidence-Werte: candidate_pool explicit:callout = 1.0, globaler Scan = 0.7
- Key-Generierung gehärtet für konsistente Deduplizierung
VERSION: 4.3.1 (WP-24c: Präzisions-Priorität)
VERSION: 4.4.0 (WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges)
STATUS: Active
"""
import re
@ -35,6 +35,11 @@ from .graph_utils import (
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for,
get_typical_edge_for # WP-26 v1.1: Für automatische Intra-Note-Edges
)
# WP-26 v1.4: Für automatische Backlinks bei Intra-Note-Edges
try:
from app.services.edge_registry import registry as edge_registry
except ImportError:
edge_registry = None
from .graph_extractors import (
extract_typed_relations, extract_callout_relations, extract_wikilinks
)
@ -1052,4 +1057,61 @@ def build_edges_for_note(
final_edges.append(winner)
# WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges (Chunk-Level)
# Erstelle inverse Edges für alle Intra-Note-Edges, wenn noch nicht vorhanden
if edge_registry:
# Erstelle Set aller existierenden Edge-Keys für schnelle Lookup
existing_edge_keys: Set[Tuple[str, str, str, Optional[str]]] = set()
for e in final_edges:
source = e.get("source_id", "")
target = e.get("target_id", "")
kind = e.get("kind", "")
target_section = e.get("target_section")
existing_edge_keys.add((source, target, kind, target_section))
# Durchlaufe alle Edges und erstelle Backlinks für Intra-Note-Edges
backlinks_to_add: List[dict] = []
for e in final_edges:
is_internal = e.get("is_internal", False)
scope = e.get("scope", "chunk")
source_id = e.get("source_id", "")
target_id = e.get("target_id", "")
kind = e.get("kind", "")
target_section = e.get("target_section")
# Nur Intra-Note-Edges auf Chunk-Level berücksichtigen
if not is_internal or scope != "chunk":
continue
# Prüfe, ob bereits ein inverser Edge existiert
inv_kind = edge_registry.get_inverse(kind) if edge_registry else None
if not inv_kind:
continue # Kein inverser Edge-Type verfügbar
# Prüfe, ob inverser Edge bereits existiert
inv_key = (target_id, source_id, inv_kind, None) # Backlink hat keine target_section
if inv_key in existing_edge_keys:
continue # Backlink bereits vorhanden
# Erstelle automatischen Backlink
backlink_edge = _edge(inv_kind, "chunk", target_id, source_id, note_id, {
"chunk_id": target_id, # Backlink geht vom Target-Chunk aus
"edge_id": _mk_edge_id(inv_kind, target_id, source_id, "chunk"),
"provenance": "rule",
"rule_id": "derived:intra_note_backlink",
"source_hint": "automatic_backlink",
"confidence": PROVENANCE_PRIORITY.get("derived:backlink", 0.8),
"is_internal": True,
"original_edge_kind": kind # Debug-Info: Welcher Edge-Type wurde invertiert
})
backlinks_to_add.append(backlink_edge)
existing_edge_keys.add(inv_key) # Verhindere Duplikate
logger.debug(f"WP-26 Backlink erstellt: {target_id} --[{inv_kind}]--> {source_id} (Original: {kind})")
# Füge Backlinks zu final_edges hinzu
if backlinks_to_add:
final_edges.extend(backlinks_to_add)
logger.info(f"WP-26: {len(backlinks_to_add)} automatische Backlinks für Intra-Note-Edges erstellt")
return final_edges

View File

@ -566,6 +566,98 @@ default_edge = topology["typical"][0] # "resulted_in"
}
```
#### FA-08b: Automatische Backlinks für Intra-Note-Edges
**Anforderung:** Für alle Intra-Note-Edges (`is_internal: True`, `scope: "chunk"`) werden automatisch inverse Backlinks erzeugt, wenn diese nicht bereits explizit vorhanden sind.
**Regeln:**
1. **Nur für Intra-Note-Edges:** Backlinks werden nur für Edges innerhalb derselben Note erstellt
2. **Chunk-Level:** Backlinks haben `scope: "chunk"` (nicht `scope: "note"`)
3. **Inverser Edge-Type:** Der inverse Edge-Type wird via `EdgeRegistry.get_inverse()` ermittelt
4. **Deduplizierung:** Backlinks werden nur erstellt, wenn noch kein inverser Edge existiert
5. **Gilt für alle Intra-Note-Edges:** Explizite Edges, automatische Section-Transitions, etc.
**Beispiel:**
**Input (explizite Edge):**
```markdown
## Reflexion ^ref
> [!section] insight
> [!edge] derives
> [[#^sit]]
```
**Generierte Edges:**
```python
# Forward-Edge (explizit)
{
"kind": "derives",
"source_id": "NoteID#c02", # Reflexion
"target_id": "NoteID#c01", # Situation
"scope": "chunk",
"is_internal": True,
"provenance": "explicit"
}
# Backlink (automatisch erzeugt)
{
"kind": "derived_from", # Inverser Edge-Type
"source_id": "NoteID#c01", # Situation (invertiert)
"target_id": "NoteID#c02", # Reflexion (invertiert)
"scope": "chunk",
"is_internal": True,
"provenance": "rule",
"rule_id": "derived:intra_note_backlink",
"source_hint": "automatic_backlink",
"original_edge_kind": "derives" # Debug-Info
}
```
**Beispiel (automatische Section-Transition):**
**Input:**
```markdown
## Situation ^sit
> [!section] experience
## Reflexion ^ref
> [!section] insight
<!-- Kein expliziter [!edge] -->
```
**Generierte Edges:**
```python
# Forward-Edge (automatisch aus Schema)
{
"kind": "resulted_in",
"source_id": "NoteID#c01", # experience
"target_id": "NoteID#c02", # insight
"scope": "chunk",
"is_internal": True,
"provenance": "rule",
"rule_id": "inferred:section_transition"
}
# Backlink (automatisch erzeugt)
{
"kind": "caused_by", # Inverser Edge-Type
"source_id": "NoteID#c02", # insight (invertiert)
"target_id": "NoteID#c01", # experience (invertiert)
"scope": "chunk",
"is_internal": True,
"provenance": "rule",
"rule_id": "derived:intra_note_backlink"
}
```
**Wichtig:**
- Backlinks werden **nur** erstellt, wenn noch kein inverser Edge existiert
- Backlinks haben immer `scope: "chunk"` (nicht `scope: "note"`)
- Backlinks werden **nach** der Deduplizierung erstellt, um Duplikate zu vermeiden
### 4.4 Retriever-Anpassungen
#### FA-09: Edge-Gewichtung für Intra-Note-Edges

View File

@ -3,7 +3,7 @@ FILE: tests/test_wp26_section_types.py
DESCRIPTION: Unit-Tests für WP-26 Phase 1: Section-Types und Intra-Note-Edges
WP-26 v1.1: Erweitert um Tests für Section-Split und automatische Edges
WP-26 v1.3: Erweitert um Tests für rückwirkende section_type Propagation
VERSION: 1.3.0
VERSION: 1.4.0 (WP-26 v1.4: Automatische Backlinks)
"""
import pytest
from app.core.chunking.chunking_parser import parse_blocks
@ -624,5 +624,102 @@ class TestBlockIdParsing:
assert section == "Normale Überschrift"
class TestAutomaticBacklinks:
"""UT-18: Automatische Backlinks für Intra-Note-Edges (WP-26 v1.4)"""
def test_backlink_created_for_intra_note_edge(self):
"""Backlink wird automatisch für Intra-Note-Edge erstellt"""
from app.core.graph.graph_derive_edges import build_edges_for_note
# Mock-Chunks mit Section-Types
chunks = [
{
"chunk_id": "note1#c01",
"type": "experience",
"section_type": "experience",
"window": "Situation text"
},
{
"chunk_id": "note1#c02",
"type": "insight",
"section_type": "insight",
"window": "Reflexion text"
}
]
edges = build_edges_for_note(
note_id="note1",
chunks=chunks,
note_level_references=None,
include_note_scope_refs=False,
markdown_body=""
)
# Prüfe, dass sowohl Forward-Edge als auch Backlink vorhanden sind
forward_edges = [e for e in edges if e.get("source_id") == "note1#c01" and e.get("target_id") == "note1#c02"]
backlink_edges = [e for e in edges if e.get("source_id") == "note1#c02" and e.get("target_id") == "note1#c01"]
assert len(forward_edges) > 0, "Forward-Edge sollte vorhanden sein"
assert len(backlink_edges) > 0, "Backlink sollte automatisch erstellt werden"
# Prüfe Backlink-Eigenschaften
backlink = backlink_edges[0]
assert backlink.get("is_internal") is True
assert backlink.get("scope") == "chunk"
assert backlink.get("provenance") == "rule"
assert backlink.get("rule_id") == "derived:intra_note_backlink"
def test_backlink_not_created_if_already_exists(self):
"""Backlink wird nicht erstellt, wenn bereits ein inverser Edge existiert"""
from app.core.graph.graph_derive_edges import build_edges_for_note
# Mock-Chunks
chunks = [
{
"chunk_id": "note1#c01",
"type": "experience",
"section_type": "experience",
"window": "Situation text",
"candidate_pool": [
{
"kind": "derived_from", # Inverser Edge-Type
"to": "note1#c02",
"provenance": "explicit:callout"
}
]
},
{
"chunk_id": "note1#c02",
"type": "insight",
"section_type": "insight",
"window": "Reflexion text",
"candidate_pool": [
{
"kind": "derives", # Forward-Edge
"to": "note1#c01",
"provenance": "explicit:callout"
}
]
}
]
edges = build_edges_for_note(
note_id="note1",
chunks=chunks,
note_level_references=None,
include_note_scope_refs=False,
markdown_body=""
)
# Zähle Backlinks (sollte nicht dupliziert werden)
backlink_edges = [e for e in edges
if e.get("source_id") == "note1#c01"
and e.get("target_id") == "note1#c02"
and e.get("kind") == "derived_from"]
# Sollte genau einen Backlink geben (der explizite, nicht zusätzlich automatischer)
assert len(backlink_edges) == 1, f"Erwartet genau einen Backlink, gefunden: {len(backlink_edges)}"
if __name__ == "__main__":
pytest.main([__file__, "-v"])