Implement automatic backlinks for intra-note edges (WP-26 v1.4)
- Added functionality to automatically create inverse backlinks for intra-note edges at the chunk level, ensuring that backlinks are generated only when they do not already exist. - Updated the documentation to outline the requirements and rules for backlink creation, including conditions for deduplication and scope. - Introduced unit tests to validate the creation of backlinks and ensure correct behavior when existing backlinks are present. - Incremented version to 4.4.0 to reflect the new feature addition.
This commit is contained in:
parent
bc8bdfac3c
commit
52ed079067
|
|
@ -24,7 +24,7 @@ DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
|
|||
- Chunk-Scope gewinnt zwingend über Note-Scope (außer explicit:note_zone)
|
||||
- Confidence-Werte: candidate_pool explicit:callout = 1.0, globaler Scan = 0.7
|
||||
- Key-Generierung gehärtet für konsistente Deduplizierung
|
||||
VERSION: 4.3.1 (WP-24c: Präzisions-Priorität)
|
||||
VERSION: 4.4.0 (WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges)
|
||||
STATUS: Active
|
||||
"""
|
||||
import re
|
||||
|
|
@ -35,6 +35,11 @@ from .graph_utils import (
|
|||
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for,
|
||||
get_typical_edge_for # WP-26 v1.1: Für automatische Intra-Note-Edges
|
||||
)
|
||||
# WP-26 v1.4: Für automatische Backlinks bei Intra-Note-Edges
|
||||
try:
|
||||
from app.services.edge_registry import registry as edge_registry
|
||||
except ImportError:
|
||||
edge_registry = None
|
||||
from .graph_extractors import (
|
||||
extract_typed_relations, extract_callout_relations, extract_wikilinks
|
||||
)
|
||||
|
|
@ -1052,4 +1057,61 @@ def build_edges_for_note(
|
|||
|
||||
final_edges.append(winner)
|
||||
|
||||
# WP-26 v1.4: Automatische Backlinks für Intra-Note-Edges (Chunk-Level)
|
||||
# Erstelle inverse Edges für alle Intra-Note-Edges, wenn noch nicht vorhanden
|
||||
if edge_registry:
|
||||
# Erstelle Set aller existierenden Edge-Keys für schnelle Lookup
|
||||
existing_edge_keys: Set[Tuple[str, str, str, Optional[str]]] = set()
|
||||
for e in final_edges:
|
||||
source = e.get("source_id", "")
|
||||
target = e.get("target_id", "")
|
||||
kind = e.get("kind", "")
|
||||
target_section = e.get("target_section")
|
||||
existing_edge_keys.add((source, target, kind, target_section))
|
||||
|
||||
# Durchlaufe alle Edges und erstelle Backlinks für Intra-Note-Edges
|
||||
backlinks_to_add: List[dict] = []
|
||||
for e in final_edges:
|
||||
is_internal = e.get("is_internal", False)
|
||||
scope = e.get("scope", "chunk")
|
||||
source_id = e.get("source_id", "")
|
||||
target_id = e.get("target_id", "")
|
||||
kind = e.get("kind", "")
|
||||
target_section = e.get("target_section")
|
||||
|
||||
# Nur Intra-Note-Edges auf Chunk-Level berücksichtigen
|
||||
if not is_internal or scope != "chunk":
|
||||
continue
|
||||
|
||||
# Prüfe, ob bereits ein inverser Edge existiert
|
||||
inv_kind = edge_registry.get_inverse(kind) if edge_registry else None
|
||||
if not inv_kind:
|
||||
continue # Kein inverser Edge-Type verfügbar
|
||||
|
||||
# Prüfe, ob inverser Edge bereits existiert
|
||||
inv_key = (target_id, source_id, inv_kind, None) # Backlink hat keine target_section
|
||||
if inv_key in existing_edge_keys:
|
||||
continue # Backlink bereits vorhanden
|
||||
|
||||
# Erstelle automatischen Backlink
|
||||
backlink_edge = _edge(inv_kind, "chunk", target_id, source_id, note_id, {
|
||||
"chunk_id": target_id, # Backlink geht vom Target-Chunk aus
|
||||
"edge_id": _mk_edge_id(inv_kind, target_id, source_id, "chunk"),
|
||||
"provenance": "rule",
|
||||
"rule_id": "derived:intra_note_backlink",
|
||||
"source_hint": "automatic_backlink",
|
||||
"confidence": PROVENANCE_PRIORITY.get("derived:backlink", 0.8),
|
||||
"is_internal": True,
|
||||
"original_edge_kind": kind # Debug-Info: Welcher Edge-Type wurde invertiert
|
||||
})
|
||||
|
||||
backlinks_to_add.append(backlink_edge)
|
||||
existing_edge_keys.add(inv_key) # Verhindere Duplikate
|
||||
logger.debug(f"WP-26 Backlink erstellt: {target_id} --[{inv_kind}]--> {source_id} (Original: {kind})")
|
||||
|
||||
# Füge Backlinks zu final_edges hinzu
|
||||
if backlinks_to_add:
|
||||
final_edges.extend(backlinks_to_add)
|
||||
logger.info(f"WP-26: {len(backlinks_to_add)} automatische Backlinks für Intra-Note-Edges erstellt")
|
||||
|
||||
return final_edges
|
||||
|
|
@ -566,6 +566,98 @@ default_edge = topology["typical"][0] # "resulted_in"
|
|||
}
|
||||
```
|
||||
|
||||
#### FA-08b: Automatische Backlinks für Intra-Note-Edges
|
||||
|
||||
**Anforderung:** Für alle Intra-Note-Edges (`is_internal: True`, `scope: "chunk"`) werden automatisch inverse Backlinks erzeugt, wenn diese nicht bereits explizit vorhanden sind.
|
||||
|
||||
**Regeln:**
|
||||
|
||||
1. **Nur für Intra-Note-Edges:** Backlinks werden nur für Edges innerhalb derselben Note erstellt
|
||||
2. **Chunk-Level:** Backlinks haben `scope: "chunk"` (nicht `scope: "note"`)
|
||||
3. **Inverser Edge-Type:** Der inverse Edge-Type wird via `EdgeRegistry.get_inverse()` ermittelt
|
||||
4. **Deduplizierung:** Backlinks werden nur erstellt, wenn noch kein inverser Edge existiert
|
||||
5. **Gilt für alle Intra-Note-Edges:** Explizite Edges, automatische Section-Transitions, etc.
|
||||
|
||||
**Beispiel:**
|
||||
|
||||
**Input (explizite Edge):**
|
||||
```markdown
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
> [!edge] derives
|
||||
> [[#^sit]]
|
||||
```
|
||||
|
||||
**Generierte Edges:**
|
||||
|
||||
```python
|
||||
# Forward-Edge (explizit)
|
||||
{
|
||||
"kind": "derives",
|
||||
"source_id": "NoteID#c02", # Reflexion
|
||||
"target_id": "NoteID#c01", # Situation
|
||||
"scope": "chunk",
|
||||
"is_internal": True,
|
||||
"provenance": "explicit"
|
||||
}
|
||||
|
||||
# Backlink (automatisch erzeugt)
|
||||
{
|
||||
"kind": "derived_from", # Inverser Edge-Type
|
||||
"source_id": "NoteID#c01", # Situation (invertiert)
|
||||
"target_id": "NoteID#c02", # Reflexion (invertiert)
|
||||
"scope": "chunk",
|
||||
"is_internal": True,
|
||||
"provenance": "rule",
|
||||
"rule_id": "derived:intra_note_backlink",
|
||||
"source_hint": "automatic_backlink",
|
||||
"original_edge_kind": "derives" # Debug-Info
|
||||
}
|
||||
```
|
||||
|
||||
**Beispiel (automatische Section-Transition):**
|
||||
|
||||
**Input:**
|
||||
```markdown
|
||||
## Situation ^sit
|
||||
> [!section] experience
|
||||
|
||||
## Reflexion ^ref
|
||||
> [!section] insight
|
||||
<!-- Kein expliziter [!edge] -->
|
||||
```
|
||||
|
||||
**Generierte Edges:**
|
||||
|
||||
```python
|
||||
# Forward-Edge (automatisch aus Schema)
|
||||
{
|
||||
"kind": "resulted_in",
|
||||
"source_id": "NoteID#c01", # experience
|
||||
"target_id": "NoteID#c02", # insight
|
||||
"scope": "chunk",
|
||||
"is_internal": True,
|
||||
"provenance": "rule",
|
||||
"rule_id": "inferred:section_transition"
|
||||
}
|
||||
|
||||
# Backlink (automatisch erzeugt)
|
||||
{
|
||||
"kind": "caused_by", # Inverser Edge-Type
|
||||
"source_id": "NoteID#c02", # insight (invertiert)
|
||||
"target_id": "NoteID#c01", # experience (invertiert)
|
||||
"scope": "chunk",
|
||||
"is_internal": True,
|
||||
"provenance": "rule",
|
||||
"rule_id": "derived:intra_note_backlink"
|
||||
}
|
||||
```
|
||||
|
||||
**Wichtig:**
|
||||
- Backlinks werden **nur** erstellt, wenn noch kein inverser Edge existiert
|
||||
- Backlinks haben immer `scope: "chunk"` (nicht `scope: "note"`)
|
||||
- Backlinks werden **nach** der Deduplizierung erstellt, um Duplikate zu vermeiden
|
||||
|
||||
### 4.4 Retriever-Anpassungen
|
||||
|
||||
#### FA-09: Edge-Gewichtung für Intra-Note-Edges
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ FILE: tests/test_wp26_section_types.py
|
|||
DESCRIPTION: Unit-Tests für WP-26 Phase 1: Section-Types und Intra-Note-Edges
|
||||
WP-26 v1.1: Erweitert um Tests für Section-Split und automatische Edges
|
||||
WP-26 v1.3: Erweitert um Tests für rückwirkende section_type Propagation
|
||||
VERSION: 1.3.0
|
||||
VERSION: 1.4.0 (WP-26 v1.4: Automatische Backlinks)
|
||||
"""
|
||||
import pytest
|
||||
from app.core.chunking.chunking_parser import parse_blocks
|
||||
|
|
@ -624,5 +624,102 @@ class TestBlockIdParsing:
|
|||
assert section == "Normale Überschrift"
|
||||
|
||||
|
||||
class TestAutomaticBacklinks:
|
||||
"""UT-18: Automatische Backlinks für Intra-Note-Edges (WP-26 v1.4)"""
|
||||
|
||||
def test_backlink_created_for_intra_note_edge(self):
|
||||
"""Backlink wird automatisch für Intra-Note-Edge erstellt"""
|
||||
from app.core.graph.graph_derive_edges import build_edges_for_note
|
||||
|
||||
# Mock-Chunks mit Section-Types
|
||||
chunks = [
|
||||
{
|
||||
"chunk_id": "note1#c01",
|
||||
"type": "experience",
|
||||
"section_type": "experience",
|
||||
"window": "Situation text"
|
||||
},
|
||||
{
|
||||
"chunk_id": "note1#c02",
|
||||
"type": "insight",
|
||||
"section_type": "insight",
|
||||
"window": "Reflexion text"
|
||||
}
|
||||
]
|
||||
|
||||
edges = build_edges_for_note(
|
||||
note_id="note1",
|
||||
chunks=chunks,
|
||||
note_level_references=None,
|
||||
include_note_scope_refs=False,
|
||||
markdown_body=""
|
||||
)
|
||||
|
||||
# Prüfe, dass sowohl Forward-Edge als auch Backlink vorhanden sind
|
||||
forward_edges = [e for e in edges if e.get("source_id") == "note1#c01" and e.get("target_id") == "note1#c02"]
|
||||
backlink_edges = [e for e in edges if e.get("source_id") == "note1#c02" and e.get("target_id") == "note1#c01"]
|
||||
|
||||
assert len(forward_edges) > 0, "Forward-Edge sollte vorhanden sein"
|
||||
assert len(backlink_edges) > 0, "Backlink sollte automatisch erstellt werden"
|
||||
|
||||
# Prüfe Backlink-Eigenschaften
|
||||
backlink = backlink_edges[0]
|
||||
assert backlink.get("is_internal") is True
|
||||
assert backlink.get("scope") == "chunk"
|
||||
assert backlink.get("provenance") == "rule"
|
||||
assert backlink.get("rule_id") == "derived:intra_note_backlink"
|
||||
|
||||
def test_backlink_not_created_if_already_exists(self):
|
||||
"""Backlink wird nicht erstellt, wenn bereits ein inverser Edge existiert"""
|
||||
from app.core.graph.graph_derive_edges import build_edges_for_note
|
||||
|
||||
# Mock-Chunks
|
||||
chunks = [
|
||||
{
|
||||
"chunk_id": "note1#c01",
|
||||
"type": "experience",
|
||||
"section_type": "experience",
|
||||
"window": "Situation text",
|
||||
"candidate_pool": [
|
||||
{
|
||||
"kind": "derived_from", # Inverser Edge-Type
|
||||
"to": "note1#c02",
|
||||
"provenance": "explicit:callout"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"chunk_id": "note1#c02",
|
||||
"type": "insight",
|
||||
"section_type": "insight",
|
||||
"window": "Reflexion text",
|
||||
"candidate_pool": [
|
||||
{
|
||||
"kind": "derives", # Forward-Edge
|
||||
"to": "note1#c01",
|
||||
"provenance": "explicit:callout"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
edges = build_edges_for_note(
|
||||
note_id="note1",
|
||||
chunks=chunks,
|
||||
note_level_references=None,
|
||||
include_note_scope_refs=False,
|
||||
markdown_body=""
|
||||
)
|
||||
|
||||
# Zähle Backlinks (sollte nicht dupliziert werden)
|
||||
backlink_edges = [e for e in edges
|
||||
if e.get("source_id") == "note1#c01"
|
||||
and e.get("target_id") == "note1#c02"
|
||||
and e.get("kind") == "derived_from"]
|
||||
|
||||
# Sollte genau einen Backlink geben (der explizite, nicht zusätzlich automatischer)
|
||||
assert len(backlink_edges) == 1, f"Erwartet genau einen Backlink, gefunden: {len(backlink_edges)}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user