WP24c - Agentic Edge Validation & Chunk-Aware Multigraph-System (v4.5.8) #22
|
|
@ -208,8 +208,9 @@ def extract_llm_validation_zones(markdown_body: str) -> List[Tuple[str, str]]:
|
||||||
llm_validation_headers = get_llm_validation_zone_headers()
|
llm_validation_headers = get_llm_validation_zone_headers()
|
||||||
|
|
||||||
for i, line in enumerate(lines):
|
for i, line in enumerate(lines):
|
||||||
# Prüfe auf Header
|
# Prüfe auf Header (konfiguriertes Level aus MINDNET_LLM_VALIDATION_HEADER_LEVEL)
|
||||||
header_match = re.match(header_pattern, line.strip())
|
header_match = re.match(header_pattern, line.strip())
|
||||||
|
|
||||||
if header_match:
|
if header_match:
|
||||||
header_text = header_match.group(1).strip()
|
header_text = header_match.group(1).strip()
|
||||||
|
|
||||||
|
|
@ -266,11 +267,16 @@ def extract_callouts_from_markdown(
|
||||||
) -> List[dict]:
|
) -> List[dict]:
|
||||||
"""
|
"""
|
||||||
WP-24c v4.2.1: Extrahiert Callouts aus dem Original-Markdown.
|
WP-24c v4.2.1: Extrahiert Callouts aus dem Original-Markdown.
|
||||||
|
WP-24c v4.5.6: Header-Status-Maschine für korrekte Zonen-Erkennung.
|
||||||
|
|
||||||
Smart Logic: Nur Callouts, die NICHT in Chunks vorkommen (z.B. in Edge-Zonen),
|
Smart Logic: Nur Callouts, die NICHT in Chunks vorkommen (z.B. in Edge-Zonen),
|
||||||
werden mit scope: "note" angelegt. Callouts, die bereits in Chunks erfasst wurden,
|
werden mit scope: "note" angelegt. Callouts, die bereits in Chunks erfasst wurden,
|
||||||
werden übersprungen, um Duplikate zu vermeiden.
|
werden übersprungen, um Duplikate zu vermeiden.
|
||||||
|
|
||||||
|
WP-24c v4.5.6: Prüft für jeden Callout, ob er in einer LLM-Validierungs-Zone liegt.
|
||||||
|
- In LLM-Validierungs-Zone: rule_id = "candidate:explicit:callout"
|
||||||
|
- In Standard-Zone: rule_id = "explicit:callout" (ohne candidate:)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
markdown_body: Original-Markdown-Text (vor Chunking-Filterung)
|
markdown_body: Original-Markdown-Text (vor Chunking-Filterung)
|
||||||
note_id: ID der Note
|
note_id: ID der Note
|
||||||
|
|
@ -287,52 +293,207 @@ def extract_callouts_from_markdown(
|
||||||
|
|
||||||
edges: List[dict] = []
|
edges: List[dict] = []
|
||||||
|
|
||||||
# Extrahiere alle Callouts aus dem gesamten Markdown
|
# WP-24c v4.5.6: Header-Status-Maschine - Baue Mapping von Zeilen zu Zonen-Status
|
||||||
call_pairs, _ = extract_callout_relations(markdown_body)
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
for k, raw_t in call_pairs:
|
llm_validation_headers = get_llm_validation_zone_headers()
|
||||||
t, sec = parse_link_target(raw_t, note_id)
|
llm_validation_level = int(os.getenv("MINDNET_LLM_VALIDATION_HEADER_LEVEL", "3"))
|
||||||
if not t:
|
# WP-24c v4.5.6: Konfigurierbare Header-Ebene (vollständig über .env steuerbar)
|
||||||
|
header_level_pattern = "#" * llm_validation_level
|
||||||
|
header_pattern = rf'^{re.escape(header_level_pattern)}\s+(.+?)$'
|
||||||
|
|
||||||
|
lines = markdown_body.split('\n')
|
||||||
|
current_zone_is_llm_validation = False
|
||||||
|
|
||||||
|
# WP-24c v4.5.6: Zeile-für-Zeile Verarbeitung mit Zonen-Tracking
|
||||||
|
# Extrahiere Callouts direkt während des Durchlaufs, um Zonen-Kontext zu behalten
|
||||||
|
current_kind = None
|
||||||
|
in_callout_block = False
|
||||||
|
callout_block_lines = [] # Sammle Zeilen eines Callout-Blocks
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
# WP-24c v4.5.6: Prüfe auf Header (Zonen-Wechsel)
|
||||||
|
# Verwendet das konfigurierte Level aus MINDNET_LLM_VALIDATION_HEADER_LEVEL
|
||||||
|
header_match = re.match(header_pattern, stripped)
|
||||||
|
|
||||||
|
if header_match:
|
||||||
|
header_text = header_match.group(1).strip()
|
||||||
|
# Prüfe, ob dieser Header eine LLM-Validierungs-Zone startet
|
||||||
|
# WP-24c v4.5.6: Header-Status-Maschine - korrekte Zonen-Erkennung
|
||||||
|
current_zone_is_llm_validation = any(
|
||||||
|
header_text.lower() == llm_header.lower()
|
||||||
|
for llm_header in llm_validation_headers
|
||||||
|
)
|
||||||
|
logger.debug(f"DEBUG-TRACER [Zone-Change]: Header '{header_text}' (Level {llm_validation_level}) -> LLM-Validierung: {current_zone_is_llm_validation}")
|
||||||
|
# Beende aktuellen Callout-Block bei Header-Wechsel
|
||||||
|
if in_callout_block:
|
||||||
|
# Verarbeite gesammelten Callout-Block VOR dem Zonen-Wechsel
|
||||||
|
if callout_block_lines:
|
||||||
|
block_text = '\n'.join([lines[j] for j in callout_block_lines])
|
||||||
|
block_call_pairs, _ = extract_callout_relations(block_text)
|
||||||
|
|
||||||
|
# Verarbeite jeden Callout mit Zonen-Kontext
|
||||||
|
# WICHTIG: Verwende den Zonen-Status VOR dem Header-Wechsel
|
||||||
|
zone_before_header = current_zone_is_llm_validation
|
||||||
|
|
||||||
|
for k, raw_t in block_call_pairs:
|
||||||
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
|
if not t:
|
||||||
|
continue
|
||||||
|
|
||||||
|
callout_key = (k, t, sec)
|
||||||
|
is_blocked = callout_key in existing_chunk_callouts
|
||||||
|
|
||||||
|
if is_blocked:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# WP-24c v4.5.6: Bestimme rule_id basierend auf Zonen-Status VOR Header
|
||||||
|
if zone_before_header:
|
||||||
|
rule_id = "candidate:explicit:callout"
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
else:
|
||||||
|
rule_id = "explicit:callout" # KEIN candidate: für Standard-Zonen
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"edge_id": _mk_edge_id(k, note_id, t, "note", target_section=sec),
|
||||||
|
"provenance": provenance,
|
||||||
|
"rule_id": rule_id,
|
||||||
|
"confidence": 0.7
|
||||||
|
}
|
||||||
|
if sec:
|
||||||
|
payload["target_section"] = sec
|
||||||
|
|
||||||
|
logger.debug(f"DEBUG-TRACER [Zone-Check]: Callout in {'LLM-Validierungs' if zone_before_header else 'Standard'}-Zone (Zeile {callout_block_lines[0]}) -> rule_id: {rule_id}")
|
||||||
|
|
||||||
|
edges.append(_edge(
|
||||||
|
kind=k,
|
||||||
|
scope="note",
|
||||||
|
source_id=note_id,
|
||||||
|
target_id=t,
|
||||||
|
note_id=note_id,
|
||||||
|
extra=payload
|
||||||
|
))
|
||||||
|
|
||||||
|
# Reset für nächsten Block
|
||||||
|
in_callout_block = False
|
||||||
|
current_kind = None
|
||||||
|
callout_block_lines = []
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# WP-24c v4.2.2: Prüfe, ob dieser Callout bereits in einem Chunk vorkommt
|
# WP-24c v4.5.6: Prüfe auf Callout-Start
|
||||||
# Härtung: Berücksichtigt auch Sektions-Anker (sec) für Multigraph-Präzision
|
callout_start_match = re.match(r'^\s*>{1,}\s*\[!edge\]\s*(.*)$', stripped, re.IGNORECASE)
|
||||||
# Ein Callout zu "Note#Section1" ist anders als "Note#Section2" oder "Note"
|
if callout_start_match:
|
||||||
callout_key = (k, t, sec)
|
in_callout_block = True
|
||||||
|
callout_block_lines = [i] # Start-Zeile
|
||||||
# WP-24c v4.4.0-DEBUG: Schnittstelle 3 - Global Scan Vergleich
|
header_content = callout_start_match.group(1).strip()
|
||||||
is_blocked = callout_key in existing_chunk_callouts
|
# Prüfe, ob Header einen Typ enthält
|
||||||
logger.debug(f"DEBUG-TRACER [Global Scan Compare]: Key: ({k}, {t}, {sec}), Raw_Target: {raw_t}, In_Block_List: {is_blocked}, Block_List_Size: {len(existing_chunk_callouts) if existing_chunk_callouts else 0}")
|
if header_content and re.match(r'^[a-z_]+$', header_content, re.IGNORECASE):
|
||||||
|
current_kind = header_content.lower()
|
||||||
if is_blocked:
|
|
||||||
# Callout ist bereits in Chunk erfasst -> überspringe (wird mit chunk-Scope angelegt)
|
|
||||||
# Die Sektion (sec) ist bereits im Key enthalten, daher wird Multigraph-Präzision gewährleistet
|
|
||||||
logger.debug(f"DEBUG-TRACER [Global Scan Compare]: Key ({k}, {t}, {sec}) ist blockiert - überspringe")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# WP-24c v4.2.1: Callout ist NICHT in Chunks -> lege mit scope: "note" an
|
# WP-24c v4.5.6: Sammle Callout-Block-Zeilen
|
||||||
# (typischerweise in Edge-Zonen, die nicht gechunkt werden)
|
if in_callout_block:
|
||||||
# WP-24c v4.3.1: Confidence auf 0.7 gesenkt, damit chunk-Scope (1.0) gewinnt
|
if stripped.startswith('>'):
|
||||||
payload = {
|
callout_block_lines.append(i)
|
||||||
"edge_id": _mk_edge_id(k, note_id, t, "note", target_section=sec),
|
else:
|
||||||
"provenance": "explicit:callout",
|
# Callout-Block beendet - verarbeite gesammelte Zeilen
|
||||||
"rule_id": "callout:edge",
|
if callout_block_lines:
|
||||||
"confidence": 0.7 # WP-24c v4.3.1: Niedrigere Confidence für Note-Scope Callouts
|
# Extrahiere Callouts aus diesem Block
|
||||||
}
|
block_text = '\n'.join([lines[j] for j in callout_block_lines])
|
||||||
if sec:
|
block_call_pairs, _ = extract_callout_relations(block_text)
|
||||||
payload["target_section"] = sec
|
|
||||||
|
|
||||||
# WP-24c v4.4.0-DEBUG: Schnittstelle 3 - Global Scan erstellt Note-Scope Callout
|
# Verarbeite jeden Callout mit Zonen-Kontext
|
||||||
logger.debug(f"DEBUG-TRACER [Global Scan Create]: Erstelle Note-Scope Callout - Kind: {k}, Target: {t}, Section: {sec}, Raw_Target: {raw_t}, Edge_ID: {payload['edge_id']}, Confidence: {payload['confidence']}")
|
for k, raw_t in block_call_pairs:
|
||||||
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
|
if not t:
|
||||||
|
continue
|
||||||
|
|
||||||
edges.append(_edge(
|
callout_key = (k, t, sec)
|
||||||
kind=k,
|
is_blocked = callout_key in existing_chunk_callouts
|
||||||
scope="note",
|
|
||||||
source_id=note_id,
|
if is_blocked:
|
||||||
target_id=t,
|
continue
|
||||||
note_id=note_id,
|
|
||||||
extra=payload
|
# WP-24c v4.5.6: Bestimme rule_id basierend auf Zonen-Status
|
||||||
))
|
if current_zone_is_llm_validation:
|
||||||
|
rule_id = "candidate:explicit:callout"
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
else:
|
||||||
|
rule_id = "explicit:callout" # KEIN candidate: für Standard-Zonen
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"edge_id": _mk_edge_id(k, note_id, t, "note", target_section=sec),
|
||||||
|
"provenance": provenance,
|
||||||
|
"rule_id": rule_id,
|
||||||
|
"confidence": 0.7
|
||||||
|
}
|
||||||
|
if sec:
|
||||||
|
payload["target_section"] = sec
|
||||||
|
|
||||||
|
logger.debug(f"DEBUG-TRACER [Zone-Check]: Callout in {'LLM-Validierungs' if current_zone_is_llm_validation else 'Standard'}-Zone (Zeile {callout_block_lines[0]}) -> rule_id: {rule_id}")
|
||||||
|
|
||||||
|
edges.append(_edge(
|
||||||
|
kind=k,
|
||||||
|
scope="note",
|
||||||
|
source_id=note_id,
|
||||||
|
target_id=t,
|
||||||
|
note_id=note_id,
|
||||||
|
extra=payload
|
||||||
|
))
|
||||||
|
|
||||||
|
# Reset für nächsten Block
|
||||||
|
in_callout_block = False
|
||||||
|
current_kind = None
|
||||||
|
callout_block_lines = []
|
||||||
|
|
||||||
|
# WP-24c v4.5.6: Verarbeite letzten Callout-Block (falls am Ende)
|
||||||
|
if in_callout_block and callout_block_lines:
|
||||||
|
block_text = '\n'.join([lines[j] for j in callout_block_lines])
|
||||||
|
block_call_pairs, _ = extract_callout_relations(block_text)
|
||||||
|
|
||||||
|
for k, raw_t in block_call_pairs:
|
||||||
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
|
if not t:
|
||||||
|
continue
|
||||||
|
|
||||||
|
callout_key = (k, t, sec)
|
||||||
|
is_blocked = callout_key in existing_chunk_callouts
|
||||||
|
|
||||||
|
if is_blocked:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# WP-24c v4.5.6: Bestimme rule_id basierend auf Zonen-Status
|
||||||
|
if current_zone_is_llm_validation:
|
||||||
|
rule_id = "candidate:explicit:callout"
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
else:
|
||||||
|
rule_id = "explicit:callout"
|
||||||
|
provenance = "explicit:callout"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"edge_id": _mk_edge_id(k, note_id, t, "note", target_section=sec),
|
||||||
|
"provenance": provenance,
|
||||||
|
"rule_id": rule_id,
|
||||||
|
"confidence": 0.7
|
||||||
|
}
|
||||||
|
if sec:
|
||||||
|
payload["target_section"] = sec
|
||||||
|
|
||||||
|
logger.debug(f"DEBUG-TRACER [Zone-Check]: Callout in {'LLM-Validierungs' if current_zone_is_llm_validation else 'Standard'}-Zone (Zeile {callout_block_lines[0]}) -> rule_id: {rule_id}")
|
||||||
|
|
||||||
|
edges.append(_edge(
|
||||||
|
kind=k,
|
||||||
|
scope="note",
|
||||||
|
source_id=note_id,
|
||||||
|
target_id=t,
|
||||||
|
note_id=note_id,
|
||||||
|
extra=payload
|
||||||
|
))
|
||||||
|
|
||||||
return edges
|
return edges
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user