bug
This commit is contained in:
parent
e27b1f4621
commit
7fc316d284
|
|
@ -17,8 +17,8 @@ from app.services.semantic_analyzer import get_semantic_analyzer
|
||||||
try:
|
try:
|
||||||
from app.core.derive_edges import build_edges_for_note
|
from app.core.derive_edges import build_edges_for_note
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Mock für Tests: Signatur muss mit dem Aufruf übereinstimmen
|
# Mock für Tests
|
||||||
def build_edges_for_note(text, note_id, note_type, chunks=[], references=[]): return []
|
def build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False): return []
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -105,7 +105,6 @@ def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
|
||||||
|
|
||||||
fm, text_without_fm = extract_frontmatter_from_text(md_text)
|
fm, text_without_fm = extract_frontmatter_from_text(md_text)
|
||||||
|
|
||||||
# H1 suchen
|
|
||||||
h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
|
h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
|
||||||
if h1_match:
|
if h1_match:
|
||||||
h1_title = h1_match.group(1).strip()
|
h1_title = h1_match.group(1).strip()
|
||||||
|
|
@ -218,7 +217,7 @@ def _strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id
|
||||||
return _strategy_sliding_window(blocks, config, note_id, doc_title, context_prefix=f"# {doc_title}")
|
return _strategy_sliding_window(blocks, config, note_id, doc_title, context_prefix=f"# {doc_title}")
|
||||||
|
|
||||||
# ==========================================
|
# ==========================================
|
||||||
# 4. ORCHESTRATION (ASYNC)
|
# 4. ORCHESTRATION (ASYNC) - WP-15 CORE
|
||||||
# ==========================================
|
# ==========================================
|
||||||
|
|
||||||
async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Optional[Dict] = None) -> List[Chunk]:
|
async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Optional[Dict] = None) -> List[Chunk]:
|
||||||
|
|
@ -246,6 +245,7 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if enable_smart_edges:
|
if enable_smart_edges:
|
||||||
|
# Hier rufen wir nun die Smart Edge Allocation auf
|
||||||
chunks = await _run_smart_edge_allocation(chunks, md_text, note_id, note_type)
|
chunks = await _run_smart_edge_allocation(chunks, md_text, note_id, note_type)
|
||||||
|
|
||||||
for i, ch in enumerate(chunks):
|
for i, ch in enumerate(chunks):
|
||||||
|
|
@ -254,36 +254,57 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Hilfsfunktion: Erstellt einen Dummy-Chunk für den gesamten Text und ruft
|
||||||
|
den Edge-Parser auf, um ALLE Kanten der Notiz zu finden.
|
||||||
|
"""
|
||||||
|
# 1. Dummy Chunk erstellen, der den gesamten Text enthält
|
||||||
|
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
|
||||||
|
dummy_chunk = {
|
||||||
|
"chunk_id": f"{note_id}#full",
|
||||||
|
"text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content')
|
||||||
|
"type": note_type
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Aufruf des Parsers mit dem Dummy-Chunk
|
||||||
|
# WICHTIG: Argumentreihenfolge aus derive_edges.py beachten:
|
||||||
|
# note_id, chunks, note_level_references=None, include_note_scope_refs=False
|
||||||
|
raw_edges = build_edges_for_note(
|
||||||
|
note_id,
|
||||||
|
[dummy_chunk],
|
||||||
|
note_level_references=None,
|
||||||
|
include_note_scope_refs=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Kanten extrahieren und formatieren
|
||||||
|
all_candidates = set()
|
||||||
|
for e in raw_edges:
|
||||||
|
# Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
|
||||||
|
kind = e.get("kind")
|
||||||
|
target = e.get("target_id")
|
||||||
|
if target and kind not in ["belongs_to", "next", "prev"]:
|
||||||
|
all_candidates.add(f"{kind}:{target}")
|
||||||
|
|
||||||
|
return list(all_candidates)
|
||||||
|
|
||||||
async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_id: str, note_type: str) -> List[Chunk]:
|
async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_id: str, note_type: str) -> List[Chunk]:
|
||||||
analyzer = get_semantic_analyzer()
|
analyzer = get_semantic_analyzer()
|
||||||
|
|
||||||
# FIX: Nutzung von positional arguments für die ersten 3 Parameter
|
# A. Alle potenziellen Kanten der Notiz sammeln (über den Dummy-Chunk Trick)
|
||||||
# Dies verhindert den "multiple values for argument" Fehler
|
candidate_list = _extract_all_edges_from_md(full_text, note_id, note_type)
|
||||||
raw_edges = build_edges_for_note(
|
|
||||||
full_text,
|
|
||||||
note_id,
|
|
||||||
note_type,
|
|
||||||
chunks=[],
|
|
||||||
references=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
all_candidates = set()
|
|
||||||
if raw_edges:
|
|
||||||
for e in raw_edges:
|
|
||||||
if e.get("target_id") and e.get("kind") not in ["next", "prev", "belongs_to"]:
|
|
||||||
all_candidates.add(f"{e['kind']}:{e['target_id']}")
|
|
||||||
|
|
||||||
candidate_list = list(all_candidates)
|
|
||||||
|
|
||||||
if not candidate_list:
|
if not candidate_list:
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
# B. LLM Filterung pro Chunk (Parallel)
|
||||||
tasks = []
|
tasks = []
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
tasks.append(analyzer.assign_edges_to_chunk(chunk.text, candidate_list, note_type))
|
tasks.append(analyzer.assign_edges_to_chunk(chunk.text, candidate_list, note_type))
|
||||||
|
|
||||||
results_per_chunk = await asyncio.gather(*tasks)
|
results_per_chunk = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# C. Injection & Fallback
|
||||||
assigned_edges_global = set()
|
assigned_edges_global = set()
|
||||||
|
|
||||||
for i, confirmed_edges in enumerate(results_per_chunk):
|
for i, confirmed_edges in enumerate(results_per_chunk):
|
||||||
|
|
@ -296,6 +317,7 @@ async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_i
|
||||||
chunk.text += injection_str
|
chunk.text += injection_str
|
||||||
chunk.window += injection_str
|
chunk.window += injection_str
|
||||||
|
|
||||||
|
# D. Fallback: Unassigned Kanten überall hin
|
||||||
unassigned = set(candidate_list) - assigned_edges_global
|
unassigned = set(candidate_list) - assigned_edges_global
|
||||||
if unassigned:
|
if unassigned:
|
||||||
fallback_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in unassigned if ':' in e])
|
fallback_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in unassigned if ':' in e])
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user