WP15 Bug fixing

This commit is contained in:
Lars 2025-12-12 12:58:24 +01:00
parent 7e9e496d86
commit 87083355ee
2 changed files with 17 additions and 11 deletions

View File

@ -263,13 +263,14 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
dummy_chunk = {
"chunk_id": f"{note_id}#full",
"text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content')
"text": md_text,
"content": md_text, # Sicherstellen, dass der Parser Text findet
"window": md_text,
"type": note_type
}
# 2. Aufruf des Parsers mit dem Dummy-Chunk
# WICHTIG: Argumentreihenfolge aus derive_edges.py beachten:
# note_id, chunks, note_level_references=None, include_note_scope_refs=False
# 2. Aufruf des Parsers (Signatur-Fix!)
# derive_edges.py: build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False)
raw_edges = build_edges_for_note(
note_id,
[dummy_chunk],
@ -277,13 +278,12 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
include_note_scope_refs=False
)
# 3. Kanten extrahieren und formatieren
# 3. Kanten extrahieren
all_candidates = set()
for e in raw_edges:
# Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
kind = e.get("kind")
target = e.get("target_id")
if target and kind not in ["belongs_to", "next", "prev"]:
if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
all_candidates.add(f"{kind}:{target}")
return list(all_candidates)

View File

@ -5,7 +5,7 @@ Zentraler Service für die Transformation von Markdown-Dateien in Qdrant-Objekte
Dient als Shared Logic für:
1. CLI-Imports (scripts/import_markdown.py)
2. API-Uploads (WP-11)
Refactored for Async Embedding Support.
Refactored for Async Embedding & Async Chunking (WP-15).
"""
import os
import logging
@ -18,6 +18,7 @@ from app.core.parser import (
validate_required_frontmatter,
)
from app.core.note_payload import make_note_payload
# ASYNC CHUNKER (WP-15)
from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads
@ -193,10 +194,15 @@ class IngestionService:
# 5. Processing (Chunking, Embedding, Edges)
try:
body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm["type"])
# --- FIX: AWAIT ASYNC CHUNKER (WP-15 Update) ---
# assemble_chunks ist jetzt eine Coroutine und muss mit await aufgerufen werden.
chunks = await assemble_chunks(fm["id"], body_text, fm["type"])
# -----------------------------------------------
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
# --- EMBEDDING FIX (ASYNC) ---
# --- EMBEDDING (ASYNC) ---
vecs = []
if chunk_pls:
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]