WP15 Bug fixing

This commit is contained in:
Lars 2025-12-12 12:58:24 +01:00
parent 7e9e496d86
commit 87083355ee
2 changed files with 17 additions and 11 deletions

View File

@ -263,13 +263,14 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert. # Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
dummy_chunk = { dummy_chunk = {
"chunk_id": f"{note_id}#full", "chunk_id": f"{note_id}#full",
"text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content') "text": md_text,
"content": md_text, # Sicherstellen, dass der Parser Text findet
"window": md_text,
"type": note_type "type": note_type
} }
# 2. Aufruf des Parsers mit dem Dummy-Chunk # 2. Aufruf des Parsers (Signatur-Fix!)
# WICHTIG: Argumentreihenfolge aus derive_edges.py beachten: # derive_edges.py: build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False)
# note_id, chunks, note_level_references=None, include_note_scope_refs=False
raw_edges = build_edges_for_note( raw_edges = build_edges_for_note(
note_id, note_id,
[dummy_chunk], [dummy_chunk],
@ -277,13 +278,12 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
include_note_scope_refs=False include_note_scope_refs=False
) )
# 3. Kanten extrahieren und formatieren # 3. Kanten extrahieren
all_candidates = set() all_candidates = set()
for e in raw_edges: for e in raw_edges:
# Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
kind = e.get("kind") kind = e.get("kind")
target = e.get("target_id") target = e.get("target_id")
if target and kind not in ["belongs_to", "next", "prev"]: if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
all_candidates.add(f"{kind}:{target}") all_candidates.add(f"{kind}:{target}")
return list(all_candidates) return list(all_candidates)

View File

@ -5,7 +5,7 @@ Zentraler Service für die Transformation von Markdown-Dateien in Qdrant-Objekte
Dient als Shared Logic für: Dient als Shared Logic für:
1. CLI-Imports (scripts/import_markdown.py) 1. CLI-Imports (scripts/import_markdown.py)
2. API-Uploads (WP-11) 2. API-Uploads (WP-11)
Refactored for Async Embedding Support. Refactored for Async Embedding & Async Chunking (WP-15).
""" """
import os import os
import logging import logging
@ -18,6 +18,7 @@ from app.core.parser import (
validate_required_frontmatter, validate_required_frontmatter,
) )
from app.core.note_payload import make_note_payload from app.core.note_payload import make_note_payload
# ASYNC CHUNKER (WP-15)
from app.core.chunker import assemble_chunks from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads from app.core.chunk_payload import make_chunk_payloads
@ -193,10 +194,15 @@ class IngestionService:
# 5. Processing (Chunking, Embedding, Edges) # 5. Processing (Chunking, Embedding, Edges)
try: try:
body_text = getattr(parsed, "body", "") or "" body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm["type"])
# --- FIX: AWAIT ASYNC CHUNKER (WP-15 Update) ---
# assemble_chunks ist jetzt eine Coroutine und muss mit await aufgerufen werden.
chunks = await assemble_chunks(fm["id"], body_text, fm["type"])
# -----------------------------------------------
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
# --- EMBEDDING FIX (ASYNC) --- # --- EMBEDDING (ASYNC) ---
vecs = [] vecs = []
if chunk_pls: if chunk_pls:
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls] texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]