WP15 Bug fixing
This commit is contained in:
parent
7e9e496d86
commit
87083355ee
|
|
@ -263,13 +263,14 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
|
|||
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
|
||||
dummy_chunk = {
|
||||
"chunk_id": f"{note_id}#full",
|
||||
"text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content')
|
||||
"text": md_text,
|
||||
"content": md_text, # Sicherstellen, dass der Parser Text findet
|
||||
"window": md_text,
|
||||
"type": note_type
|
||||
}
|
||||
|
||||
# 2. Aufruf des Parsers mit dem Dummy-Chunk
|
||||
# WICHTIG: Argumentreihenfolge aus derive_edges.py beachten:
|
||||
# note_id, chunks, note_level_references=None, include_note_scope_refs=False
|
||||
# 2. Aufruf des Parsers (Signatur-Fix!)
|
||||
# derive_edges.py: build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False)
|
||||
raw_edges = build_edges_for_note(
|
||||
note_id,
|
||||
[dummy_chunk],
|
||||
|
|
@ -277,13 +278,12 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
|
|||
include_note_scope_refs=False
|
||||
)
|
||||
|
||||
# 3. Kanten extrahieren und formatieren
|
||||
# 3. Kanten extrahieren
|
||||
all_candidates = set()
|
||||
for e in raw_edges:
|
||||
# Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
|
||||
kind = e.get("kind")
|
||||
target = e.get("target_id")
|
||||
if target and kind not in ["belongs_to", "next", "prev"]:
|
||||
if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
|
||||
all_candidates.add(f"{kind}:{target}")
|
||||
|
||||
return list(all_candidates)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Zentraler Service für die Transformation von Markdown-Dateien in Qdrant-Objekte
|
|||
Dient als Shared Logic für:
|
||||
1. CLI-Imports (scripts/import_markdown.py)
|
||||
2. API-Uploads (WP-11)
|
||||
Refactored for Async Embedding Support.
|
||||
Refactored for Async Embedding & Async Chunking (WP-15).
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
|
|
@ -18,6 +18,7 @@ from app.core.parser import (
|
|||
validate_required_frontmatter,
|
||||
)
|
||||
from app.core.note_payload import make_note_payload
|
||||
# ASYNC CHUNKER (WP-15)
|
||||
from app.core.chunker import assemble_chunks
|
||||
from app.core.chunk_payload import make_chunk_payloads
|
||||
|
||||
|
|
@ -193,10 +194,15 @@ class IngestionService:
|
|||
# 5. Processing (Chunking, Embedding, Edges)
|
||||
try:
|
||||
body_text = getattr(parsed, "body", "") or ""
|
||||
chunks = assemble_chunks(fm["id"], body_text, fm["type"])
|
||||
|
||||
# --- FIX: AWAIT ASYNC CHUNKER (WP-15 Update) ---
|
||||
# assemble_chunks ist jetzt eine Coroutine und muss mit await aufgerufen werden.
|
||||
chunks = await assemble_chunks(fm["id"], body_text, fm["type"])
|
||||
# -----------------------------------------------
|
||||
|
||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||
|
||||
# --- EMBEDDING FIX (ASYNC) ---
|
||||
# --- EMBEDDING (ASYNC) ---
|
||||
vecs = []
|
||||
if chunk_pls:
|
||||
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user