WP15 Bug fixing
This commit is contained in:
parent
7e9e496d86
commit
87083355ee
|
|
@ -263,13 +263,14 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
|
||||||
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
|
# Das ist notwendig, da build_edges_for_note Kanten nur aus Chunks extrahiert.
|
||||||
dummy_chunk = {
|
dummy_chunk = {
|
||||||
"chunk_id": f"{note_id}#full",
|
"chunk_id": f"{note_id}#full",
|
||||||
"text": md_text, # Der Parser schaut in 'text' (oder 'window', 'content')
|
"text": md_text,
|
||||||
|
"content": md_text, # Sicherstellen, dass der Parser Text findet
|
||||||
|
"window": md_text,
|
||||||
"type": note_type
|
"type": note_type
|
||||||
}
|
}
|
||||||
|
|
||||||
# 2. Aufruf des Parsers mit dem Dummy-Chunk
|
# 2. Aufruf des Parsers (Signatur-Fix!)
|
||||||
# WICHTIG: Argumentreihenfolge aus derive_edges.py beachten:
|
# derive_edges.py: build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False)
|
||||||
# note_id, chunks, note_level_references=None, include_note_scope_refs=False
|
|
||||||
raw_edges = build_edges_for_note(
|
raw_edges = build_edges_for_note(
|
||||||
note_id,
|
note_id,
|
||||||
[dummy_chunk],
|
[dummy_chunk],
|
||||||
|
|
@ -277,13 +278,12 @@ def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> Li
|
||||||
include_note_scope_refs=False
|
include_note_scope_refs=False
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. Kanten extrahieren und formatieren
|
# 3. Kanten extrahieren
|
||||||
all_candidates = set()
|
all_candidates = set()
|
||||||
for e in raw_edges:
|
for e in raw_edges:
|
||||||
# Wir ignorieren Strukturkanten, die wir für den Dummy erstellt haben
|
|
||||||
kind = e.get("kind")
|
kind = e.get("kind")
|
||||||
target = e.get("target_id")
|
target = e.get("target_id")
|
||||||
if target and kind not in ["belongs_to", "next", "prev"]:
|
if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
|
||||||
all_candidates.add(f"{kind}:{target}")
|
all_candidates.add(f"{kind}:{target}")
|
||||||
|
|
||||||
return list(all_candidates)
|
return list(all_candidates)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ Zentraler Service für die Transformation von Markdown-Dateien in Qdrant-Objekte
|
||||||
Dient als Shared Logic für:
|
Dient als Shared Logic für:
|
||||||
1. CLI-Imports (scripts/import_markdown.py)
|
1. CLI-Imports (scripts/import_markdown.py)
|
||||||
2. API-Uploads (WP-11)
|
2. API-Uploads (WP-11)
|
||||||
Refactored for Async Embedding Support.
|
Refactored for Async Embedding & Async Chunking (WP-15).
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -18,6 +18,7 @@ from app.core.parser import (
|
||||||
validate_required_frontmatter,
|
validate_required_frontmatter,
|
||||||
)
|
)
|
||||||
from app.core.note_payload import make_note_payload
|
from app.core.note_payload import make_note_payload
|
||||||
|
# ASYNC CHUNKER (WP-15)
|
||||||
from app.core.chunker import assemble_chunks
|
from app.core.chunker import assemble_chunks
|
||||||
from app.core.chunk_payload import make_chunk_payloads
|
from app.core.chunk_payload import make_chunk_payloads
|
||||||
|
|
||||||
|
|
@ -193,10 +194,15 @@ class IngestionService:
|
||||||
# 5. Processing (Chunking, Embedding, Edges)
|
# 5. Processing (Chunking, Embedding, Edges)
|
||||||
try:
|
try:
|
||||||
body_text = getattr(parsed, "body", "") or ""
|
body_text = getattr(parsed, "body", "") or ""
|
||||||
chunks = assemble_chunks(fm["id"], body_text, fm["type"])
|
|
||||||
|
# --- FIX: AWAIT ASYNC CHUNKER (WP-15 Update) ---
|
||||||
|
# assemble_chunks ist jetzt eine Coroutine und muss mit await aufgerufen werden.
|
||||||
|
chunks = await assemble_chunks(fm["id"], body_text, fm["type"])
|
||||||
|
# -----------------------------------------------
|
||||||
|
|
||||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||||
|
|
||||||
# --- EMBEDDING FIX (ASYNC) ---
|
# --- EMBEDDING (ASYNC) ---
|
||||||
vecs = []
|
vecs = []
|
||||||
if chunk_pls:
|
if chunk_pls:
|
||||||
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
|
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user