diff --git a/app/core/chunking/chunking_parser.py b/app/core/chunking/chunking_parser.py index df9db13..2423acb 100644 --- a/app/core/chunking/chunking_parser.py +++ b/app/core/chunking/chunking_parser.py @@ -8,7 +8,7 @@ DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks). """ import re import os -from typing import List, Tuple, Set, Dict, Any +from typing import List, Tuple, Set, Dict, Any, Optional from .chunking_models import RawBlock from .chunking_utils import extract_frontmatter_from_text diff --git a/app/core/chunking/chunking_processor.py b/app/core/chunking/chunking_processor.py index 290b527..93bdb14 100644 --- a/app/core/chunking/chunking_processor.py +++ b/app/core/chunking/chunking_processor.py @@ -147,7 +147,9 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op cleaned_lines = [] i = 0 - callout_start_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE) + # NEU (v4.2.8): + # WP-24c v4.2.8: Callout-Pattern für Edge und Abstract + callout_start_pattern = re.compile(r'^>\s*\[!(edge|abstract)[^\]]*\]', re.IGNORECASE) while i < len(lines): line = lines[i] diff --git a/app/core/chunking/chunking_utils.py b/app/core/chunking/chunking_utils.py index b0def8f..fe7456b 100644 --- a/app/core/chunking/chunking_utils.py +++ b/app/core/chunking/chunking_utils.py @@ -46,7 +46,7 @@ def get_chunk_config(note_type: str, frontmatter: Optional[Dict[str, Any]] = Non # WP-24c v4.2.5: Priorität: Frontmatter > Type-Def > Defaults profile_name = None if frontmatter and "chunking_profile" in frontmatter: - profile_name = frontmatter.get("chunking_profile") + profile_name = frontmatter.get("chunking_profile") or frontmatter.get("chunk_profile") if not profile_name: profile_name = type_def.get("chunking_profile") if not profile_name: diff --git a/app/core/ingestion/ingestion_processor.py b/app/core/ingestion/ingestion_processor.py index f4cef12..d803d9a 100644 --- a/app/core/ingestion/ingestion_processor.py +++ b/app/core/ingestion/ingestion_processor.py @@ -252,7 +252,10 @@ class IngestionService: new_pool.append(cand) ch.candidate_pool = new_pool - chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry) + # chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry) + # v4.2.8 Fix C: Explizite Übergabe des Profil-Namens für den Chunk-Payload + chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry, chunk_profile=profile) + vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else [] # WP-24c v4.2.0: Kanten-Extraktion mit Note-Scope Zonen Support