Enhance chunking functionality in version 4.2.8: Update callout pattern to support additional syntax for edge and abstract callouts. Modify get_chunk_config to allow fallback to chunk_profile if chunking_profile is not present. Ensure explicit passing of chunk_profile in make_chunk_payloads for improved payload handling. Update type hints in chunking_parser for better clarity.

This commit is contained in:
Lars 2026-01-11 11:49:16 +01:00
parent 1d66ca0649
commit 20fb1e92e2
4 changed files with 9 additions and 4 deletions

View File

@ -8,7 +8,7 @@ DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks).
""" """
import re import re
import os import os
from typing import List, Tuple, Set, Dict, Any from typing import List, Tuple, Set, Dict, Any, Optional
from .chunking_models import RawBlock from .chunking_models import RawBlock
from .chunking_utils import extract_frontmatter_from_text from .chunking_utils import extract_frontmatter_from_text

View File

@ -147,7 +147,9 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
cleaned_lines = [] cleaned_lines = []
i = 0 i = 0
callout_start_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE) # NEU (v4.2.8):
# WP-24c v4.2.8: Callout-Pattern für Edge und Abstract
callout_start_pattern = re.compile(r'^>\s*\[!(edge|abstract)[^\]]*\]', re.IGNORECASE)
while i < len(lines): while i < len(lines):
line = lines[i] line = lines[i]

View File

@ -46,7 +46,7 @@ def get_chunk_config(note_type: str, frontmatter: Optional[Dict[str, Any]] = Non
# WP-24c v4.2.5: Priorität: Frontmatter > Type-Def > Defaults # WP-24c v4.2.5: Priorität: Frontmatter > Type-Def > Defaults
profile_name = None profile_name = None
if frontmatter and "chunking_profile" in frontmatter: if frontmatter and "chunking_profile" in frontmatter:
profile_name = frontmatter.get("chunking_profile") profile_name = frontmatter.get("chunking_profile") or frontmatter.get("chunk_profile")
if not profile_name: if not profile_name:
profile_name = type_def.get("chunking_profile") profile_name = type_def.get("chunking_profile")
if not profile_name: if not profile_name:

View File

@ -252,7 +252,10 @@ class IngestionService:
new_pool.append(cand) new_pool.append(cand)
ch.candidate_pool = new_pool ch.candidate_pool = new_pool
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry) # chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry)
# v4.2.8 Fix C: Explizite Übergabe des Profil-Namens für den Chunk-Payload
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry, chunk_profile=profile)
vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else [] vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else []
# WP-24c v4.2.0: Kanten-Extraktion mit Note-Scope Zonen Support # WP-24c v4.2.0: Kanten-Extraktion mit Note-Scope Zonen Support