Enhance chunking functionality in version 4.2.8: Update callout pattern to support additional syntax for edge and abstract callouts. Modify get_chunk_config to allow fallback to chunk_profile if chunking_profile is not present. Ensure explicit passing of chunk_profile in make_chunk_payloads for improved payload handling. Update type hints in chunking_parser for better clarity.
This commit is contained in:
parent
1d66ca0649
commit
20fb1e92e2
|
|
@ -8,7 +8,7 @@ DESCRIPTION: Zerlegt Markdown in logische Einheiten (RawBlocks).
|
|||
"""
|
||||
import re
|
||||
import os
|
||||
from typing import List, Tuple, Set, Dict, Any
|
||||
from typing import List, Tuple, Set, Dict, Any, Optional
|
||||
from .chunking_models import RawBlock
|
||||
from .chunking_utils import extract_frontmatter_from_text
|
||||
|
||||
|
|
|
|||
|
|
@ -147,7 +147,9 @@ async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Op
|
|||
cleaned_lines = []
|
||||
i = 0
|
||||
|
||||
callout_start_pattern = re.compile(r'^\s*>{1,}\s*\[!(edge|abstract)\]', re.IGNORECASE)
|
||||
# NEU (v4.2.8):
|
||||
# WP-24c v4.2.8: Callout-Pattern für Edge und Abstract
|
||||
callout_start_pattern = re.compile(r'^>\s*\[!(edge|abstract)[^\]]*\]', re.IGNORECASE)
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ def get_chunk_config(note_type: str, frontmatter: Optional[Dict[str, Any]] = Non
|
|||
# WP-24c v4.2.5: Priorität: Frontmatter > Type-Def > Defaults
|
||||
profile_name = None
|
||||
if frontmatter and "chunking_profile" in frontmatter:
|
||||
profile_name = frontmatter.get("chunking_profile")
|
||||
profile_name = frontmatter.get("chunking_profile") or frontmatter.get("chunk_profile")
|
||||
if not profile_name:
|
||||
profile_name = type_def.get("chunking_profile")
|
||||
if not profile_name:
|
||||
|
|
|
|||
|
|
@ -252,7 +252,10 @@ class IngestionService:
|
|||
new_pool.append(cand)
|
||||
ch.candidate_pool = new_pool
|
||||
|
||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry)
|
||||
# chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry)
|
||||
# v4.2.8 Fix C: Explizite Übergabe des Profil-Namens für den Chunk-Payload
|
||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, file_path=file_path, types_cfg=self.registry, chunk_profile=profile)
|
||||
|
||||
vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else []
|
||||
|
||||
# WP-24c v4.2.0: Kanten-Extraktion mit Note-Scope Zonen Support
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user