mindnet/config/types.yaml
2025-12-11 23:21:24 +01:00

124 lines
3.1 KiB
YAML

version: 1.2 # Update für Smart Chunking Config
# --- CHUNKING DEFINITIONEN ---
# Hier definieren wir die technischen Strategien zentral.
chunking_profiles:
# Standard für Fließtexte (Sliding Window)
sliding_short:
strategy: sliding_window
target: 200
max: 350
overlap: [30, 50]
sliding_standard:
strategy: sliding_window
target: 400
max: 600
overlap: [50, 80]
sliding_large:
strategy: sliding_window
target: 500
max: 800
overlap: [60, 100]
# Smart Chunking für Strukturen (Harte Splits)
structured_strict:
strategy: by_heading
split_level: 2
max: 600 # Fallback Limit
target: 400 # Fallback Target bei Sub-Chunking
overlap: [50, 80] # Overlap bei Sub-Chunking
# NEU: LLM-basierte semantische Zerlegung (Chunker.py ruft semantic_analyzer.py)
semantic_llm:
strategy: semantic_llm
# Da das LLM die Längensteuerung übernimmt, dienen diese als Fallback/Empfehlung
target: 400
max: 800
defaults:
retriever_weight: 1.0
chunking_profile: sliding_standard # Fallback Profil
edge_defaults: []
types:
# --- WISSENSBAUSTEINE ---
concept:
chunking_profile: sliding_standard
retriever_weight: 0.60
edge_defaults: ["references", "related_to"]
source:
chunking_profile: sliding_standard
retriever_weight: 0.50
edge_defaults: []
glossary:
chunking_profile: sliding_short
retriever_weight: 0.40
edge_defaults: ["related_to"]
# --- IDENTITÄT & PERSÖNLICHKEIT ---
profile:
chunking_profile: structured_strict
retriever_weight: 0.70
edge_defaults: ["references", "related_to"]
value:
chunking_profile: structured_strict
retriever_weight: 1.00
edge_defaults: ["related_to"]
principle:
chunking_profile: structured_strict
retriever_weight: 0.95
edge_defaults: ["derived_from", "references"]
belief:
chunking_profile: sliding_short
retriever_weight: 0.90
edge_defaults: ["related_to"]
experience:
chunking_profile: sliding_standard
retriever_weight: 0.90
edge_defaults: ["derived_from", "references"]
# --- STRATEGIE & ENTSCHEIDUNG ---
goal:
chunking_profile: sliding_standard
retriever_weight: 0.95
edge_defaults: ["depends_on", "related_to"]
decision:
chunking_profile: structured_strict
retriever_weight: 1.00
edge_defaults: ["caused_by", "references"]
risk:
chunking_profile: sliding_short
retriever_weight: 0.85
edge_defaults: ["related_to", "blocks"]
milestone:
chunking_profile: sliding_short
retriever_weight: 0.70
edge_defaults: ["related_to", "part_of"]
# --- OPERATIV ---
project:
chunking_profile: sliding_large
retriever_weight: 0.97
edge_defaults: ["references", "depends_on"]
task:
chunking_profile: sliding_short
retriever_weight: 0.80
edge_defaults: ["depends_on", "part_of"]
journal:
# NEUE ZUWEISUNG: Journale profitieren am meisten von der semantischen Analyse
chunking_profile: semantic_llm
retriever_weight: 0.80
edge_defaults: ["references", "related_to"]