""" FILE: scripts/test_wp26_comprehensive.py DESCRIPTION: Umfassendes Test-Script für WP-26 - Prüft alle FA-Requirements aus dem Lastenheft v1.3 VERSION: 1.0.0 """ import sys import os from pathlib import Path # Füge Projekt-Root zum Python-Pfad hinzu project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from typing import Dict, List, Tuple, Optional from qdrant_client import QdrantClient import yaml import json # Farben für Terminal-Output class Colors: GREEN = '\033[92m' RED = '\033[91m' YELLOW = '\033[93m' BLUE = '\033[94m' RESET = '\033[0m' BOLD = '\033[1m' def print_header(text: str): print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.RESET}") print(f"{Colors.BOLD}{Colors.BLUE}{text}{Colors.RESET}") print(f"{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.RESET}\n") def print_success(text: str): print(f"{Colors.GREEN}✓ {text}{Colors.RESET}") def print_error(text: str): print(f"{Colors.RED}✗ {text}{Colors.RESET}") def print_warning(text: str): print(f"{Colors.YELLOW}⚠ {text}{Colors.RESET}") def print_info(text: str): print(f" {text}") # ============================================================================ # PHASE 1: Section-Types & Parsing # ============================================================================ def test_fa01_section_callout_format(): """FA-01: Neues Callout-Format [!section]""" print_header("FA-01: Section-Callout-Format") from app.core.chunking.chunking_parser import parse_blocks markdown = """## Test Section ^test-id > [!section] insight Content here. """ blocks, _ = parse_blocks(markdown) section_found = False for block in blocks: if block.section_type == "insight": section_found = True print_success(f"Section-Type 'insight' erkannt in Block: {block.text[:50]}...") break if not section_found: print_error("Section-Type wurde nicht erkannt") return False return True def test_fa01b_nested_edge_callouts(): """FA-01b: Verschachtelte Edge-Callouts""" print_header("FA-01b: Verschachtelte Edge-Callouts") from app.core.graph.graph_derive_edges import extract_callout_relations markdown = """> [!abstract] Semantic Edges >> [!edge] derives >> [[#^sit]] >> >> [!edge] supports >> [[Target]] """ pairs, _ = extract_callout_relations(markdown) if len(pairs) >= 2: print_success(f"Verschachtelte Callouts erkannt: {len(pairs)} Edges gefunden") for kind, target in pairs: print_info(f" - {kind} -> {target}") return True else: print_error(f"Verschachtelte Callouts nicht korrekt erkannt: {len(pairs)} Edges") return False def test_fa02_scope_termination(): """FA-02: Scope-Beendigung""" print_header("FA-02: Scope-Beendigung") from app.core.chunking.chunking_parser import parse_blocks markdown = """## Section A ^a > [!section] insight Content A. ## Section B ^b Content B (sollte note_type verwenden). """ blocks, _ = parse_blocks(markdown) section_a_type = None section_b_type = None for block in blocks: if "Section A" in block.text or block.section_type == "insight": section_a_type = block.section_type if "Section B" in block.text: section_b_type = block.section_type if section_a_type == "insight": print_success(f"Section A hat korrekten Type: {section_a_type}") else: print_error(f"Section A hat falschen Type: {section_a_type}") return False # Section B sollte None haben (Fallback auf note_type) if section_b_type is None: print_success("Section B verwendet Fallback (None = note_type)") else: print_warning(f"Section B hat Type: {section_b_type} (erwartet: None)") return True def test_fa03_type_field(): """FA-03: type-Feld-Befüllung mit effective_type""" print_header("FA-03: type-Feld-Befüllung") from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads from app.core.chunking.chunking_parser import parse_blocks from app.core.chunking.chunking_strategies import strategy_by_heading # Mock Note markdown = """--- type: experience --- ## Situation ^sit > [!section] experience Text. ## Reflexion ^ref > [!section] insight Text. """ blocks, h1_title = parse_blocks(markdown) # Korrekte Signatur: strategy_by_heading(blocks, config, note_id, context_prefix) config = { "max": 500, "target": 400, "enable_smart_edge_allocation": True } chunks = strategy_by_heading(blocks, config, note_id="test-note") # Erstelle Payloads # Signatur: make_chunk_payloads(note, note_path, chunks_from_chunker, **kwargs) payloads = make_chunk_payloads( note={"frontmatter": {"type": "experience"}}, note_path="test.md", chunks_from_chunker=chunks, file_path="test.md", types_cfg={} ) # Prüfe effective_type for p in payloads: effective_type = p.get("type") note_type = p.get("note_type") section_type = p.get("section_type") print_info(f"Chunk: type={effective_type}, note_type={note_type}, section_type={section_type}") # Section-Type sollte Vorrang haben if section_type: if effective_type != section_type: print_error(f"effective_type ({effective_type}) != section_type ({section_type})") return False print_success("effective_type wird korrekt berechnet (section_type || note_type)") return True def test_fa04_note_type_field(): """FA-04: Optionales Feld note_type""" print_header("FA-04: note_type-Feld") from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads from app.core.chunking.chunking_parser import parse_blocks from app.core.chunking.chunking_strategies import strategy_by_heading markdown = """--- type: experience --- ## Section ^sec > [!section] insight Text. """ blocks, _ = parse_blocks(markdown) # Korrekte Signatur: strategy_by_heading(blocks, config, note_id, context_prefix) config = { "max": 500, "target": 400, "enable_smart_edge_allocation": True } chunks = strategy_by_heading(blocks, config, note_id="test-note") # Signatur: make_chunk_payloads(note, note_path, chunks_from_chunker, **kwargs) payloads = make_chunk_payloads( note={"frontmatter": {"type": "experience"}}, note_path="test.md", chunks_from_chunker=chunks, file_path="test.md", types_cfg={} ) for p in payloads: if "note_type" not in p: print_error("note_type-Feld fehlt im Payload") return False if p["note_type"] != "experience": print_error(f"note_type ist falsch: {p['note_type']} (erwartet: experience)") return False print_success("note_type-Feld ist vorhanden und korrekt") return True def test_fa05_block_reference(): """FA-05: Block-Reference als Link-Format""" print_header("FA-05: Block-Reference") from app.core.graph.graph_utils import parse_link_target # Test Block-ID-Extraktion target, section = parse_link_target("[[#^block-id]]", "test-note") if section == "block-id": print_success(f"Block-ID korrekt extrahiert: {section}") else: print_error(f"Block-ID falsch extrahiert: {section} (erwartet: block-id)") return False # Test mit Section-String target2, section2 = parse_link_target("[[#📖 Diagnose ^kontext]]", "test-note") if section2 == "kontext": print_success(f"Block-ID aus Section-String extrahiert: {section2}") else: print_error(f"Block-ID aus Section-String falsch: {section2} (erwartet: kontext)") return False return True def test_fa07_is_internal_flag(): """FA-07b: is_internal Flag""" print_header("FA-07b: is_internal Flag") from app.core.graph.graph_utils import _edge # Intra-Note-Edge edge1 = _edge("derives", "chunk", "note1#c01", "note1#c02", "note1", {}) if edge1.get("is_internal") is True: print_success("Intra-Note-Edge hat is_internal=True") else: print_error(f"Intra-Note-Edge hat is_internal={edge1.get('is_internal')}") return False # Inter-Note-Edge (würde normalerweise False sein, aber _edge prüft nur note_id) # Für echten Test müsste man build_edges_for_note aufrufen return True def test_fa08_default_edges_from_schema(): """FA-08: Default-Edges aus graph_schema.md""" print_header("FA-08: Default-Edges aus Schema") from app.core.graph.graph_utils import get_typical_edge_for, clear_graph_schema_cache clear_graph_schema_cache() # Test für experience -> insight edge_type = get_typical_edge_for("experience", "insight") if edge_type: print_success(f"Typische Edge gefunden: {edge_type}") print_info(f" experience -> insight: {edge_type}") else: print_warning("Keine typische Edge gefunden (Fallback auf 'any' oder 'default')") return True # ============================================================================ # PHASE 2: Retriever-Anpassungen # ============================================================================ def test_fa09_internal_edge_boost(): """FA-09: Edge-Gewichtung für Intra-Note-Edges""" print_header("FA-09: Internal Edge Boost") from app.core.graph.graph_subgraph import Subgraph, get_edge_scoring_config from app.core.graph.graph_utils import clear_graph_schema_cache clear_graph_schema_cache() get_edge_scoring_config.cache_clear() config = get_edge_scoring_config() if "internal_edge_boost" in config and "external_edge_boost" in config: print_success(f"Edge-Scoring-Config geladen:") print_info(f" internal_edge_boost: {config['internal_edge_boost']}") print_info(f" external_edge_boost: {config['external_edge_boost']}") # Test Subgraph sg = Subgraph() sg.add_edge({ "source": "note1#c01", "target": "note1#c02", "kind": "derives", "weight": 1.0, "is_internal": True }) edges = sg.adj.get("note1#c01", []) if edges: final_weight = edges[0]["weight"] expected_weight = 1.0 * config["internal_edge_boost"] if abs(final_weight - expected_weight) < 0.01: print_success(f"Boost korrekt angewendet: {final_weight} (erwartet: {expected_weight})") else: print_error(f"Boost falsch: {final_weight} (erwartet: {expected_weight})") return False else: print_error("Edge-Scoring-Config fehlt") return False return True def test_fa10_chunk_level_aggregation(): """FA-10: Optionale Chunk-Level-Deduplizierung""" print_header("FA-10: Aggregation-Level") from app.core.retrieval.retriever import _get_aggregation_config config = _get_aggregation_config() if "level" in config and "max_chunks_per_note" in config: print_success(f"Aggregation-Config geladen:") print_info(f" level: {config['level']}") print_info(f" max_chunks_per_note: {config['max_chunks_per_note']}") if config["level"] in ["note", "chunk"]: print_success("Aggregation-Level ist gültig") else: print_error(f"Aggregation-Level ist ungültig: {config['level']}") return False else: print_error("Aggregation-Config fehlt") return False return True # ============================================================================ # PHASE 3: Schema-Validierung # ============================================================================ def test_fa12_schema_validation(): """FA-12: Schema-Validierung gegen effektiven Chunk-Typ""" print_header("FA-12: Schema-Validierung") from app.core.ingestion.ingestion_validation import validate_intra_note_edge from app.core.graph.graph_utils import clear_graph_schema_cache clear_graph_schema_cache() # Test 1: Typische Edge edge1 = {"kind": "resulted_in", "source_id": "chunk1", "target_id": "chunk2"} source_chunk1 = {"type": "experience"} target_chunk1 = {"type": "insight"} is_valid1, confidence1, reason1 = validate_intra_note_edge( edge=edge1, source_chunk=source_chunk1, target_chunk=target_chunk1, strict_mode=False ) if is_valid1: print_success(f"Typische Edge validiert: {edge1['kind']} (confidence: {confidence1})") else: print_error(f"Typische Edge abgelehnt: {reason1}") return False # Test 2: Atypische Edge (sollte mit reduzierter Confidence erlaubt sein) edge2 = {"kind": "very_unusual_edge_xyz123", "source_id": "chunk1", "target_id": "chunk2"} is_valid2, confidence2, reason2 = validate_intra_note_edge( edge=edge2, source_chunk=source_chunk1, target_chunk=target_chunk1, strict_mode=False ) if is_valid2 and confidence2 == 0.7: print_success(f"Atypische Edge erlaubt mit reduzierter Confidence: {confidence2}") else: print_warning(f"Atypische Edge: valid={is_valid2}, confidence={confidence2}") # Test 3: Effektiver Typ wird verwendet edge3 = {"kind": "related_to", "source_id": "chunk1", "target_id": "chunk2"} source_chunk3 = {"type": "insight", "note_type": "experience"} # type hat Vorrang target_chunk3 = {"type": "decision", "note_type": "experience"} is_valid3, confidence3, reason3 = validate_intra_note_edge( edge=edge3, source_chunk=source_chunk3, target_chunk=target_chunk3, strict_mode=False ) if is_valid3: print_success("Effektiver Typ (type-Feld) wird für Validierung verwendet") else: print_error(f"Validierung mit effektivem Typ fehlgeschlagen: {reason3}") return False return True # ============================================================================ # QDRANT-INTEGRATION TESTS # ============================================================================ def test_qdrant_indices(): """Prüft Qdrant-Indizes für WP-26""" print_header("Qdrant-Indizes") try: client = QdrantClient("http://localhost:6333") # Prüfe Collections collections = client.get_collections().collections chunks_collection = None edges_collection = None for col in collections: if "chunks" in col.name.lower(): chunks_collection = col.name if "edges" in col.name.lower(): edges_collection = col.name if not chunks_collection or not edges_collection: print_warning("Collections nicht gefunden - möglicherweise noch nicht initialisiert") print_info("Führe 'python scripts/setup_mindnet_collections.py' aus") return True # Nicht kritisch für Funktionalität print_success(f"Collections gefunden: {chunks_collection}, {edges_collection}") # Prüfe Indizes (vereinfacht - echte Prüfung würde Collection-Info benötigen) print_info("Indizes sollten vorhanden sein für:") print_info(" - chunks: note_type, type, block_id") print_info(" - edges: is_internal (bool), kind, source_id, target_id") return True except Exception as e: print_warning(f"Qdrant-Verbindung fehlgeschlagen: {e}") print_info("Stelle sicher, dass Qdrant läuft: docker-compose up -d") return True # Nicht kritisch # ============================================================================ # MAIN # ============================================================================ def main(): """Führt alle Tests aus""" print(f"\n{Colors.BOLD}{Colors.BLUE}") print("="*70) print("WP-26 Umfassende Funktionsprüfung") print("Lastenheft v1.3 - Alle FA-Requirements") print("="*70) print(f"{Colors.RESET}\n") tests = [ # Phase 1 ("FA-01: Section-Callout-Format", test_fa01_section_callout_format), ("FA-01b: Verschachtelte Edge-Callouts", test_fa01b_nested_edge_callouts), ("FA-02: Scope-Beendigung", test_fa02_scope_termination), ("FA-03: type-Feld-Befüllung", test_fa03_type_field), ("FA-04: note_type-Feld", test_fa04_note_type_field), ("FA-05: Block-Reference", test_fa05_block_reference), ("FA-07b: is_internal Flag", test_fa07_is_internal_flag), ("FA-08: Default-Edges aus Schema", test_fa08_default_edges_from_schema), # Phase 2 ("FA-09: Internal Edge Boost", test_fa09_internal_edge_boost), ("FA-10: Aggregation-Level", test_fa10_chunk_level_aggregation), # Phase 3 ("FA-12: Schema-Validierung", test_fa12_schema_validation), # Integration ("Qdrant-Indizes", test_qdrant_indices), ] results = [] for test_name, test_func in tests: try: result = test_func() results.append((test_name, result)) except Exception as e: print_error(f"Test '{test_name}' fehlgeschlagen mit Exception: {e}") import traceback traceback.print_exc() results.append((test_name, False)) # Zusammenfassung print_header("ZUSAMMENFASSUNG") passed = sum(1 for _, result in results if result) total = len(results) for test_name, result in results: if result: print_success(test_name) else: print_error(test_name) print(f"\n{Colors.BOLD}Ergebnis: {passed}/{total} Tests bestanden{Colors.RESET}\n") if passed == total: print(f"{Colors.GREEN}{Colors.BOLD}✓ Alle Tests bestanden! WP-26 ist vollständig implementiert.{Colors.RESET}\n") return 0 else: print(f"{Colors.RED}{Colors.BOLD}✗ Einige Tests fehlgeschlagen. Bitte prüfe die Fehler oben.{Colors.RESET}\n") return 1 if __name__ == "__main__": sys.exit(main())