""" FILE: app/core/graph/graph_extractors.py DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text. """ import re from typing import List, Tuple _WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]") _REL_PIPE = re.compile(r"\[\[\s*rel:(?P[a-z_]+)\s*\|\s*(?P[^\]]+?)\s*\]\]", re.IGNORECASE) _REL_SPACE = re.compile(r"\[\[\s*rel:(?P[a-z_]+)\s+(?P[^\]]+?)\s*\]\]", re.IGNORECASE) _REL_TEXT = re.compile(r"rel\s*:\s*(?P[a-z_]+)\s*\[\[\s*(?P[^\]]+?)\s*\]\]", re.IGNORECASE) _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE) _REL_LINE = re.compile(r"^(?P[a-z_]+)\s*:\s*(?P.+?)\s*$", re.IGNORECASE) _WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]") def extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]: """Extrahiert [[rel:KIND|Target]].""" pairs = [] def _collect(m): k, t = (m.group("kind") or "").strip().lower(), (m.group("target") or "").strip() if k and t: pairs.append((k, t)) return "" text = _REL_PIPE.sub(_collect, text) text = _REL_SPACE.sub(_collect, text) text = _REL_TEXT.sub(_collect, text) return pairs, text def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]: """Verarbeitet Obsidian [!edge]-Callouts.""" if not text: return [], text lines = text.splitlines(); out_pairs, keep_lines, i = [], [], 0 while i < len(lines): m = _CALLOUT_START.match(lines[i]) if not m: keep_lines.append(lines[i]); i += 1; continue block_lines = [m.group(1)] if m.group(1).strip() else [] i += 1 while i < len(lines) and lines[i].lstrip().startswith('>'): block_lines.append(lines[i].lstrip()[1:].lstrip()); i += 1 for bl in block_lines: mrel = _REL_LINE.match(bl) if not mrel: continue kind, targets = mrel.group("kind").strip().lower(), mrel.group("targets") or "" found = _WIKILINKS_IN_LINE.findall(targets) if found: for t in found: out_pairs.append((kind, t.strip())) else: for raw in re.split(r"[,;]", targets): if raw.strip(): out_pairs.append((kind, raw.strip())) return out_pairs, "\n".join(keep_lines) def extract_wikilinks(text: str) -> List[str]: """Extrahiert Standard-Wikilinks.""" return [m.group(1).strip() for m in _WIKILINK_RE.finditer(text or "")]