mindnet_obsidian/src/vocab/parseEdgeVocabulary.ts
Lars 2fcf333e56
Some checks are pending
Node.js build / build (20.x) (push) Waiting to run
Node.js build / build (22.x) (push) Waiting to run
Enhance edge type handling and categorization
- Added optional description and category fields to edge type entries, improving metadata for edge types.
- Updated the `getAllEdgeTypes` and `groupEdgeTypesByCategory` functions to utilize new fields for better organization and display.
- Enhanced UI components to show descriptions as tooltips and categorize edge types in the EdgeTypeChooserModal and InlineEdgeTypeModal.
- Improved parsing logic in `parseEdgeVocabulary` to extract descriptions and categories from the vocabulary table, ensuring richer edge type data.
- Adjusted the LinkPromptModal to clarify edge type actions and maintain alias information during selection.
2026-01-17 13:59:26 +01:00

170 lines
5.6 KiB
TypeScript

import type { EdgeVocabulary } from "./types";
const BACKTICK_RE = /`([^`]+)`/g;
/**
* Parses markdown tables containing edge vocabulary definitions.
*
* Expected format:
* | System-Typ (Canonical) | Inverser Typ | Erlaubte Aliasse (User) | Beschreibung ... |
* | **`caused_by`** | `resulted_in` | `ausgelöst_durch`, `wegen`, ... | ... |
*
* Rules:
* - Extract all backticked tokens from each row
* - First token = canonical (may be wrapped in ** **)
* - Second token = inverse (optional)
* - Remaining tokens = aliases (skip if cell contains "(Kein Alias)")
* - Skip rows with less than 1 token (with warning counter)
* - Store canonical as-is, but use lowercase for lookup keys
*/
export function parseEdgeVocabulary(md: string): EdgeVocabulary {
const lines = md.split(/\r?\n/);
const byCanonical = new Map<string, { canonical: string; inverse?: string; aliases: string[]; description?: string; category?: string }>();
const aliasToCanonical = new Map<string, string>();
let skippedRows = 0;
let currentCategory: string | null = null; // Track current H3 category
for (const line of lines) {
// Detect H3 headings (###) as category separators
const h3Match = line.match(/^###\s+(.+)$/);
if (h3Match && h3Match[1]) {
currentCategory = h3Match[1].trim();
continue;
}
// Skip header separator rows (e.g., "| :--- | :--- |")
if (/^\s*\|[\s:|-]+\|\s*$/.test(line)) {
continue;
}
// Only process table rows
if (!line.trim().startsWith("|")) {
continue;
}
// Skip header rows (contains "Canonical", "System-Typ", "Beschreibung", "Kategorie", etc.)
// Check for common header keywords
if (/canonical|system-typ|beschreibung|kategorie|category|description|inverser|aliasse/i.test(line)) {
continue;
}
// Extract all backticked tokens
const tokens: string[] = [];
let match: RegExpExecArray | null;
BACKTICK_RE.lastIndex = 0;
while ((match = BACKTICK_RE.exec(line)) !== null) {
if (match[1]) {
const token = match[1].trim();
if (token) {
tokens.push(token);
}
}
}
// Skip rows with no tokens or only one token (need at least canonical)
if (tokens.length < 1) {
skippedRows++;
continue;
}
// Parse table cells (split by |, skip first and last empty cells)
const cells = line.split("|").map(c => c.trim()).filter(c => c);
// Extract description and category from cells
// Expected order: Canonical | Inverse | Aliases | Description | Category (optional)
let description: string | undefined = undefined;
let category: string | undefined = undefined;
// Try to extract from cells after aliases (index 3+)
// Description is usually the first text cell after aliases
// Category might be in brackets, short, or in a separate column
for (let i = 3; i < cells.length; i++) {
const cell = cells[i];
if (!cell || !cell.trim()) continue;
const trimmed = cell.trim();
// Check if this looks like a category:
// - Short text (< 40 chars)
// - Might be in brackets [Category]
// - Might be all caps
// - Might match category pattern
const looksLikeCategory =
trimmed.length < 40 && (
/^\[.+\]$/.test(trimmed) || // [Category]
trimmed === trimmed.toUpperCase() || // ALL CAPS
/^[A-ZÄÖÜ][a-zäöüß]+(\s+[A-ZÄÖÜ][a-zäöüß]+)*$/.test(trimmed) // Title Case
);
if (looksLikeCategory && !category) {
// Remove brackets if present
category = trimmed.replace(/^\[|\]$/g, "");
} else if (!description && trimmed.length > 0) {
// First substantial cell is likely description
// Remove markdown formatting but keep content
description = trimmed
.replace(/\*\*/g, "") // Remove bold
.replace(/\*/g, "") // Remove italic
.replace(/`/g, "") // Remove code
.trim();
}
}
// Check if aliases cell contains "(Kein Alias)"
const hasNoAliases = /\(Kein Alias\)/i.test(line);
const canonical = tokens[0];
if (!canonical) {
skippedRows++;
continue;
}
const inverse = tokens.length >= 2 && tokens[1] ? tokens[1] : undefined;
// Extract aliases: all tokens after the first two, but only if not "(Kein Alias)"
const aliases: string[] = [];
if (!hasNoAliases && tokens.length > 2) {
for (let i = 2; i < tokens.length; i++) {
const alias = tokens[i];
if (alias) {
aliases.push(alias);
}
}
}
// Store canonical entry with description and category
// Use currentCategory from H3 heading if available, otherwise use extracted category
const finalCategory = currentCategory || category;
byCanonical.set(canonical, {
canonical,
inverse,
aliases,
description,
category: finalCategory,
});
// Build alias-to-canonical mapping (case-insensitive keys)
for (const alias of aliases) {
if (alias) {
const lowerAlias = alias.toLowerCase();
if (!aliasToCanonical.has(lowerAlias)) {
aliasToCanonical.set(lowerAlias, canonical);
}
}
}
}
if (skippedRows > 0) {
// Only warn if there are actually problematic rows (not just header/separator rows)
// Header and separator rows are expected and should not trigger warnings
console.debug(`parseEdgeVocabulary: Skipped ${skippedRows} data rows with insufficient tokens (this is normal if the file contains empty or malformed table rows)`);
}
return {
byCanonical,
aliasToCanonical,
};
}