- Added optional description and category fields to edge type entries, improving metadata for edge types. - Updated the `getAllEdgeTypes` and `groupEdgeTypesByCategory` functions to utilize new fields for better organization and display. - Enhanced UI components to show descriptions as tooltips and categorize edge types in the EdgeTypeChooserModal and InlineEdgeTypeModal. - Improved parsing logic in `parseEdgeVocabulary` to extract descriptions and categories from the vocabulary table, ensuring richer edge type data. - Adjusted the LinkPromptModal to clarify edge type actions and maintain alias information during selection.
170 lines
5.6 KiB
TypeScript
170 lines
5.6 KiB
TypeScript
import type { EdgeVocabulary } from "./types";
|
|
|
|
const BACKTICK_RE = /`([^`]+)`/g;
|
|
|
|
/**
|
|
* Parses markdown tables containing edge vocabulary definitions.
|
|
*
|
|
* Expected format:
|
|
* | System-Typ (Canonical) | Inverser Typ | Erlaubte Aliasse (User) | Beschreibung ... |
|
|
* | **`caused_by`** | `resulted_in` | `ausgelöst_durch`, `wegen`, ... | ... |
|
|
*
|
|
* Rules:
|
|
* - Extract all backticked tokens from each row
|
|
* - First token = canonical (may be wrapped in ** **)
|
|
* - Second token = inverse (optional)
|
|
* - Remaining tokens = aliases (skip if cell contains "(Kein Alias)")
|
|
* - Skip rows with less than 1 token (with warning counter)
|
|
* - Store canonical as-is, but use lowercase for lookup keys
|
|
*/
|
|
export function parseEdgeVocabulary(md: string): EdgeVocabulary {
|
|
const lines = md.split(/\r?\n/);
|
|
const byCanonical = new Map<string, { canonical: string; inverse?: string; aliases: string[]; description?: string; category?: string }>();
|
|
const aliasToCanonical = new Map<string, string>();
|
|
|
|
let skippedRows = 0;
|
|
let currentCategory: string | null = null; // Track current H3 category
|
|
|
|
for (const line of lines) {
|
|
// Detect H3 headings (###) as category separators
|
|
const h3Match = line.match(/^###\s+(.+)$/);
|
|
if (h3Match && h3Match[1]) {
|
|
currentCategory = h3Match[1].trim();
|
|
continue;
|
|
}
|
|
|
|
// Skip header separator rows (e.g., "| :--- | :--- |")
|
|
if (/^\s*\|[\s:|-]+\|\s*$/.test(line)) {
|
|
continue;
|
|
}
|
|
|
|
// Only process table rows
|
|
if (!line.trim().startsWith("|")) {
|
|
continue;
|
|
}
|
|
|
|
// Skip header rows (contains "Canonical", "System-Typ", "Beschreibung", "Kategorie", etc.)
|
|
// Check for common header keywords
|
|
if (/canonical|system-typ|beschreibung|kategorie|category|description|inverser|aliasse/i.test(line)) {
|
|
continue;
|
|
}
|
|
|
|
// Extract all backticked tokens
|
|
const tokens: string[] = [];
|
|
let match: RegExpExecArray | null;
|
|
BACKTICK_RE.lastIndex = 0;
|
|
while ((match = BACKTICK_RE.exec(line)) !== null) {
|
|
if (match[1]) {
|
|
const token = match[1].trim();
|
|
if (token) {
|
|
tokens.push(token);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip rows with no tokens or only one token (need at least canonical)
|
|
if (tokens.length < 1) {
|
|
skippedRows++;
|
|
continue;
|
|
}
|
|
|
|
// Parse table cells (split by |, skip first and last empty cells)
|
|
const cells = line.split("|").map(c => c.trim()).filter(c => c);
|
|
|
|
// Extract description and category from cells
|
|
// Expected order: Canonical | Inverse | Aliases | Description | Category (optional)
|
|
let description: string | undefined = undefined;
|
|
let category: string | undefined = undefined;
|
|
|
|
// Try to extract from cells after aliases (index 3+)
|
|
// Description is usually the first text cell after aliases
|
|
// Category might be in brackets, short, or in a separate column
|
|
for (let i = 3; i < cells.length; i++) {
|
|
const cell = cells[i];
|
|
if (!cell || !cell.trim()) continue;
|
|
|
|
const trimmed = cell.trim();
|
|
|
|
// Check if this looks like a category:
|
|
// - Short text (< 40 chars)
|
|
// - Might be in brackets [Category]
|
|
// - Might be all caps
|
|
// - Might match category pattern
|
|
const looksLikeCategory =
|
|
trimmed.length < 40 && (
|
|
/^\[.+\]$/.test(trimmed) || // [Category]
|
|
trimmed === trimmed.toUpperCase() || // ALL CAPS
|
|
/^[A-ZÄÖÜ][a-zäöüß]+(\s+[A-ZÄÖÜ][a-zäöüß]+)*$/.test(trimmed) // Title Case
|
|
);
|
|
|
|
if (looksLikeCategory && !category) {
|
|
// Remove brackets if present
|
|
category = trimmed.replace(/^\[|\]$/g, "");
|
|
} else if (!description && trimmed.length > 0) {
|
|
// First substantial cell is likely description
|
|
// Remove markdown formatting but keep content
|
|
description = trimmed
|
|
.replace(/\*\*/g, "") // Remove bold
|
|
.replace(/\*/g, "") // Remove italic
|
|
.replace(/`/g, "") // Remove code
|
|
.trim();
|
|
}
|
|
}
|
|
|
|
// Check if aliases cell contains "(Kein Alias)"
|
|
const hasNoAliases = /\(Kein Alias\)/i.test(line);
|
|
|
|
const canonical = tokens[0];
|
|
if (!canonical) {
|
|
skippedRows++;
|
|
continue;
|
|
}
|
|
|
|
const inverse = tokens.length >= 2 && tokens[1] ? tokens[1] : undefined;
|
|
|
|
// Extract aliases: all tokens after the first two, but only if not "(Kein Alias)"
|
|
const aliases: string[] = [];
|
|
if (!hasNoAliases && tokens.length > 2) {
|
|
for (let i = 2; i < tokens.length; i++) {
|
|
const alias = tokens[i];
|
|
if (alias) {
|
|
aliases.push(alias);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Store canonical entry with description and category
|
|
// Use currentCategory from H3 heading if available, otherwise use extracted category
|
|
const finalCategory = currentCategory || category;
|
|
|
|
byCanonical.set(canonical, {
|
|
canonical,
|
|
inverse,
|
|
aliases,
|
|
description,
|
|
category: finalCategory,
|
|
});
|
|
|
|
// Build alias-to-canonical mapping (case-insensitive keys)
|
|
for (const alias of aliases) {
|
|
if (alias) {
|
|
const lowerAlias = alias.toLowerCase();
|
|
if (!aliasToCanonical.has(lowerAlias)) {
|
|
aliasToCanonical.set(lowerAlias, canonical);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (skippedRows > 0) {
|
|
// Only warn if there are actually problematic rows (not just header/separator rows)
|
|
// Header and separator rows are expected and should not trigger warnings
|
|
console.debug(`parseEdgeVocabulary: Skipped ${skippedRows} data rows with insufficient tokens (this is normal if the file contains empty or malformed table rows)`);
|
|
}
|
|
|
|
return {
|
|
byCanonical,
|
|
aliasToCanonical,
|
|
};
|
|
}
|