neu graph explorer service
This commit is contained in:
parent
dc16bbf8a4
commit
772a202d6e
|
|
@ -100,6 +100,7 @@ EDGE_COLORS = {
|
||||||
"derived_from": "#ff9ff3",# Pink
|
"derived_from": "#ff9ff3",# Pink
|
||||||
"references": "#bdc3c7", # Grey
|
"references": "#bdc3c7", # Grey
|
||||||
"belongs_to": "#2e86de" # Dark Blue
|
"belongs_to": "#2e86de" # Dark Blue
|
||||||
|
"contributes_to": "#1dd1a1"
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- HELPER FUNCTIONS ---
|
# --- HELPER FUNCTIONS ---
|
||||||
|
|
@ -245,27 +246,19 @@ class GraphExplorerService:
|
||||||
self.notes_col = f"{prefix}_notes"
|
self.notes_col = f"{prefix}_notes"
|
||||||
self.chunks_col = f"{prefix}_chunks"
|
self.chunks_col = f"{prefix}_chunks"
|
||||||
self.edges_col = f"{prefix}_edges"
|
self.edges_col = f"{prefix}_edges"
|
||||||
self._note_cache = {} # Simple in-memory cache for the session
|
self._note_cache = {}
|
||||||
|
|
||||||
def get_ego_graph(self, center_note_id: str):
|
def get_ego_graph(self, center_note_id: str):
|
||||||
"""
|
nodes_dict = {}
|
||||||
Bidirektionaler Ego-Graph:
|
unique_edges = {}
|
||||||
1. Lädt Center Node.
|
|
||||||
2. Findet OUTGOING Edges (Source = Chunk von Center).
|
|
||||||
3. Findet INCOMING Edges (Target = Chunk von Center ODER Target = Titel von Center).
|
|
||||||
4. Dedupliziert auf Notiz-Ebene.
|
|
||||||
"""
|
|
||||||
nodes_dict = {} # note_id -> Node Object
|
|
||||||
unique_edges = {} # (source_note_id, target_note_id) -> Edge Data
|
|
||||||
|
|
||||||
# 1. Zentrale Note laden
|
|
||||||
center_note = self._fetch_note_cached(center_note_id)
|
center_note = self._fetch_note_cached(center_note_id)
|
||||||
if not center_note: return [], []
|
if not center_note: return [], []
|
||||||
self._add_node_to_dict(nodes_dict, center_note, is_center=True)
|
self._add_node_to_dict(nodes_dict, center_note, is_center=True)
|
||||||
|
|
||||||
center_title = center_note.get("title")
|
center_title = center_note.get("title")
|
||||||
|
|
||||||
# 2. Chunks der Note finden (für Edge-Suche)
|
# Chunks laden
|
||||||
scroll_filter = models.Filter(
|
scroll_filter = models.Filter(
|
||||||
must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=center_note_id))]
|
must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=center_note_id))]
|
||||||
)
|
)
|
||||||
|
|
@ -276,7 +269,7 @@ class GraphExplorerService:
|
||||||
|
|
||||||
raw_edges = []
|
raw_edges = []
|
||||||
|
|
||||||
# 3. OUTGOING EDGES Suche
|
# 1. OUTGOING: Source ist einer unserer Chunks
|
||||||
if center_chunk_ids:
|
if center_chunk_ids:
|
||||||
out_filter = models.Filter(
|
out_filter = models.Filter(
|
||||||
must=[models.FieldCondition(key="source_id", match=models.MatchAny(any=center_chunk_ids))]
|
must=[models.FieldCondition(key="source_id", match=models.MatchAny(any=center_chunk_ids))]
|
||||||
|
|
@ -286,28 +279,26 @@ class GraphExplorerService:
|
||||||
)
|
)
|
||||||
raw_edges.extend(res_out)
|
raw_edges.extend(res_out)
|
||||||
|
|
||||||
# 4. INCOMING EDGES Suche
|
# 2. INCOMING: Target ist Chunk, Titel oder exakte Note-ID
|
||||||
# Case A: Target ist einer unserer Chunks
|
# Hinweis: Target mit #Section (z.B. 'note#header') kann via Keyword-Index schwer gefunden werden,
|
||||||
|
# wenn wir den Header-Teil nicht kennen.
|
||||||
|
must_conditions = []
|
||||||
if center_chunk_ids:
|
if center_chunk_ids:
|
||||||
in_chunk_filter = models.Filter(
|
must_conditions.append(models.FieldCondition(key="target_id", match=models.MatchAny(any=center_chunk_ids)))
|
||||||
must=[models.FieldCondition(key="target_id", match=models.MatchAny(any=center_chunk_ids))]
|
|
||||||
)
|
|
||||||
res_in_c, _ = self.client.scroll(
|
|
||||||
collection_name=self.edges_col, scroll_filter=in_chunk_filter, limit=100, with_payload=True
|
|
||||||
)
|
|
||||||
raw_edges.extend(res_in_c)
|
|
||||||
|
|
||||||
# Case B: Target ist unser Titel (Wikilinks)
|
|
||||||
if center_title:
|
if center_title:
|
||||||
in_title_filter = models.Filter(
|
must_conditions.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=center_title)))
|
||||||
must=[models.FieldCondition(key="target_id", match=models.MatchValue(value=center_title))]
|
|
||||||
)
|
|
||||||
res_in_t, _ = self.client.scroll(
|
|
||||||
collection_name=self.edges_col, scroll_filter=in_title_filter, limit=50, with_payload=True
|
|
||||||
)
|
|
||||||
raw_edges.extend(res_in_t)
|
|
||||||
|
|
||||||
# 5. Kanten verarbeiten und auflösen
|
# NEU: Auch exakte Note-ID als Target prüfen
|
||||||
|
must_conditions.append(models.FieldCondition(key="target_id", match=models.MatchValue(value=center_note_id)))
|
||||||
|
|
||||||
|
if must_conditions:
|
||||||
|
in_filter = models.Filter(should=must_conditions) # 'should' wirkt wie OR
|
||||||
|
res_in, _ = self.client.scroll(
|
||||||
|
collection_name=self.edges_col, scroll_filter=in_filter, limit=100, with_payload=True
|
||||||
|
)
|
||||||
|
raw_edges.extend(res_in)
|
||||||
|
|
||||||
|
# Verarbeitung
|
||||||
for record in raw_edges:
|
for record in raw_edges:
|
||||||
payload = record.payload
|
payload = record.payload
|
||||||
|
|
||||||
|
|
@ -316,29 +307,23 @@ class GraphExplorerService:
|
||||||
kind = payload.get("kind", "related_to")
|
kind = payload.get("kind", "related_to")
|
||||||
provenance = payload.get("provenance", "explicit")
|
provenance = payload.get("provenance", "explicit")
|
||||||
|
|
||||||
# Resolve Source Note
|
|
||||||
src_note = self._resolve_note_from_ref(src_ref)
|
src_note = self._resolve_note_from_ref(src_ref)
|
||||||
# Resolve Target Note
|
|
||||||
tgt_note = self._resolve_note_from_ref(tgt_ref)
|
tgt_note = self._resolve_note_from_ref(tgt_ref)
|
||||||
|
|
||||||
if src_note and tgt_note:
|
if src_note and tgt_note:
|
||||||
src_id = src_note['note_id']
|
src_id = src_note['note_id']
|
||||||
tgt_id = tgt_note['note_id']
|
tgt_id = tgt_note['note_id']
|
||||||
|
|
||||||
# Keine Self-Loops und valide Verbindung
|
|
||||||
if src_id != tgt_id:
|
if src_id != tgt_id:
|
||||||
# Nodes hinzufügen (falls noch nicht da)
|
|
||||||
self._add_node_to_dict(nodes_dict, src_note)
|
self._add_node_to_dict(nodes_dict, src_note)
|
||||||
self._add_node_to_dict(nodes_dict, tgt_note)
|
self._add_node_to_dict(nodes_dict, tgt_note)
|
||||||
|
|
||||||
# Deduplizierung: Wir behalten die "stärkste" Kante
|
|
||||||
# Wenn bereits eine explizite Kante existiert, überschreiben wir sie nicht mit einer AI-Kante
|
|
||||||
key = (src_id, tgt_id)
|
key = (src_id, tgt_id)
|
||||||
existing = unique_edges.get(key)
|
existing = unique_edges.get(key)
|
||||||
|
|
||||||
is_current_explicit = (provenance == "explicit" or provenance == "rule")
|
is_current_explicit = (provenance == "explicit" or provenance == "rule")
|
||||||
|
|
||||||
should_update = True
|
should_update = True
|
||||||
|
|
||||||
if existing:
|
if existing:
|
||||||
is_existing_explicit = (existing['provenance'] == "explicit" or existing['provenance'] == "rule")
|
is_existing_explicit = (existing['provenance'] == "explicit" or existing['provenance'] == "rule")
|
||||||
if is_existing_explicit and not is_current_explicit:
|
if is_existing_explicit and not is_current_explicit:
|
||||||
|
|
@ -346,39 +331,25 @@ class GraphExplorerService:
|
||||||
|
|
||||||
if should_update:
|
if should_update:
|
||||||
unique_edges[key] = {
|
unique_edges[key] = {
|
||||||
"source": src_id,
|
"source": src_id, "target": tgt_id, "kind": kind, "provenance": provenance
|
||||||
"target": tgt_id,
|
|
||||||
"kind": kind,
|
|
||||||
"provenance": provenance
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 6. Agraph Edge Objekte erstellen
|
|
||||||
final_edges = []
|
final_edges = []
|
||||||
for (src, tgt), data in unique_edges.items():
|
for (src, tgt), data in unique_edges.items():
|
||||||
kind = data['kind']
|
kind = data['kind']
|
||||||
prov = data['provenance']
|
prov = data['provenance']
|
||||||
|
|
||||||
color = EDGE_COLORS.get(kind, "#bdc3c7")
|
color = EDGE_COLORS.get(kind, "#bdc3c7")
|
||||||
is_smart = (prov != "explicit" and prov != "rule")
|
is_smart = (prov != "explicit" and prov != "rule")
|
||||||
|
|
||||||
label = f"{kind}"
|
|
||||||
# AI Edges gestrichelt
|
|
||||||
dashes = is_smart
|
|
||||||
|
|
||||||
final_edges.append(Edge(
|
final_edges.append(Edge(
|
||||||
source=src,
|
source=src, target=tgt, label=kind, color=color, dashes=is_smart,
|
||||||
target=tgt,
|
title=f"Provenance: {prov}\nType: {kind}"
|
||||||
label=label,
|
|
||||||
color=color,
|
|
||||||
dashes=dashes,
|
|
||||||
title=f"Provenance: {prov}, Type: {kind}"
|
|
||||||
))
|
))
|
||||||
|
|
||||||
return list(nodes_dict.values()), final_edges
|
return list(nodes_dict.values()), final_edges
|
||||||
|
|
||||||
def _fetch_note_cached(self, note_id):
|
def _fetch_note_cached(self, note_id):
|
||||||
if note_id in self._note_cache: return self._note_cache[note_id]
|
if note_id in self._note_cache: return self._note_cache[note_id]
|
||||||
|
|
||||||
res, _ = self.client.scroll(
|
res, _ = self.client.scroll(
|
||||||
collection_name=self.notes_col,
|
collection_name=self.notes_col,
|
||||||
scroll_filter=models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=note_id))]),
|
scroll_filter=models.Filter(must=[models.FieldCondition(key="note_id", match=models.MatchValue(value=note_id))]),
|
||||||
|
|
@ -390,54 +361,50 @@ class GraphExplorerService:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _resolve_note_from_ref(self, ref_str):
|
def _resolve_note_from_ref(self, ref_str):
|
||||||
"""Löst eine ID (Chunk) oder einen String (Titel) zu einer Note Payload auf."""
|
|
||||||
if not ref_str: return None
|
if not ref_str: return None
|
||||||
|
|
||||||
# Fall 1: Chunk ID (enthält '#')
|
# Fall A: Chunk ID (Format: note_id#cXX)
|
||||||
if "#" in ref_str:
|
if "#" in ref_str:
|
||||||
# Wir könnten den Chunk holen, aber effizienter ist es, die note_id aus dem Chunk-String zu parsen,
|
# 1. Versuch: Echte Chunk ID in DB suchen
|
||||||
# WENN das Format strikt 'note_id#cXX' ist. Um sicher zu gehen, fragen wir Qdrant.
|
|
||||||
try:
|
try:
|
||||||
res = self.client.retrieve(collection_name=self.chunks_col, ids=[ref_str], with_payload=True)
|
res = self.client.retrieve(collection_name=self.chunks_col, ids=[ref_str], with_payload=True)
|
||||||
if res:
|
if res:
|
||||||
parent_id = res[0].payload.get("note_id")
|
parent_id = res[0].payload.get("note_id")
|
||||||
return self._fetch_note_cached(parent_id)
|
return self._fetch_note_cached(parent_id)
|
||||||
except: pass # Falls ID nicht existiert
|
except: pass
|
||||||
|
|
||||||
# Fall 2: Vermutlich ein Titel (Wikilink) oder Note ID
|
# 2. Versuch (NEU): Es ist ein Link auf eine Section (z.B. "note-id#Header")
|
||||||
# Versuch als Note ID
|
# Wir entfernen den Hash-Teil und suchen die Basis-Notiz
|
||||||
|
possible_note_id = ref_str.split("#")[0]
|
||||||
|
note_by_id = self._fetch_note_cached(possible_note_id)
|
||||||
|
if note_by_id: return note_by_id
|
||||||
|
|
||||||
|
# Fall B: Es ist direkt die Note ID
|
||||||
note_by_id = self._fetch_note_cached(ref_str)
|
note_by_id = self._fetch_note_cached(ref_str)
|
||||||
if note_by_id: return note_by_id
|
if note_by_id: return note_by_id
|
||||||
|
|
||||||
# Versuch als Titel
|
# Fall C: Es ist der Titel (Wikilink)
|
||||||
res, _ = self.client.scroll(
|
res, _ = self.client.scroll(
|
||||||
collection_name=self.notes_col,
|
collection_name=self.notes_col,
|
||||||
scroll_filter=models.Filter(must=[models.FieldCondition(key="title", match=models.MatchValue(value=ref_str))]),
|
scroll_filter=models.Filter(must=[models.FieldCondition(key="title", match=models.MatchValue(value=ref_str))]),
|
||||||
limit=1, with_payload=True
|
limit=1, with_payload=True
|
||||||
)
|
)
|
||||||
if res:
|
if res:
|
||||||
payload = res[0].payload
|
p = res[0].payload
|
||||||
self._note_cache[payload['note_id']] = payload
|
self._note_cache[p['note_id']] = p
|
||||||
return payload
|
return p
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _add_node_to_dict(self, node_dict, note_payload, is_center=False):
|
def _add_node_to_dict(self, node_dict, note_payload, is_center=False):
|
||||||
nid = note_payload.get("note_id")
|
nid = note_payload.get("note_id")
|
||||||
if nid in node_dict: return
|
if nid in node_dict: return
|
||||||
|
|
||||||
ntype = note_payload.get("type", "default")
|
ntype = note_payload.get("type", "default")
|
||||||
color = GRAPH_COLORS.get(ntype, GRAPH_COLORS["default"])
|
color = GRAPH_COLORS.get(ntype, GRAPH_COLORS["default"])
|
||||||
size = 40 if is_center else 20
|
size = 35 if is_center else 20
|
||||||
|
|
||||||
node_dict[nid] = Node(
|
node_dict[nid] = Node(
|
||||||
id=nid,
|
id=nid, label=note_payload.get("title", nid), size=size, color=color, shape="dot",
|
||||||
label=note_payload.get("title", nid),
|
title=f"Type: {ntype}\nTags: {note_payload.get('tags')}", font={'color': 'black'}
|
||||||
size=size,
|
|
||||||
color=color,
|
|
||||||
shape="dot" if not is_center else "diamond",
|
|
||||||
title=f"Type: {ntype}\nTags: {note_payload.get('tags')}",
|
|
||||||
font={'color': 'black', 'face': 'arial'}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Init Graph Service
|
# Init Graph Service
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user