From ea985f4d614264f5582cba868e6039b7087b1b83 Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 2 Sep 2025 10:23:45 +0200 Subject: [PATCH] app/routers/qdrant_router.py aktualisiert --- app/routers/qdrant_router.py | 107 +++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/app/routers/qdrant_router.py b/app/routers/qdrant_router.py index c784fdd..5796281 100644 --- a/app/routers/qdrant_router.py +++ b/app/routers/qdrant_router.py @@ -1,25 +1,45 @@ - from __future__ import annotations + from typing import Any, Optional, List +import uuid + from fastapi import APIRouter, HTTPException from pydantic import BaseModel, Field from qdrant_client import QdrantClient -from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue +from qdrant_client.http.models import ( + Distance, + VectorParams, + PointStruct, + Filter, + FieldCondition, + MatchValue, +) + from ..config import get_settings from ..embeddings import embed_texts router = APIRouter(prefix="/qdrant", tags=["qdrant"]) + def _client() -> QdrantClient: s = get_settings() return QdrantClient(url=s.QDRANT_URL, api_key=s.QDRANT_API_KEY) + def _col(name: str) -> str: return f"{get_settings().COLLECTION_PREFIX}_{name}" + +def _uuid5(s: str) -> str: + """Deterministic UUIDv5 from arbitrary string (server-side point id).""" + return str(uuid.uuid5(uuid.NAMESPACE_URL, s)) + + # --- Models --- class BaseMeta(BaseModel): - note_id: str = Field(..., description="Stable ID of the note (e.g., hash of vault-relative path)") + note_id: str = Field( + ..., description="Stable ID of the note (e.g., hash of vault-relative path)" + ) title: Optional[str] = Field(None, description="Note or chunk title") path: Optional[str] = Field(None, description="Vault-relative path to the .md file") Typ: Optional[str] = None @@ -27,14 +47,19 @@ class BaseMeta(BaseModel): tags: Optional[List[str]] = None Rolle: Optional[List[str]] = None # allow list + class UpsertChunkRequest(BaseMeta): chunk_id: str = Field(..., description="Stable ID of the chunk within the note") text: str = Field(..., description="Chunk text content") - links: Optional[List[str]] = Field(default=None, description="Outbound links detected in the chunk") + links: Optional[List[str]] = Field( + default=None, description="Outbound links detected in the chunk" + ) + class UpsertNoteRequest(BaseMeta): text: Optional[str] = Field(None, description="Full note text (optional)") + class UpsertEdgeRequest(BaseModel): src_note_id: str dst_note_id: Optional[str] = None @@ -43,6 +68,7 @@ class UpsertEdgeRequest(BaseModel): relation: str = Field(default="links_to") link_text: Optional[str] = None + class QueryRequest(BaseModel): query: str limit: int = 5 @@ -50,6 +76,7 @@ class QueryRequest(BaseModel): path: Optional[str] = None tags: Optional[List[str]] = None + # --- Helpers --- def _ensure_collections(): s = get_settings() @@ -58,17 +85,27 @@ def _ensure_collections(): try: cli.get_collection(_col("chunks")) except Exception: - cli.recreate_collection(_col("chunks"), vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE)) + cli.recreate_collection( + _col("chunks"), + vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE), + ) # notes try: cli.get_collection(_col("notes")) except Exception: - cli.recreate_collection(_col("notes"), vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE)) + cli.recreate_collection( + _col("notes"), + vectors_config=VectorParams(size=s.VECTOR_SIZE, distance=Distance.COSINE), + ) # edges (dummy vector of size 1) try: cli.get_collection(_col("edges")) except Exception: - cli.recreate_collection(_col("edges"), vectors_config=VectorParams(size=1, distance=Distance.COSINE)) + cli.recreate_collection( + _col("edges"), + vectors_config=VectorParams(size=1, distance=Distance.COSINE), + ) + @router.post("/upsert_chunk", summary="Upsert a chunk into mindnet_chunks") def upsert_chunk(req: UpsertChunkRequest) -> dict: @@ -78,10 +115,14 @@ def upsert_chunk(req: UpsertChunkRequest) -> dict: payload: dict[str, Any] = req.model_dump() payload.pop("text", None) # Also store short preview - payload["preview"] = (req.text[:240] + "…") if len(req.text) > 240 else req.text - pt = PointStruct(id=req.chunk_id, vector=vec, payload=payload) + payload["preview"] = ( + (req.text[:240] + "…") if len(req.text) > 240 else req.text + ) + qdrant_id = _uuid5(f"chunk:{req.chunk_id}") + pt = PointStruct(id=qdrant_id, vector=vec, payload=payload) cli.upsert(collection_name=_col("chunks"), points=[pt]) - return {"status": "ok", "id": req.chunk_id} + return {"status": "ok", "id": qdrant_id} + @router.post("/upsert_note", summary="Upsert a note into mindnet_notes") def upsert_note(req: UpsertNoteRequest) -> dict: @@ -91,9 +132,11 @@ def upsert_note(req: UpsertNoteRequest) -> dict: vec = embed_texts([text_for_embedding])[0] payload: dict[str, Any] = req.model_dump() payload.pop("text", None) - pt = PointStruct(id=req.note_id, vector=vec, payload=payload) + qdrant_id = _uuid5(f"note:{req.note_id}") + pt = PointStruct(id=qdrant_id, vector=vec, payload=payload) cli.upsert(collection_name=_col("notes"), points=[pt]) - return {"status": "ok", "id": req.note_id} + return {"status": "ok", "id": qdrant_id} + @router.post("/upsert_edge", summary="Upsert a graph edge into mindnet_edges") def upsert_edge(req: UpsertEdgeRequest) -> dict: @@ -102,10 +145,15 @@ def upsert_edge(req: UpsertEdgeRequest) -> dict: payload = req.model_dump() # dummy vector vec = [0.0] - edge_id = f"{req.src_note_id}|{req.src_chunk_id or ''}->{req.dst_note_id or ''}|{req.dst_chunk_id or ''}|{req.relation}" - pt = PointStruct(id=edge_id, vector=vec, payload=payload) + raw_edge_id = ( + f"{req.src_note_id}|{req.src_chunk_id or ''}->" + f"{req.dst_note_id or ''}|{req.dst_chunk_id or ''}|{req.relation}" + ) + qdrant_id = _uuid5(f"edge:{raw_edge_id}") + pt = PointStruct(id=qdrant_id, vector=vec, payload=payload) cli.upsert(collection_name=_col("edges"), points=[pt]) - return {"status": "ok", "id": edge_id} + return {"status": "ok", "id": qdrant_id} + @router.post("/query", summary="Vector query over mindnet_chunks with optional filters") def query(req: QueryRequest) -> dict: @@ -126,17 +174,26 @@ def query(req: QueryRequest) -> dict: if conds: flt = Filter(must=conds) - res = cli.search(collection_name=_col("chunks"), query_vector=vec, limit=req.limit, with_payload=True, with_vectors=False, query_filter=flt) + res = cli.search( + collection_name=_col("chunks"), + query_vector=vec, + limit=req.limit, + with_payload=True, + with_vectors=False, + query_filter=flt, + ) hits = [] for p in res: pl = p.payload or {} - hits.append({ - "chunk_id": p.id, - "score": p.score, - "note_id": pl.get("note_id"), - "title": pl.get("title"), - "path": pl.get("path"), - "preview": pl.get("preview"), - "tags": pl.get("tags"), - }) + hits.append( + { + "chunk_id": p.id, + "score": p.score, + "note_id": pl.get("note_id"), + "title": pl.get("title"), + "path": pl.get("path"), + "preview": pl.get("preview"), + "tags": pl.get("tags"), + } + ) return {"results": hits}