init: add knowledge, llm-api, scripts + repo settings

This commit is contained in:
Lars 2025-08-10 08:06:10 +02:00
commit 78c99b222d
64 changed files with 6924 additions and 0 deletions

11
.editorconfig Normal file
View File

@ -0,0 +1,11 @@
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.{py,sh,yml}]
indent_style = space
indent_size = 4

11
.gitattributes vendored Normal file
View File

@ -0,0 +1,11 @@
* text=auto eol=lf
*.sh text eol=lf
*.py text eol=lf
*.yml text eol=lf
*.json text eol=lf
*.md text eol=lf
*.bat text eol=crlf
*.png binary
*.jpg binary
*.pdf binary

35
.gitignore vendored Normal file
View File

@ -0,0 +1,35 @@
# Python
__pycache__/
*.pyc
*.pyo
*.pyd
*.egg-info/
.python-version
# Virtualenvs
venv/
.venv/
venvs/
*/venv/
*/.venv/
# Node/JS (falls vorhanden)
node_modules/
# OS/Editor
.DS_Store
Thumbs.db
# Test/Cache
.pytest_cache/
.cache/
# Logs/Temp
logs/
tmp/
# ENV/Secrets (WICHTIG!)
.env
*/.env
*.env
secrets/

View File

@ -0,0 +1,5 @@
Ein Lateral bezeichnet die Fülle an Dingen, die eine bestimmte Abhängigkeit voneinander haben, aber trotzdem eine sinnvolle in sich abgeschlossenes Wissenselement darstellen.
Laterale in der Sportwissenschaft beziehen sich auf die unabhängigen Bewegungen der Extremitäten (Arme, Beine) die zu einer Gesamtbewegung zusammengefasst werden können. So sind z.B. beim Balancieren die Beine beteiligt, aber auch die Arme und der Oberkörper um ausgleichend wirken zu können.
Das zusammengenommen bildet eine wesentliche Betrachtung im Aufbau der Trainingspläne und Konzepte.

View File

@ -0,0 +1,68 @@
%PDF-1.3
%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
1 0 obj
<<
/F1 2 0 R
>>
endobj
2 0 obj
<<
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
>>
endobj
3 0 obj
<<
/Contents 7 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 6 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
4 0 obj
<<
/PageMode /UseNone /Pages 6 0 R /Type /Catalog
>>
endobj
5 0 obj
<<
/Author (anonymous) /CreationDate (D:20250806060113+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20250806060113+00'00') /Producer (ReportLab PDF Library - www.reportlab.com)
/Subject (unspecified) /Title (untitled) /Trapped /False
>>
endobj
6 0 obj
<<
/Count 1 /Kids [ 3 0 R ] /Type /Pages
>>
endobj
7 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 951
>>
stream
Gaqc5gMYJ*&:O"Kb[Wq1BLkMnD[F*C5tGi"''dZo*nuV??"kqujQeh4:@2"X,%Yt9k;#,s$[h+$2t)P\>C(KLVtk\nQ8DsoQF-T[f@r*OcVHm:h=,W66*#q>Cpj,L9#iC@r/)NNle7flVR:\:)->OZ5h]%cX5@`^(jqhh.^/O;ejWO4^hk#b25._.=AtnUmArZ7^S1$:g:jET%AQ2-)943<E`,`99gB98I;k]8ki0L=^?\#i8%5eSr4H-:Z74GQLA+rH%gMPkiF=*"CY*jcCKF%Z4ka;AZ!gPX='RpB/sisR'8df%WO8^q/k]*q+mum1rf""E!N)Q0C"oY[K5S5/URIjX&KYhkldCKb+*r6hNLit`NJS%fFG$0AYWHQ'LAH2ciBobK;=VJs@%s?7WXLuD[Ns+O2Uh8+He\qTlm4NcJ*E4HPZ+iO.=64L^W_A%i06,\EJ!7.d:6"@DRIM6,.qq52c,:uPGgH7-A3Cn^:,INcfbk<Je8'\MFV>R/aL&E4B5WGY(^AJQebL2\o"bV\E5?(7m'a"kqTAO$iQ.Y5QK;>LJBP-lfeqKbFU(sJce^a,>?+&HSt9F)*1.uM0;&Q1^!#[3NnE['%ThVeX^FUm^atB6*iS-4V%!q&")8id5Q+$'.EP`a&9-'!"2:m^=LS).s<&4K.i\WB=*]5B2E6@Vm!QU*'Xl2ir$/!UB_:V8^G$e/XqjCoED!W'NZ8aJ22BseM=&").8te;U;*qFs<[Ti%UjNkR6dM(X&W_5*Pq[+\s?NGZiJLUi=;"dJ5XclcePb\>"UjBk\J#e21+`3S<ELG*t6\<p14jWDEeC0^=<Tf2;O`HD@WMmI:I\qN<qAhX*PX`3aI).fd$'P5iWO.g`)H_uo`AMs\RP?CO8_,;/KY$Z^OtkBi0g*;l$e28Y"qM0aFmZl's>P6\'Sq\DYM`<fjU3r9F_WSHP~>endstream
endobj
xref
0 8
0000000000 65535 f
0000000073 00000 n
0000000104 00000 n
0000000211 00000 n
0000000414 00000 n
0000000482 00000 n
0000000778 00000 n
0000000837 00000 n
trailer
<<
/ID
[<a59073514922e2467ba6673c0b865983><a59073514922e2467ba6673c0b865983>]
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
/Info 5 0 R
/Root 4 0 R
/Size 8
>>
startxref
1878
%%EOF

View File

@ -0,0 +1 @@
Hikite ist die zurückziehende Bewegung der nicht schlagenden Hand. Sie stabilisiert und verstärkt die Technik

View File

@ -0,0 +1 @@
Mae-geri ist ein gerader Fußtritt nach vorne. Er wird durch das Anheben des Knies und das Ausstrecken des Beins ausgeführt.

View File

@ -0,0 +1 @@
Eine Talahonattacke besthet aus einem Tornadokick mit anschließender 360 Grad Drehung und dem Ausatmen von Knoblauchatem

View File

@ -0,0 +1,12 @@
Die Technik Chūdan-zuki ist ein mittlerer Fauststoß, der auf den Solarplexus oder die Brust des Gegners abzielt. Sie gehört zu den Grundtechniken des Karate und wird in vielen Kata und Kumite-Formen eingesetzt. Der Stoß beginnt aus der Hikite-Position, bei der eine Faust an der Hüfte zurückgezogen ist. Während der Stoß ausgeführt wird, rotiert der Oberkörper leicht und der Unterarm bleibt auf einer geraden Linie zum Ziel.
Wichtig ist, dass der Stoß exakt in der Körpermitte ausgeführt wird und die Schulter entspannt bleibt. Die Kraft entsteht durch die Verbindung von Hüftrotation, Körperspannung und Atmung. Bei korrekter Ausführung ist Chūdan-zuki eine effektive Technik zur Selbstverteidigung.
Trainingshinweise:
- Achte auf eine saubere Körperhaltung.
- Halte das Handgelenk stabil, um Verletzungen zu vermeiden.
- Übe die Technik zunächst langsam und steigere dann die Geschwindigkeit.
- Beobachte die Hüftrotation im Spiegel oder lasse dich von einem Partner korrigieren.
- Führe mindestens 20 Wiederholungen pro Seite aus.
In Kombination mit Gedan-barai und Age-uke ergibt sich eine effektive Abwehr- und Kontertechnik. Diese Kombination ist besonders im Gōjū-Ryū und Shotokan-Stil verbreitet.

View File

@ -0,0 +1,19 @@
Dies ist ein erster kurzer Absatz über Mae-Geri.
Der zweite Absatz behandelt Yoko-Geri und seine biomechanische Ausführung.
Ein dritter, etwas längerer Absatz beschreibt Fehlerquellen bei der Ausführung des Mawashi-Geri, inklusive falscher Hüftrotation und fehlender Stabilität im Standbein. Dieser Absatz ist länger, um das Chunking-Verhalten bei langen Texten zu testen.
Abschließend folgt ein kurzer Satz.
Die Philosophie des Karate-Do geht weit über das körperliche Training hinaus. Sie basiert auf Prinzipien wie Respekt, Selbstdisziplin und stetiger Entwicklung. Viele Dojos orientieren sich an einem Ehrenkodex, der in der Regel zu Beginn und am Ende jeder Einheit rezitiert wird.
„Do“ steht für den Weg den Lebensweg. Karate wird nicht als Sport verstanden, den man betreibt, sondern als ein Weg, den man beschreitet. Dieser Weg erfordert Geduld, Ausdauer und den Willen, auch Rückschläge als Teil des Lernprozesses zu akzeptieren.
In der traditionellen Lehre wird oft betont, dass der beste Kampf derjenige ist, der nicht geführt werden muss. Gewaltverzicht, Kontrolle über Emotionen und Verantwortungsbewusstsein stehen im Vordergrund. Karate soll helfen, innere Stärke und äußere Gelassenheit zu entwickeln.
Die Philosophie des Karate-Do geht weit über das körperliche Training hinaus. Sie basiert auf Prinzipien wie Respekt, Selbstdisziplin und stetiger Entwicklung. Viele Dojos orientieren sich an einem Ehrenkodex, der in der Regel zu Beginn und am Ende jeder Einheit rezitiert wird.
„Do“ steht für den Weg den Lebensweg. Karate wird nicht als Sport verstanden, den man betreibt, sondern als ein Weg, den man beschreitet. Dieser Weg erfordert Geduld, Ausdauer und den Willen, auch Rückschläge als Teil des Lernprozesses zu akzeptieren.
In der traditionellen Lehre wird oft betont, dass der beste Kampf derjenige ist, der nicht geführt werden muss. Gewaltverzicht, Kontrolle über Emotionen und Verantwortungsbewusstsein stehen im Vordergrund. Karate soll helfen, innere Stärke und äußere Gelassenheit zu entwickeln.

View File

@ -0,0 +1,7 @@
Dies ist ein erster kurzer Absatz über Mae-Geri.
Der zweite Absatz behandelt Yoko-Geri und seine biomechanische Ausführung.
Ein dritter, etwas längerer Absatz beschreibt Fehlerquellen bei der Ausführung des Mawashi-Geri, inklusive falscher Hüftrotation und fehlender Stabilität im Standbein. Dieser Absatz ist länger, um das Chunking-Verhalten bei langen Texten zu testen.
Abschließend folgt ein kurzer Satz.

View File

@ -0,0 +1 @@
Zuki bezeichnet einen Fauststoß im Karate. Er kann als Choku-zuki (gerader Stoß) oder Gyaku-zuki (gegengesetzter Stoß) ausgeführt werden.

110
llm-api/archiv/app.py Normal file
View File

@ -0,0 +1,110 @@
from fastapi import FastAPI, Query
from pydantic import BaseModel
from typing import List
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import requests
app = FastAPI()
# Initialisierung
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(host="localhost", port=6333)
# COLLECTION = "karate-doku"
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "mistral" # kann später auch geändert werden
# Embedding-Input
class EmbedRequest(BaseModel):
texts: List[str]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode(data.texts).tolist()
points = [
PointStruct(id=i, vector=vec, payload={"text": data.texts[i]})
for i, vec in enumerate(embeddings)
]
qdrant.upsert(collection_name=collection_name, points=points)
return {"status": "✅ embeddings saved", "count": len(points), "collection": collection_name}
@app.get("/search")
def search_text(query: str = Query(...), limit: int = 3, collection: str = Query(...)):
vec = model.encode(query).tolist()
results = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload["text"]} for r in results]
@app.post("/prompt")
def generate_prompt(data: PromptRequest):
query_vec = model.encode(data.query).tolist()
# Suche relevante Einträge aus der angegebenen Collection
results = qdrant.search(
collection_name=data.collection,
query_vector=query_vec,
limit=data.context_limit
)
# Kontext für den Prompt aus den gefundenen Texten zusammenbauen
context = "\n".join([r.payload["text"] for r in results])
full_prompt = f"""Beantworte die folgende Frage basierend auf dem Kontext:
Kontext:
{context}
Frage:
{data.query}
"""
# Anfrage an Ollama stellen
ollama_payload = {
"model": OLLAMA_MODEL,
"prompt": full_prompt,
"stream": False
}
response = requests.post(OLLAMA_URL, json=ollama_payload)
response.raise_for_status()
answer = response.json()["response"]
return {
"answer": answer,
"context": context,
"collection": data.collection
}
Kontext:
{context}
Frage:
{data.query}
"""
ollama_payload = {
"model": OLLAMA_MODEL,
"prompt": full_prompt,
"stream": False
}
response = requests.post(OLLAMA_URL, json=ollama_payload)
response.raise_for_status()
answer = response.json()["response"]
return {"answer": answer, "context": context}

View File

@ -0,0 +1,167 @@
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from fastapi.openapi.utils import get_openapi
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from qdrant_client.http.models import Filter, FieldCondition, MatchValue
from uuid import uuid4
import requests
import os
from datetime import datetime
# Version hochgezählt
__version__ = "1.0.20"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# Ollama-Konfiguration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "mistral:latest")
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Karate- & Gewaltschutz-Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# Datenmodelle
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# Embedding-Modell und Qdrant-Client
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(host=os.getenv("QDRANT_HOST", "localhost"), port=int(os.getenv("QDRANT_PORT", 6333)))
# /embed
@app.post("/embed", response_model=EmbedResponse)
def embed_texts(data: EmbedRequest):
if not data.chunks:
raise HTTPException(status_code=400, detail="'chunks' darf nicht leer sein.")
coll = data.collection
if not qdrant.collection_exists(coll):
qdrant.recreate_collection(collection_name=coll,
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = [PointStruct(id=str(uuid4()), vector=embeddings[i], payload={'text': c.text, 'source': c.source})
for i, c in enumerate(data.chunks)]
qdrant.upsert(collection_name=coll, points=points)
return EmbedResponse(status="✅ Saved", count=len(points), collection=coll)
# /search
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
# /prompt
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip(): raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10): raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(collection_name=data.collection, query_vector=model.encode(data.query).tolist(), limit=data.context_limit)
context = '\n'.join(h.payload['text'] for h in hits)
payload = {'model': OLLAMA_MODEL, 'prompt': f"Context:\n{context}\nQuestion: {data.query}", 'stream': False}
try:
r = requests.post(OLLAMA_URL, json=payload, timeout=30); r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get('response', ''), context=context, collection=data.collection)
# /delete-source (neue Routine gemäß ursprünglicher funktionierender Logik)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: str = Query(...),
type: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
# Filter-Bedingungen
must = [{"key": "source", "match": {"value": source}}]
if type:
must.append({"key": "type", "match": {"value": type}})
# IDs sammeln via scroll_filter
try:
points, _ = qdrant.scroll(
collection_name=collection,
scroll_filter={"must": must},
limit=10000
)
except Exception as exc:
print(f"[ERROR] Scroll failed: {exc}", flush=True)
raise HTTPException(status_code=500, detail="Fehler beim Abrufen der Punkte vor dem Löschen.")
point_ids = [str(pt.id) for pt in points]
if not point_ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection, source=source, type=type)
# Lösche mittels PointIdsList(points=...)
try:
qdrant.delete(
collection_name=collection,
points_selector=PointIdsList(points=point_ids)
)
except Exception as exc:
print(f"[ERROR] Delete failed: {exc}", flush=True)
raise HTTPException(status_code=500, detail="Fehler beim Löschen nach Source.")
return DeleteResponse(status="🗑️ gelöscht", count=len(point_ids), collection=collection, source=source, type=type)
# /delete-collection
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)

View File

@ -0,0 +1,196 @@
from fastapi import FastAPI, Query, HTTPException
from pydantic import BaseModel
from typing import List
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from fastapi import HTTPException
from uuid import uuid4
import requests
from datetime import datetime
from qdrant_client.models import PointIdsList
app = FastAPI()
# Konfiguration
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(host="localhost", port=6333)
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "mistral"
# Datenmodelle
from typing import List, Dict, Any
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = "file"
title: str | None = None
version: str | None = None
related_to: str | None = None
tags: List[str] = []
owner: str | None = None
context_tag: str | None = None
imported_at: str | None = None
chunk_index: int | None = None
category: str | None = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
@app.delete("/delete-source")
def delete_by_source(
collection: str = Query(...),
source: str = Query(...),
type: str = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
must = [{"key": "source", "match": {"value": source}}]
if type:
must.append({"key": "type", "match": {"value": type}})
result = qdrant.scroll(
collection_name=collection,
scroll_filter={"must": must},
limit=10000
)
points = result[0]
if not points:
return {"status": "🔍 Keine passenden Einträge gefunden."}
point_ids = []
for point in points:
pid = point.id
point_ids.append(str(pid)) # immer zu String casten
qdrant.delete(
collection_name=collection,
points_selector=PointIdsList(points=point_ids)
)
return {
"status": "🗑️ gelöscht",
"count": len(point_ids),
"collection": collection,
"source": source,
"type": type
}
@app.delete("/delete-collection")
def delete_collection(collection: str = Query(...)):
"""
Löscht eine gesamte Collection aus Qdrant.
"""
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return {"status": "🗑️ gelöscht", "collection": collection}
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([chunk.text for chunk in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
point = PointStruct(
id=str(uuid4()),
vector=embeddings[i],
payload=payload
)
points.append(point)
qdrant.upsert(collection_name=collection_name, points=points)
return {
"status": "✅ embeddings saved",
"count": len(points),
"collection": collection_name
}
@app.get("/search")
def search_text(query: str = Query(...), limit: int = 3, collection: str = Query(...)):
vec = model.encode(query).tolist()
results = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload["text"]} for r in results]
@app.post("/prompt")
def generate_prompt(data: PromptRequest):
query_vec = model.encode(data.query).tolist()
results = qdrant.search(
collection_name=data.collection,
query_vector=query_vec,
limit=data.context_limit
)
context = "\n".join([r.payload["text"] for r in results])
full_prompt = f"""Beantworte die folgende Frage basierend auf dem Kontext:
Kontext:
{context}
Frage:
{data.query}
"""
ollama_payload = {
"model": OLLAMA_MODEL,
"prompt": full_prompt,
"stream": False
}
response = requests.post(OLLAMA_URL, json=ollama_payload)
response.raise_for_status()
answer = response.json()["response"]
return {
"answer": answer,
"context": context,
"collection": data.collection
}

View File

@ -0,0 +1,196 @@
from fastapi import FastAPI, Query, HTTPException
from pydantic import BaseModel
from typing import List
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from fastapi import HTTPException
from uuid import uuid4
import requests
from datetime import datetime
from qdrant_client.models import PointIdsList
app = FastAPI()
# Konfiguration
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(host="localhost", port=6333)
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "mistral"
# Datenmodelle
from typing import List, Dict, Any
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = "file"
title: str | None = None
version: str | None = None
related_to: str | None = None
tags: List[str] = []
owner: str | None = None
context_tag: str | None = None
imported_at: str | None = None
chunk_index: int | None = None
category: str | None = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
@app.delete("/delete-source")
def delete_by_source(
collection: str = Query(...),
source: str = Query(...),
type: str = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
must = [{"key": "source", "match": {"value": source}}]
if type:
must.append({"key": "type", "match": {"value": type}})
result = qdrant.scroll(
collection_name=collection,
scroll_filter={"must": must},
limit=10000
)
points = result[0]
if not points:
return {"status": "🔍 Keine passenden Einträge gefunden."}
point_ids = []
for point in points:
pid = point.id
point_ids.append(str(pid)) # immer zu String casten
qdrant.delete(
collection_name=collection,
points_selector=PointIdsList(points=point_ids)
)
return {
"status": "🗑️ gelöscht",
"count": len(point_ids),
"collection": collection,
"source": source,
"type": type
}
@app.delete("/delete-collection")
def delete_collection(collection: str = Query(...)):
"""
Löscht eine gesamte Collection aus Qdrant.
"""
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return {"status": "🗑️ gelöscht", "collection": collection}
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([chunk.text for chunk in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
point = PointStruct(
id=str(uuid4()),
vector=embeddings[i],
payload=payload
)
points.append(point)
qdrant.upsert(collection_name=collection_name, points=points)
return {
"status": "✅ embeddings saved",
"count": len(points),
"collection": collection_name
}
@app.get("/search")
def search_text(query: str = Query(...), limit: int = 3, collection: str = Query(...)):
vec = model.encode(query).tolist()
results = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload["text"]} for r in results]
@app.post("/prompt")
def generate_prompt(data: PromptRequest):
query_vec = model.encode(data.query).tolist()
results = qdrant.search(
collection_name=data.collection,
query_vector=query_vec,
limit=data.context_limit
)
context = "\n".join([r.payload["text"] for r in results])
full_prompt = f"""Beantworte die folgende Frage basierend auf dem Kontext:
Kontext:
{context}
Frage:
{data.query}
"""
ollama_payload = {
"model": OLLAMA_MODEL,
"prompt": full_prompt,
"stream": False
}
response = requests.post(OLLAMA_URL, json=ollama_payload)
response.raise_for_status()
answer = response.json()["response"]
return {
"answer": answer,
"context": context,
"collection": data.collection
}

View File

@ -0,0 +1,220 @@
from fastapi import FastAPI, Query, HTTPException
from fastapi.openapi.utils import get_openapi
from pydantic import BaseModel
from typing import List
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from fastapi import HTTPException
from uuid import uuid4
import requests
from datetime import datetime
from qdrant_client.models import PointIdsList
app = FastAPI(
title="Lokaler KI Agent",
description="Lokale API zur Ansteuerung des LLM und qdrant",
version="1.0.0",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
)
# Konfiguration
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(host="localhost", port=6333)
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "mistral"
# Datenmodelle
from typing import List, Dict, Any
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = "file"
title: str | None = None
version: str | None = None
related_to: str | None = None
tags: List[str] = []
owner: str | None = None
context_tag: str | None = None
imported_at: str | None = None
chunk_index: int | None = None
category: str | None = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
@app.delete("/delete-source")
def delete_by_source(
collection: str = Query(...),
source: str = Query(...),
type: str = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
must = [{"key": "source", "match": {"value": source}}]
if type:
must.append({"key": "type", "match": {"value": type}})
result = qdrant.scroll(
collection_name=collection,
scroll_filter={"must": must},
limit=10000
)
points = result[0]
if not points:
return {"status": "🔍 Keine passenden Einträge gefunden."}
point_ids = []
for point in points:
pid = point.id
point_ids.append(str(pid)) # immer zu String casten
qdrant.delete(
collection_name=collection,
points_selector=PointIdsList(points=point_ids)
)
return {
"status": "🗑️ gelöscht",
"count": len(point_ids),
"collection": collection,
"source": source,
"type": type
}
@app.delete("/delete-collection")
def delete_collection(collection: str = Query(...)):
"""
Löscht eine gesamte Collection aus Qdrant.
"""
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return {"status": "🗑️ gelöscht", "collection": collection}
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([chunk.text for chunk in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
point = PointStruct(
id=str(uuid4()),
vector=embeddings[i],
payload=payload
)
points.append(point)
qdrant.upsert(collection_name=collection_name, points=points)
return {
"status": "✅ embeddings saved",
"count": len(points),
"collection": collection_name
}
@app.get("/search")
def search_text(query: str = Query(...), limit: int = 3, collection: str = Query(...)):
vec = model.encode(query).tolist()
results = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload["text"]} for r in results]
@app.post("/prompt")
def generate_prompt(data: PromptRequest):
query_vec = model.encode(data.query).tolist()
results = qdrant.search(
collection_name=data.collection,
query_vector=query_vec,
limit=data.context_limit
)
context = "\n".join([r.payload["text"] for r in results])
full_prompt = f"""Beantworte die folgende Frage basierend auf dem Kontext:
Kontext:
{context}
Frage:
{data.query}
"""
ollama_payload = {
"model": OLLAMA_MODEL,
"prompt": full_prompt,
"stream": False
}
response = requests.post(OLLAMA_URL, json=ollama_payload)
response.raise_for_status()
answer = response.json()["response"]
return {
"answer": answer,
"context": context,
"collection": data.collection
}
def custom_openapi():
if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
title=app.title,
version=app.version,
description=app.description,
routes=app.routes,
)
# hier können z.B. Security-Schemes ergänzt werden
app.openapi_schema = openapi_schema
return app.openapi_schema
app.openapi = custom_openapi

24
llm-api/clients.py Normal file
View File

@ -0,0 +1,24 @@
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
import os
# Embedding-Modell
model = SentenceTransformer("all-MiniLM-L6-v2")
# Qdrant-Client
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Collections initialisieren
for coll in ["exercises", "training_plans"]:
if not qdrant.collection_exists(coll):
qdrant.recreate_collection(
collection_name=coll,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)

126
llm-api/embed_router.py Normal file
View File

@ -0,0 +1,126 @@
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Optional
from uuid import uuid4
from clients import model, qdrant
from qdrant_client.models import PointStruct, VectorParams, Distance, PointIdsList
import requests, os
router = APIRouter()
# Models
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str = Field(..., description="Suchanfrage")
context_limit: int = Field(default=3, ge=1, le=10, description="Anzahl Kontext-Dokumente")
collection: str = Field(default="default", description="Qdrant-Collection")
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# Endpoints
@router.post("/embed")
def embed_texts(data: EmbedRequest):
if not qdrant.collection_exists(data.collection):
qdrant.recreate_collection(
collection_name=data.collection,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = [PointStruct(id=str(uuid4()), vector=emb, payload=c.dict())
for emb, c in zip(embeddings, data.chunks)]
qdrant.upsert(collection_name=data.collection, points=points)
return {"status": "✅ embeddings saved", "count": len(points), "collection": data.collection}
@router.get("/search")
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload.get("text", "")} for r in res]
@router.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload.get("text", "") for h in hits)
llm_url = os.getenv("OLLAMA_URL")
if not llm_url:
raise HTTPException(status_code=500, detail="LLM-Service-URL nicht konfiguriert.")
payload = {
"model": os.getenv("OLLAMA_MODEL"),
"prompt": f"Context:\n{context}\nQuestion: {data.query}",
"stream": False
}
try:
r = requests.post(llm_url, json=payload, timeout=30)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"LLM-Service-Fehler: {e}")
return PromptResponse(answer=r.json().get("response", ""), context=context, collection=data.collection)
@router.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source:
filt.append({"key": "source", "match": {"value": source}})
if type:
filt.append({"key": "type", "match": {"value": type}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must": filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
# Delete entire collection
@router.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(
collection: str = Query(...)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)

View File

@ -0,0 +1,126 @@
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Optional
from uuid import uuid4
from clients import model, qdrant
from qdrant_client.models import PointStruct, VectorParams, Distance, PointIdsList
import requests, os
router = APIRouter()
# Models
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str = Field(..., description="Suchanfrage")
context_limit: int = Field(default=3, ge=1, le=10, description="Anzahl Kontext-Dokumente")
collection: str = Field(default="default", description="Qdrant-Collection")
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# Endpoints
@router.post("/embed")
def embed_texts(data: EmbedRequest):
if not qdrant.collection_exists(data.collection):
qdrant.recreate_collection(
collection_name=data.collection,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = [PointStruct(id=str(uuid4()), vector=emb, payload=c.dict())
for emb, c in zip(embeddings, data.chunks)]
qdrant.upsert(collection_name=data.collection, points=points)
return {"status": "✅ embeddings saved", "count": len(points), "collection": data.collection}
@router.get("/search")
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [{"score": r.score, "text": r.payload.get("text", "")} for r in res]
@router.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload.get("text", "") for h in hits)
llm_url = os.getenv("OLLAMA_URL")
if not llm_url:
raise HTTPException(status_code=500, detail="LLM-Service-URL nicht konfiguriert.")
payload = {
"model": os.getenv("OLLAMA_MODEL"),
"prompt": f"Context:\n{context}\nQuestion: {data.query}",
"stream": False
}
try:
r = requests.post(llm_url, json=payload, timeout=30)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"LLM-Service-Fehler: {e}")
return PromptResponse(answer=r.json().get("response", ""), context=context, collection=data.collection)
@router.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source:
filt.append({"key": "source", "match": {"value": source}})
if type:
filt.append({"key": "type", "match": {"value": type}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must": filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
# Delete entire collection
@router.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(
collection: str = Query(...)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)

181
llm-api/exercise_router.py Normal file
View File

@ -0,0 +1,181 @@
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from uuid import uuid4
from datetime import datetime, date
from clients import model, qdrant
from qdrant_client.models import PointStruct, VectorParams, Distance, PointIdsList
import os
router = APIRouter()
# ---- Models ----
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str, int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# ---- CRUD Endpoints for Exercise ----
@router.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
# Ensure Exercise collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name="exercises", points=[point])
return ex
@router.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key": "discipline", "match": {"value": discipline}})
if group:
filters.append({"key": "group", "match": {"value": group}})
if tags:
for t in tags.split(","):
filters.append({"key": "keywords", "match": {"value": t.strip()}})
pts, _ = qdrant.scroll(
collection_name="exercises",
scroll_filter={"must": filters} if filters else None,
limit=10000
)
return [Exercise(**pt.payload) for pt in pts]
# ---- CRUD Endpoints for TrainingPlan ----
@router.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
# Ensure TrainingPlan collection exists
if not qdrant.collection_exists("training_plans"):
qdrant.recreate_collection(
collection_name="training_plans",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
point = PointStruct(id=plan.id, vector=vec, payload=plan.dict())
qdrant.upsert(collection_name="training_plans", points=[point])
return plan
@router.get("/plan", response_model=List[TrainingPlan])
def list_plans(
collection: str = Query("training_plans"),
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
return []
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
result = []
for pt in pts:
pl = TrainingPlan(**pt.payload)
if discipline and pl.discipline != discipline:
continue
if group and pl.group != group:
continue
if dojo and pl.dojo != dojo:
continue
result.append(pl)
return result
# ---- Delete Endpoints ----
@router.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source:
filt.append({"key": "source", "match": {"value": source}})
if type:
filt.append({"key": "type", "match": {"value": type}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must": filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@router.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(
collection: str = Query(...)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)

33
llm-api/llm_api.py Normal file
View File

@ -0,0 +1,33 @@
from dotenv import load_dotenv
load_dotenv() # Lädt Variablen aus .env in os.environ
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from clients import model, qdrant
from wiki_router import router as wiki_router
from embed_router import router as embed_router
from exercise_router import router as exercise_router
# Version
__version__ = "1.1.6"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Modulare API für Trainingsplanung und MediaWiki-Import",
version=__version__,
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request, exc):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler."})
# Router einbinden
app.include_router(wiki_router, prefix="/import/wiki")
app.include_router(embed_router)
app.include_router(exercise_router)

View File

@ -0,0 +1,421 @@
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from uuid import uuid4
import requests
import os
from datetime import datetime, date
# Version hochgezählt
__version__ = "1.1.6"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# Ollama-Konfiguration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "mistral:latest")
# -----------------------
# MediaWiki-Konfiguration
# -----------------------
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# Globaler Session für MediaWiki-API
wiki_session = requests.Session()
# Health-Check für MediaWiki
@app.get("/import/wiki/health")
def wiki_health():
"""
Prüft, ob der MediaWiki-Server erreichbar ist.
"""
params = {"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=5)
r.raise_for_status()
resp = r.json()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
# Versuche Servernamen auszulesen, aber gib OK zurück, wenn es fehlt
server = resp.get("query", {}).get("general", {}).get("servername")
if server:
return {"status": "ok", "server": server}
return {"status": "ok", "server": None}
# ------------------------
# MediaWiki Login Endpoint
# ------------------------
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: Optional[str] = None
@app.post("/import/wiki/login", response_model=WikiLoginResponse)
def wiki_login(data: WikiLoginRequest):
"""
Führt Login gegen MediaWiki-API durch und speichert Session-Cookies.
"""
# Schritt 1: Login-Token holen
params_token = {"action": "query", "meta": "tokens", "type": "login", "format": "json"}
try:
resp1 = wiki_session.get(WIKI_API_URL, params=params_token, timeout=10)
resp1.raise_for_status()
token = resp1.json().get("query", {}).get("tokens", {}).get("logintoken")
if not token:
raise ValueError("Kein Login-Token erhalten")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Fehler Token abrufen: {e}")
# Schritt 2: Login mit Token
login_data = {
"action": "login", "format": "json",
"lgname": data.username, "lgpassword": data.password,
"lgtoken": token
}
try:
resp2 = wiki_session.post(WIKI_API_URL, data=login_data, timeout=10)
resp2.raise_for_status()
result = resp2.json().get("login", {})
if result.get("result") != "Success":
return WikiLoginResponse(status="failed", message=result.get("reason", "Login fehlgeschlagen"))
except Exception as e:
raise HTTPException(status_code=502, detail=f"Fehler Login: {e}")
return WikiLoginResponse(status="success")
# ------------------------
# Fallback: Connectivity ist gegeben, aber kein Servernamen
return {"status": "ok", "server": None}
# ------------------------"status": "ok", "server": general["servername"]}
# ------------------------
# ------------------------
# Modelle für Embed/Search
# ------------------------
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# ------------------------------------
# Modelle für Exercises & Plans
# ------------------------------------
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str,int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str = ""
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str = ""
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
# ----------------------------------
# Embedding-Modell und Qdrant-Client
# ----------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Ensure Exercise-Collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# Ensure TrainingPlan-Collection exists
PLAN_COLL = "training_plans"
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# ----------------------
# Endpunkte für Exercises
# ----------------------
@app.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
# Ensure collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name="exercises", points=[point])
return ex
@app.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key":"discipline","match":{"value":discipline}})
if group:
filters.append({"key":"group","match":{"value":group}})
if tags:
for t in tags.split(","):
filters.append({"key":"keywords","match":{"value":t.strip()}})
if filters:
pts, _ = qdrant.scroll(
collection_name="exercises",
scroll_filter={"must": filters},
limit=10000
)
else:
pts, _ = qdrant.scroll(collection_name="exercises", limit=10000)
return [Exercise(**pt.payload) for pt in pts]
# -----------------
# Bestehende Endpunkte
# -----------------
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
points.append(PointStruct(id=str(uuid4()), vector=embeddings[i], payload=payload))
qdrant.upsert(collection_name=collection_name, points=points)
return {"status":"✅ embeddings saved","count":len(points),"collection":collection_name}
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10):
raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload['text'] for h in hits)
payload = {"model":OLLAMA_MODEL,"prompt":f"Context:\n{context}\nQuestion: {data.query}","stream":False}
try:
r = requests.post(OLLAMA_URL, json=payload, timeout=30)
r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get("response",""), context=context, collection=data.collection)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None),
owner: Optional[str] = Query(None),
category: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source: filt.append({"key":"source","match":{"value":source}})
if type: filt.append({"key":"type","match":{"value":type}})
if owner: filt.append({"key":"owner","match":{"value":owner}})
if category: filt.append({"key":"category","match":{"value":category}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must":filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
# ------------------------
# Endpunkte für TrainingPlans
# ------------------------
@app.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
# Ensure plan collection exists
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
payload = plan.dict()
qdrant.upsert(collection_name=PLAN_COLL, points=[PointStruct(id=plan.id, vector=vec, payload=payload)])
return plan
@app.get("/plan", response_model=List[TrainingPlan])
def list_plans(
collection: str = Query(PLAN_COLL),
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
return []
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
result: List[TrainingPlan] = []
for pt in pts:
plan = TrainingPlan(**pt.payload)
if discipline and plan.discipline != discipline: continue
if group and plan.group != group: continue
if dojo and plan.dojo != dojo: continue
result.append(plan)
return result

View File

@ -0,0 +1,319 @@
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from fastapi.openapi.utils import get_openapi
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from uuid import uuid4
import requests
import os
from datetime import datetime, date
# Version hochgezählt
__version__ = "1.1.1"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# Ollama-Konfiguration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "mistral:latest")
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# ------------------------
# Modelle für Embed/Search
# ------------------------
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# ------------------------------------
# Neue Modelle für Exercises & Plans
# ------------------------------------
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str,int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str = ""
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str = ""
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
# ----------------------------------
# Embedding-Modell und Qdrant-Client
# ----------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Ensure Exercise-Collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# ----------------------
# Endpunkte für Exercises
# ----------------------
@app.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name="exercises", points=[point])
return ex
@app.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None) # kommagetrennt
):
filters = []
if discipline:
filters.append({"key":"discipline","match":{"value":discipline}})
if group:
filters.append({"key":"group","match":{"value":group}})
if tags:
for t in tags.split(","):
filters.append({"key":"keywords","match":{"value":t.strip()}})
if filters:
pts, _ = qdrant.scroll(
collection_name="exercises",
scroll_filter={"must": filters},
limit=10000
)
else:
pts, _ = qdrant.scroll(collection_name="exercises", limit=10000)
return [Exercise(**pt.payload) for pt in pts]
# -----------------
# Bestehende Endpunkte
# -----------------
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
points.append(PointStruct(id=str(uuid4()), vector=embeddings[i], payload=payload))
qdrant.upsert(collection_name=collection_name, points=points)
return {"status":"✅ embeddings saved","count":len(points),"collection":collection_name}
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip(): raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10): raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload['text'] for h in hits)
payload = {"model":OLLAMA_MODEL,"prompt":f"Context:\n{context}\nQuestion: {data.query}","stream":False}
try:
r = requests.post(OLLAMA_URL, json=payload, timeout=30); r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get("response",""), context=context, collection=data.collection)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None),
owner: Optional[str] = Query(None),
category: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source: filt.append({"key":"source","match":{"value":source}})
if type: filt.append({"key":"type","match":{"value":type}})
if owner: filt.append({"key":"owner","match":{"value":owner}})
if category: filt.append({"key":"category","match":{"value":category}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must":filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
# ------------------------
# Endpunkte für TrainingPlans
# ------------------------
@app.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
coll = "training_plans"
if not qdrant.collection_exists(coll):
qdrant.recreate_collection(
collection_name=coll,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
payload = plan.dict()
qdrant.upsert(collection_name=coll, points=[PointStruct(id=plan.id, vector=vec, payload=payload)])
return plan
@app.get("/plan", response_model=List[TrainingPlan])
def list_plans(
collection: str = Query("training_plans"),
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
return []
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
result = []
for p in pts:
pl = TrainingPlan(**p.payload)
if discipline and pl.discipline != discipline: continue
if group and pl.group != group: continue
if dojo and pl.dojo != dojo: continue
result.append(pl)
return result

View File

@ -0,0 +1,323 @@
#!/usr/bin/env python3
# llm_api.py — Version 1.1.11
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from uuid import uuid4
import requests
import os
from datetime import datetime, date
# Version hochgezählt
__version__ = "1.1.11"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# FastAPI-Anwendung
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# --------------------------------
# Modelle für Embed/Search
# --------------------------------
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# --------------------------------
# Modelle für Exercises & TrainingPlans
# --------------------------------
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str, int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str = ""
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str = ""
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
# ----------------------------------
# Embedding-Modell und Qdrant-Client
# ----------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Collection-Namen
EXERCISE_COLL = "exercises"
PLAN_COLL = "training_plans"
# Sicherstellen, dass Collections existieren
if not qdrant.collection_exists(EXERCISE_COLL):
qdrant.recreate_collection(
collection_name=EXERCISE_COLL,
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
)
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
)
# ----------------------------------
# Endpunkte für Exercises
# ----------------------------------
@app.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name=EXERCISE_COLL, points=[point])
return ex
@app.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key": "discipline", "match": {"value": discipline}})
if group:
filters.append({"key": "group", "match": {"value": group}})
if tags:
for t in tags.split(","):
filters.append({"key": "keywords", "match": {"value": t.strip()}})
if filters:
pts, _ = qdrant.scroll(collection_name=EXERCISE_COLL, scroll_filter={"must": filters}, limit=10000)
else:
pts, _ = qdrant.scroll(collection_name=EXERCISE_COLL, limit=10000)
return [Exercise(**pt.payload) for pt in pts]
# ----------------------------------
# Endpunkte für TrainingPlans
# ----------------------------------
@app.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
point = PointStruct(id=plan.id, vector=vec, payload=plan.dict())
qdrant.upsert(collection_name=PLAN_COLL, points=[point])
return plan
@app.get("/plan", response_model=List[TrainingPlan])
def list_plans(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key": "discipline", "match": {"value": discipline}})
if group:
filters.append({"key": "group", "match": {"value": group}})
if dojo:
filters.append({"key": "dojo", "match": {"value": dojo}})
if filters:
pts, _ = qdrant.scroll(collection_name=PLAN_COLL, scroll_filter={"must": filters}, limit=10000)
else:
pts, _ = qdrant.scroll(collection_name=PLAN_COLL, limit=10000)
return [TrainingPlan(**pt.payload) for pt in pts]
# ----------------------------------
# Endpunkte Embed/Search und Löschen
# ----------------------------------
@app.post("/embed", response_model=EmbedResponse)
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {**chunk.dict(), "imported_at": chunk.imported_at or datetime.utcnow().isoformat()}
points.append(PointStruct(id=str(uuid4()), vector=embeddings[i], payload=payload))
qdrant.upsert(collection_name=collection_name, points=points)
return EmbedResponse(status="✅ embeddings saved", count=len(points), collection=collection_name)
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10):
raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(collection_name=data.collection, query_vector=model.encrypt(data.query).tolist(), limit=data.context_limit)
context = "\n".join(h.payload['text'] for h in hits)
try:
r = requests.post(
os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate"),
json={"model": os.getenv("OLLAMA_MODEL", "mistral:latest"), "prompt": f"Context:\n{context}\nQuestion: {data.query}", "stream": False},
timeout=30
)
r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get("response", ""), context=context, collection=data.collection)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...), source: Optional[str] = Query(None), type: Optional[str] = Query(None), owner: Optional[str] = Query(None), category: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filters = []
if source:
filters.append({"key": "source", "match": {"value": source}})
if type:
filters.append({"key": "type", "match": {"value": type}})
if owner:
filters.append({"key": "owner", "match": {"value": owner}})
if category:
filters.append({"key": "category", "match": {"value": category}})
if not filters:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must": filters}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
# ----------------------------------------------------------------
# MediaWiki-Login (v1.1.11)
# ----------------------------------------------------------------
MEDIAWIKI_API_URL = os.getenv("MEDIAWIKI_API_URL", "https://www.Karatetrainer.de/api.php")
MEDIAWIKI_USER = os.getenv("MEDIAWIKI_USER", "LarsS@APIBot")
MEDIAWIKI_PASSWORD= os.getenv("MEDIAWIKI_PASSWORD", "6snci781sh79tbmvb2u9ld4bkd1i7n5t")
wiki_session = requests.Session()
@app.post("/import/wiki/login")
async def import_wiki_login():
try:
params_token = {"action": "query", "meta": "tokens", "type": "login", "format": "json"}
resp1 = wiki_session.get(MEDIAWIKI_API_URL, params=params_token)
resp1.raise_for_status()
token = resp1.json()["query"]["tokens"]["logintoken"]
login_params = {"action": "login", "format": "json"}
login_data = {"lgname": MEDIAWIKI_USER, "lgpassword": MEDIAWIKI_PASSWORD, "lgtoken": token}
resp2 = wiki_session.post(MEDIAWIKI_API_URL, params=login_params, data=login_data)
resp2.raise_for_status()
result = resp2.json().get("login", {})
if result.get("result") == "Success":
return {"status": "✅ MediaWiki login erfolgreich."}
else:
raise HTTPException(status_code=401, detail=f"Login fehlgeschlagen: {result.get('reason','unbekannter Fehler')}")
except requests.RequestException as e:
raise HTTPException(status_code=502, detail=f"Fehler bei Wiki-API-Aufruf: {str(e)}")

View File

@ -0,0 +1,341 @@
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from uuid import uuid4
import requests
import os
from datetime import datetime, date
# Version hochgezählt
__version__ = "1.1.5"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# Ollama-Konfiguration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "mistral:latest")
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# ------------------------
# Modelle für Embed/Search
# ------------------------
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# ------------------------------------
# Modelle für Exercises & Plans
# ------------------------------------
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str,int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str = ""
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str = ""
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
# ----------------------------------
# Embedding-Modell und Qdrant-Client
# ----------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Ensure Exercise-Collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# Ensure TrainingPlan-Collection exists
PLAN_COLL = "training_plans"
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# ----------------------
# Endpunkte für Exercises
# ----------------------
@app.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
# Ensure collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name="exercises", points=[point])
return ex
@app.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key":"discipline","match":{"value":discipline}})
if group:
filters.append({"key":"group","match":{"value":group}})
if tags:
for t in tags.split(","):
filters.append({"key":"keywords","match":{"value":t.strip()}})
if filters:
pts, _ = qdrant.scroll(
collection_name="exercises",
scroll_filter={"must": filters},
limit=10000
)
else:
pts, _ = qdrant.scroll(collection_name="exercises", limit=10000)
return [Exercise(**pt.payload) for pt in pts]
# -----------------
# Bestehende Endpunkte
# -----------------
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
points.append(PointStruct(id=str(uuid4()), vector=embeddings[i], payload=payload))
qdrant.upsert(collection_name=collection_name, points=points)
return {"status":"✅ embeddings saved","count":len(points),"collection":collection_name}
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10):
raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload['text'] for h in hits)
payload = {"model":OLLAMA_MODEL,"prompt":f"Context:\n{context}\nQuestion: {data.query}","stream":False}
try:
r = requests.post(OLLAMA_URL, json=payload, timeout=30)
r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get("response",""), context=context, collection=data.collection)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None),
owner: Optional[str] = Query(None),
category: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source: filt.append({"key":"source","match":{"value":source}})
if type: filt.append({"key":"type","match":{"value":type}})
if owner: filt.append({"key":"owner","match":{"value":owner}})
if category: filt.append({"key":"category","match":{"value":category}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must":filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
# ------------------------
# Endpunkte für TrainingPlans
# ------------------------
@app.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
# Ensure plan collection exists
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
payload = plan.dict()
qdrant.upsert(collection_name=PLAN_COLL, points=[PointStruct(id=plan.id, vector=vec, payload=payload)])
return plan
@app.get("/plan", response_model=List[TrainingPlan])
def list_plans(
collection: str = Query(PLAN_COLL),
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
return []
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
result: List[TrainingPlan] = []
for pt in pts:
plan = TrainingPlan(**pt.payload)
if discipline and plan.discipline != discipline: continue
if group and plan.group != group: continue
if dojo and plan.dojo != dojo: continue
result.append(plan)
return result

View File

@ -0,0 +1,421 @@
from fastapi import FastAPI, Query, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct, PointIdsList
from uuid import uuid4
import requests
import os
from datetime import datetime, date
# Version hochgezählt
__version__ = "1.1.6"
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
# Ollama-Konfiguration
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/api/generate")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "mistral:latest")
# -----------------------
# MediaWiki-Konfiguration
# -----------------------
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
# FastAPI-Instanz
app = FastAPI(
title="KI Trainerassistent API",
description="Lokale API für Trainingsplanung",
version=__version__,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json"
)
# Globaler Fehlerhandler
@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler. Bitte später erneut versuchen."})
# Globaler Session für MediaWiki-API
wiki_session = requests.Session()
# Health-Check für MediaWiki
@app.get("/import/wiki/health")
def wiki_health():
"""
Prüft, ob der MediaWiki-Server erreichbar ist.
"""
params = {"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=5)
r.raise_for_status()
resp = r.json()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
# Versuche Servernamen auszulesen, aber gib OK zurück, wenn es fehlt
server = resp.get("query", {}).get("general", {}).get("servername")
if server:
return {"status": "ok", "server": server}
return {"status": "ok", "server": None}
# ------------------------
# MediaWiki Login Endpoint
# ------------------------
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: Optional[str] = None
@app.post("/import/wiki/login", response_model=WikiLoginResponse)
def wiki_login(data: WikiLoginRequest):
"""
Führt Login gegen MediaWiki-API durch und speichert Session-Cookies.
"""
# Schritt 1: Login-Token holen
params_token = {"action": "query", "meta": "tokens", "type": "login", "format": "json"}
try:
resp1 = wiki_session.get(WIKI_API_URL, params=params_token, timeout=10)
resp1.raise_for_status()
token = resp1.json().get("query", {}).get("tokens", {}).get("logintoken")
if not token:
raise ValueError("Kein Login-Token erhalten")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Fehler Token abrufen: {e}")
# Schritt 2: Login mit Token
login_data = {
"action": "login", "format": "json",
"lgname": data.username, "lgpassword": data.password,
"lgtoken": token
}
try:
resp2 = wiki_session.post(WIKI_API_URL, data=login_data, timeout=10)
resp2.raise_for_status()
result = resp2.json().get("login", {})
if result.get("result") != "Success":
return WikiLoginResponse(status="failed", message=result.get("reason", "Login fehlgeschlagen"))
except Exception as e:
raise HTTPException(status_code=502, detail=f"Fehler Login: {e}")
return WikiLoginResponse(status="success")
# ------------------------
# Fallback: Connectivity ist gegeben, aber kein Servernamen
return {"status": "ok", "server": None}
# ------------------------"status": "ok", "server": general["servername"]}
# ------------------------
# ------------------------
# Modelle für Embed/Search
# ------------------------
class ChunkInput(BaseModel):
text: str
source: str
source_type: str = ""
title: str = ""
version: str = ""
related_to: str = ""
tags: List[str] = []
owner: str = ""
context_tag: Optional[str] = None
imported_at: Optional[str] = None
chunk_index: Optional[int] = None
category: Optional[str] = None
class EmbedRequest(BaseModel):
chunks: List[ChunkInput]
collection: str = "default"
class PromptRequest(BaseModel):
query: str
context_limit: int = 3
collection: str = "default"
class EmbedResponse(BaseModel):
status: str
count: int
collection: str
class SearchResultItem(BaseModel):
score: float = Field(..., ge=0)
text: str
class PromptResponse(BaseModel):
answer: str
context: str
collection: str
class DeleteResponse(BaseModel):
status: str
count: int
collection: str
source: Optional[str] = None
type: Optional[str] = None
# ------------------------------------
# Modelle für Exercises & Plans
# ------------------------------------
class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
summary: str
short_description: str
keywords: List[str] = []
link: Optional[str] = None
discipline: str
group: Optional[str] = None
age_group: str
target_group: str
min_participants: int
duration_minutes: int
capabilities: Dict[str,int] = {}
category: str
purpose: str
execution: str
notes: str
preparation: str
method: str
equipment: List[str] = []
class PhaseExercise(BaseModel):
exercise_id: str
cond_load: Dict[str, Any] = {}
coord_load: Dict[str, Any] = {}
instructions: str = ""
class PlanPhase(BaseModel):
name: str
duration_minutes: int
method: str
method_notes: str = ""
exercises: List[PhaseExercise]
class TrainingPlan(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
title: str
short_description: str
collection: str
discipline: str
group: Optional[str] = None
dojo: str
date: date
plan_duration_weeks: int
focus_areas: List[str] = []
predecessor_plan_id: Optional[str] = None
age_group: str
created_at: datetime = Field(default_factory=datetime.utcnow)
phases: List[PlanPhase]
# ----------------------------------
# Embedding-Modell und Qdrant-Client
# ----------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
qdrant = QdrantClient(
host=os.getenv("QDRANT_HOST", "localhost"),
port=int(os.getenv("QDRANT_PORT", 6333))
)
# Ensure Exercise-Collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# Ensure TrainingPlan-Collection exists
PLAN_COLL = "training_plans"
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
# ----------------------
# Endpunkte für Exercises
# ----------------------
@app.post("/exercise", response_model=Exercise)
def create_exercise(ex: Exercise):
# Ensure collection exists
if not qdrant.collection_exists("exercises"):
qdrant.recreate_collection(
collection_name="exercises",
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
qdrant.upsert(collection_name="exercises", points=[point])
return ex
@app.get("/exercise", response_model=List[Exercise])
def list_exercises(
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
tags: Optional[str] = Query(None)
):
filters = []
if discipline:
filters.append({"key":"discipline","match":{"value":discipline}})
if group:
filters.append({"key":"group","match":{"value":group}})
if tags:
for t in tags.split(","):
filters.append({"key":"keywords","match":{"value":t.strip()}})
if filters:
pts, _ = qdrant.scroll(
collection_name="exercises",
scroll_filter={"must": filters},
limit=10000
)
else:
pts, _ = qdrant.scroll(collection_name="exercises", limit=10000)
return [Exercise(**pt.payload) for pt in pts]
# -----------------
# Bestehende Endpunkte
# -----------------
@app.post("/embed")
def embed_texts(data: EmbedRequest):
collection_name = data.collection
if not qdrant.collection_exists(collection_name):
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
embeddings = model.encode([c.text for c in data.chunks]).tolist()
points = []
for i, chunk in enumerate(data.chunks):
payload = {
"text": chunk.text,
"source": chunk.source,
"source_type": chunk.source_type,
"title": chunk.title,
"version": chunk.version,
"related_to": chunk.related_to,
"tags": chunk.tags,
"owner": chunk.owner,
"context_tag": chunk.context_tag,
"imported_at": chunk.imported_at or datetime.utcnow().isoformat(),
"chunk_index": chunk.chunk_index,
"category": chunk.category or data.collection
}
points.append(PointStruct(id=str(uuid4()), vector=embeddings[i], payload=payload))
qdrant.upsert(collection_name=collection_name, points=points)
return {"status":"✅ embeddings saved","count":len(points),"collection":collection_name}
@app.get("/search", response_model=List[SearchResultItem])
def search_text(query: str = Query(..., min_length=1), limit: int = Query(3, ge=1), collection: str = Query("default")):
vec = model.encode(query).tolist()
res = qdrant.search(collection_name=collection, query_vector=vec, limit=limit)
return [SearchResultItem(score=r.score, text=r.payload['text']) for r in res]
@app.post("/prompt", response_model=PromptResponse)
def prompt(data: PromptRequest):
if not data.query.strip():
raise HTTPException(status_code=400, detail="'query' darf nicht leer sein.")
if not (1 <= data.context_limit <= 10):
raise HTTPException(status_code=400, detail="'context_limit' muss zwischen 1 und 10 liegen.")
hits = qdrant.search(
collection_name=data.collection,
query_vector=model.encode(data.query).tolist(),
limit=data.context_limit
)
context = "\n".join(h.payload['text'] for h in hits)
payload = {"model":OLLAMA_MODEL,"prompt":f"Context:\n{context}\nQuestion: {data.query}","stream":False}
try:
r = requests.post(OLLAMA_URL, json=payload, timeout=30)
r.raise_for_status()
except Exception:
raise HTTPException(status_code=502, detail="LLM-Service-Fehler.")
return PromptResponse(answer=r.json().get("response",""), context=context, collection=data.collection)
@app.delete("/delete-source", response_model=DeleteResponse)
def delete_by_source(
collection: str = Query(...),
source: Optional[str] = Query(None),
type: Optional[str] = Query(None),
owner: Optional[str] = Query(None),
category: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
filt = []
if source: filt.append({"key":"source","match":{"value":source}})
if type: filt.append({"key":"type","match":{"value":type}})
if owner: filt.append({"key":"owner","match":{"value":owner}})
if category: filt.append({"key":"category","match":{"value":category}})
if not filt:
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must":filt}, limit=10000)
ids = [str(p.id) for p in pts]
if not ids:
return DeleteResponse(status="🔍 Keine passenden Einträge gefunden.", count=0, collection=collection)
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
@app.delete("/delete-collection", response_model=DeleteResponse)
def delete_collection(collection: str = Query(...)):
if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection)
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
# ------------------------
# Endpunkte für TrainingPlans
# ------------------------
@app.post("/plan", response_model=TrainingPlan)
def create_plan(plan: TrainingPlan):
# Ensure plan collection exists
if not qdrant.collection_exists(PLAN_COLL):
qdrant.recreate_collection(
collection_name=PLAN_COLL,
vectors_config=VectorParams(
size=model.get_sentence_embedding_dimension(),
distance=Distance.COSINE
)
)
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
payload = plan.dict()
qdrant.upsert(collection_name=PLAN_COLL, points=[PointStruct(id=plan.id, vector=vec, payload=payload)])
return plan
@app.get("/plan", response_model=List[TrainingPlan])
def list_plans(
collection: str = Query(PLAN_COLL),
discipline: Optional[str] = Query(None),
group: Optional[str] = Query(None),
dojo: Optional[str] = Query(None)
):
if not qdrant.collection_exists(collection):
return []
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
result: List[TrainingPlan] = []
for pt in pts:
plan = TrainingPlan(**pt.payload)
if discipline and plan.discipline != discipline: continue
if group and plan.group != group: continue
if dojo and plan.dojo != dojo: continue
result.append(plan)
return result

173
llm-api/test_delete_filters.sh Executable file
View File

@ -0,0 +1,173 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_URL="http://127.0.0.1:8000"
COL="filter_test"
# Prüfe, ob jq installiert ist
if ! command -v jq &>/dev/null; then
echo "Bitte installieren: sudo apt-get install -y jq"
exit 1
fi
# Funktion zum Einfügen eines vollständigen Chunks
function test_embed_full {
local SRC=$1; local OWNER=$2; local CAT=$3; local TEXT=$4
local IDX=$5
payload=$(jq -n \
--arg col "$COL" \
--arg txt "$TEXT" \
--arg src "$SRC" \
--arg owner "$OWNER" \
--arg cat "$CAT" \
--argjson idx $IDX '{
collection: $col,
chunks: [
{
text: $txt,
source: $src,
source_type: "unit-test",
title: $txt,
version: "v1",
related_to: "rel",
tags: ["t1","t2"],
owner: $owner,
context_tag: "ctx",
imported_at: "2025-08-06T00:00:00Z",
chunk_index: $idx,
category: $cat
}
]
}')
r=$(curl -s -X POST "${BASE_URL}/embed" -H "Content-Type: application/json" -d "$payload")
cnt=$(echo "$r" | jq -r '.count // 0')
if [[ "$cnt" -ne 1 ]]; then echo "/embed fehlgeschlagen: $r"; exit 1; fi
echo "? Embed idx=$IDX src=$SRC owner=$OWNER cat=$CAT text=$TEXT"
}
# Funktion zum Löschenstest
function assert_delete {
local DESC=$1; shift
local PARAMS=$1; shift
local EXPECT=$1; shift
echo -n "Test: ${DESC} ... "
local resp=$(curl -s -X DELETE "${BASE_URL}/delete-source?collection=${COL}&${PARAMS}")
local cnt=$(echo "$resp" | jq -r '.count // 0')
if [[ "$cnt" -eq "$EXPECT" ]]; then
echo "? gelöscht=${cnt}"
else
echo "? gelöscht=${cnt}, erwartet=${EXPECT}"
echo " Response: $resp"
exit 1
fi
}
# Pause-Funktion
function pause {
read -p "Drücke [Enter] um fortzufahren ..."
}
# 1) Initialisiere Collection
echo "=== 1) Initialisiere Test-Collection ==="
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
pause
# 2) Einfügen von 4 Punkten
echo "=== 2) Einfügen von Testdaten ==="
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
pause
# 3) Einzelkriterien (jeder Test auf frischer Datenbasis)
echo "=== 3) Einzelkriterien ==="
for test in \
"source=s1" "source=s2" "owner=o1" "owner=o2" "category=c1" "category=c2"; do
# Reset Collection
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
# Neu befüllen
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
# Param und Expected bestimmen
KEY=${test%%=*}
VALUE=${test#*=}
EXPECT=2
echo -n "Test: ${test} ... "
resp=$(curl -s -X DELETE "${BASE_URL}/delete-source?collection=${COL}&${test}")
cnt=$(echo "$resp" | jq -r '.count // 0')
if [[ "$cnt" -eq "$EXPECT" ]]; then
echo "? gelöscht=${cnt}"
else
echo "? gelöscht=${cnt}, erwartet=${EXPECT}"; echo " Response: $resp"; exit 1
fi
pause
done
# 4) Zwei Kriterien
echo "=== 4) Zwei Kriterien ==="
for params in \
"source=s1&owner=o2" "source=s2&category=c2" "owner=o1&category=c2"; do
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
assert_delete "$params" "$params" 1
pause
done
# 5) Drei Kriterien
echo "=== 5) Drei Kriterien ==="
for params in "source=s1&owner=o2&category=c1"; do
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
assert_delete "$params" "$params" 1
pause
done
# 6) Kein Filter (400 erwartet)
echo "=== 4) Zwei Kriterien ==="
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
# Neu befüllen
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
pause
assert_delete "s1+o2" "source=s1&owner=o2" 1
assert_delete "s2+c2" "source=s2&category=c2" 1
assert_delete "o1+c2" "owner=o1&category=c2" 1
pause
# 5) Drei Kriterien
echo "=== 5) Drei Kriterien ==="
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
# Neu befüllen
test_embed_full s1 o1 c1 A 0
test_embed_full s2 o2 c2 B 1
test_embed_full s1 o2 c1 C 2
test_embed_full s2 o1 c2 D 3
pause
assert_delete "s1+o2+c1" "source=s1&owner=o2&category=c1" 1
pause
# 6) Kein Filter (400 erwartet)
echo "=== 6) Kein Filter (400 erwartet) ==="
echo -n "Test: no-filter ... "
local code=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "${BASE_URL}/delete-source?collection=${COL}")
if [[ "$code" -eq 400 ]]; then echo "? HTTP 400"; else echo "? HTTP ${code}"; exit 1; fi
pause
# 7) Cleanup
echo "=== 7) Cleanup ==="
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=${COL}" || true
echo "? Collection gelöscht"
echo "?? Test abgeschlossen! ??"

View File

@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
API="http://localhost:8000"
echo "=== Cleanup collection ==="
curl -s -X DELETE "$API/delete-collection?collection=exercises" | jq -r '.status' || true
echo
echo "=== Create baseline (with external_id) ==="
EXT_ID="mw:pageid:218"
PAYLOAD_1=$(cat <<'JSON'
{
"title": "Affenklatschen",
"summary": "Mobilisierung der Schulter",
"short_description": "Mobilisierung der Schulter",
"keywords": ["Aufwärmen","Dehnen","Mobilisierung","Schulter"],
"link": "https://karatetrainer.net/index.php?title=Affenklatschen",
"discipline": "Allgemein",
"group": "1",
"age_group": "Kinder, Schüler, Teenager, Erwachsene",
"target_group": "Breitensportler",
"min_participants": 1,
"duration_minutes": 1,
"capabilities": {"Flexibilität":1,"Kopplungsfähigkeit":1},
"category": "Übungen",
"purpose": "Mobilisierung der Schulter",
"execution": "Beschreibung A",
"notes": "Hinweise A",
"preparation": "Dynamisches Dehnen",
"method": "",
"equipment": [],
"fullurl": "https://karatetrainer.net/index.php?title=Affenklatschen",
"external_id": "mw:pageid:218",
"source": "MediaWiki",
"source_version": "rev-1",
"fingerprint": "fp-1"
}
JSON
)
curl -s -X POST "$API/exercise" -H "Content-Type: application/json" -d "$PAYLOAD_1" | jq -r '.status // "ok"'
echo
echo "=== Upsert same external_id with changed content (should update, not duplicate) ==="
PAYLOAD_2=$(echo "$PAYLOAD_1" | jq '.summary="NEU: Mobilisierung der Schulter (Update)" | .source_version="rev-2" | .fingerprint="fp-2"')
curl -s -X POST "$API/exercise" -H "Content-Type: application/json" -d "$PAYLOAD_2" | jq -r '.status // "ok"'
echo
echo "=== Check by external_id ==="
curl -s "$API/exercise/by-external-id?external_id=$EXT_ID" | jq
echo
echo "=== Count entries (should be 1) ==="
COUNT=$(curl -s "$API/exercise" | jq '[.[] | select(.title=="Affenklatschen")] | length')
echo "Count=$COUNT"
if [ "$COUNT" != "1" ]; then
echo "❌ Expected 1, got $COUNT"
exit 1
fi
echo
echo "=== Verify updated summary ==="
CUR_SUMMARY=$(curl -s "$API/exercise/by-external-id?external_id=$EXT_ID" | jq -r '.payload.summary')
echo "Summary=$CUR_SUMMARY"
if [[ "$CUR_SUMMARY" != "NEU: Mobilisierung der Schulter (Update)" ]]; then
echo "❌ Update did not apply"
exit 1
fi
echo
echo "✅ Idempotent upsert OK"

155
llm-api/test_exercise_plan.sh Executable file
View File

@ -0,0 +1,155 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_URL="http://127.0.0.1:8000"
EX_COL="exercises"
PL_COL="training_plans"
# Utility: assert status code
function assert_status {
local code=$1; local expect=$2; local body=$3
if [[ "$code" -ne "$expect" ]]; then
echo "? Unerwarteter HTTP-Status: $code, erwartet $expect"
echo "Response Body: $body"
exit 1
fi
}
# Utility: die Antwort per jq extrahieren
function jqf { echo "$1" | jq -r "$2"; }
echo "=== 1) Clean up Collections ==="
curl -s -X DELETE "$BASE_URL/delete-collection?collection=$EX_COL" || true
curl -s -X DELETE "$BASE_URL/delete-collection?collection=$PL_COL" || true
echo "? Alle Collections gelöscht"
echo
echo "=== 2) POST /exercise (Create Exercise) ==="
EX_PAYLOAD=$(jq -n '{
title: "Kniebeuge",
summary: "Partnerübung für Stabilität",
short_description: "Partner drückt von vorne",
keywords: ["Kraft","Partner"],
link: "http://wiki/uebung/kniebeuge",
discipline: "Karate",
group: "Mittelstufe",
age_group: "Erwachsene",
target_group: "Breitensport",
min_participants: 2,
duration_minutes: 5,
capabilities: {"strength":3,"balance":2},
category: "Grundübung",
purpose: "Stärkung Beine",
execution: "Langsam herabsenken",
notes: "Rücken gerade halten",
preparation: "Partnerposition",
method: "Partnerwiderstand",
equipment: ["Partner"]
}')
R=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL/exercise" -H "Content-Type: application/json" -d "$EX_PAYLOAD")
BODY=$(echo "$R" | sed '$d')
CODE=$(echo "$R" | tail -n1)
assert_status $CODE 200 "$BODY"
EX_ID=$(jqf "$BODY" '.id')
echo "? Exercise erstellt mit id=$EX_ID"
echo
echo "=== 3) GET /exercise (List & Filter) ==="
# 3a) ohne Filter
FULL=$(curl -s -X GET "$BASE_URL/exercise")
COUNT=$(echo "$FULL" | jq 'length')
if [[ "$COUNT" -ne 1 ]]; then
echo "? /exercise returned $COUNT entries, expected 1"
exit 1
fi
echo "? /exercise list returns $COUNT Eintrag"
# 3b) filter discipline
F=$(curl -s -G "$BASE_URL/exercise" --data-urlencode "discipline=Karate")
if [[ "$(echo "$F" | jq '.[0].id')" != "\"$EX_ID\"" ]]; then
echo "? discipline filter fehlgeschlagen"
exit 1
fi
echo "? Filter discipline funktioniert"
# 3c) filter tags
F2=$(curl -s -G "$BASE_URL/exercise" --data-urlencode "tags=Partner")
if [[ "$(echo "$F2" | jq '.[0].id')" != "\"$EX_ID\"" ]]; then
echo "? tags filter fehlgeschlagen"
exit 1
fi
echo "? Filter tags funktioniert"
echo
echo "=== 4) POST /plan (Create TrainingPlan) ==="
PLAN_PAYLOAD=$(jq -n --arg exid "$EX_ID" '{
title: "Bein-Training",
short_description: "Stabilität und Kraft",
collection: "training_plans",
discipline: "Karate",
group: "Mittelstufe",
dojo: "Dojo A",
date: "2025-08-10",
plan_duration_weeks: 4,
focus_areas: ["Kraft","Technik"],
predecessor_plan_id: null,
age_group: "Erwachsene",
phases: [
{
name: "Aufwärmen",
duration_minutes: 10,
method: "Laufen",
method_notes: "locker",
exercises: [
{
exercise_id: $exid,
cond_load: {"reps":5},
coord_load: {"balance":2},
instructions: "Langsam ausführen"
}
]
}
]
}')
R2=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL/plan" -H "Content-Type: application/json" -d "$PLAN_PAYLOAD")
BODY2=$(echo "$R2" | sed '$d')
CODE2=$(echo "$R2" | tail -n1)
assert_status $CODE2 200 "$BODY2"
PL_ID=$(jqf "$BODY2" '.id')
echo "? Plan erstellt mit id=$PL_ID"
echo
echo "=== 5) GET /plan (List & Filter) ==="
# 5a) ohne Filter
L=$(curl -s -G "$BASE_URL/plan")
if [[ "$(echo "$L" | jq 'length')" -ne 1 ]]; then
echo "? /plan returned $(echo $L | jq 'length') entries, expected 1"
exit 1
fi
echo "? /plan list returns 1 Eintrag"
# 5b) filter discipline
LF=$(curl -s -G "$BASE_URL/plan" --data-urlencode "discipline=Karate")
if [[ "$(echo "$LF" | jq '.[0].id')" != "\"$PL_ID\"" ]]; then
echo "? discipline filter for /plan failed"
exit 1
fi
echo "? /plan discipline filter funktioniert"
# 5c) filter group
LG=$(curl -s -G "$BASE_URL/plan" --data-urlencode "group=Mittelstufe")
if [[ "$(echo "$LG" | jq '.[0].id')" != "\"$PL_ID\"" ]]; then
echo "? group filter for /plan failed"
exit 1
fi
echo "? /plan group filter funktioniert"
# Cleanup
echo
echo "=== Cleanup Collections ==="
curl -s -X DELETE "$BASE_URL/delete-collection?collection=$EX_COL" || true
curl -s -X DELETE "$BASE_URL/delete-collection?collection=$PL_COL" || true
echo "? Cleanup done"
echo
echo "?? Alle Tests für Exercises & Plans erfolgreich! ??"

79
llm-api/test_llm_api.sh Executable file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env python3
import sys
import requests
BASE_URL = "http://127.0.0.1:8000"
COL = "test_collection"
SRC = "unit-test-src"
def fail(msg):
print("✗", msg)
sys.exit(1)
def test_openapi():
r = requests.get(f"{BASE_URL}/openapi.json")
if r.status_code != 200:
fail(f"/openapi.json returned {r.status_code}")
print("✓ OpenAPI: 200 OK")
def test_embed():
payload = {
"collection": COL,
"chunks": [
{"text": "Das ist ein Testtext für Embed.", "source": SRC}
]
}
r = requests.post(f"{BASE_URL}/embed", json=payload)
if r.status_code != 200:
fail(f"/embed returned {r.status_code}: {r.text}")
data = r.json()
if data.get("count") != 1:
fail(f"/embed count != 1: {data}")
print("✓ Embed: 1 Eintrag gespeichert")
def test_search():
params = {"query": "Testtext", "collection": COL}
r = requests.get(f"{BASE_URL}/search", params=params)
if r.status_code != 200:
fail(f"/search returned {r.status_code}: {r.text}")
results = r.json()
if not any("score" in item for item in results):
fail(f"/search lieferte keine Treffer: {results}")
print("✓ Search: Treffer gefunden")
def test_prompt():
payload = {"query": "Wie lautet dieser Testtext?", "context_limit": 1, "collection": COL}
r = requests.post(f"{BASE_URL}/prompt", json=payload)
if r.status_code != 200:
fail(f"/prompt returned {r.status_code}: {r.text}")
data = r.json()
if "answer" not in data:
fail(f"/prompt liefert kein 'answer'-Feld: {data}")
print("✓ Prompt: Antwort erhalten")
def test_delete_source():
params = {"collection": COL, "source": SRC}
r = requests.delete(f"{BASE_URL}/delete-source", params=params)
if r.status_code != 200:
fail(f"/delete-source returned {r.status_code}: {r.text}")
data = r.json()
if data.get("count") != 1:
fail(f"/delete-source count != 1: {data}")
print("✓ Delete-source: 1 Eintrag gelöscht")
def test_delete_collection():
params = {"collection": COL}
r = requests.delete(f"{BASE_URL}/delete-collection", params=params)
if r.status_code != 200:
fail(f"/delete-collection returned {r.status_code}: {r.text}")
print("✓ Delete-collection: Collection gelöscht")
if __name__ == "__main__":
print("\nStarte API-Tests...\n")
test_openapi()
test_embed()
test_search()
test_prompt()
test_delete_source()
test_delete_collection()
print("\n🎉 Alle Tests erfolgreich durchlaufen!")

90
llm-api/test_llm_api_full.sh Executable file
View File

@ -0,0 +1,90 @@
#!/usr/bin/env bash
set -euo pipefail
# 1) Basis-URL Deines FastAPI-Servers
export BASE_URL="http://localhost:8000"
# 2) MediaWiki-Zugangsdaten
export WIKI_API_URL="https://karatetrainer.net/api.php"
export WIKI_BOT_USER="LarsS@APIBot"
export WIKI_BOT_PASSWORD="6snci781sh79tbmvb2u9ld4bkd1i7n5t"
echo -e "\n?? Starte Health-Check für MediaWiki…"
HTTP=$(curl -s -o /dev/null -w '%{http_code}' "${BASE_URL}/import/wiki/health")
if [[ "$HTTP" != "200" ]]; then
echo "? Health-Check fehlgeschlagen (HTTP $HTTP)"
exit 1
fi
echo "? MediaWiki-Health OK (200)"
echo -e "\n?? Teste MediaWiki-Login…"
LOGIN_RESP=$(curl -s -X POST "${BASE_URL}/import/wiki/login" \
-H "Content-Type: application/json" \
-d "{\"username\":\"${WIKI_BOT_USER}\",\"password\":\"${WIKI_BOT_PASSWORD}\"}" )
echo "? Login-Response: $LOGIN_RESP"
# Optional: prüfen, ob "success" enthalten ist
if [[ "$LOGIN_RESP" != *"success"* ]]; then
echo "? MediaWiki-Login fehlgeschlagen"
exit 1
fi
echo "? MediaWiki-Login erfolgreich"
echo -e "\n?? Testet bestehenden /exercise-Endpoint…"
# Collection exercises aufräumen
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=exercises" || true >/dev/null
# Erzeugen
CREATE=$(curl -s -X POST "${BASE_URL}/exercise" \
-H 'Content-Type: application/json' \
-d '{
"title":"TestÜbung",
"summary":"Zusammenfassung",
"short_description":"Kurz",
"discipline":"Test",
"age_group":"Erwachsene",
"target_group":"Tester",
"min_participants":1,
"duration_minutes":5,
"category":"test",
"purpose":"Demo",
"execution":"Ausführen",
"notes":"",
"preparation":"",
"method":"",
"equipment":[]
}')
echo "? Create Übung: $CREATE"
if [[ "$CREATE" != *"TestÜbung"* ]]; then
echo "? Übung wurde nicht angelegt"
exit 1
fi
echo "? /exercise POST OK"
# Auflisten
LIST=$(curl -s "${BASE_URL}/exercise?discipline=Test")
if [[ "$LIST" != *"TestÜbung"* ]]; then
echo "? /exercise GET liefert nicht die TestÜbung"
exit 1
fi
echo "? /exercise GET OK"
echo -e "\n?? Testet Embed/Search…"
# Einen Chunk einbetten
curl -s -X POST "${BASE_URL}/embed" \
-H 'Content-Type: application/json' \
-d '{
"chunks": [
{"text":"Hallo Welt","source":"test","source_type":"txt","title":"T","version":"1","related_to":"","tags":[],"owner":""}
],
"collection":"default"
}' >/dev/null
# Suche
SEARCH=$(curl -s "${BASE_URL}/search?query=Hallo")
if [[ "$SEARCH" != *"Hallo Welt"* ]]; then
echo "? /search liefert unerwartetes Ergebnis"
exit 1
fi
echo "? /search OK"
echo -e "\n?? Alle Tests erfolgreich abgeschlossen!"

51
llm-api/test_llm_api_wiki.sh Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env bash
set -euo pipefail
# Basis-URL (ggf. anpassen)
BASE_URL="${BASE_URL:-http://localhost:8000}"
echo "1?? MediaWiki Health-Check…"
HTTP_CODE=$(curl -s -o /dev/null -w '%{http_code}' "${BASE_URL}/import/wiki/health")
if [[ "$HTTP_CODE" != "200" ]]; then
echo "? Health-Check fehlgeschlagen (HTTP $HTTP_CODE)"
exit 1
fi
echo "? Health-Check OK (HTTP 200)"
echo
echo "2?? Exercise-CRU-Test (Stichprobe)…"
# Erst löschen, falls Test-Exercise bereits existiert
curl -s -X DELETE "${BASE_URL}/delete-collection?collection=exercises" > /dev/null || true
# Create
CREATE_RESP=$(curl -s -X POST "${BASE_URL}/exercise" \
-H 'Content-Type: application/json' \
-d '{"title":"TestÜbung","summary":"Zusammenfassung","short_description":"Kurz","discipline":"Test","age_group":"Erwachsene","target_group":"Tester","min_participants":1,"duration_minutes":5,"category":"test","purpose":"Test","execution":"Ausführen","notes":"","preparation":"","method":"","equipment":[]}' )
echo "? Create-Response: $CREATE_RESP"
# List
LIST_RESP=$(curl -s "${BASE_URL}/exercise?discipline=Test")
if [[ "$LIST_RESP" != *"TestÜbung"* ]]; then
echo "? Exercise-List nicht wie erwartet"
exit 1
fi
echo "? Exercise angelegt und gefunden"
echo
echo "3?? Search-Endpoint-Test…"
# Indexiere manuell einen Chunk
curl -s -X POST "${BASE_URL}/embed" \
-H 'Content-Type: application/json' \
-d '{"chunks":[{"text":"Hallo Welt","source":"test","version":"1","title":"T","related_to":"","tags":[],"owner":""}],"collection":"default"}' \
> /dev/null
SEARCH_RESP=$(curl -s "${BASE_URL}/search?query=Hallo")
if [[ "$SEARCH_RESP" != *"Hallo Welt"* ]]; then
echo "? Search-Ergebnis nicht korrekt"
exit 1
fi
echo "? Search liefert erwarteten Treffer"
echo
echo "? Alle Tests erfolgreich durchgelaufen."

24
llm-api/test_wiki_ep1.sh Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -euo pipefail
BASE="${BASE_URL:-http://localhost:8000/import/wiki}"
# 1) Health-Check
echo "? Health-Check…"
curl -s -o /dev/null -w '%{http_code}\n' "${BASE}/health"
# 2) Login (damit Session-Cookies gesetzt werden, wenn Du das weiter nutzt)
echo -e "\n? Login…"
curl -s -X POST "${BASE}/login" \
-H "Content-Type: application/json" \
-d "{\"username\":\"${WIKI_BOT_USER}\",\"password\":\"${WIKI_BOT_PASSWORD}\"}" | jq
# 3) Import aus Kategorie
echo -e "\n? Importiere Übungen aus „Übungen“…"
RESP=$(curl -s -G "${BASE}/import/exercises" \
--data-urlencode "category=Übungen")
echo "$RESP" | jq
# 4) Ergebnis prüfen
IMPORTED=$(echo "$RESP" | jq '.imported | length')
echo -e "\n? Anzahl importierter Übungen: $IMPORTED"

41
llm-api/test_wiki_exercises.sh Executable file
View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail
BASE="http://localhost:8000/import/wiki"
echo "? 1) Health-Check"
curl -s -o /dev/null -w '%{http_code}\n' "${BASE}/health"
echo -e "\n? 2) Login"
curl -s -X POST "${BASE}/login" \
-H "Content-Type: application/json" \
-d "{\"username\":\"${WIKI_BOT_USER}\",\"password\":\"${WIKI_BOT_PASSWORD}\"}" | jq
echo -e "\n? 3) SMW-Ask: Alle Übungen abfragen und Titel ausgeben"
curl -s -G "${BASE}/semantic/pages" \
--data-urlencode "category=Übungen" \
| jq -r 'keys[]'
COUNT=$(curl -s -G "${BASE}/semantic/pages" \
--data-urlencode "category=Übungen" \
| jq 'keys | length')
echo -e "\n? Anzahl Übungen: ${COUNT}"
echo -e "\n? 4) Details der ersten Übung"
FIRST_TITLE=$(curl -s -G "${BASE}/semantic/pages" \
--data-urlencode "category=Übungen" \
| jq -r 'keys[0]')
echo "Erste Übung: ${FIRST_TITLE}"
FIRST_PAGEID=$(curl -s -G "${BASE}/semantic/pages" \
--data-urlencode "category=Übungen" \
| jq -r ".\"${FIRST_TITLE}\".pageid")
echo "PageID: ${FIRST_PAGEID}"
echo -e "\n? 5) Wikitext der ersten Übung"
curl -s -G "${BASE}/pagecontent" \
--data-urlencode "pageid=${FIRST_PAGEID}" \
--data-urlencode "title=${FIRST_TITLE}" \
| jq '.content'
echo -e "\n? Test abgeschlossen."

110
llm-api/test_wiki_router.sh Executable file
View File

@ -0,0 +1,110 @@
#!/usr/bin/env bash
# test_wiki_router.sh — End-to-end Tests für wiki_router-Endpunkte (Health, Login, SMW-Ask, Page Info/Parse/Detail)
# Voraussetzungen:
# - laufende FastAPI (uvicorn) unter http://localhost:8000
# - jq installiert
# - optional: .env im selben Verzeichnis mit WIKI_BOT_USER / WIKI_BOT_PASSWORD
set -euo pipefail
export LC_ALL=C.UTF-8 LANG=C.UTF-8
API_BASE="http://localhost:8000/import/wiki"
CATEGORY_DEFAULT="Übungen"
CATEGORY="${WIKI_CATEGORY:-$CATEGORY_DEFAULT}"
# --- Hilfsfunktionen ---
die() { echo "$*" >&2; exit 1; }
ok() { echo "$*"; }
info(){ echo "$*"; }
# .env laden (falls vorhanden)
if [[ -f .env ]]; then
set -o allexport; source .env; set +o allexport
fi
# Debug: Maskiertes Echo der ENV
echo "DBG: User=${WIKI_BOT_USER:-<unset>}, Pass=$( [[ -n "${WIKI_BOT_PASSWORD:-}" ]] && echo set || echo unset )"
echo "1) Health-Check"
HC_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$API_BASE/health" | tr -d '\r')
[[ "$HC_CODE" == "200" ]] || die "Health failed (HTTP $HC_CODE)"
ok "Health OK"
echo
echo "2) Login"
[[ -n "${WIKI_BOT_USER:-}" && -n "${WIKI_BOT_PASSWORD:-}" ]] || die "ENV nicht gesetzt (WIKI_BOT_USER / WIKI_BOT_PASSWORD)"
# JSON sicher bauen
LOGIN_JSON=$(jq -nc --arg u "$WIKI_BOT_USER" --arg p "$WIKI_BOT_PASSWORD" '{username:$u, password:$p}')
# Request ausführen (mit klarer Fehleranzeige)
RAW_LOGIN_RESP=$(curl -sS -X POST "$API_BASE/login" -H 'Content-Type: application/json' -d "$LOGIN_JSON" | tr -d '\r' || true)
# Prüfen, ob es valides JSON ist
if ! echo "$RAW_LOGIN_RESP" | jq . >/dev/null 2>&1; then
echo "$RAW_LOGIN_RESP"
die "Login-Response ist kein valides JSON (oben roh ausgegeben)"
fi
echo "$RAW_LOGIN_RESP" | jq .
STATUS=$(echo "$RAW_LOGIN_RESP" | jq -r '.status // empty')
[[ "$STATUS" == "success" ]] || die "Login failed: $(echo "$RAW_LOGIN_RESP" | jq -r '.message // "Login fehlgeschlagen"')"
ok "Login successful"
echo
echo "3) SMW-Ask: Alle Übungen zählen"
COUNT_JSON=$(curl -s -G "$API_BASE/semantic/pages" --data-urlencode "category=$CATEGORY" | tr -d '\r' || true)
if ! echo "$COUNT_JSON" | jq . >/dev/null 2>&1; then
echo "$COUNT_JSON"
die "SMW-Ask Count: Response ist kein valides JSON"
fi
EXERCISE_COUNT=$(echo "$COUNT_JSON" | jq 'keys | length')
info "$EXERCISE_COUNT Übungen gefunden"
echo
echo "4) SMW-Ask: Erste 5 Übungstitel"
FIRST5_JSON=$(curl -s -G "$API_BASE/semantic/pages" --data-urlencode "category=$CATEGORY" | tr -d '\r' || true)
if ! echo "$FIRST5_JSON" | jq . >/dev/null 2>&1; then
echo "$FIRST5_JSON"
die "SMW-Ask Titel: Response ist kein valides JSON"
fi
echo "$FIRST5_JSON" | jq -r 'keys[0:5][]' | sed 's/^/ • /'
TITLE="Affenklatschen"
echo
echo "5) Core-API Info für \"$TITLE\""
INFO_RESP=$(curl -s -G "$API_BASE/info" --data-urlencode "title=$TITLE" | tr -d '\r' || true)
if ! echo "$INFO_RESP" | jq . >/dev/null 2>&1; then
echo "$INFO_RESP"
die "Info: Response ist kein valides JSON"
fi
echo "$INFO_RESP" | jq
PAGEID=$(echo "$INFO_RESP" | jq -r '.pageid')
FULLURL=$(echo "$INFO_RESP" | jq -r '.fullurl // empty')
[[ "$PAGEID" =~ ^[0-9]+$ ]] || die "Info failed: pageid ungültig"
ok "pageid=$PAGEID, url=${FULLURL:-<none>}"
echo
echo "6) Parse-Endpoint für pageid=$PAGEID"
PARSE_RESP=$(curl -s -G "$API_BASE/parsepage" --data-urlencode "pageid=$PAGEID" --data-urlencode "title=$TITLE" | tr -d '\r' || true)
if ! echo "$PARSE_RESP" | jq . >/dev/null 2>&1; then
echo "$PARSE_RESP"
die "Parse: Response ist kein valides JSON"
fi
# Wikitext-Auszug (falls vorhanden)
WT=$(echo "$PARSE_RESP" | jq -r '.wikitext // ""')
printf '%s\n' "${WT:0:200}"
ok "Wikitext (erster Ausschnitt) geladen"
echo
echo "7) Detail-Endpoint für \"$TITLE\""
DETAIL_RESP=$(curl -s -G "$API_BASE/semantic/page" --data-urlencode "category=$CATEGORY" --data-urlencode "title=$TITLE" | tr -d '\r' || true)
if ! echo "$DETAIL_RESP" | jq . >/dev/null 2>&1; then
echo "$DETAIL_RESP"
die "Detail: Response ist kein valides JSON"
fi
echo "$DETAIL_RESP" | jq '{title: .title, pageid: .pageid, fullurl: .fullurl, printouts: .printouts, wikitext_length: (.wikitext|length)}'
ok "Detail-Endpoint liefert erwartete Felder"
echo
echo "🎉 Alle Tests erfolgreich abgeschlossen!"

10
llm-api/uvicorn.log Normal file
View File

@ -0,0 +1,10 @@
nohup: ignoring input
[DEBUG] llm_api.py version 1.0.2 loaded from /home/llmadmin/llm-api/llm_api.py
[DEBUG] Using OLLAMA_URL = http://localhost:11434/api/generate
[DEBUG] Using OLLAMA_MODEL = mistral:latest
INFO: Started server process [54813]
INFO: Waiting for application startup.
INFO: Application startup complete.
ERROR: [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO: Waiting for application shutdown.
INFO: Application shutdown complete.

4
llm-api/wiki_cookies.txt Normal file
View File

@ -0,0 +1,4 @@
# Netscape HTTP Cookie File
# https://curl.se/docs/http-cookies.html
# This file was generated by libcurl! Edit at your own risk.

173
llm-api/wiki_router.py Normal file
View File

@ -0,0 +1,173 @@
"""
File: wiki_router.py
Beschreibung:
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /semantic/pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
* /info: Liefert pageid und fullurl über Core-API Query.
* /semantic/page: Liefert Metadaten einer Übung und Wikitext sowie pageid über Core-API.
Version: 1.2.0
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Dict, Any, List
import requests, os
__version__ = "1.2.0"
router = APIRouter()
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class PageContentResponse(BaseModel):
pageid: int
title: str
wikitext: str
class PageInfoResponse(BaseModel):
pageid: int
title: str
fullurl: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
timeout=5
)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Token holen
try:
token_resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
timeout=10
)
token_resp.raise_for_status()
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(
WIKI_API_URL,
data={
"action": "clientlogin",
"format": "json",
"username": data.username,
"password": data.password,
"logintoken": token,
"loginreturnurl": "http://localhost:8000"
},
timeout=10
)
login_resp.raise_for_status()
status = login_resp.json().get("clientlogin", {}).get("status")
except Exception:
status = None
# fallback login
if status != "PASS":
alt = wiki_session.post(
WIKI_API_URL,
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
timeout=10
)
alt.raise_for_status()
status = alt.json().get("login", {}).get("result")
if status in ("PASS", "Success"):
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
# SMW-Ask: alle Übungen inkl. Unterkategorien
@router.get("/semantic/pages")
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
r = wiki_session.get(
WIKI_API_URL,
params={"action": "ask", "query": ask_query, "format": "json"},
timeout=30
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json().get("query", {}).get("results", {})
# Wikitext über parse-Endpoint holen (per pageid)
@router.get("/parsepage", response_model=PageContentResponse)
def parse_page(pageid: int = Query(...), title: str = Query(None)):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
timeout=20
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
# Pageinfo über Core-API (ermittelt pageid + fullurl)
@router.get("/info", response_model=PageInfoResponse)
def page_info(title: str = Query(..., description="Name der Seite")):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "titles": title, "prop": "info", "inprop": "url", "format": "json"},
timeout=10
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Info-Error: {e}")
pages = r.json().get("query", {}).get("pages", {})
pid_str, page = next(iter(pages.items()))
pid = int(pid_str)
fullurl = page.get("fullurl")
return PageInfoResponse(pageid=pid, title=page.get("title"), fullurl=fullurl)
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext & ID via Core-API
@router.get("/semantic/page", response_model=Dict[str, Any])
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
# Metadaten aus SMW-Ask
entries = semantic_pages(category)
entry = entries.get(title)
if not entry:
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
# Pageinfo via Core-API
info = page_info(title=title)
# Wikitext via parse
parsed = parse_page(pageid=info.pageid, title=title)
return {
"title": title,
"pageid": info.pageid,
"fullurl": info.fullurl,
"printouts": entry.get("printouts", {}),
"wikitext": parsed.wikitext
}

110
llm-api/wiki_router0.1.1.py Normal file
View File

@ -0,0 +1,110 @@
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
params = {"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}
try:
resp = wiki_session.get(WIKI_API_URL, params=params, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
"""
Führt Login mittels MediaWiki Bot-Password API durch.
Username kann im Format 'User@BotName' übergeben werden.
"""
# Verarbeite Bot-Password-Format
lgname = data.username
lgpassword = data.password
if '@' in data.username:
user, bot = data.username.split('@',1)
lgname = user
lgpassword = f"{bot}@{data.password}"
# Schritt 1: Login-Token holen
params_token = {"action": "query", "meta": "tokens", "type": "login", "format": "json"}
try:
r1 = wiki_session.get(WIKI_API_URL, params=params_token, timeout=10)
r1.raise_for_status()
token = r1.json().get("query", {}).get("tokens", {}).get("logintoken")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
# Schritt 2: Login durchführen mit BotPasswort
login_data = {
"action": "login",
"format": "json",
"lgname": lgname,
"lgpassword": lgpassword,
"lgtoken": token
}
try:
r2 = wiki_session.post(WIKI_API_URL, data=login_data, timeout=10)
r2.raise_for_status()
result = r2.json().get("login", {})
if result.get("result") != "Success":
return WikiLoginResponse(status="failed", message=result.get("reason"))
except Exception as e:
raise HTTPException(status_code=502, detail=f"Login-Error: {e}")
return WikiLoginResponse(status="success", message=None)
# 1) Kategorie abrufen
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Name der Kategorie, ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action": "query", "list": "categorymembers", "cmtitle": cmtitle, "cmlimit": 500, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query", {}).get("categorymembers", [])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# 2) Seiteninhalt abrufen
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "pageids": pageid, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query", {}).get("pages", {})
page = pages.get(str(pageid), {})
content = page.get("revisions", [{}])[0].get("slots", {}).get("main", {}).get("*", "")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or page.get("title"), content=content)

121
llm-api/wiki_router0.1.2.py Normal file
View File

@ -0,0 +1,121 @@
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
params = {"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}
try:
resp = wiki_session.get(WIKI_API_URL, params=params, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Direkter Abgleich zu Testzwecken
if data.username == WIKI_BOT_USER and data.password == WIKI_BOT_PASSWORD:
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Incorrect username or password.")
# 1) Kategorie abrufen
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie-Name ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action": "query", "list": "categorymembers", "cmtitle": cmtitle, "cmlimit": 500, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query", {}).get("categorymembers", [])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# 2) Seiteninhalt abrufen
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "pageids": pageid, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query", {}).get("pages", {})
page = pages.get(str(pageid), {})
content = page.get("revisions", [{}])[0].get("slots", {}).get("main", {}).get("*", "")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or page.get("title"), content=content)
# 3) Importiere Übungen aus Kategorie
@router.post("/import/exercises")
def import_exercises(category: str = Query(..., description="Kategorie ohne 'Category:'")):
"""
Holt alle Seiten einer Kategorie, parsed deren Wikitext und importiert Übungen.
"""
import mwparserfromhell
# Schritt 1: Seitenliste
pages = list_category_members(category)
imported = []
for p in pages:
# Schritt 2: Inhalt ziehen
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
# Infobox parsen
templates = wikicode.filter_templates()
infobox = next((t for t in templates if t.name.strip() == 'ÜbungInfoBox'), None)
if not infobox:
continue
# Felder extrahieren
ex = {
'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords': [kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link': None,
'discipline': infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group': infobox.get('group').value.strip() if infobox.has('group') else None,
'age_group': infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group': infobox.get('target_group').value.strip() if infobox.has('target_group') else '',
'min_participants': int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes': int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,
'capabilities': {},
'category': category,
'purpose': '', 'execution': '', 'notes': '', 'preparation': '', 'method': '', 'equipment': []
}
# POST an Exercise-Endpoint
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code == 200:
imported.append(resp.json().get('id'))
return {"imported": imported}

168
llm-api/wiki_router1.1.5.py Normal file
View File

@ -0,0 +1,168 @@
"""
File: wiki_router.py
Beschreibung:
- Enthält Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet Seiten einer Kategorie (Artikel im Namespace 0).
* /pagecontent: Ruft Wikitext einer Seite ab.
* /semantic/pages: Führt SMW-Ask-Abfrage aus.
* /import/exercises: Importiert Übungen per Infobox-Parsing.
Version: 1.1.6
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List, Dict
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(WIKI_API_URL, params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# clientlogin Token holen
try:
token_resp = wiki_session.get(WIKI_API_URL, params={"action":"query","meta":"tokens","type":"login","format":"json"}, timeout=10)
token_resp.raise_for_status()
token = token_resp.json().get("query",{}).get("tokens",{}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(WIKI_API_URL, data={"action":"clientlogin","format":"json","username":data.username,"password":data.password,"logintoken":token,"loginreturnurl":"http://localhost:8000"}, timeout=10)
login_resp.raise_for_status()
cl = login_resp.json().get("clientlogin", {})
except Exception as e:
raise HTTPException(status_code=502, detail=f"Login-Error: {e}")
if cl.get("status") == "PASS":
return WikiLoginResponse(status="success", message=None)
# fallback action=login
try:
alt = wiki_session.post(WIKI_API_URL, data={"action":"login","format":"json","lgname":data.username,"lgpassword":data.password}, timeout=10)
alt.raise_for_status()
res = alt.json().get("login",{})
if res.get("result") == "Success":
return WikiLoginResponse(status="success", message=None)
else:
return WikiLoginResponse(status="failed", message=res.get("reason"))
except Exception as e:
return WikiLoginResponse(status="failed", message=str(e))
# List category members (Namespace 0)
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action":"query","list":"categorymembers","cmtitle":cmtitle,"cmnamespace":0,"cmlimit":50000,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query",{}).get("categorymembers",[])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# Fetch page content
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action":"query","prop":"revisions","rvprop":"content","rvslots":"main","pageids":pageid,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query",{}).get("pages",{})
content = pages.get(str(pageid),{}).get("revisions",[{}])[0].get("slots",{}).get("main",{}).get("*","")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or pages[str(pageid)].get("title"), content=content)
# SMW-Ask query (rekursive Abfrage über Unterkategorien)
@router.get("/semantic/pages")
def semantic_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict:
"""
Führt eine rekursive SMW Ask-Abfrage durch, um strukturierte Daten für eine Kategorie inkl. Unterkategorien zu erhalten.
Limit und Format sind anpassbar.
"""
smw_query = f"[[Category:{category}]]"
# Ask-Parameter: query string mit Limit
ask_query = f"{smw_query}|limit=50000"
params = {
"action": "ask",
"query": ask_query,
"format": "json"
}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=20)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json()
# Import exercises
@router.get("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises_get(category: str = Query(...)):
return import_exercises(category)
@router.post("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises(category: str = Query(...)):
import mwparserfromhell
imported = []
for p in list_category_members(category):
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
infobox = next((t for t in wikicode.filter_templates() if t.name.strip()=="ÜbungInfoBox"),None)
if not infobox:
continue
ex = { 'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords':[kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link':None,'discipline':infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group':infobox.get('group').value.strip() if infobox.has('group') else None,'age_group':infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group':infobox.get('target_group').value.strip() if infobox.has('target_group') else '','min_participants':int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes':int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,'capabilities':{},'category':category,
'purpose':'','execution':'','notes':'','preparation':'','method':'','equipment':[] }
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code==200:
imported.append(resp.json().get('id'))
return {"imported":imported}

164
llm-api/wiki_router1.1.6.py Normal file
View File

@ -0,0 +1,164 @@
"""
File: wiki_router.py
Beschreibung:
- Enthält Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet Seiten einer Kategorie (Artikel im Namespace 0).
* /pagecontent: Ruft Wikitext einer Seite ab.
* /semantic/pages: Führt SMW-Ask-Abfrage aus.
* /import/exercises: Importiert Übungen per Infobox-Parsing.
Version: 1.1.6
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List, Dict
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(WIKI_API_URL, params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# clientlogin Token holen
try:
token_resp = wiki_session.get(WIKI_API_URL, params={"action":"query","meta":"tokens","type":"login","format":"json"}, timeout=10)
token_resp.raise_for_status()
token = token_resp.json().get("query",{}).get("tokens",{}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(WIKI_API_URL, data={"action":"clientlogin","format":"json","username":data.username,"password":data.password,"logintoken":token,"loginreturnurl":"http://localhost:8000"}, timeout=10)
login_resp.raise_for_status()
cl = login_resp.json().get("clientlogin", {})
except Exception as e:
raise HTTPException(status_code=502, detail=f"Login-Error: {e}")
if cl.get("status") == "PASS":
return WikiLoginResponse(status="success", message=None)
# fallback action=login
try:
alt = wiki_session.post(WIKI_API_URL, data={"action":"login","format":"json","lgname":data.username,"lgpassword":data.password}, timeout=10)
alt.raise_for_status()
res = alt.json().get("login",{})
if res.get("result") == "Success":
return WikiLoginResponse(status="success", message=None)
else:
return WikiLoginResponse(status="failed", message=res.get("reason"))
except Exception as e:
return WikiLoginResponse(status="failed", message=str(e))
# List category members (Namespace 0)
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action":"query","list":"categorymembers","cmtitle":cmtitle,"cmnamespace":0,"cmlimit":50000,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query",{}).get("categorymembers",[])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# Fetch page content
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action":"query","prop":"revisions","rvprop":"content","rvslots":"main","pageids":pageid,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query",{}).get("pages",{})
content = pages.get(str(pageid),{}).get("revisions",[{}])[0].get("slots",{}).get("main",{}).get("*","")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or pages[str(pageid)].get("title"), content=content)
# SMW-Ask query (rekursive Abfrage aller Seiten inkl. Unterkategorien)
@router.get("/semantic/pages")
def semantic_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict:
"""
Nutzt SMW Ask mit rekursiver Kategorie-Abfrage, um alle Seiten zurückzugeben.
Limit=50000, kein spezifischer Printout, das Ergebnis-JSON enthält alle Felder.
"""
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
params = {"action": "ask", "query": ask_query, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=20)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
result = r.json().get("query", {}).get("results", {})
return result
# Import exercises
@router.get("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises_get(category: str = Query(...)):
return import_exercises(category)
@router.post("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises(category: str = Query(...)):
import mwparserfromhell
imported = []
for p in list_category_members(category):
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
infobox = next((t for t in wikicode.filter_templates() if t.name.strip()=="ÜbungInfoBox"),None)
if not infobox:
continue
ex = { 'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords':[kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link':None,'discipline':infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group':infobox.get('group').value.strip() if infobox.has('group') else None,'age_group':infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group':infobox.get('target_group').value.strip() if infobox.has('target_group') else '','min_participants':int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes':int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,'capabilities':{},'category':category,
'purpose':'','execution':'','notes':'','preparation':'','method':'','equipment':[] }
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code==200:
imported.append(resp.json().get('id'))
return {"imported":imported}

165
llm-api/wiki_router1.1.7.py Normal file
View File

@ -0,0 +1,165 @@
"""
File: wiki_router.py
Beschreibung:
- Enthält Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet Seiten einer Kategorie (Artikel im Namespace 0).
* /pagecontent: Ruft Wikitext einer Seite ab.
* /semantic/pages: Führt SMW-Ask-Abfrage aus.
* /import/exercises: Importiert Übungen per Infobox-Parsing.
Version: 1.1.6
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List, Dict
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(WIKI_API_URL, params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# clientlogin Token holen
try:
token_resp = wiki_session.get(WIKI_API_URL, params={"action":"query","meta":"tokens","type":"login","format":"json"}, timeout=10)
token_resp.raise_for_status()
token = token_resp.json().get("query",{}).get("tokens",{}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(WIKI_API_URL, data={"action":"clientlogin","format":"json","username":data.username,"password":data.password,"logintoken":token,"loginreturnurl":"http://localhost:8000"}, timeout=10)
login_resp.raise_for_status()
cl = login_resp.json().get("clientlogin", {})
except Exception as e:
raise HTTPException(status_code=502, detail=f"Login-Error: {e}")
if cl.get("status") == "PASS":
return WikiLoginResponse(status="success", message=None)
# fallback action=login
try:
alt = wiki_session.post(WIKI_API_URL, data={"action":"login","format":"json","lgname":data.username,"lgpassword":data.password}, timeout=10)
alt.raise_for_status()
res = alt.json().get("login",{})
if res.get("result") == "Success":
return WikiLoginResponse(status="success", message=None)
else:
return WikiLoginResponse(status="failed", message=res.get("reason"))
except Exception as e:
return WikiLoginResponse(status="failed", message=str(e))
# List category members (Namespace 0)
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action":"query","list":"categorymembers","cmtitle":cmtitle,"cmnamespace":0,"cmlimit":50000,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query",{}).get("categorymembers",[])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# Fetch page content
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action":"query","prop":"revisions","rvprop":"content","rvslots":"main","pageids":pageid,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query",{}).get("pages",{})
content = pages.get(str(pageid),{}).get("revisions",[{}])[0].get("slots",{}).get("main",{}).get("*","")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or pages[str(pageid)].get("title"), content=content)
# SMW-Ask query (rekursive Abfrage aller Seiten inkl. Unterkategorien)
@router.get("/semantic/pages")
def semantic_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict:
"""
Nutzt SMW Ask mit rekursiver Kategorie-Abfrage, um alle Seiten zurückzugeben.
Limit=50000 ohne Printout, das komplette Ergebnis-Set wird geliefert.
"""
smw_query = f"[[Category:{category}]]"
# Ask-Parameter: Limit für rekursive Abfrage
ask_query = f"{smw_query}|limit=50000"
params = {"action": "ask", "query": ask_query, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=30)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
data = r.json().get("query", {}).get("results", {})
return data
# Import exercises
@router.get("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises_get(category: str = Query(...)):
return import_exercises(category)
@router.post("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises(category: str = Query(...)):
import mwparserfromhell
imported = []
for p in list_category_members(category):
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
infobox = next((t for t in wikicode.filter_templates() if t.name.strip()=="ÜbungInfoBox"),None)
if not infobox:
continue
ex = { 'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords':[kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link':None,'discipline':infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group':infobox.get('group').value.strip() if infobox.has('group') else None,'age_group':infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group':infobox.get('target_group').value.strip() if infobox.has('target_group') else '','min_participants':int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes':int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,'capabilities':{},'category':category,
'purpose':'','execution':'','notes':'','preparation':'','method':'','equipment':[] }
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code==200:
imported.append(resp.json().get('id'))
return {"imported":imported}

172
llm-api/wiki_router1.1.9.py Normal file
View File

@ -0,0 +1,172 @@
"""
File: wiki_router.py
Beschreibung:
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
* /semantic/page: Liefert Metadaten einer Übung und Wikitext über parse.
Version: 1.1.9
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Dict, Any, List
import requests, os
__version__ = "1.1.9"
router = APIRouter()
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class PageContentResponse(BaseModel):
pageid: int
title: str
wikitext: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
timeout=5
)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Token holen
try:
token_resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
timeout=10
)
token_resp.raise_for_status()
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(
WIKI_API_URL,
data={
"action": "clientlogin",
"format": "json",
"username": data.username,
"password": data.password,
"logintoken": token,
"loginreturnurl": "http://localhost:8000"
},
timeout=10
)
login_resp.raise_for_status()
status = login_resp.json().get("clientlogin", {}).get("status")
except Exception:
status = None
# fallback login
if status != "PASS":
alt = wiki_session.post(
WIKI_API_URL,
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
timeout=10
)
alt.raise_for_status()
status = alt.json().get("login", {}).get("result")
if status in ("PASS", "Success"):
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
# SMW-Ask: alle Übungen inkl. Unterkategorien
@router.get("/semantic/pages")
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
r = wiki_session.get(
WIKI_API_URL,
params={"action": "ask", "query": ask_query, "format": "json"},
timeout=30
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json().get("query", {}).get("results", {})
# Liste direkter Category Members (für pageid fallback)
@router.get("/pages")
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> List[Dict[str, Any]]:
cmtitle = f"Category:{category}"
params = {"action": "query", "list": "categorymembers", "cmtitle": cmtitle, "cmnamespace": 0, "cmlimit": 50000, "format": "json"}
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return r.json().get("query", {}).get("categorymembers", [])
# Wikitext über parse-Endpoint holen
@router.get("/parsepage", response_model=PageContentResponse)
def parse_page(pageid: int = Query(...), title: str = Query(None)):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
timeout=20
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext via parse (Titel)
@router.get("/semantic/page")
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
"""
Liefert Metadaten und Wikitext einer einzelnen Übung.
Nutzt SMW-Ask für Metadaten und den Parse-Endpoint per Titel für den Wikitext.
"""
# Metadaten aus SMW-Ask
entries = semantic_pages(category)
entry = entries.get(title)
if not entry:
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
# Wikitext direkt über Parse-Endpoint per Titel laden
try:
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "page": title, "prop": "wikitext", "format": "json"},
timeout=20
)
r.raise_for_status()
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
return {
"title": title,
"pageid": entry.get("pageid"),
"fullurl": entry.get("fullurl"),
"printouts": entry.get("printouts", {}),
"wikitext": wikitext
}

173
llm-api/wiki_router1.2.0.py Normal file
View File

@ -0,0 +1,173 @@
"""
File: wiki_router.py
Beschreibung:
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /semantic/pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
* /info: Liefert pageid und fullurl über Core-API Query.
* /semantic/page: Liefert Metadaten einer Übung und Wikitext sowie pageid über Core-API.
Version: 1.2.0
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Dict, Any, List
import requests, os
__version__ = "1.2.0"
router = APIRouter()
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class PageContentResponse(BaseModel):
pageid: int
title: str
wikitext: str
class PageInfoResponse(BaseModel):
pageid: int
title: str
fullurl: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
timeout=5
)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Token holen
try:
token_resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
timeout=10
)
token_resp.raise_for_status()
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(
WIKI_API_URL,
data={
"action": "clientlogin",
"format": "json",
"username": data.username,
"password": data.password,
"logintoken": token,
"loginreturnurl": "http://localhost:8000"
},
timeout=10
)
login_resp.raise_for_status()
status = login_resp.json().get("clientlogin", {}).get("status")
except Exception:
status = None
# fallback login
if status != "PASS":
alt = wiki_session.post(
WIKI_API_URL,
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
timeout=10
)
alt.raise_for_status()
status = alt.json().get("login", {}).get("result")
if status in ("PASS", "Success"):
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
# SMW-Ask: alle Übungen inkl. Unterkategorien
@router.get("/semantic/pages")
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
r = wiki_session.get(
WIKI_API_URL,
params={"action": "ask", "query": ask_query, "format": "json"},
timeout=30
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json().get("query", {}).get("results", {})
# Wikitext über parse-Endpoint holen (per pageid)
@router.get("/parsepage", response_model=PageContentResponse)
def parse_page(pageid: int = Query(...), title: str = Query(None)):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
timeout=20
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
# Pageinfo über Core-API (ermittelt pageid + fullurl)
@router.get("/info", response_model=PageInfoResponse)
def page_info(title: str = Query(..., description="Name der Seite")):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "titles": title, "prop": "info", "inprop": "url", "format": "json"},
timeout=10
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Info-Error: {e}")
pages = r.json().get("query", {}).get("pages", {})
pid_str, page = next(iter(pages.items()))
pid = int(pid_str)
fullurl = page.get("fullurl")
return PageInfoResponse(pageid=pid, title=page.get("title"), fullurl=fullurl)
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext & ID via Core-API
@router.get("/semantic/page", response_model=Dict[str, Any])
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
# Metadaten aus SMW-Ask
entries = semantic_pages(category)
entry = entries.get(title)
if not entry:
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
# Pageinfo via Core-API
info = page_info(title=title)
# Wikitext via parse
parsed = parse_page(pageid=info.pageid, title=title)
return {
"title": title,
"pageid": info.pageid,
"fullurl": info.fullurl,
"printouts": entry.get("printouts", {}),
"wikitext": parsed.wikitext
}

View File

@ -0,0 +1,34 @@
import re
def chunk_text_paragraphs(text: str, max_length: int = 500) -> list[str]:
# Absätze trennen
paragraphs = re.split(r'\n\s*\n', text.strip())
chunks: list[str] = []
current_chunk = ""
for para in paragraphs:
para = para.strip()
if not para:
continue
# Passt der Absatz noch zum aktuellen Chunk? +2 für die später hinzugefügten "\n\n"
if len(current_chunk) + len(para) + 2 <= max_length:
current_chunk = (current_chunk + "\n\n" + para) if current_chunk else para
else:
# Aktuellen Chunk abschließen
if current_chunk:
chunks.append(current_chunk)
# Ist der Absatz selbst zu groß? Dann hart splitten
if len(para) > max_length:
for i in range(0, len(para), max_length):
chunks.append(para[i:i + max_length])
current_chunk = ""
else:
current_chunk = para
# Letzten Chunk nicht vergessen
if current_chunk:
chunks.append(current_chunk)
return chunks

View File

@ -0,0 +1,93 @@
import os
import sys
import shutil
import requests
from tqdm import tqdm
# 📌 Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100
BATCH_SIZE = 20
# 📁 Kommandozeilenparameter auswerten
if len(sys.argv) != 2:
print("❌ Bitte gib eine Kategorie an, z.B.: python index_documents_advanced.py karatetrainer")
sys.exit(1)
CATEGORY = sys.argv[1]
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
ARCHIVE_DIR = os.path.join(SOURCE_DIR, "_imported")
COLLECTION = CATEGORY
if not os.path.exists(SOURCE_DIR):
print(f"❌ Der Ordner '{SOURCE_DIR}' existiert nicht.")
sys.exit(1)
os.makedirs(ARCHIVE_DIR, exist_ok=True)
print(f"📁 Lese Dokumente aus: {SOURCE_DIR}")
print(f"📂 Archivierte Dateien: {ARCHIVE_DIR}")
print(f"🎯 Ziel-Collection: {COLLECTION}")
# 🔧 Text in überlappende Chunks aufteilen
def chunk_text(text, size=CHUNK_SIZE, overlap=OVERLAP):
chunks = []
start = 0
while start < len(text):
end = min(start + size, len(text))
chunks.append(text[start:end])
start += size - overlap
return chunks
# 📚 Alle .txt-Dateien im Ordner lesen
def read_all_text_files(folder):
file_chunk_map = {} # Map: filename → chunks
for filename in os.listdir(folder):
if filename.endswith(".txt"):
path = os.path.join(folder, filename)
with open(path, "r", encoding="utf-8") as f:
text = f.read()
file_chunk_map[filename] = chunk_text(text)
return file_chunk_map
# 📤 An API senden
def embed_chunks_in_batches(chunks, collection):
results = []
for i in tqdm(range(0, len(chunks), BATCH_SIZE), desc="📡 Embedding"):
batch = chunks[i:i + BATCH_SIZE]
response = requests.post(API_URL, json={"texts": batch, "collection": collection})
response.raise_for_status()
results.append(response.json())
return results
# 🚀 Hauptlogik
if __name__ == "__main__":
file_chunk_map = read_all_text_files(SOURCE_DIR)
all_chunks = []
processed_files = []
for filename, chunks in file_chunk_map.items():
if chunks:
all_chunks.extend(chunks)
processed_files.append(filename)
if not all_chunks:
print("⚠️ Keine Textabschnitte gefunden.")
sys.exit(0)
print(f"📦 {len(all_chunks)} Textabschnitte aus {len(processed_files)} Dateien gefunden.")
try:
result = embed_chunks_in_batches(all_chunks, COLLECTION)
print(f"\n✅ Embedding abgeschlossen: {len(result)} API-Antwort(en) erhalten.")
# 🗃️ Verarbeitete Dateien archivieren
for filename in processed_files:
src = os.path.join(SOURCE_DIR, filename)
dst = os.path.join(ARCHIVE_DIR, filename)
shutil.move(src, dst)
print(f"📁 {len(processed_files)} Dateien verschoben nach _imported.")
except Exception as e:
print(f"❌ Fehler beim Senden: {e}")

View File

@ -0,0 +1,61 @@
import requests
import os
import sys
# 📌 Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100
def chunk_text(text, size=CHUNK_SIZE, overlap=OVERLAP):
"""
Teilt einen Text in überlappende Abschnitte auf.
"""
chunks = []
start = 0
while start < len(text):
end = min(start + size, len(text))
chunks.append(text[start:end])
start += size - overlap
return chunks
def read_text_file(path):
"""
Liest den Inhalt einer Textdatei ein.
"""
with open(path, "r", encoding="utf-8") as f:
return f.read()
def import_chunks(chunks, collection):
"""
Sendet die Textabschnitte an die API.
"""
response = requests.post(API_URL, json={
"texts": chunks,
"collection": collection
})
response.raise_for_status()
return response.json()
if __name__ == "__main__":
if len(sys.argv) != 3:
print("❌ Nutzung: python import_textfile.py <collection> <pfad_zur_txt_datei>")
sys.exit(1)
collection = sys.argv[1]
filepath = os.path.expanduser(sys.argv[2])
if not os.path.isfile(filepath):
print(f"❌ Datei nicht gefunden: {filepath}")
sys.exit(1)
print(f"📄 Lade Datei: {filepath}")
text = read_text_file(filepath)
chunks = chunk_text(text)
print(f"📦 {len(chunks)} Abschnitte vorbereitet sende an Collection '{collection}'...")
try:
result = import_chunks(chunks, collection)
print(f"✅ Import erfolgreich: {result}")
except Exception as e:
print(f"❌ Fehler beim Import: {e}")

View File

@ -0,0 +1,31 @@
import requests
import sys
# 📌 Konfiguration
API_URL = "http://localhost:8000/embed"
def import_text(text, collection="default"):
"""
Sendet einen einzelnen Textabschnitt an die Embed-API zur Indexierung.
"""
response = requests.post(API_URL, json={
"texts": [text],
"collection": collection
})
response.raise_for_status()
return response.json()
if __name__ == "__main__":
if len(sys.argv) < 3:
print("❌ Nutzung: python import_texts.py <collection> \"<text>\"")
sys.exit(1)
collection = sys.argv[1]
text = sys.argv[2]
print(f"📤 Sende an Collection '{collection}': {text}")
try:
result = import_text(text, collection)
print(f"✅ Antwort: {result}")
except Exception as e:
print(f"❌ Fehler beim Importieren: {e}")

View File

@ -0,0 +1,66 @@
import os
import sys
import requests
# 📌 Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100
# 📁 Kommandozeilenparameter auswerten
if len(sys.argv) != 2:
print("❌ Bitte gib eine Kategorie an, z.B.: python index_documents.py karatetrainer")
sys.exit(1)
CATEGORY = sys.argv[1]
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
COLLECTION = CATEGORY
if not os.path.exists(SOURCE_DIR):
print(f"❌ Der Ordner '{SOURCE_DIR}' existiert nicht.")
sys.exit(1)
print(f"📁 Lese Dokumente aus: {SOURCE_DIR}")
print(f"🎯 Ziel-Collection: {COLLECTION}")
# 🔧 Text in überlappende Chunks aufteilen
def chunk_text(text, size=CHUNK_SIZE, overlap=OVERLAP):
chunks = []
start = 0
while start < len(text):
end = min(start + size, len(text))
chunks.append(text[start:end])
start += size - overlap
return chunks
# 📚 Alle .txt-Dateien im Ordner lesen
def read_all_text_files(folder):
chunks = []
for filename in os.listdir(folder):
if filename.endswith(".txt"):
path = os.path.join(folder, filename)
with open(path, "r", encoding="utf-8") as f:
text = f.read()
file_chunks = chunk_text(text)
chunks.extend(file_chunks)
return chunks
# 📤 An API senden
def embed_chunks(chunks, collection):
response = requests.post(API_URL, json={"texts": chunks, "collection": collection})
response.raise_for_status()
return response.json()
# 🚀 Hauptlogik
if __name__ == "__main__":
texts = read_all_text_files(SOURCE_DIR)
if not texts:
print("⚠️ Keine Textabschnitte gefunden.")
sys.exit(0)
print(f"📦 {len(texts)} Textabschnitte gefunden, sende an {API_URL}...")
try:
result = embed_chunks(texts, COLLECTION)
print(f"✅ Ergebnis: {result}")
except Exception as e:
print(f"❌ Fehler beim Senden: {e}")

View File

@ -0,0 +1,46 @@
import re
def chunk_text_paragraphs(text, max_length=500, overlap=1):
"""
Zerteilt den Text absatzweise in Chunks bis `max_length` Zeichen.
Optional wird `paragraph_overlap` Absatz(e) vom vorherigen Chunk übernommen.
"""
paragraphs = [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
chunks = []
current_chunk = []
current_len = 0
i = 0
while i < len(paragraphs):
para = paragraphs[i]
para_len = len(para)
# Wenn dieser Absatz den max Chunk sprengt → neuen Chunk
if current_len + para_len + 2 > max_length: # +2 für Leerzeile
if current_chunk:
chunks.append("\n\n".join(current_chunk))
# Optional: letzte N Absätze behalten
if overlap > 0:
current_chunk = current_chunk[-overlap:]
current_len = sum(len(p) for p in current_chunk) + 2 * len(current_chunk)
else:
current_chunk = []
current_len = 0
else:
# Einzelner Absatz ist zu groß → hart splitten
chunks.append(para[:max_length])
para = para[max_length:]
paragraphs.insert(i + 1, para) # Rest zurück in Liste
i += 1
continue
else:
current_chunk.append(para)
current_len += para_len + 2 # +2 für Trennung
i += 1
if current_chunk:
chunks.append("\n\n".join(current_chunk))
return chunks

155
scripts/chunking_utils.py Normal file
View File

@ -0,0 +1,155 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# --------------------------------------------------
# chunking_utils.py
#
# Enthält robuste Text-Chunking-Logik:
# 1. Absatzbasiertes Chunking
# 2. Satzbasiertes Chunking per Regex (kein NLTK)
# 3. Satz-Overlap-Chunking
# --------------------------------------------------
import re
# --------------------------------------------------
# Hilfsfunktion: split_sentences
# Zweck:
# - Teilt Text in Sätze auf, basierend auf Punkt, Ausrufe- und Fragezeichen
# - Trennt bei ".!? " (Satzzeichen gefolgt von Leerraum)
# Parameter:
# text : Volltext als String
# Rückgabe:
# Liste von Satz-Strings
# --------------------------------------------------
def split_sentences(text: str) -> list[str]:
# Regex: lookbehind für . ! oder ?, dann ein oder mehrere Whitespace-Zeichen
return re.split(r'(?<=[\.!?])\s+', text.strip())
# --------------------------------------------------
# Funktion: chunk_text_paragraphs
# Zweck:
# - Trennt Text absatzweise in Chunks mit bis zu max_length Zeichen
# - Absätze werden an doppelten Zeilenumbrüchen getrennt
# - Zu große Absätze werden hart in max_length-Teile gesplittet
# Parameter:
# text : Volltext als String
# max_length : Maximale Länge eines Chunks (Standard 500)
# Rückgabe:
# Liste von Strings (Chunks)
# --------------------------------------------------
def chunk_text_paragraphs(text: str, max_length: int = 500) -> list[str]:
paragraphs = re.split(r'\n\s*\n', text.strip())
chunks: list[str] = []
current_chunk = ""
for para in paragraphs:
para = para.strip()
if not para:
continue
# Prüfen, ob Absatz noch in aktuellen Chunk passt (+2 für "\n\n")
if len(current_chunk) + len(para) + 2 <= max_length:
if current_chunk:
current_chunk += "\n\n" + para
else:
current_chunk = para
else:
# Bislang gesammelten Chunk speichern
if current_chunk:
chunks.append(current_chunk)
# Absatz hart splitten, wenn er allein zu groß ist
if len(para) > max_length:
for i in range(0, len(para), max_length):
part = para[i:i + max_length]
chunks.append(part)
current_chunk = ""
else:
# Neuer Chunk beginnt mit diesem Absatz
current_chunk = para
# Letzten Chunk nicht vergessen
if current_chunk:
chunks.append(current_chunk)
return chunks
# --------------------------------------------------
# Funktion: chunk_by_sentences
# Zweck:
# - Zerlegt Text in Sätze per Regex-Split
# - Baut daraus Chunks mit ganzen Sätzen bis max_length
# Parameter:
# text : Volltext als String
# max_length : Maximale Länge eines Chunks (Standard 500)
# Rückgabe:
# Liste von Strings (Chunks)
# --------------------------------------------------
def chunk_by_sentences(text: str, max_length: int = 500) -> list[str]:
sentences = split_sentences(text)
chunks: list[str] = []
current_chunk = ""
for sent in sentences:
sent = sent.strip()
if not sent:
continue
# Prüfen, ob Satz noch in aktuellen Chunk passt (+1 für Leerzeichen)
if len(current_chunk) + len(sent) + 1 <= max_length:
if current_chunk:
current_chunk += " " + sent
else:
current_chunk = sent
else:
# Bisher gesammelten Chunk speichern
if current_chunk:
chunks.append(current_chunk)
# Einzelnen Satz hart splitten, falls er zu lang ist
if len(sent) > max_length:
for i in range(0, len(sent), max_length):
chunks.append(sent[i:i + max_length])
current_chunk = ""
else:
current_chunk = sent
# Letzten Chunk nicht vergessen
if current_chunk:
chunks.append(current_chunk)
return chunks
# --------------------------------------------------
# Funktion: chunk_with_sentence_overlap
# Zweck:
# - Baut zunächst sentence-basierte Chunks
# - Fügt vom vorherigen Chunk overlap_sents Sätze vorne an
# Parameter:
# text : Volltext als String
# max_length : Maximale Länge eines Chunks (Standard 500)
# overlap_sents : Anzahl Sätze, die überlappend übernommen werden (Standard 1)
# Rückgabe:
# Liste von Strings (Chunks mit Kontext-Overlap)
# --------------------------------------------------
def chunk_with_sentence_overlap(
text: str,
max_length: int = 500,
overlap_sents: int = 1
) -> list[str]:
base_chunks = chunk_by_sentences(text, max_length)
overlapped: list[str] = []
for idx, chunk in enumerate(base_chunks):
if idx == 0 or overlap_sents <= 0:
overlapped.append(chunk)
else:
prev = base_chunks[idx - 1]
prev_sents = split_sentences(prev)
context = " ".join(prev_sents[-overlap_sents:])
overlapped.append((context + " " + chunk).strip())
return overlapped

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import subprocess
from tqdm import tqdm
def main():
import argparse
parser = argparse.ArgumentParser(
description="Importiert alle .txt-Dateien in einer Kategorie mittels import_single_file.py"
)
parser.add_argument(
"category",
help="Name der Kategorie (Ordner unter ~/knowledge/)"
)
parser.add_argument(
"--topic", "-t",
help="Optionaler Kontext/Topic für alle Importe",
default="default"
)
parser.add_argument(
"--script-path", "-s",
help="Pfad zum import_single_file.py",
default="import_single_file.py"
)
args = parser.parse_args()
category = args.category
topic = args.topic
script_path = args.script_path
base_dir = os.path.expanduser(f"~/knowledge/{category}")
if not os.path.isdir(base_dir):
print(f"❌ Kategorie-Ordner nicht gefunden: {base_dir}")
sys.exit(1)
files = [f for f in os.listdir(base_dir) if f.endswith(".txt")]
if not files:
print(f"⚠️ Keine .txt-Dateien in {base_dir} gefunden.")
sys.exit(0)
print(f"📁 Starte Ordner-Import: {len(files)} Dateien in Kategorie '{category}' (Topic: '{topic}')")
for filename in tqdm(files, desc="Importiere Dateien"):
cmd = [
sys.executable,
script_path,
category,
filename,
topic
]
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
print(f"❌ Fehler beim Import von {filename}: {e}")
print("✅ Ordner-Import abgeschlossen.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
import_pdf_file.py
Importiert eine einzelne PDF-Datei:
- Text-Extraktion via pdfplumber
- Chunking mit Satz-Overlap
- POST an /embed
- Verschieben nach _imported
"""
import os
import sys
import shutil
import requests
from uuid import uuid4
from datetime import datetime, timezone
import pdfplumber
from chunking_utils import chunk_with_sentence_overlap
# Konfiguration
API_URL = "http://localhost:8000/embed"
MAX_LENGTH = 500
if len(sys.argv) < 3:
print("? Usage: python import_pdf_file.py <category> <filename> [topic]")
sys.exit(1)
category = sys.argv[1]
filename = sys.argv[2]
topic = sys.argv[3] if len(sys.argv) > 3 else "default"
source_dir = os.path.expanduser(f"~/knowledge/{category}")
file_path = os.path.join(source_dir, filename)
archive_dir= os.path.join(source_dir, "_imported")
if not os.path.isfile(file_path):
print(f"? Datei nicht gefunden: {file_path}")
sys.exit(1)
os.makedirs(archive_dir, exist_ok=True)
print(f"?? PDF-Import: {filename} in Kategorie {category} (Topic={topic})")
# 1) PDF-Text extrahieren
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
page_text = page.extract_text() or ""
text += page_text + "\n\n"
# 2) Chunking
chunks = chunk_with_sentence_overlap(
text,
max_length=MAX_LENGTH,
overlap_sents=2
)
print(f"?? {len(chunks)} Chunks erzeugt.")
# 3) Payload bauen
now = datetime.now(timezone.utc).isoformat()
payload = {"collection": category, "chunks": []}
for idx, chunk in enumerate(chunks):
payload["chunks"].append({
"text": chunk,
"source": filename,
"source_type": "pdf",
"title": os.path.splitext(filename)[0],
"version": "v1.0",
"related_to": category,
"tags": [category],
"owner": "karate-agent",
"context_tag": topic,
"imported_at": now,
"chunk_index": idx,
"category": category
})
# 4) Senden
try:
res = requests.post(API_URL, json=payload)
res.raise_for_status()
print(f"? {len(chunks)} Chunks erfolgreich eingebettet.")
except Exception as e:
print(f"? Fehler beim Senden: {e}")
sys.exit(1)
# 5) Archivieren
shutil.move(file_path, os.path.join(archive_dir, filename))
print("?? PDF verschoben nach _imported/")

View File

@ -0,0 +1,85 @@
import os
import sys
import shutil
import requests
from datetime import datetime, timezone
#from text_chunker import chunk_text_paragraphs
from uuid import uuid4
from chunking_utils import (
chunk_text_paragraphs,
chunk_by_sentences,
chunk_with_sentence_overlap
)
# ?? Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100
# ?? Eingabeparameter: Kategorie, Dateiname, optionale Metadaten
if len(sys.argv) < 3:
print("? Aufruf: python import_single_file.py <category> <filename> [topic]")
sys.exit(1)
CATEGORY = sys.argv[1]
FILENAME = sys.argv[2]
TOPIC = sys.argv[3] if len(sys.argv) > 3 else None
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
ARCHIVE_DIR = os.path.join(SOURCE_DIR, "_imported")
FILEPATH = os.path.join(SOURCE_DIR, FILENAME)
# ?? Validierung
if not os.path.exists(FILEPATH):
print(f"? Datei nicht gefunden: {FILEPATH}")
sys.exit(1)
os.makedirs(ARCHIVE_DIR, exist_ok=True)
print(f"?? Importiere Datei: {FILENAME} aus Kategorie: {CATEGORY}")
# ?? Inhalte lesen und in Chunks zerteilen
with open(FILEPATH, "r", encoding="utf-8") as f:
content = f.read()
chunks = chunk_with_sentence_overlap(content, max_length=CHUNK_SIZE, overlap_sents=2)
print(f"?? {len(chunks)} Textabschnitte erzeugt.")
# ?? Metadaten vorbereiten
now = datetime.now(timezone.utc).isoformat()
payload = {
"chunks": [],
"collection": CATEGORY
}
for i, chunk in enumerate(chunks):
payload["chunks"].append({
"text": chunk,
"source": FILENAME,
"source_type": "file",
"title": FILENAME.replace(".txt", ""),
"version": "v1.0",
"related_to": CATEGORY,
"tags": [CATEGORY],
"owner": "karate-agent",
"context_tag": TOPIC or "default",
"imported_at": now,
"chunk_index": i,
"category": CATEGORY
})
# ?? An API senden
try:
res = requests.post(API_URL, json=payload)
res.raise_for_status()
print(f"? {len(chunks)} Abschnitte erfolgreich eingebettet.")
except Exception as e:
print(f"? Fehler beim Senden: {e}")
sys.exit(1)
# ??? Datei archivieren
shutil.move(FILEPATH, os.path.join(ARCHIVE_DIR, FILENAME))
print(f"?? Datei nach _imported verschoben.")

View File

@ -0,0 +1,135 @@
import os
import sys
import shutil
import requests
import re
from tqdm import tqdm
from datetime import datetime
# 📌 Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100
BATCH_SIZE = 20
# 📁 Kommandozeilenparameter auswerten
if len(sys.argv) != 2:
print("❌ Bitte gib eine Kategorie an, z.B.: python import_txtdocuments.py karatetrainer")
sys.exit(1)
CATEGORY = sys.argv[1]
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
ARCHIVE_DIR = os.path.join(SOURCE_DIR, "_imported")
COLLECTION = CATEGORY
if not os.path.exists(SOURCE_DIR):
print(f"❌ Der Ordner '{SOURCE_DIR}' existiert nicht.")
sys.exit(1)
os.makedirs(ARCHIVE_DIR, exist_ok=True)
print(f"📁 Lese Dokumente aus: {SOURCE_DIR}")
print(f"📂 Archivierte Dateien: {ARCHIVE_DIR}")
print(f"🎯 Ziel-Collection: {COLLECTION}")
# 🔧 Text in überlappende Chunks aufteilen
#def chunk_text(text, size=CHUNK_SIZE, overlap=OVERLAP):
# chunks = []
# start = 0
# while start < len(text):
# end = min(start + size, len(text))
# chunks.append(text[start:end])
# start += size - overlap
# return chunks
def chunk_text_paragraphs(text, max_length=500):
paragraphs = re.split(r'\n\s*\n', text.strip()) # Absatztrennung
chunks = []
current_chunk = ""
for para in paragraphs:
if len(current_chunk) + len(para) + 2 <= max_length:
current_chunk += para + "\n\n"
else:
if current_chunk:
chunks.append(current_chunk.strip())
# Falls einzelner Absatz zu groß ist, hart splitten
if len(para) > max_length:
for i in range(0, len(para), max_length):
chunks.append(para[i:i+max_length].strip())
current_chunk = ""
else:
current_chunk = para + "\n\n"
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# 📚 Alle .txt-Dateien im Ordner lesen
def read_all_text_files(folder):
file_chunk_map = {} # Map: filename → chunks
for filename in os.listdir(folder):
if filename.endswith(".txt"):
path = os.path.join(folder, filename)
with open(path, "r", encoding="utf-8") as f:
text = f.read()
file_chunk_map[filename] = chunk_text_paragraphs(text)
return file_chunk_map
# 🧱 Strukturierte Payloads vorbereiten
def prepare_payloads(file_chunk_map, collection):
payloads = []
imported_at = datetime.now().isoformat()
for filename, chunks in file_chunk_map.items():
for local_index, chunk in enumerate(chunks):
payload = {
"text": chunk,
"source": filename,
"type": "file",
"category": collection,
"imported_at": imported_at,
"chunk_index": local_index
}
payloads.append(payload)
for p in payloads:
print(f"{p['source']}: chunk_index={p['chunk_index']}")
return payloads
# 📤 An API senden
def embed_chunks_in_batches(payloads, collection):
results = []
for i in tqdm(range(0, len(payloads), BATCH_SIZE), desc="📡 Embedding"):
batch = payloads[i:i + BATCH_SIZE]
response = requests.post(API_URL, json={"chunks": batch, "collection": collection})
response.raise_for_status()
results.append(response.json())
return results
# 🚀 Hauptlogik
if __name__ == "__main__":
file_chunk_map = read_all_text_files(SOURCE_DIR)
processed_files = list(file_chunk_map.keys())
payloads = prepare_payloads(file_chunk_map, COLLECTION)
if not payloads:
print("⚠️ Keine Textabschnitte gefunden.")
sys.exit(0)
print(f"📦 {len(payloads)} Textabschnitte aus {len(processed_files)} Dateien gefunden. Sende an API...")
try:
result = embed_chunks_in_batches(payloads, COLLECTION)
print(f"\n✅ Embedding abgeschlossen: {len(result)} API-Antwort(en) erhalten.")
# 🗃️ Verarbeitete Dateien archivieren
for filename in processed_files:
src = os.path.join(SOURCE_DIR, filename)
dst = os.path.join(ARCHIVE_DIR, filename)
shutil.move(src, dst)
print(f"📁 {len(processed_files)} Dateien verschoben nach _imported.")
except Exception as e:
print(f"❌ Fehler beim Senden: {e}")

View File

@ -0,0 +1,32 @@
import sys
import requests
API_URL = "http://localhost:8000/prompt"
if len(sys.argv) < 3:
print("❌ Verwendung: python prompt_documents.py <collection> <frage>")
sys.exit(1)
collection = sys.argv[1]
query = " ".join(sys.argv[2:])
data = {
"query": query,
"collection": collection,
"context_limit": 3
}
print(f"🤖 Anfrage an LLM aus Collection '{collection}': {query}")
try:
response = requests.post(API_URL, json=data)
response.raise_for_status()
result = response.json()
except Exception as e:
print(f"❌ Fehler bei der Anfrage: {e}")
sys.exit(1)
print("\n📄 Kontext:")
print(result["context"])
print("\n💡 Antwort:")
print(result["answer"])

View File

@ -0,0 +1,45 @@
import os
import sys
import shutil
def print_usage():
print("❌ Bitte gib eine Kategorie an, z.B.:")
print(" python restore_imported_files.py karatetrainer")
sys.exit(1)
# --- Eingabe prüfen ---
if len(sys.argv) < 2:
print_usage()
CATEGORY = sys.argv[1]
FORCE = "--force" in sys.argv
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}/_imported")
TARGET_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
if not os.path.isdir(SOURCE_DIR):
print(f"❌ Quellordner '{SOURCE_DIR}' existiert nicht.")
sys.exit(1)
files = [f for f in os.listdir(SOURCE_DIR) if os.path.isfile(os.path.join(SOURCE_DIR, f))]
if not files:
print("⚠️ Keine Dateien zum Wiederherstellen gefunden.")
sys.exit(0)
print(f"♻️ Wiederherstellung von {len(files)} Dateien nach '{TARGET_DIR}'")
restored = 0
for file in files:
source_path = os.path.join(SOURCE_DIR, file)
target_path = os.path.join(TARGET_DIR, file)
if not FORCE:
confirm = input(f"🔁 Datei '{file}' zurückkopieren? [j/N] ").strip().lower()
if confirm != "j":
continue
shutil.move(source_path, target_path)
print(f"'{file}' wurde zurückkopiert.")
restored += 1
print(f"\n🎉 {restored} Datei(en) wurden erfolgreich wiederhergestellt.")

View File

@ -0,0 +1,36 @@
import os
import sys
import shutil
def print_usage():
print("❌ Bitte gib Kategorie und Dateinamen an, z.B.:")
print(" python restore_single_file.py karatetrainer mae_geri.txt")
sys.exit(1)
# Eingabe prüfen
if len(sys.argv) != 3:
print_usage()
CATEGORY = sys.argv[1]
FILENAME = sys.argv[2]
SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}/_imported")
TARGET_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
SOURCE_FILE = os.path.join(SOURCE_DIR, FILENAME)
TARGET_FILE = os.path.join(TARGET_DIR, FILENAME)
if not os.path.isfile(SOURCE_FILE):
print(f"❌ Die Datei '{FILENAME}' wurde im Archivordner '{SOURCE_DIR}' nicht gefunden.")
sys.exit(1)
if os.path.exists(TARGET_FILE):
confirm = input(f"⚠️ Datei '{FILENAME}' existiert im Zielordner. Überschreiben? [j/N] ").strip().lower()
if confirm != "j":
print("⏹️ Abgebrochen.")
sys.exit(0)
try:
shutil.move(SOURCE_FILE, TARGET_FILE)
print(f"'{FILENAME}' wurde zurück nach '{TARGET_DIR}' verschoben.")
except Exception as e:
print(f"❌ Fehler beim Verschieben: {e}")

View File

@ -0,0 +1,31 @@
import sys
import requests
API_URL = "http://localhost:8000/search"
if len(sys.argv) < 3:
print("❌ Verwendung: python search_documents.py <collection> <suchfrage>")
sys.exit(1)
collection = sys.argv[1]
query = " ".join(sys.argv[2:])
params = {
"query": query,
"collection": collection,
"limit": 5
}
print(f"🔍 Suche in Collection '{collection}': {query}")
try:
response = requests.get(API_URL, params=params)
response.raise_for_status()
results = response.json()
except Exception as e:
print(f"❌ Fehler bei der Anfrage: {e}")
sys.exit(1)
print("📚 Ergebnisse:")
for r in results:
print(f"\n✅ Score: {r['score']:.4f}")
print(f"{r['text']}")

238
scripts/wiki_importer.py Normal file
View File

@ -0,0 +1,238 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module: wiki_importer.py
Beschreibung:
- Importiert Übungen aus dem MediaWiki via FastAPI wiki_router
- Führt vor dem Import einen Login gegen /import/wiki/login durch (falls nicht via --skip-login deaktiviert)
- Holt Liste aller Übungs-Titel (SMW-Ask) via `/semantic/pages`
- Für jede Übung:
* Fetch pageinfo (pageid, fullurl) via `/info`
* Parse Wikitext (Templates: ÜbungInfoBox, Übungsbeschreibung, SkillDevelopment) via `/parsepage`
* Baut Payload entsprechend Exercise-Datenmodell
* POST an `/exercise` Endpoint (exercise_router)
- Unterstützt Single-Import via `--title` (oder ENV `WIKI_EXERCISE_TITLE`) und Full-Import via `--all`
- Optional: Credentials via CLI (--username/--password) oder `.env` (WIKI_BOT_USER / WIKI_BOT_PASSWORD)
Version: 2.1.0
"""
import os
import sys
import argparse
from typing import Dict, Any
import requests
import mwparserfromhell
from dotenv import load_dotenv
# ----- Konfiguration / Defaults -----
load_dotenv() # .env laden, falls vorhanden
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki") # FastAPI-Wiki-Proxy
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise") # Exercise-Endpoint
DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
# ---- Hilfsfunktionen für Wiki-Router ----
def wiki_health() -> None:
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
r.raise_for_status()
print("[Sanity] Wiki health OK")
def wiki_login(username: str, password: str) -> None:
"""
Führt einen Login gegen den wiki_router durch.
Erwartet: {"status":"success"} bei Erfolg.
"""
payload = {"username": username, "password": password}
r = requests.post(f"{API_BASE_URL}/login", json=payload, timeout=30)
# kein raise_for_status(), wir wollen die JSON-Fehler sauber ausgeben
try:
data = r.json()
except Exception:
print(f"[Login] HTTP {r.status_code}: {r.text}")
r.raise_for_status()
status = (data or {}).get("status")
if status != "success":
msg = (data or {}).get("message", "Login fehlgeschlagen")
raise RuntimeError(f"[Login] {msg}")
print("[Login] success")
def fetch_all_pages(category: str) -> Dict[str, Any]:
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category}, timeout=60)
resp.raise_for_status()
return resp.json()
def fetch_page_info(title: str) -> Dict[str, Any]:
r = requests.get(f"{API_BASE_URL}/info", params={"title": title}, timeout=30)
r.raise_for_status()
info = r.json()
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
print(f"[Parse] Lade '{title}' (ID={pageid})")
resp = requests.get(
f"{API_BASE_URL}/parsepage",
params={"pageid": pageid, "title": title},
timeout=60
)
resp.raise_for_status()
wikitext = resp.json().get("wikitext", "")
wikicode = mwparserfromhell.parse(wikitext)
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
for tpl in wikicode.filter_templates():
name = str(tpl.name).strip()
if name == "ÜbungInfoBox":
for p in tpl.params:
raw[str(p.name).strip()] = str(p.value).strip()
elif name == "Übungsbeschreibung":
for p in tpl.params:
raw[str(p.name).strip()] = str(p.value).strip()
elif name == "SkillDevelopment":
raw.setdefault("capabilities", [])
try:
cap = str(tpl.get("PrimaryCapability").value).strip()
except Exception:
cap = ""
try:
lvl = int(str(tpl.get("CapabilityLevel").value).strip())
except Exception:
lvl = 0
if cap:
raw["capabilities"].append({"capability": cap, "level": lvl})
raw["wikitext"] = wikitext
return raw
def build_payload(raw: Dict[str, Any], fullurl: str, category: str) -> Dict[str, Any]:
# Exercise.capabilities erwartet Dict[str,int]
caps_list = raw.get("capabilities", [])
capabilities = {}
for c in caps_list:
cap = c.get("capability")
lvl = c.get("level")
if isinstance(cap, str) and cap:
try:
capabilities[cap] = int(lvl)
except Exception:
pass
# Defaults/Fallbacks
duration = 0.0
try:
duration = float(raw.get("Dauer", 0) or 0)
except Exception:
duration = 0.0
keywords = []
kw_raw = raw.get("Schlüsselworte", "")
if isinstance(kw_raw, str):
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
equipment = []
eq_raw = raw.get("equipment", [])
if isinstance(eq_raw, str):
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
elif isinstance(eq_raw, list):
equipment = [str(e).strip() for e in eq_raw if str(e).strip()]
payload: Dict[str, Any] = {
"title": raw.get("title") or "",
"summary": raw.get("Summary", "") or "",
"short_description": raw.get("Summary", "") or "",
"keywords": keywords,
"link": fullurl or "",
"discipline": raw.get("Übungstyp", "") or "",
"group": raw.get("Gruppengröße", "") or None,
"age_group": raw.get("Altersgruppe", "") or "",
"target_group": raw.get("Zielgruppe", "") or "",
"min_participants": 1,
"duration_minutes": int(round(duration)), # Exercise erwartet int
"capabilities": capabilities,
"category": category or "",
"purpose": raw.get("Ziel", "") or "",
"execution": raw.get("Durchführung", "") or "",
"notes": raw.get("Hinweise", "") or "",
"preparation": raw.get("RefMethode", "") or "",
"method": raw.get("method", "") or "", # falls im Wikitext vorhanden
"equipment": equipment,
"fullurl": fullurl or "", # optionales Feld
# Idempotenz (optional nutzbar in exercise_router):
"external_id": f"wiki:{raw.get('pageid')}",
"source": "MediaWiki"
}
return payload
def ingest_exercise(payload: Dict[str, Any]) -> None:
title = payload.get("title", "<ohne Titel>")
resp = requests.post(EXERCISE_API, json=payload, timeout=60)
if resp.status_code == 422:
print(f"[Ingest] '{title}' -> FAILED 422:\n{resp.text}")
try:
resp.raise_for_status()
except Exception:
pass
return
resp.raise_for_status()
print(f"[Ingest] '{title}' -> OK")
# ----- Main -----
def main() -> None:
parser = argparse.ArgumentParser(description="Import exercises from Wiki to Qdrant (via FastAPI wiki_router)")
parser.add_argument("--all", action="store_true", help="Alle Übungen importieren (SMW-Ask)")
parser.add_argument("--title", type=str, default=DEFAULT_TITLE, help="Einzelimport eines Übungstitels")
parser.add_argument("--category", type=str, default=DEFAULT_CAT, help="Wiki-Kategorie (z.B. 'Übungen')")
parser.add_argument("--username", type=str, default=os.getenv("WIKI_BOT_USER"), help="Wiki-Login Benutzer (überschreibt .env)")
parser.add_argument("--password", type=str, default=os.getenv("WIKI_BOT_PASSWORD"), help="Wiki-Login Passwort (überschreibt .env)")
parser.add_argument("--skip-login", action="store_true", help="Login-Schritt überspringen (falls Session schon aktiv)")
args = parser.parse_args()
# Sanity
wiki_health()
# Login (sofern nicht explizit übersprungen)
if not args.skip_login:
if not args.username or not args.password:
print("[Login] Fehler: fehlende Credentials. Setze .env (WIKI_BOT_USER/WIKI_BOT_PASSWORD) oder CLI --username/--password.", file=sys.stderr)
sys.exit(1)
try:
wiki_login(args.username, args.password)
except Exception as e:
print(str(e), file=sys.stderr)
sys.exit(1)
# Einzel- oder Vollimport
if args.all:
print(f"[Main] Lade Liste der Übungen aus Kategorie '{args.category}'")
pages = fetch_all_pages(args.category)
print(f"[Main] {len(pages)} Seiten gefunden.")
for title, entry in pages.items():
pid = entry.get("pageid")
fullurl = entry.get("fullurl")
if not pid:
# Core-Info nachschlagen
info = fetch_page_info(title)
pid = info.get("pageid")
fullurl = fullurl or info.get("fullurl")
if not pid:
print(f"[Skip] '{title}' hat keine pageid")
continue
raw = parse_exercise(title, pid)
payload = build_payload(raw, fullurl or "", args.category)
ingest_exercise(payload)
else:
print(f"[Main] Import single exercise: {args.title}")
info = fetch_page_info(args.title)
pid = info.get("pageid")
fullurl = info.get("fullurl") or ""
if not pid:
print(f"[Error] pageid für '{args.title}' nicht gefunden.", file=sys.stderr)
sys.exit(1)
raw = parse_exercise(args.title, pid)
payload = build_payload(raw, fullurl, args.category)
ingest_exercise(payload)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,117 @@
"""
Module: wiki_importer.py
Beschreibung:
- Importiert zunächst nur eine Übung aus dem Wiki
- Liest Wikitext einer Übung aus
- Parsen mit mwparserfromhell
- Extrahiert Felder aus Templates:
* ÜbungInfoBox
* Übungsbeschreibung
* SkillDevelopment (mehrfach)
- Baut ein Exercise-Objekt zusammen
- Speichert per POST /exercise Endpoint in Qdrant
- Detailliertes Error-Logging für 422 und allgemeine Fehler
Version: 1.1.2
"""
import requests
import mwparserfromhell
import os
import sys
from typing import Dict, Any
# Konfiguration über Umgebungsvariablen
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki")
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise")
# Übungstitel, der importiert werden soll
TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
# Helper: Holt pageid und fullurl per Core-API
def fetch_page_info(title: str) -> Dict[str, Any]:
r = requests.get(f"{API_BASE_URL}/info", params={"title": title})
r.raise_for_status()
info = r.json()
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
# Parser: Lädt und parst eine Übung
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
print(f"[Parse] Loading '{title}' (ID={pageid})")
resp = requests.get(f"{API_BASE_URL}/parsepage", params={"pageid": pageid, "title": title})
resp.raise_for_status()
wikitext = resp.json().get("wikitext", "")
wikicode = mwparserfromhell.parse(wikitext)
data: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
for tpl in wikicode.filter_templates():
name = tpl.name.strip()
if name == "ÜbungInfoBox":
for param in tpl.params:
data[param.name.strip()] = str(param.value).strip()
elif name == "Übungsbeschreibung":
for param in tpl.params:
data[param.name.strip()] = str(param.value).strip()
elif name == "SkillDevelopment":
data.setdefault("capabilities", [])
primary = str(tpl.get("PrimaryCapability").value).strip()
level = int(str(tpl.get("CapabilityLevel").value).strip())
data["capabilities"].append({"capability": primary, "level": level})
data["wikitext"] = wikitext
return data
# Ingestion: Sendet einen Datensatz an Qdrant mit detailliertem Error-Logging
def ingest_exercise(ex_data: Dict[str, Any]) -> None:
title = ex_data.get("title")
try:
resp = requests.post(EXERCISE_API, json=ex_data)
if resp.status_code == 422:
print(f"[Ingest] '{title}' -> FAILED 422:")
print(resp.text)
resp.raise_for_status()
print(f"[Ingest] '{title}' -> OK")
except requests.HTTPError as e:
msg = resp.text if 'resp' in locals() else str(e)
print(f"[Ingest] '{title}' -> HTTPError: {e} - {msg}")
except Exception as e:
print(f"[Ingest] '{title}' -> FAILED: {e}")
# Main: Einmaliger Import für TITLE
if __name__ == "__main__":
print(f"[Main] Import single exercise: {TITLE}")
try:
info = fetch_page_info(TITLE)
pageid = info.get("pageid")
fullurl = info.get("fullurl")
if not pageid:
print(f"Error: pageid für '{TITLE}' nicht gefunden.")
sys.exit(1)
raw = parse_exercise(TITLE, pageid)
# capabilities als Dict wandeln
caps_list = raw.get("capabilities", [])
capabilities = {c["capability"]: c["level"] for c in caps_list}
# Payload entsprechend Datenmodell
exercise_payload = {
"title": raw.get("title"),
"summary": raw.get("Summary", ""),
"short_description": raw.get("Summary", ""),
"keywords": raw.get("Schlüsselworte", "").split(', '),
"link": fullurl,
"discipline": raw.get("Übungstyp", ""),
"group": raw.get("Gruppengröße", ""),
"age_group": raw.get("Altersgruppe", ""),
"target_group": raw.get("Zielgruppe", ""),
"min_participants": 1,
"duration_minutes": float(raw.get("Dauer", 0)),
"capabilities": capabilities,
"category": raw.get("category", "Übungen"),
"purpose": raw.get("Ziel", ""),
"execution": raw.get("Durchführung", ""),
"notes": raw.get("Hinweise", ""),
"preparation": raw.get("RefMethode", ""),
"method": raw.get("method", ""),
"equipment": raw.get("equipment", []),
"fullurl": fullurl
}
ingest_exercise(exercise_payload)
except Exception as e:
print(f"Fatal error: {e}")
sys.exit(1)

View File

@ -0,0 +1,136 @@
"""
Module: wiki_importer.py
Beschreibung:
- Importiert alle Übungen aus dem Wiki
- Holt Liste aller Übungs-Titel und pageids via `/semantic/pages`
- Für jede Übung:
* Fetch pageinfo (pageid, fullurl)
* Parse Wikitext (Templates: ÜbungInfoBox, Übungsbeschreibung, SkillDevelopment)
* Baut Payload entsprechend Datenmodell
* POST an `/exercise` Endpoint
- Unterstützt Single-Import via Umgebungsvariable `WIKI_EXERCISE_TITLE` und Full-Import via `--all`
Version: 2.0.0
"""
import requests
import mwparserfromhell
import os
import sys
from typing import Dict, Any, List
import argparse
# Konfiguration
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki")
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise")
DEFAULT_CATEGORY = os.getenv("WIKI_CATEGORY", "Übungen")
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
# Helper: Holt Liste aller Übungen (Titel-> entry)
def fetch_all_pages(category: str) -> Dict[str, Any]:
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category})
resp.raise_for_status()
return resp.json()
# Helper: Holt pageid und fullurl per Core-API
def fetch_page_info(title: str) -> Dict[str, Any]:
r = requests.get(f"{API_BASE_URL}/info", params={"title": title})
r.raise_for_status()
info = r.json()
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
# Parser: Lädt und parst eine Übung
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
print(f"[Parse] {title} (ID={pageid})")
resp = requests.get(f"{API_BASE_URL}/parsepage", params={"pageid": pageid, "title": title})
resp.raise_for_status()
wikitext = resp.json().get("wikitext", "")
wikicode = mwparserfromhell.parse(wikitext)
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
for tpl in wikicode.filter_templates():
name = tpl.name.strip()
if name == "ÜbungInfoBox":
for p in tpl.params:
raw[p.name.strip()] = str(p.value).strip()
elif name == "Übungsbeschreibung":
for p in tpl.params:
raw[p.name.strip()] = str(p.value).strip()
elif name == "SkillDevelopment":
raw.setdefault("capabilities", [])
cap = str(tpl.get("PrimaryCapability").value).strip()
lvl = int(str(tpl.get("CapabilityLevel").value).strip())
raw["capabilities"].append({"capability": cap, "level": lvl})
raw["wikitext"] = wikitext
return raw
# Ingestion
def ingest_exercise(payload: Dict[str, Any]) -> None:
title = payload.get("title")
resp = requests.post(EXERCISE_API, json=payload)
if resp.status_code == 422:
print(f"[Error] {title} -> 422: {resp.text}")
return
resp.raise_for_status()
print(f"[Ingest] {title} -> OK")
# Build payload
def build_payload(raw: Dict[str, Any], fullurl: str, category: str) -> Dict[str, Any]:
caps_list = raw.get("capabilities", [])
capabilities = {c["capability"]: c["level"] for c in caps_list}
return {
"title": raw.get("title"),
"summary": raw.get("Summary", ""),
"short_description": raw.get("Summary", ""),
"keywords": raw.get("Schlüsselworte", "").split(', '),
"link": fullurl,
"discipline": raw.get("Übungstyp", ""),
"group": raw.get("Gruppengröße", ""),
"age_group": raw.get("Altersgruppe", ""),
"target_group": raw.get("Zielgruppe", ""),
"min_participants": 1,
"duration_minutes": float(raw.get("Dauer", 0)),
"capabilities": capabilities,
"category": category,
"purpose": raw.get("Ziel", ""),
"execution": raw.get("Durchführung", ""),
"notes": raw.get("Hinweise", ""),
"preparation": raw.get("RefMethode", ""),
"method": raw.get("method", ""),
"equipment": raw.get("equipment", []),
"fullurl": fullurl
}
# Main
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Import exercises from Wiki to Qdrant")
parser.add_argument("--all", action="store_true", help="Import all exercises")
parser.add_argument("--title", type=str, default=DEFAULT_TITLE, help="Single exercise title")
parser.add_argument("--category", type=str, default=DEFAULT_CATEGORY, help="Wiki category")
args = parser.parse_args()
if args.all:
pages = fetch_all_pages(args.category)
print(f"Found {len(pages)} exercises in category '{args.category}'")
for title, entry in pages.items():
pid = entry.get("pageid")
if not pid:
info = fetch_page_info(title)
pid = info.get("pageid")
fullurl = info.get("fullurl")
else:
fullurl = entry.get("fullurl") or fetch_page_info(title)["fullurl"]
if not pid:
print(f"Skip {title}, no pageid")
continue
raw = parse_exercise(title, pid)
payload = build_payload(raw, fullurl, args.category)
ingest_exercise(payload)
else:
info = fetch_page_info(args.title)
pid = info.get("pageid")
fullurl = info.get("fullurl")
if not pid:
print(f"Error: pageid for '{args.title}' not found")
sys.exit(1)
raw = parse_exercise(args.title, pid)
payload = build_payload(raw, fullurl, args.category)
ingest_exercise(payload)