import requests import os import sys # 📌 Konfiguration API_URL = "http://localhost:8000/embed" CHUNK_SIZE = 500 OVERLAP = 100 def chunk_text(text, size=CHUNK_SIZE, overlap=OVERLAP): """ Teilt einen Text in überlappende Abschnitte auf. """ chunks = [] start = 0 while start < len(text): end = min(start + size, len(text)) chunks.append(text[start:end]) start += size - overlap return chunks def read_text_file(path): """ Liest den Inhalt einer Textdatei ein. """ with open(path, "r", encoding="utf-8") as f: return f.read() def import_chunks(chunks, collection): """ Sendet die Textabschnitte an die API. """ response = requests.post(API_URL, json={ "texts": chunks, "collection": collection }) response.raise_for_status() return response.json() if __name__ == "__main__": if len(sys.argv) != 3: print("❌ Nutzung: python import_textfile.py ") sys.exit(1) collection = sys.argv[1] filepath = os.path.expanduser(sys.argv[2]) if not os.path.isfile(filepath): print(f"❌ Datei nicht gefunden: {filepath}") sys.exit(1) print(f"📄 Lade Datei: {filepath}") text = read_text_file(filepath) chunks = chunk_text(text) print(f"📦 {len(chunks)} Abschnitte vorbereitet – sende an Collection '{collection}'...") try: result = import_chunks(chunks, collection) print(f"✅ Import erfolgreich: {result}") except Exception as e: print(f"❌ Fehler beim Import: {e}")