import os
import sys
import shutil
import requests
from datetime import datetime, timezone
#from text_chunker import chunk_text_paragraphs
from uuid import uuid4
from chunking_utils import (
    chunk_text_paragraphs,
    chunk_by_sentences,
    chunk_with_sentence_overlap
)


# ?? Konfiguration
API_URL = "http://localhost:8000/embed"
CHUNK_SIZE = 500
OVERLAP = 100

# ?? Eingabeparameter: Kategorie, Dateiname, optionale Metadaten
if len(sys.argv) < 3:
    print("? Aufruf: python import_single_file.py <category> <filename> [topic]")
    sys.exit(1)

CATEGORY = sys.argv[1]
FILENAME = sys.argv[2]
TOPIC = sys.argv[3] if len(sys.argv) > 3 else None

SOURCE_DIR = os.path.expanduser(f"~/knowledge/{CATEGORY}")
ARCHIVE_DIR = os.path.join(SOURCE_DIR, "_imported")
FILEPATH = os.path.join(SOURCE_DIR, FILENAME)

# ?? Validierung
if not os.path.exists(FILEPATH):
    print(f"? Datei nicht gefunden: {FILEPATH}")
    sys.exit(1)

os.makedirs(ARCHIVE_DIR, exist_ok=True)

print(f"?? Importiere Datei: {FILENAME} aus Kategorie: {CATEGORY}")

# ?? Inhalte lesen und in Chunks zerteilen
with open(FILEPATH, "r", encoding="utf-8") as f:
    content = f.read()

chunks = chunk_with_sentence_overlap(content, max_length=CHUNK_SIZE, overlap_sents=2)
print(f"?? {len(chunks)} Textabschnitte erzeugt.")

# ?? Metadaten vorbereiten
now = datetime.now(timezone.utc).isoformat()


payload = {
    "chunks": [],
    "collection": CATEGORY
}

for i, chunk in enumerate(chunks):
    payload["chunks"].append({
        "text": chunk,
        "source": FILENAME,
        "source_type": "file",
        "title": FILENAME.replace(".txt", ""),
        "version": "v1.0",
        "related_to": CATEGORY,
        "tags": [CATEGORY],
        "owner": "karate-agent",
        "context_tag": TOPIC or "default",
        "imported_at": now,
        "chunk_index": i,
        "category": CATEGORY
    })

# ?? An API senden
try:
    res = requests.post(API_URL, json=payload)
    res.raise_for_status()
    print(f"? {len(chunks)} Abschnitte erfolgreich eingebettet.")
except Exception as e:
    print(f"? Fehler beim Senden: {e}")
    sys.exit(1)

# ??? Datei archivieren
shutil.move(FILEPATH, os.path.join(ARCHIVE_DIR, FILENAME))
print(f"?? Datei nach _imported verschoben.")