import os
import subprocess
import zipfile
import uuid
import shutil
import sys
from flask import Flask, request, render_template_string, send_file, jsonify
from threading import Thread

app = Flask(__name__)

# --- CONFIGURARE ---
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
UPLOAD_FOLDER = os.path.join(BASE_DIR, 'paddle_in')
OUTPUT_FOLDER = os.path.join(BASE_DIR, 'paddle_out')
PORT = 8005

for f in [UPLOAD_FOLDER, OUTPUT_FOLDER]:
    os.makedirs(f, exist_ok=True)

# Mapare coduri limbi pentru Paddle/Tesseract
LANG_MAP = {
    "ron": "romanian",
    "ell": "greek",
    "eng": "english",
    "deu": "german",
    "fra": "french"
}

batches = {}

def process_thread(batch_id, files_paths, lang):
    batches[batch_id]['status'] = 'processing'
    output_batch_dir = os.path.join(OUTPUT_FOLDER, batch_id)
    os.makedirs(output_batch_dir, exist_ok=True)

    # Ensure PATH/TESSDATA_PREFIX include common install locations
    tesseract_dir = r"C:\Program Files\Tesseract-OCR"
    gs_dir = r"C:\Program Files\gs"
    possible_gs_bins = []
    if os.path.isdir(gs_dir):
        for root, dirs, files in os.walk(gs_dir):
            if os.path.basename(root).lower() == "bin":
                possible_gs_bins.append(root)
        possible_gs_bins.sort(reverse=True)
    env_path = os.environ.get("PATH", "")
    if os.path.isdir(tesseract_dir) and tesseract_dir not in env_path:
        os.environ["PATH"] = env_path + ";" + tesseract_dir
        env_path = os.environ["PATH"]
    for gs_bin in possible_gs_bins:
        if gs_bin not in env_path:
            os.environ["PATH"] = env_path + ";" + gs_bin
            env_path = os.environ["PATH"]
            break
    tessdata_local = os.path.join(BASE_DIR, "tessdata")
    if os.path.isdir(tessdata_local) and not os.environ.get("TESSDATA_PREFIX"):
        os.environ["TESSDATA_PREFIX"] = tessdata_local

    # Windows: Tesseract si Ghostscript trebuie instalate si in PATH
    missing = []
    if shutil.which('tesseract') is None:
        missing.append('tesseract')
    if shutil.which('gswin64c') is None and shutil.which('gs') is None:
        missing.append('ghostscript (gswin64c)')
    if missing:
        batches[batch_id]['status'] = 'error'
        batches[batch_id]['logs'].append('Lipsesc dependinte de sistem: ' + ', '.join(missing))
        batches[batch_id]['logs'].append('Instaleaza Tesseract si Ghostscript si incearca din nou.')
        return

    
    # PaddleOCR/OCRmyPDF necesită pachetele de sistem (le verificăm automat)
    batches[batch_id]['logs'].append(f"🌐 Verificăm motorul de limbă: {lang}...")

    for f_path in files_paths:
        f_name = os.path.basename(f_path)
        output_file = os.path.join(output_batch_dir, f"PADDLE_OCR_{f_name}")
        batches[batch_id]['logs'].append(f"🚀 Procesare avansată (RTX 5090): {f_name}...")
        
        try:
            # Folosim OCRmyPDF cu setări de optimizare imagine agresivă 
            # Aceasta este metoda "Sandwich" care lasă textul original intact vizual
            subprocess.run([
                sys.executable, '-m', 'ocrmypdf',
                '-l', f'{lang}+eng',
                '--rotate-pages',
                '--deskew',
                '--clean',
                '--optimize', '1',
                f_path,
                output_file
            ], check=True, capture_output=True, text=True)
            batches[batch_id]['logs'].append(f"✅ Gata: {f_name}")
        except Exception as e:
            batches[batch_id]['logs'].append(f"❌ Eroare la {f_name}")
            if isinstance(e, subprocess.CalledProcessError):
                if e.stdout:
                    batches[batch_id]['logs'].append(e.stdout.strip())
                if e.stderr:
                    batches[batch_id]['logs'].append(e.stderr.strip())

    # Creare ZIP
    zip_path = os.path.join(OUTPUT_FOLDER, f"{batch_id}.zip")
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for root, dirs, files in os.walk(output_batch_dir):
            for file in files:
                zipf.write(os.path.join(root, file), file)
    
    batches[batch_id]['status'] = 'completed'
    batches[batch_id]['logs'].append("🏁 TOATE FIȘIERELE SUNT GATA!")

@app.route('/')
def index(): return render_template_string(HTML_UI)

@app.route('/process', methods=['POST'])
def process():
    files = request.files.getlist('files')
    lang = request.form.get('lang', 'ron')
    batch_id = str(uuid.uuid4())[:8]
    batch_dir = os.path.join(UPLOAD_FOLDER, batch_id)
    os.makedirs(batch_dir, exist_ok=True)
    
    paths = []
    for f in files:
        p = os.path.join(batch_dir, f.filename)
        f.save(p)
        paths.append(p)
    
    batches[batch_id] = {'status': 'queued', 'logs': [], 'files': paths}
    Thread(target=process_thread, args=(batch_id, paths, lang)).start()
    return jsonify({'status': 'started', 'batch_id': batch_id})

@app.route('/status/<batch_id>')
def status(batch_id): return jsonify(batches.get(batch_id, {'status': 'not_found', 'logs': []}))

@app.route('/download/<batch_id>')
def download(batch_id):
    return send_file(os.path.join(OUTPUT_FOLDER, f"{batch_id}.zip"), as_attachment=True)

HTML_UI = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>PaddleOCR Pro - RTX 5090</title>
    <style>
        body { font-family: 'Segoe UI', sans-serif; background: #0b0f1a; color: #e2e8f0; padding: 40px; }
        .card { max-width: 800px; margin: auto; background: #161b22; padding: 30px; border-radius: 15px; border: 1px solid #30363d; box-shadow: 0 10px 30px rgba(0,0,0,0.5); }
        h2 { color: #58a6ff; text-align: center; }
        select, button, input { width: 100%; padding: 12px; margin: 10px 0; border-radius: 8px; font-size: 16px; border: 1px solid #30363d; }
        select { background: #0d1117; color: white; }
        button { background: #238636; color: white; border: none; cursor: pointer; font-weight: bold; font-size: 18px; transition: 0.2s; }
        button:hover { background: #2ea043; }
        button:disabled { background: #484f58; cursor: not-allowed; }
        #log { background: #010409; color: #39d353; padding: 15px; height: 200px; overflow-y: auto; font-family: 'Consolas', monospace; font-size: 13px; margin-top: 20px; border-radius: 6px; border: 1px solid #30363d; }
        .zip-btn { display: block; background: #1f6feb; color: white; padding: 15px; text-decoration: none; text-align: center; border-radius: 8px; font-weight: bold; margin-top: 20px; }
    </style>
</head>
<body>
    <div class="card">
        <h2>🚀 PaddleOCR High-Precision</h2>
        <p style="text-align:center; color:#8b949e;">Tehnologie Sandwich (Text invizibil + Imagine originală)</p>
        
        <label>Limba principală:</label>
        <select id="lang">
            <option value="ron">Română 🇷🇴</option>
            <option value="ell">Greacă 🇬🇷</option>
            <option value="eng">Engleză 🇺🇸</option>
            <option value="deu">Germană 🇩🇪</option>
            <option value="fra">Franceză 🇫🇷</option>
        </select>

        <input type="file" id="files" multiple accept=".pdf">
        <button id="btn" onclick="start()">LANSEAZĂ PROCESAREA BULK</button>

        <div id="log">Așteptare fișiere...</div>
        <div id="res" style="display:none;"><a id="link" href="#" class="zip-btn">📥 DESCARCĂ ARHIVA ZIP</a></div>
    </div>
    <script>
        async function start() {
            const files = document.getElementById('files').files;
            const lang = document.getElementById('lang').value;
            if(files.length === 0) return alert("Selectează fișiere PDF!");
            
            const fd = new FormData();
            Array.from(files).forEach(f => fd.append('files', f));
            fd.append('lang', lang);
            
            document.getElementById('btn').disabled = true;
            document.getElementById('res').style.display = 'none';
            
            const res = await fetch('/process', {method:'POST', body:fd});
            const data = await res.json();
            
            const timer = setInterval(async () => {
                const s = await (await fetch('/status/'+data.batch_id)).json();
                document.getElementById('log').innerHTML = s.logs.join('<br>');
                document.getElementById('log').scrollTop = document.getElementById('log').scrollHeight;
                if(s.status === 'completed') {
                    clearInterval(timer);
                    document.getElementById('res').style.display='block';
                    document.getElementById('link').href = '/download/'+data.batch_id;
                    document.getElementById('btn').disabled = false;
                }
            }, 2000);
        }
    </script>
</body>
</html>
"""

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=PORT)
