#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys
from collections import Counter
from itertools import groupby
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import csv

INPUT_FILE = "datebinare.txt"
OUTDIR = "rezultate"


def read_bits(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        data = f.read().strip()
    for ch in set(data):
        if ch not in {"0", "1"}:
            raise ValueError(f"Fișierul conține caracter invalid: {repr(ch)}")
    if not data:
        raise ValueError("Fișierul este gol.")
    return data


def run_length_encode(bits: str):
    return [(bit, sum(1 for _ in grp)) for bit, grp in groupby(bits)]


def counter_by_bit(runs):
    c0, c1 = Counter(), Counter()
    for bit, length in runs:
        if bit == "0":
            c0[length] += 1
        else:
            c1[length] += 1
    return c0, c1


def to_percent(counter: Counter):
    total = sum(counter.values())
    if total == 0:
        return {k: 0.0 for k in counter}
    return {k: 100.0 * v / total for k, v in counter.items()}


def save_csv(c0, c1, out_csv_path: str):
    lengths = sorted(set(c0.keys()) | set(c1.keys()))
    pct0 = to_percent(c0)
    pct1 = to_percent(c1)

    with open(out_csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["length", "count_0", "pct_0", "count_1", "pct_1"])
        for L in lengths:
            w.writerow([
                L,
                c0.get(L, 0),
                round(pct0.get(L, 0.0), 6),
                c1.get(L, 0),
                round(pct1.get(L, 0.0), 6),
            ])


def plot_bar(counter: Counter, title: str, out_png_path: str):
    if not counter:
        plt.figure()
        plt.title(title)
        plt.xlabel("Lungimea run-ului")
        plt.ylabel("Procent (%)")
        plt.tight_layout()
        plt.savefig(out_png_path, dpi=150)
        plt.close()
        return

    xs = sorted(counter.keys())
    pct = to_percent(counter)
    ys = [pct[x] for x in xs]

    plt.figure()
    plt.bar([str(x) for x in xs], ys)
    plt.title(title)
    plt.xlabel("Lungimea run-ului")
    plt.ylabel("Procent (%)")
    plt.tight_layout()
    plt.savefig(out_png_path, dpi=150)
    plt.close()


def main():
    if not os.path.exists(INPUT_FILE):
        print(f"Eroare: fișierul {INPUT_FILE} nu există.", file=sys.stderr)
        sys.exit(1)

    bits = read_bits(INPUT_FILE)

    os.makedirs(OUTDIR, exist_ok=True)

    runs = run_length_encode(bits)
    c0, c1 = counter_by_bit(runs)

    max0 = max((L for b, L in runs if b == "0"), default=0)
    max1 = max((L for b, L in runs if b == "1"), default=0)

    print(f"Total biți: {len(bits):,}")
    print(f"Cea mai lungă serie de 0: {max0}")
    print(f"Cea mai lungă serie de 1: {max1}")
    print(f"Număr total run-uri: {len(runs):,}")

    out_csv = os.path.join(OUTDIR, "run_distribution.csv")
    save_csv(c0, c1, out_csv)
    print(f"CSV salvat: {out_csv}")

    plot_bar(c0, "Distribuția lungimilor pentru 0", os.path.join(OUTDIR, "run_distribution_0.png"))
    plot_bar(c1, "Distribuția lungimilor pentru 1", os.path.join(OUTDIR, "run_distribution_1.png"))

    print("Gata. Grafice și CSV generate.")


if __name__ == "__main__":
    main()
