import re
import sys
from io import StringIO
from pathlib import Path
import pandas as pd
import requests
from bs4 import BeautifulSoup
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import get_column_letter

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/122.0 Safari/537.36"
}

SCORE_LABELS = [
    "Single-Core Score",
    "Multi-Core Score",
    "Metal Score",
    "OpenCL Score",
    "Vulkan Score",
    "CUDA Score",
]

def clean_sheet_name(name: str) -> str:
    name = re.sub(r'[\\/*?:\[\]]', "_", name)
    return name[:31] or "Sheet"

def fetch_html(url: str) -> str:
    r = requests.get(url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    return r.text

def extract_text_score(text: str, label: str):
    patterns = [
        rf"(\d[\d,\.]*)\s+{re.escape(label)}",
        rf"{re.escape(label)}\s+(\d[\d,\.]*)",
    ]
    for pattern in patterns:
        m = re.search(pattern, text, re.IGNORECASE)
        if m:
            return m.group(1).replace(",", "")
    return None

def parse_tables(html: str):
    soup = BeautifulSoup(html, "html.parser")
    blocks = []
    current_section = "General"

    for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]):
        if tag.name in {"h1", "h2", "h3", "h4"}:
            current_section = " ".join(tag.get_text(" ", strip=True).split())
        elif tag.name == "table":
            try:
                df = pd.read_html(StringIO(str(tag)))[0]
                df = df.dropna(how="all").dropna(axis=1, how="all")
                blocks.append((current_section, df))
            except ValueError:
                pass

    title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result"
    text = soup.get_text("\n", strip=True)
    return title, text, blocks

def detect_scores(text: str):
    scores = {}
    for label in SCORE_LABELS:
        scores[label] = extract_text_score(text, label)
    return scores

def write_block(ws, start_row, title, df):
    ws.cell(start_row, 1, title)
    ws.cell(start_row, 1).font = Font(bold=True)
    start_row += 1

    for c, col in enumerate(df.columns, start=1):
        cell = ws.cell(start_row, c, str(col))
        cell.font = Font(bold=True)
        cell.fill = PatternFill("solid", fgColor="D9EAF7")

    for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1):
        for c_idx, value in enumerate(row, start=1):
            ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value))

    return start_row + len(df) + 3

def autofit(ws):
    for col_cells in ws.columns:
        max_len = 0
        col_idx = col_cells[0].column
        for cell in col_cells:
            value = "" if cell.value is None else str(cell.value)
            max_len = max(max_len, len(value))
        ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60)

def main():
    if len(sys.argv) < 3:
        print("Usage: python geekbench_to_excel.py output.xlsx <url1> <url2> ...")
        sys.exit(1)

    output_file = Path(sys.argv[1])
    urls = sys.argv[2:]

    print(f"Processing {len(urls)} URLs...")

    overview_rows = []

    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        for i, url in enumerate(urls, start=1):
            html = fetch_html(url)
            title, text, blocks = parse_tables(html)
            scores = detect_scores(text)

            sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}")
            workbook = writer.book
            ws = workbook.create_sheet(sheet_name)

            ws["A1"] = title
            ws["A1"].font = Font(bold=True, size=14)
            ws["A2"] = url

            row = 4
            for section_title, df in blocks:
                row = write_block(ws, row, section_title, df)

            ws.freeze_panes = "A4"
            autofit(ws)

            overview_rows.append({
                "Titel": title,
                "URL": url,
                **scores
            })

        overview_df = pd.DataFrame(overview_rows)
        overview_df.to_excel(writer, sheet_name="Overview", index=False)

        ws = writer.book["Overview"]
        for cell in ws[1]:
            cell.font = Font(bold=True)
            cell.fill = PatternFill("solid", fgColor="B7DEE8")
        ws.freeze_panes = "A2"
        autofit(ws)

        if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1:
            del writer.book["Sheet"]

    print(f"Finished: {output_file}")

if __name__ == "__main__":
    main()