commit 69e23d4bf78abb7e3c1be221658d8c7df5a9b862
Author: Michelle <michelle.winkler@miichelle.moe>
Date:   Thu Mar 19 08:33:48 2026 +0100

    Init

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..048602c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.xlsx
+.venv/
+__pycache__/
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..fe214e2
--- /dev/null
+++ b/main.py
@@ -0,0 +1,151 @@
+import re
+import sys
+from io import StringIO
+from pathlib import Path
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from openpyxl.styles import Font, PatternFill
+from openpyxl.utils import get_column_letter
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                  "(KHTML, like Gecko) Chrome/122.0 Safari/537.36"
+}
+
+SCORE_LABELS = [
+    "Single-Core Score",
+    "Multi-Core Score",
+    "Metal Score",
+    "OpenCL Score",
+    "Vulkan Score",
+    "CUDA Score",
+]
+
+def clean_sheet_name(name: str) -> str:
+    name = re.sub(r'[\\/*?:\[\]]', "_", name)
+    return name[:31] or "Sheet"
+
+def fetch_html(url: str) -> str:
+    r = requests.get(url, headers=HEADERS, timeout=20)
+    r.raise_for_status()
+    return r.text
+
+def extract_text_score(text: str, label: str):
+    patterns = [
+        rf"(\d[\d,\.]*)\s+{re.escape(label)}",
+        rf"{re.escape(label)}\s+(\d[\d,\.]*)",
+    ]
+    for pattern in patterns:
+        m = re.search(pattern, text, re.IGNORECASE)
+        if m:
+            return m.group(1).replace(",", "")
+    return None
+
+def parse_tables(html: str):
+    soup = BeautifulSoup(html, "html.parser")
+    blocks = []
+    current_section = "General"
+
+    for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]):
+        if tag.name in {"h1", "h2", "h3", "h4"}:
+            current_section = " ".join(tag.get_text(" ", strip=True).split())
+        elif tag.name == "table":
+            try:
+                df = pd.read_html(StringIO(str(tag)))[0]
+                df = df.dropna(how="all").dropna(axis=1, how="all")
+                blocks.append((current_section, df))
+            except ValueError:
+                pass
+
+    title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result"
+    text = soup.get_text("\n", strip=True)
+    return title, text, blocks
+
+def detect_scores(text: str):
+    scores = {}
+    for label in SCORE_LABELS:
+        scores[label] = extract_text_score(text, label)
+    return scores
+
+def write_block(ws, start_row, title, df):
+    ws.cell(start_row, 1, title)
+    ws.cell(start_row, 1).font = Font(bold=True)
+    start_row += 1
+
+    for c, col in enumerate(df.columns, start=1):
+        cell = ws.cell(start_row, c, str(col))
+        cell.font = Font(bold=True)
+        cell.fill = PatternFill("solid", fgColor="D9EAF7")
+
+    for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1):
+        for c_idx, value in enumerate(row, start=1):
+            ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value))
+
+    return start_row + len(df) + 3
+
+def autofit(ws):
+    for col_cells in ws.columns:
+        max_len = 0
+        col_idx = col_cells[0].column
+        for cell in col_cells:
+            value = "" if cell.value is None else str(cell.value)
+            max_len = max(max_len, len(value))
+        ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60)
+
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: python geekbench_to_excel.py output.xlsx <url1> <url2> ...")
+        sys.exit(1)
+
+    output_file = Path(sys.argv[1])
+    urls = sys.argv[2:]
+
+    print(f"Processing {len(urls)} URLs...")
+
+    overview_rows = []
+
+    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
+        for i, url in enumerate(urls, start=1):
+            html = fetch_html(url)
+            title, text, blocks = parse_tables(html)
+            scores = detect_scores(text)
+
+            sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}")
+            workbook = writer.book
+            ws = workbook.create_sheet(sheet_name)
+
+            ws["A1"] = title
+            ws["A1"].font = Font(bold=True, size=14)
+            ws["A2"] = url
+
+            row = 4
+            for section_title, df in blocks:
+                row = write_block(ws, row, section_title, df)
+
+            ws.freeze_panes = "A4"
+            autofit(ws)
+
+            overview_rows.append({
+                "Titel": title,
+                "URL": url,
+                **scores
+            })
+
+        overview_df = pd.DataFrame(overview_rows)
+        overview_df.to_excel(writer, sheet_name="Overview", index=False)
+
+        ws = writer.book["Overview"]
+        for cell in ws[1]:
+            cell.font = Font(bold=True)
+            cell.fill = PatternFill("solid", fgColor="B7DEE8")
+        ws.freeze_panes = "A2"
+        autofit(ws)
+
+        if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1:
+            del writer.book["Sheet"]
+
+    print(f"Finished: {output_file}")
+
+if __name__ == "__main__":
+    main()