Init

2026-03-19 08:33:48 +01:00
commit 69e23d4bf7
2 changed files with 154 additions and 0 deletions
@@ -0,0 +1,3 @@
 *.xlsx
 .venv/
 __pycache__/
@@ -0,0 +1,151 @@
 import re
 import sys
 from io import StringIO
 from pathlib import Path
 import pandas as pd
 import requests
 from bs4 import BeautifulSoup
 from openpyxl.styles import Font, PatternFill
 from openpyxl.utils import get_column_letter
 HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/122.0 Safari/537.36"
 }
 SCORE_LABELS = [
    "Single-Core Score",
    "Multi-Core Score",
    "Metal Score",
    "OpenCL Score",
    "Vulkan Score",
    "CUDA Score",
 ]
 def clean_sheet_name(name: str) -> str:
    name = re.sub(r'[\\/*?:\[\]]', "_", name)
    return name[:31] or "Sheet"
 def fetch_html(url: str) -> str:
    r = requests.get(url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    return r.text
 def extract_text_score(text: str, label: str):
    patterns = [
        rf"(\d[\d,\.]*)\s+{re.escape(label)}",
        rf"{re.escape(label)}\s+(\d[\d,\.]*)",
    ]
    for pattern in patterns:
        m = re.search(pattern, text, re.IGNORECASE)
        if m:
            return m.group(1).replace(",", "")
    return None
 def parse_tables(html: str):
    soup = BeautifulSoup(html, "html.parser")
    blocks = []
    current_section = "General"
    for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]):
        if tag.name in {"h1", "h2", "h3", "h4"}:
            current_section = " ".join(tag.get_text(" ", strip=True).split())
        elif tag.name == "table":
            try:
                df = pd.read_html(StringIO(str(tag)))[0]
                df = df.dropna(how="all").dropna(axis=1, how="all")
                blocks.append((current_section, df))
            except ValueError:
                pass
    title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result"
    text = soup.get_text("\n", strip=True)
    return title, text, blocks
 def detect_scores(text: str):
    scores = {}
    for label in SCORE_LABELS:
        scores[label] = extract_text_score(text, label)
    return scores
 def write_block(ws, start_row, title, df):
    ws.cell(start_row, 1, title)
    ws.cell(start_row, 1).font = Font(bold=True)
    start_row += 1
    for c, col in enumerate(df.columns, start=1):
        cell = ws.cell(start_row, c, str(col))
        cell.font = Font(bold=True)
        cell.fill = PatternFill("solid", fgColor="D9EAF7")
    for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1):
        for c_idx, value in enumerate(row, start=1):
            ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value))
    return start_row + len(df) + 3
 def autofit(ws):
    for col_cells in ws.columns:
        max_len = 0
        col_idx = col_cells[0].column
        for cell in col_cells:
            value = "" if cell.value is None else str(cell.value)
            max_len = max(max_len, len(value))
        ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60)
 def main():
    if len(sys.argv) < 3:
        print("Usage: python geekbench_to_excel.py output.xlsx <url1> <url2> ...")
        sys.exit(1)
    output_file = Path(sys.argv[1])
    urls = sys.argv[2:]
    print(f"Processing {len(urls)} URLs...")
    overview_rows = []
    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        for i, url in enumerate(urls, start=1):
            html = fetch_html(url)
            title, text, blocks = parse_tables(html)
            scores = detect_scores(text)
            sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}")
            workbook = writer.book
            ws = workbook.create_sheet(sheet_name)
            ws["A1"] = title
            ws["A1"].font = Font(bold=True, size=14)
            ws["A2"] = url
            row = 4
            for section_title, df in blocks:
                row = write_block(ws, row, section_title, df)
            ws.freeze_panes = "A4"
            autofit(ws)
            overview_rows.append({
                "Titel": title,
                "URL": url,
                **scores
            })
        overview_df = pd.DataFrame(overview_rows)
        overview_df.to_excel(writer, sheet_name="Overview", index=False)
        ws = writer.book["Overview"]
        for cell in ws[1]:
            cell.font = Font(bold=True)
            cell.fill = PatternFill("solid", fgColor="B7DEE8")
        ws.freeze_panes = "A2"
        autofit(ws)
        if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1:
            del writer.book["Sheet"]
    print(f"Finished: {output_file}")
 if __name__ == "__main__":
    main()