commit 69e23d4bf78abb7e3c1be221658d8c7df5a9b862 Author: Michelle Date: Thu Mar 19 08:33:48 2026 +0100 Init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..048602c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.xlsx +.venv/ +__pycache__/ \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..fe214e2 --- /dev/null +++ b/main.py @@ -0,0 +1,151 @@ +import re +import sys +from io import StringIO +from pathlib import Path +import pandas as pd +import requests +from bs4 import BeautifulSoup +from openpyxl.styles import Font, PatternFill +from openpyxl.utils import get_column_letter + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/122.0 Safari/537.36" +} + +SCORE_LABELS = [ + "Single-Core Score", + "Multi-Core Score", + "Metal Score", + "OpenCL Score", + "Vulkan Score", + "CUDA Score", +] + +def clean_sheet_name(name: str) -> str: + name = re.sub(r'[\\/*?:\[\]]', "_", name) + return name[:31] or "Sheet" + +def fetch_html(url: str) -> str: + r = requests.get(url, headers=HEADERS, timeout=20) + r.raise_for_status() + return r.text + +def extract_text_score(text: str, label: str): + patterns = [ + rf"(\d[\d,\.]*)\s+{re.escape(label)}", + rf"{re.escape(label)}\s+(\d[\d,\.]*)", + ] + for pattern in patterns: + m = re.search(pattern, text, re.IGNORECASE) + if m: + return m.group(1).replace(",", "") + return None + +def parse_tables(html: str): + soup = BeautifulSoup(html, "html.parser") + blocks = [] + current_section = "General" + + for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]): + if tag.name in {"h1", "h2", "h3", "h4"}: + current_section = " ".join(tag.get_text(" ", strip=True).split()) + elif tag.name == "table": + try: + df = pd.read_html(StringIO(str(tag)))[0] + df = df.dropna(how="all").dropna(axis=1, how="all") + blocks.append((current_section, df)) + except ValueError: + pass + + title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result" + text = soup.get_text("\n", strip=True) + return title, text, blocks + +def detect_scores(text: str): + scores = {} + for label in SCORE_LABELS: + scores[label] = extract_text_score(text, label) + return scores + +def write_block(ws, start_row, title, df): + ws.cell(start_row, 1, title) + ws.cell(start_row, 1).font = Font(bold=True) + start_row += 1 + + for c, col in enumerate(df.columns, start=1): + cell = ws.cell(start_row, c, str(col)) + cell.font = Font(bold=True) + cell.fill = PatternFill("solid", fgColor="D9EAF7") + + for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1): + for c_idx, value in enumerate(row, start=1): + ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value)) + + return start_row + len(df) + 3 + +def autofit(ws): + for col_cells in ws.columns: + max_len = 0 + col_idx = col_cells[0].column + for cell in col_cells: + value = "" if cell.value is None else str(cell.value) + max_len = max(max_len, len(value)) + ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60) + +def main(): + if len(sys.argv) < 3: + print("Usage: python geekbench_to_excel.py output.xlsx ...") + sys.exit(1) + + output_file = Path(sys.argv[1]) + urls = sys.argv[2:] + + print(f"Processing {len(urls)} URLs...") + + overview_rows = [] + + with pd.ExcelWriter(output_file, engine="openpyxl") as writer: + for i, url in enumerate(urls, start=1): + html = fetch_html(url) + title, text, blocks = parse_tables(html) + scores = detect_scores(text) + + sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}") + workbook = writer.book + ws = workbook.create_sheet(sheet_name) + + ws["A1"] = title + ws["A1"].font = Font(bold=True, size=14) + ws["A2"] = url + + row = 4 + for section_title, df in blocks: + row = write_block(ws, row, section_title, df) + + ws.freeze_panes = "A4" + autofit(ws) + + overview_rows.append({ + "Titel": title, + "URL": url, + **scores + }) + + overview_df = pd.DataFrame(overview_rows) + overview_df.to_excel(writer, sheet_name="Overview", index=False) + + ws = writer.book["Overview"] + for cell in ws[1]: + cell.font = Font(bold=True) + cell.fill = PatternFill("solid", fgColor="B7DEE8") + ws.freeze_panes = "A2" + autofit(ws) + + if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1: + del writer.book["Sheet"] + + print(f"Finished: {output_file}") + +if __name__ == "__main__": + main()