import re import sys from io import StringIO from pathlib import Path import pandas as pd import requests from bs4 import BeautifulSoup from openpyxl.styles import Font, PatternFill from openpyxl.utils import get_column_letter HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/122.0 Safari/537.36" } SCORE_LABELS = [ "Single-Core Score", "Multi-Core Score", "Metal Score", "OpenCL Score", "Vulkan Score", "CUDA Score", ] def clean_sheet_name(name: str) -> str: name = re.sub(r'[\\/*?:\[\]]', "_", name) return name[:31] or "Sheet" def fetch_html(url: str) -> str: r = requests.get(url, headers=HEADERS, timeout=20) r.raise_for_status() return r.text def extract_text_score(text: str, label: str): patterns = [ rf"(\d[\d,\.]*)\s+{re.escape(label)}", rf"{re.escape(label)}\s+(\d[\d,\.]*)", ] for pattern in patterns: m = re.search(pattern, text, re.IGNORECASE) if m: return m.group(1).replace(",", "") return None def parse_tables(html: str): soup = BeautifulSoup(html, "html.parser") blocks = [] current_section = "General" for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]): if tag.name in {"h1", "h2", "h3", "h4"}: current_section = " ".join(tag.get_text(" ", strip=True).split()) elif tag.name == "table": try: df = pd.read_html(StringIO(str(tag)))[0] df = df.dropna(how="all").dropna(axis=1, how="all") blocks.append((current_section, df)) except ValueError: pass title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result" text = soup.get_text("\n", strip=True) return title, text, blocks def detect_scores(text: str): scores = {} for label in SCORE_LABELS: scores[label] = extract_text_score(text, label) return scores def write_block(ws, start_row, title, df): ws.cell(start_row, 1, title) ws.cell(start_row, 1).font = Font(bold=True) start_row += 1 for c, col in enumerate(df.columns, start=1): cell = ws.cell(start_row, c, str(col)) cell.font = Font(bold=True) cell.fill = PatternFill("solid", fgColor="D9EAF7") for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1): for c_idx, value in enumerate(row, start=1): ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value)) return start_row + len(df) + 3 def autofit(ws): for col_cells in ws.columns: max_len = 0 col_idx = col_cells[0].column for cell in col_cells: value = "" if cell.value is None else str(cell.value) max_len = max(max_len, len(value)) ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60) def main(): if len(sys.argv) < 3: print("Usage: python geekbench_to_excel.py output.xlsx ...") sys.exit(1) output_file = Path(sys.argv[1]) urls = sys.argv[2:] print(f"Processing {len(urls)} URLs...") overview_rows = [] with pd.ExcelWriter(output_file, engine="openpyxl") as writer: for i, url in enumerate(urls, start=1): html = fetch_html(url) title, text, blocks = parse_tables(html) scores = detect_scores(text) sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}") workbook = writer.book ws = workbook.create_sheet(sheet_name) ws["A1"] = title ws["A1"].font = Font(bold=True, size=14) ws["A2"] = url row = 4 for section_title, df in blocks: row = write_block(ws, row, section_title, df) ws.freeze_panes = "A4" autofit(ws) overview_rows.append({ "Titel": title, "URL": url, **scores }) overview_df = pd.DataFrame(overview_rows) overview_df.to_excel(writer, sheet_name="Overview", index=False) ws = writer.book["Overview"] for cell in ws[1]: cell.font = Font(bold=True) cell.fill = PatternFill("solid", fgColor="B7DEE8") ws.freeze_panes = "A2" autofit(ws) if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1: del writer.book["Sheet"] print(f"Finished: {output_file}") if __name__ == "__main__": main()