This commit is contained in:
2026-03-19 08:33:48 +01:00
commit 69e23d4bf7
2 changed files with 154 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*.xlsx
.venv/
__pycache__/

151
main.py Normal file
View File

@@ -0,0 +1,151 @@
import re
import sys
from io import StringIO
from pathlib import Path
import pandas as pd
import requests
from bs4 import BeautifulSoup
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import get_column_letter
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0 Safari/537.36"
}
SCORE_LABELS = [
"Single-Core Score",
"Multi-Core Score",
"Metal Score",
"OpenCL Score",
"Vulkan Score",
"CUDA Score",
]
def clean_sheet_name(name: str) -> str:
name = re.sub(r'[\\/*?:\[\]]', "_", name)
return name[:31] or "Sheet"
def fetch_html(url: str) -> str:
r = requests.get(url, headers=HEADERS, timeout=20)
r.raise_for_status()
return r.text
def extract_text_score(text: str, label: str):
patterns = [
rf"(\d[\d,\.]*)\s+{re.escape(label)}",
rf"{re.escape(label)}\s+(\d[\d,\.]*)",
]
for pattern in patterns:
m = re.search(pattern, text, re.IGNORECASE)
if m:
return m.group(1).replace(",", "")
return None
def parse_tables(html: str):
soup = BeautifulSoup(html, "html.parser")
blocks = []
current_section = "General"
for tag in soup.find_all(["h1", "h2", "h3", "h4", "table"]):
if tag.name in {"h1", "h2", "h3", "h4"}:
current_section = " ".join(tag.get_text(" ", strip=True).split())
elif tag.name == "table":
try:
df = pd.read_html(StringIO(str(tag)))[0]
df = df.dropna(how="all").dropna(axis=1, how="all")
blocks.append((current_section, df))
except ValueError:
pass
title = soup.title.get_text(" ", strip=True) if soup.title else "Geekbench Result"
text = soup.get_text("\n", strip=True)
return title, text, blocks
def detect_scores(text: str):
scores = {}
for label in SCORE_LABELS:
scores[label] = extract_text_score(text, label)
return scores
def write_block(ws, start_row, title, df):
ws.cell(start_row, 1, title)
ws.cell(start_row, 1).font = Font(bold=True)
start_row += 1
for c, col in enumerate(df.columns, start=1):
cell = ws.cell(start_row, c, str(col))
cell.font = Font(bold=True)
cell.fill = PatternFill("solid", fgColor="D9EAF7")
for r_idx, row in enumerate(df.itertuples(index=False), start=start_row + 1):
for c_idx, value in enumerate(row, start=1):
ws.cell(r_idx, c_idx, "" if pd.isna(value) else str(value))
return start_row + len(df) + 3
def autofit(ws):
for col_cells in ws.columns:
max_len = 0
col_idx = col_cells[0].column
for cell in col_cells:
value = "" if cell.value is None else str(cell.value)
max_len = max(max_len, len(value))
ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 60)
def main():
if len(sys.argv) < 3:
print("Usage: python geekbench_to_excel.py output.xlsx <url1> <url2> ...")
sys.exit(1)
output_file = Path(sys.argv[1])
urls = sys.argv[2:]
print(f"Processing {len(urls)} URLs...")
overview_rows = []
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
for i, url in enumerate(urls, start=1):
html = fetch_html(url)
title, text, blocks = parse_tables(html)
scores = detect_scores(text)
sheet_name = clean_sheet_name(f"{i}_{title.split('-')[0].strip()}")
workbook = writer.book
ws = workbook.create_sheet(sheet_name)
ws["A1"] = title
ws["A1"].font = Font(bold=True, size=14)
ws["A2"] = url
row = 4
for section_title, df in blocks:
row = write_block(ws, row, section_title, df)
ws.freeze_panes = "A4"
autofit(ws)
overview_rows.append({
"Titel": title,
"URL": url,
**scores
})
overview_df = pd.DataFrame(overview_rows)
overview_df.to_excel(writer, sheet_name="Overview", index=False)
ws = writer.book["Overview"]
for cell in ws[1]:
cell.font = Font(bold=True)
cell.fill = PatternFill("solid", fgColor="B7DEE8")
ws.freeze_panes = "A2"
autofit(ws)
if "Sheet" in writer.book.sheetnames and len(writer.book.sheetnames) > 1:
del writer.book["Sheet"]
print(f"Finished: {output_file}")
if __name__ == "__main__":
main()