From 23691ba6639374b9331add5876caf8be62ecb322 Mon Sep 17 00:00:00 2001 From: Michelle Winkler Date: Wed, 24 Sep 2025 10:02:43 +0200 Subject: [PATCH] init --- .gitignore | 1 + app.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 .gitignore create mode 100644 app.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eba74f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..e0fb07a --- /dev/null +++ b/app.py @@ -0,0 +1,24 @@ +from transformers import AutoTokenizer, AutoModelForCausalLM +import torch +from torchao.quantization import quantize_, int8_weight_only + +model_name = "swiss-ai/Apertus-8B-Instruct-2509" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name) + +quantize_(model, int8_weight_only()) +model.to("cuda") + +print("Enter your prompt:") +input_text = input() +inputs = tokenizer.encode(input_text, return_tensors='pt').to("cuda") + +import time +start_time = time.time() +with torch.no_grad(): + outputs = model.generate(inputs, max_length=5000) + +end_time = time.time() + +print(f"Quantized inference time: {end_time - start_time:.2f} seconds") +print(f"Generated text: {tokenizer.decode(outputs[0], skip_special_tokens=True)}") \ No newline at end of file