init

2025-09-24 10:02:43 +02:00
commit 23691ba663
2 changed files with 25 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 venv/
--- a/app.py
+++ b/app.py
@@ -0,0 +1,24 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from torchao.quantization import quantize_, int8_weight_only
 model_name = "swiss-ai/Apertus-8B-Instruct-2509"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 quantize_(model, int8_weight_only())
 model.to("cuda")
 print("Enter your prompt:")
 input_text = input()
 inputs = tokenizer.encode(input_text, return_tensors='pt').to("cuda")
 import time
 start_time = time.time()
 with torch.no_grad():
    outputs = model.generate(inputs, max_length=5000)
 end_time = time.time()
 print(f"Quantized inference time: {end_time - start_time:.2f} seconds")
 print(f"Generated text: {tokenizer.decode(outputs[0], skip_special_tokens=True)}")