init
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
venv/
|
||||||
24
app.py
Normal file
24
app.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
import torch
|
||||||
|
from torchao.quantization import quantize_, int8_weight_only
|
||||||
|
|
||||||
|
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_name)
|
||||||
|
|
||||||
|
quantize_(model, int8_weight_only())
|
||||||
|
model.to("cuda")
|
||||||
|
|
||||||
|
print("Enter your prompt:")
|
||||||
|
input_text = input()
|
||||||
|
inputs = tokenizer.encode(input_text, return_tensors='pt').to("cuda")
|
||||||
|
|
||||||
|
import time
|
||||||
|
start_time = time.time()
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = model.generate(inputs, max_length=5000)
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
print(f"Quantized inference time: {end_time - start_time:.2f} seconds")
|
||||||
|
print(f"Generated text: {tokenizer.decode(outputs[0], skip_special_tokens=True)}")
|
||||||
Reference in New Issue
Block a user