13 lines
385 B
Python
13 lines
385 B
Python
|
|
from transformers import AutoModelForCausalLM
|
||
|
|
import torch
|
||
|
|
import torch.utils.dlpack
|
||
|
|
|
||
|
|
# Load the original model
|
||
|
|
model_name = "./mixed_llm"
|
||
|
|
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
||
|
|
|
||
|
|
# Convert the model to a different precision
|
||
|
|
model = model.bfloat16()
|
||
|
|
|
||
|
|
# Save the model as a safetensor
|
||
|
|
model.save_pretrained(f"./mixed_llm_half", safetensors=True)
|