60 lines
1.4 KiB
Markdown
60 lines
1.4 KiB
Markdown
|
|
---
|
||
|
|
library_name: transformers
|
||
|
|
tags: []
|
||
|
|
---
|
||
|
|
|
||
|
|
# Model Card for Model ID
|
||
|
|
|
||
|
|
Code used to create this, 5 layer version of https://huggingface.co/tiny-random/qwen3
|
||
|
|
|
||
|
|
> This tiny model is for debugging. It is randomly initialized with the config adapted from Qwen/Qwen3-32B.
|
||
|
|
|
||
|
|
|
||
|
|
```py
|
||
|
|
import torch
|
||
|
|
|
||
|
|
from transformers import (
|
||
|
|
AutoConfig,
|
||
|
|
AutoModelForCausalLM,
|
||
|
|
AutoTokenizer,
|
||
|
|
GenerationConfig,
|
||
|
|
pipeline,
|
||
|
|
set_seed,
|
||
|
|
)
|
||
|
|
|
||
|
|
source_model_id = "Qwen/Qwen3-32B"
|
||
|
|
save_folder = "/tmp/tiny-random/qwen3-5lyr"
|
||
|
|
|
||
|
|
tokenizer = AutoTokenizer.from_pretrained(
|
||
|
|
source_model_id, trust_remote_code=True,
|
||
|
|
)
|
||
|
|
tokenizer.save_pretrained(save_folder)
|
||
|
|
|
||
|
|
config = AutoConfig.from_pretrained(
|
||
|
|
source_model_id, trust_remote_code=True,
|
||
|
|
)
|
||
|
|
config._name_or_path = source_model_id
|
||
|
|
config.hidden_size = 64
|
||
|
|
config.intermediate_size = 128
|
||
|
|
config.head_dim = 32
|
||
|
|
config.num_key_value_heads = 1
|
||
|
|
config.num_attention_heads = 2
|
||
|
|
config.num_hidden_layers = 5 # modified from https://huggingface.co/tiny-random/qwen3
|
||
|
|
config.max_window_layers = 1
|
||
|
|
config.tie_word_embeddings = True
|
||
|
|
model = AutoModelForCausalLM.from_config(
|
||
|
|
config,
|
||
|
|
torch_dtype=torch.bfloat16,
|
||
|
|
trust_remote_code=True,
|
||
|
|
)
|
||
|
|
model.generation_config = GenerationConfig.from_pretrained(
|
||
|
|
source_model_id, trust_remote_code=True,
|
||
|
|
)
|
||
|
|
set_seed(42)
|
||
|
|
with torch.no_grad():
|
||
|
|
for name, p in sorted(model.named_parameters()):
|
||
|
|
torch.nn.init.normal_(p, 0, 0.5)
|
||
|
|
print(name, p.shape)
|
||
|
|
model.save_pretrained(save_folder)
|
||
|
|
|
||
|
|
```
|