enginex-ascend-910-llama.cpp/examples/model-conversion/scripts/causal/run-org-model.py

#!/usr/bin/env python3

import argparse
import os
import importlib
from pathlib import Path

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
import torch
import numpy as np

### If you want to dump RoPE activations, apply this monkey patch to the model
### class from Transformers that you are running (replace apertus.modeling_apertus
### with the proper package and class for your model
### === START ROPE DEBUG ===
# from transformers.models.apertus.modeling_apertus import apply_rotary_pos_emb

# orig_rope = apply_rotary_pos_emb
# torch.set_printoptions(threshold=float('inf'))
# torch.set_printoptions(precision=6, sci_mode=False)

# def debug_rope(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
#     # log inputs
#     summarize(q, "RoPE.q_in")
#     summarize(k, "RoPE.k_in")

#     # call original
#     q_out, k_out = orig_rope(q, k, cos, sin, position_ids, unsqueeze_dim)

#     # log outputs
#     summarize(q_out, "RoPE.q_out")
#     summarize(k_out, "RoPE.k_out")

#     return q_out, k_out

# # Patch it
# import transformers.models.apertus.modeling_apertus as apertus_mod  # noqa: E402
# apertus_mod.apply_rotary_pos_emb = debug_rope
### == END ROPE DEBUG ===


def summarize(tensor: torch.Tensor, name: str, max_seq: int = 3, max_vals: int = 3):
    """
    Print a tensor in llama.cpp debug style.

    Supports:
    - 2D tensors (seq, hidden)
    - 3D tensors (batch, seq, hidden)
    - 4D tensors (batch, seq, heads, dim_per_head) via flattening heads × dim_per_head

    Shows first and last max_vals of each vector per sequence position.
    """
    t = tensor.detach().to(torch.float32).cpu()

    # Determine dimensions
    if t.ndim == 3:
        _, s, _ = t.shape
    elif t.ndim == 2:
        _, s = 1, t.shape[0]
        t = t.unsqueeze(0)
    elif t.ndim == 4:
        _, s, _, _ = t.shape
    else:
        print(f"Skipping tensor due to unsupported dimensions: {t.ndim}")
        return

    ten_shape = t.shape

    print(f"ggml_debug: {name} = (f32)  ... = {{{ten_shape}}}")
    print("                                     [")
    print("                                      [")

    # Determine indices for first and last sequences
    first_indices = list(range(min(s, max_seq)))
    last_indices = list(range(max(0, s - max_seq), s))

    # Check if there's an overlap between first and last indices or if we're at the edge case of s = 2 * max_seq
    has_overlap = bool(set(first_indices) & set(last_indices)) or (max_seq * 2 == s)

    # Combine indices
    if has_overlap:
        # If there's overlap, just use the combined unique indices
        indices = sorted(list(set(first_indices + last_indices)))
        separator_index = None
    else:
        # If no overlap, we'll add a separator between first and last sequences
        indices = first_indices + last_indices
        separator_index = len(first_indices)

    for i, si in enumerate(indices):
        # Add separator if needed
        if separator_index is not None and i == separator_index:
            print("                                       ...")

        # Extract appropriate slice
        vec = t[0, si]
        if vec.ndim == 2:  # 4D case: flatten heads × dim_per_head
            flat = vec.flatten().tolist()
        else:  # 2D or 3D case
            flat = vec.tolist()

        # First and last slices
        first = flat[:max_vals]
        last = flat[-max_vals:] if len(flat) >= max_vals else flat
        first_str = ", ".join(f"{v:12.4f}" for v in first)
        last_str = ", ".join(f"{v:12.4f}" for v in last)

        print(f"                                       [{first_str}, ..., {last_str}]")

    print("                                      ],")
    print("                                     ]")
    print(f"                                     sum = {t.sum().item():.6f}\n")


def debug_hook(name):
    def fn(_m, input, output):
        if isinstance(input, torch.Tensor):
            summarize(input, name + "_in")
        elif isinstance(input, (tuple, list)) and isinstance(input[0], torch.Tensor):
            summarize(input[0], name + "_in")
        if isinstance(output, torch.Tensor):
            summarize(output, name + "_out")
        elif isinstance(output, (tuple, list)) and isinstance(output[0], torch.Tensor):
            summarize(output[0], name + "_out")

    return fn


unreleased_model_name = os.getenv("UNRELEASED_MODEL_NAME")

parser = argparse.ArgumentParser(description="Process model with specified path")
parser.add_argument("--model-path", "-m", help="Path to the model")
args = parser.parse_args()

model_path = os.environ.get("MODEL_PATH", args.model_path)
if model_path is None:
    parser.error(
        "Model path must be specified either via --model-path argument or MODEL_PATH environment variable"
    )

config = AutoConfig.from_pretrained(model_path)

print("Model type:       ", config.model_type)
print("Vocab size:       ", config.vocab_size)
print("Hidden size:      ", config.hidden_size)
print("Number of layers: ", config.num_hidden_layers)
print("BOS token id:     ", config.bos_token_id)
print("EOS token id:     ", config.eos_token_id)

print("Loading model and tokenizer using AutoTokenizer:", model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
config = AutoConfig.from_pretrained(model_path)

if unreleased_model_name:
    model_name_lower = unreleased_model_name.lower()
    unreleased_module_path = (
        f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
    )
    class_name = f"{unreleased_model_name}ForCausalLM"
    print(f"Importing unreleased model module: {unreleased_module_path}")

    try:
        model_class = getattr(
            importlib.import_module(unreleased_module_path), class_name
        )
        model = model_class.from_pretrained(
            model_path
        )  # Note: from_pretrained, not fromPretrained
    except (ImportError, AttributeError) as e:
        print(f"Failed to import or load model: {e}")
        exit(1)
else:
    model = AutoModelForCausalLM.from_pretrained(
        model_path, device_map="auto", offload_folder="offload"
    )

for name, module in model.named_modules():
    if len(list(module.children())) == 0:  # only leaf modules
        module.register_forward_hook(debug_hook(name))

model_name = os.path.basename(model_path)
# Printing the Model class to allow for easier debugging. This can be useful
# when working with models that have not been publicly released yet and this
# migth require that the concrete class is imported and used directly instead
# of using AutoModelForCausalLM.
print(f"Model class: {model.__class__.__name__}")

prompt = "Hello, my name is"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

print(f"Input tokens: {input_ids}")
print(f"Input text: {repr(prompt)}")
print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")

with torch.no_grad():
    outputs = model(input_ids.to(model.device))
    logits = outputs.logits

    # Extract logits for the last token (next token prediction)
    last_logits = logits[0, -1, :].cpu().numpy()

    print(f"Logits shape: {logits.shape}")
    print(f"Last token logits shape: {last_logits.shape}")
    print(f"Vocab size: {len(last_logits)}")

    data_dir = Path("data")
    data_dir.mkdir(exist_ok=True)
    bin_filename = data_dir / f"pytorch-{model_name}.bin"
    txt_filename = data_dir / f"pytorch-{model_name}.txt"

    # Save to file for comparison
    last_logits.astype(np.float32).tofile(bin_filename)

    # Also save as text file for easy inspection
    with open(txt_filename, "w") as f:
        for i, logit in enumerate(last_logits):
            f.write(f"{i}: {logit:.6f}\n")

    # Print some sample logits for quick verification
    print(f"First 10 logits: {last_logits[:10]}")
    print(f"Last 10 logits: {last_logits[-10:]}")

    # Show top 5 predicted tokens
    top_indices = np.argsort(last_logits)[-5:][::-1]
    print("Top 5 predictions:")
    for idx in top_indices:
        token = tokenizer.decode([idx])
        print(f"  Token {idx} ({repr(token)}): {last_logits[idx]:.6f}")

    print(f"Saved bin logits to: {bin_filename}")
    print(f"Saved txt logist to: {txt_filename}")
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								#!/usr/bin/env python3
 								import argparse
 								import os
 								import importlib
 								from pathlib import Path
 								from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
 								import torch
 								import numpy as np
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								### If you want to dump RoPE activations, apply this monkey patch to the model
 								### class from Transformers that you are running (replace apertus.modeling_apertus
 								### with the proper package and class for your model
 								### === START ROPE DEBUG ===
 								# from transformers.models.apertus.modeling_apertus import apply_rotary_pos_emb
 								# orig_rope = apply_rotary_pos_emb
 								# torch.set_printoptions(threshold=float('inf'))
 								# torch.set_printoptions(precision=6, sci_mode=False)
 								# def debug_rope(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
 								#     # log inputs
 								#     summarize(q, "RoPE.q_in")
 								#     summarize(k, "RoPE.k_in")
 								#     # call original
 								#     q_out, k_out = orig_rope(q, k, cos, sin, position_ids, unsqueeze_dim)
 								#     # log outputs
 								#     summarize(q_out, "RoPE.q_out")
 								#     summarize(k_out, "RoPE.k_out")
 								#     return q_out, k_out
 								# # Patch it
 								# import transformers.models.apertus.modeling_apertus as apertus_mod  # noqa: E402
 								# apertus_mod.apply_rotary_pos_emb = debug_rope
 								### == END ROPE DEBUG ===
 								def summarize(tensor: torch.Tensor, name: str, max_seq: int = 3, max_vals: int = 3):
 								    """
 								    Print a tensor in llama.cpp debug style.
 								    Supports:
 								    - 2D tensors (seq, hidden)
 								    - 3D tensors (batch, seq, hidden)
 								    - 4D tensors (batch, seq, heads, dim_per_head) via flattening heads × dim_per_head
 								    Shows first and last max_vals of each vector per sequence position.
 								    """
 								    t = tensor.detach().to(torch.float32).cpu()
 								    # Determine dimensions
 								    if t.ndim == 3:
 								        _, s, _ = t.shape
 								    elif t.ndim == 2:
 								        _, s = 1, t.shape[0]
 								        t = t.unsqueeze(0)
 								    elif t.ndim == 4:
 								        _, s, _, _ = t.shape
 								    else:
 								        print(f"Skipping tensor due to unsupported dimensions: {t.ndim}")
 								        return
 								    ten_shape = t.shape
 								    print(f"ggml_debug: {name} = (f32)  ... = {{{ten_shape}}}")
 								    print("                                     [")
 								    print("                                      [")
 								    # Determine indices for first and last sequences
 								    first_indices = list(range(min(s, max_seq)))
 								    last_indices = list(range(max(0, s - max_seq), s))
 								    # Check if there's an overlap between first and last indices or if we're at the edge case of s = 2 * max_seq
 								    has_overlap = bool(set(first_indices) & set(last_indices)) or (max_seq * 2 == s)
 								    # Combine indices
 								    if has_overlap:
 								        # If there's overlap, just use the combined unique indices
 								        indices = sorted(list(set(first_indices + last_indices)))
 								        separator_index = None
 								    else:
 								        # If no overlap, we'll add a separator between first and last sequences
 								        indices = first_indices + last_indices
 								        separator_index = len(first_indices)
 								    for i, si in enumerate(indices):
 								        # Add separator if needed
 								        if separator_index is not None and i == separator_index:
 								            print("                                       ...")
 								        # Extract appropriate slice
 								        vec = t[0, si]
 								        if vec.ndim == 2:  # 4D case: flatten heads × dim_per_head
 								            flat = vec.flatten().tolist()
 								        else:  # 2D or 3D case
 								            flat = vec.tolist()
 								        # First and last slices
 								        first = flat[:max_vals]
 								        last = flat[-max_vals:] if len(flat) >= max_vals else flat
 								        first_str = ", ".join(f"{v:12.4f}" for v in first)
 								        last_str = ", ".join(f"{v:12.4f}" for v in last)
 								        print(f"                                       [{first_str}, ..., {last_str}]")
 								    print("                                      ],")
 								    print("                                     ]")
 								    print(f"                                     sum = {t.sum().item():.6f}\n")
 								def debug_hook(name):
 								    def fn(_m, input, output):
 								        if isinstance(input, torch.Tensor):
 								            summarize(input, name + "_in")
 								        elif isinstance(input, (tuple, list)) and isinstance(input[0], torch.Tensor):
 								            summarize(input[0], name + "_in")
 								        if isinstance(output, torch.Tensor):
 								            summarize(output, name + "_out")
 								        elif isinstance(output, (tuple, list)) and isinstance(output[0], torch.Tensor):
 								            summarize(output[0], name + "_out")
 								    return fn
 								unreleased_model_name = os.getenv("UNRELEASED_MODEL_NAME")
 								parser = argparse.ArgumentParser(description="Process model with specified path")
 								parser.add_argument("--model-path", "-m", help="Path to the model")
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								args = parser.parse_args()
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								model_path = os.environ.get("MODEL_PATH", args.model_path)
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								if model_path is None:
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								    parser.error(
 								        "Model path must be specified either via --model-path argument or MODEL_PATH environment variable"
 								    )
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
 								config = AutoConfig.from_pretrained(model_path)
 								print("Model type:       ", config.model_type)
 								print("Vocab size:       ", config.vocab_size)
 								print("Hidden size:      ", config.hidden_size)
 								print("Number of layers: ", config.num_hidden_layers)
 								print("BOS token id:     ", config.bos_token_id)
 								print("EOS token id:     ", config.eos_token_id)
 								print("Loading model and tokenizer using AutoTokenizer:", model_path)
 								tokenizer = AutoTokenizer.from_pretrained(model_path)
 								config = AutoConfig.from_pretrained(model_path)
 								if unreleased_model_name:
 								    model_name_lower = unreleased_model_name.lower()
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								    unreleased_module_path = (
 								        f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
 								    )
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								    class_name = f"{unreleased_model_name}ForCausalLM"
 								    print(f"Importing unreleased model module: {unreleased_module_path}")
 								    try:
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								        model_class = getattr(
 								            importlib.import_module(unreleased_module_path), class_name
 								        )
 								        model = model_class.from_pretrained(
 								            model_path
 								        )  # Note: from_pretrained, not fromPretrained
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								    except (ImportError, AttributeError) as e:
 								        print(f"Failed to import or load model: {e}")
 								        exit(1)
 								else:
-												model-conversion : add extra debugging support for model conversion (#15877)

* feat: Extra debugging support for model conversion - added BF16 support for llama-callback-eval and support for dumping intermediate steps in run-org-model.py
											
										
										
											2025-09-09 06:05:55 +02:00
+								    model = AutoModelForCausalLM.from_pretrained(
 								        model_path, device_map="auto", offload_folder="offload"
 								    )
 								for name, module in model.named_modules():
 								    if len(list(module.children())) == 0:  # only leaf modules
 								        module.register_forward_hook(debug_hook(name))
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
 								model_name = os.path.basename(model_path)
 								# Printing the Model class to allow for easier debugging. This can be useful
 								# when working with models that have not been publicly released yet and this
 								# migth require that the concrete class is imported and used directly instead
 								# of using AutoModelForCausalLM.
 								print(f"Model class: {model.__class__.__name__}")
 								prompt = "Hello, my name is"
 								input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 								print(f"Input tokens: {input_ids}")
 								print(f"Input text: {repr(prompt)}")
 								print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
 								with torch.no_grad():
-												model-conversion : run-org-model.py fails to run on mac m1 (#16213)

Signed-off-by: Jie Fu <jiefu@tencent.com>
											
										
										
											2025-09-24 14:46:52 +08:00
+								    outputs = model(input_ids.to(model.device))
-												examples : add model conversion tool/example (#15455)

* examples : add model conversion tool/example

This commit adds an "example/tool" that is intended to help in the
process of converting models to GGUF. Currently it supports normal
causal models and embedding models. The readme contains instructions and
command to guide through the process.

The motivation for this to have a structured and repeatable process for
model conversions and hopefully with time improve upon it to make the
process easier and more reliable. We have started to use this for new
model conversions internally and will continue doing so and improve it
as we go along. Perhaps with time this should be placed in a different
directory than the examples directory, but for now it seems like a good
place to keep it while we are still developing it.

* squash! examples : add model conversion tool/example

Remove dependency on scikit-learn in model conversion example.

* squash! examples : add model conversion tool/example

Update transformer dep to use non-dev version. And also import
`AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility
with the latest version.

* squash! examples : add model conversion tool/example

Remove the logits requirements file from the all requirements file.
											
										
										
											2025-08-21 12:16:54 +02:00
+								    logits = outputs.logits
 								    # Extract logits for the last token (next token prediction)
 								    last_logits = logits[0, -1, :].cpu().numpy()
 								    print(f"Logits shape: {logits.shape}")
 								    print(f"Last token logits shape: {last_logits.shape}")
 								    print(f"Vocab size: {len(last_logits)}")
 								    data_dir = Path("data")
 								    data_dir.mkdir(exist_ok=True)
 								    bin_filename = data_dir / f"pytorch-{model_name}.bin"
 								    txt_filename = data_dir / f"pytorch-{model_name}.txt"
 								    # Save to file for comparison
 								    last_logits.astype(np.float32).tofile(bin_filename)
 								    # Also save as text file for easy inspection
 								    with open(txt_filename, "w") as f:
 								        for i, logit in enumerate(last_logits):
 								            f.write(f"{i}: {logit:.6f}\n")
 								    # Print some sample logits for quick verification
 								    print(f"First 10 logits: {last_logits[:10]}")
 								    print(f"Last 10 logits: {last_logits[-10:]}")
 								    # Show top 5 predicted tokens
 								    top_indices = np.argsort(last_logits)[-5:][::-1]
 								    print("Top 5 predictions:")
 								    for idx in top_indices:
 								        token = tokenizer.decode([idx])
 								        print(f"  Token {idx} ({repr(token)}): {last_logits[idx]:.6f}")
 								    print(f"Saved bin logits to: {bin_filename}")
 								    print(f"Saved txt logist to: {txt_filename}")