Sync from upstream llama.cpp repository
This commit is contained in:
46
examples/model-conversion/scripts/causal/compare-embeddings-logits.sh
Executable file
46
examples/model-conversion/scripts/causal/compare-embeddings-logits.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
MODEL_PATH="${1:-"$MODEL_PATH"}"
|
||||
MODEL_NAME="${2:-$(basename "$MODEL_PATH")}"
|
||||
|
||||
CONVERTED_MODEL_PATH="${1:-"$CONVERTED_MODEL"}"
|
||||
CONVERTED_MODEL_NAME="${2:-$(basename "$CONVERTED_MODEL_PATH" ".gguf")}"
|
||||
|
||||
if [ -t 0 ]; then
|
||||
CPP_EMBEDDINGS="data/llamacpp-${CONVERTED_MODEL_NAME}-embeddings.bin"
|
||||
else
|
||||
# Process piped JSON data and convert to binary (matching logits.cpp format)
|
||||
TEMP_FILE=$(mktemp /tmp/tmp.XXXXXX.binn)
|
||||
python3 -c "
|
||||
import json
|
||||
import sys
|
||||
import struct
|
||||
|
||||
data = json.load(sys.stdin)
|
||||
|
||||
# Flatten all embeddings completely
|
||||
flattened = []
|
||||
for item in data:
|
||||
embedding = item['embedding']
|
||||
for token_embedding in embedding:
|
||||
flattened.extend(token_embedding)
|
||||
|
||||
print(f'Total embedding values: {len(flattened)}', file=sys.stderr)
|
||||
|
||||
# Write as binary floats - matches logitc.cpp fwrite format
|
||||
with open('$TEMP_FILE', 'wb') as f:
|
||||
for value in flattened:
|
||||
f.write(struct.pack('f', value))
|
||||
"
|
||||
CPP_EMBEDDINGS="$TEMP_FILE"
|
||||
trap "rm -f $TEMP_FILE" EXIT
|
||||
fi
|
||||
|
||||
python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
|
||||
--python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \
|
||||
--cpp-embeddings $CPP_EMBEDDINGS \
|
||||
--prompt "Hello world today" \
|
||||
--causal
|
||||
|
||||
87
examples/model-conversion/scripts/causal/compare-logits.py
Executable file
87
examples/model-conversion/scripts/causal/compare-logits.py
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# Add utils directory to path for direct script execution
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "utils"))
|
||||
from common import get_model_name_from_env_path, compare_tokens, exit_with_warning # type: ignore[import-not-found]
|
||||
|
||||
def quick_logits_check(pytorch_file, llamacpp_file):
|
||||
"""Lightweight sanity check before NMSE"""
|
||||
|
||||
try:
|
||||
pytorch_logits = np.fromfile(pytorch_file, dtype=np.float32)
|
||||
llamacpp_logits = np.fromfile(llamacpp_file, dtype=np.float32)
|
||||
except Exception as e:
|
||||
print(f"❌ NOK: Failed to load files - {e}")
|
||||
return False
|
||||
|
||||
# Check shapes match
|
||||
if pytorch_logits.shape != llamacpp_logits.shape:
|
||||
print(f"❌ NOK: Shape mismatch - PyTorch: {pytorch_logits.shape}, llama.cpp: {llamacpp_logits.shape}")
|
||||
return False
|
||||
|
||||
# Calculate key metrics
|
||||
diff = pytorch_logits - llamacpp_logits
|
||||
abs_diff = np.abs(diff)
|
||||
max_diff = np.max(abs_diff)
|
||||
|
||||
# Get top 10 predictions from both models
|
||||
pytorch_top10 = np.argsort(pytorch_logits)[-10:][::-1]
|
||||
llamacpp_top10 = np.argsort(llamacpp_logits)[-10:][::-1]
|
||||
print(f"Top 10 PyTorch logits: {pytorch_logits[pytorch_top10]}")
|
||||
print(f"Top 10 llama.cpp logits: {llamacpp_logits[llamacpp_top10]}")
|
||||
print(f"Max absolute difference: {max_diff:.4f}")
|
||||
|
||||
return True
|
||||
|
||||
def main():
|
||||
model_path = os.environ.get('MODEL_PATH')
|
||||
model_name = get_model_name_from_env_path('MODEL_PATH')
|
||||
data_dir = Path("data")
|
||||
pytorch_file = data_dir / f"pytorch-{model_name}.bin"
|
||||
|
||||
llamacpp_model_name = get_model_name_from_env_path('CONVERTED_MODEL')
|
||||
print(f"Using converted model: {llamacpp_model_name}")
|
||||
llamacpp_file = data_dir / f"llamacpp-{llamacpp_model_name}.bin"
|
||||
|
||||
if not pytorch_file.exists():
|
||||
print(f"Error: PyTorch logits file not found: {pytorch_file}")
|
||||
print("Please run scripts/run-org-model.sh first to generate this file.")
|
||||
sys.exit(1)
|
||||
|
||||
if not llamacpp_file.exists():
|
||||
print(f"Error: llama.cpp logits file not found: {llamacpp_file}")
|
||||
print("Please run scripts/run-converted-model.sh first to generate this file.")
|
||||
sys.exit(1)
|
||||
|
||||
print("Checked all required files were found. Proceeding...\n")
|
||||
|
||||
# Verify tokens as they are a prerequisite for logits comparison.
|
||||
print("🔍 Token Comparison Check")
|
||||
print("=" * 40)
|
||||
if not compare_tokens(f"pytorch-{model_name}", f"llamacpp-{llamacpp_model_name}"):
|
||||
exit_with_warning("\n❌ Token mismatch detected", model_path)
|
||||
print()
|
||||
|
||||
print("🔍 GGML Model Validation for model ", model_name)
|
||||
print("=" * 40)
|
||||
print(f"PyTorch logits : {pytorch_file}")
|
||||
print(f"llama.cpp logits: {llamacpp_file}")
|
||||
print()
|
||||
|
||||
success = quick_logits_check(pytorch_file, llamacpp_file)
|
||||
|
||||
# Exit with appropriate code
|
||||
if success:
|
||||
print("✅ OK: Lightweight model check successful!")
|
||||
print(" Ok to proceed with NMSE check...")
|
||||
sys.exit(0)
|
||||
else:
|
||||
exit_with_warning(f"❌ NOK: Top 10 predictions don't match - generation will differ", model_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
46
examples/model-conversion/scripts/causal/convert-model.sh
Executable file
46
examples/model-conversion/scripts/causal/convert-model.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Parse command line arguments
|
||||
MMPROJ=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mmproj)
|
||||
MMPROJ="--mmproj"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}"
|
||||
OUTPUT_DIR="${OUTPUT_DIR:-../../models}"
|
||||
TYPE="${OUTTYPE:-f16}"
|
||||
METADATA_OVERRIDE="${METADATA_OVERRIDE:-}"
|
||||
CONVERTED_MODEL="${OUTPUT_DIR}/${MODEL_NAME}.gguf"
|
||||
|
||||
echo "Model path: ${MODEL_PATH}"
|
||||
echo "Model name: ${MODEL_NAME}"
|
||||
echo "Data type: ${TYPE}"
|
||||
echo "Converted model path:: ${CONVERTED_MODEL}"
|
||||
echo "Metadata override: ${METADATA_OVERRIDE}"
|
||||
|
||||
CMD_ARGS=("python" "../../convert_hf_to_gguf.py" "--verbose")
|
||||
CMD_ARGS+=("${MODEL_PATH}")
|
||||
CMD_ARGS+=("--outfile" "${CONVERTED_MODEL}")
|
||||
CMD_ARGS+=("--outtype" "${TYPE}")
|
||||
[[ -n "$METADATA_OVERRIDE" ]] && CMD_ARGS+=("--metadata" "${METADATA_OVERRIDE}")
|
||||
[[ -n "$MMPROJ" ]] && CMD_ARGS+=("${MMPROJ}")
|
||||
|
||||
"${CMD_ARGS[@]}"
|
||||
|
||||
echo ""
|
||||
echo "The environment variable CONVERTED_MODEL can be set to this path using:"
|
||||
echo "export CONVERTED_MODEL=$(realpath ${CONVERTED_MODEL})"
|
||||
if [[ -n "$MMPROJ" ]]; then
|
||||
mmproj_file="${OUTPUT_DIR}/mmproj-$(basename "${CONVERTED_MODEL}")"
|
||||
echo "The mmproj model was created in $(realpath "$mmproj_file")"
|
||||
fi
|
||||
13
examples/model-conversion/scripts/causal/modelcard.template
Normal file
13
examples/model-conversion/scripts/causal/modelcard.template
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
base_model:
|
||||
- {base_model}
|
||||
---
|
||||
# {model_name} GGUF
|
||||
|
||||
Recommended way to run this model:
|
||||
|
||||
```sh
|
||||
llama-server -hf {namespace}/{model_name}-GGUF
|
||||
```
|
||||
|
||||
Then, access http://localhost:8080
|
||||
114
examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py
Executable file
114
examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py
Executable file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import importlib
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
|
||||
from pathlib import Path
|
||||
|
||||
unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process model with specified path')
|
||||
parser.add_argument('--model-path', '-m', help='Path to the model')
|
||||
args = parser.parse_args()
|
||||
|
||||
model_path = os.environ.get('MODEL_PATH', args.model_path)
|
||||
if model_path is None:
|
||||
parser.error("Model path must be specified either via --model-path argument or MODEL_PATH environment variable")
|
||||
|
||||
config = AutoConfig.from_pretrained(model_path)
|
||||
|
||||
print("Model type: ", config.model_type)
|
||||
print("Vocab size: ", config.vocab_size)
|
||||
print("Hidden size: ", config.hidden_size)
|
||||
print("Number of layers: ", config.num_hidden_layers)
|
||||
print("BOS token id: ", config.bos_token_id)
|
||||
print("EOS token id: ", config.eos_token_id)
|
||||
|
||||
print("Loading model and tokenizer using AutoTokenizer:", model_path)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
|
||||
if unreleased_model_name:
|
||||
model_name_lower = unreleased_model_name.lower()
|
||||
unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
|
||||
class_name = f"{unreleased_model_name}ForCausalLM"
|
||||
print(f"Importing unreleased model module: {unreleased_module_path}")
|
||||
|
||||
try:
|
||||
model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
|
||||
model = model_class.from_pretrained(model_path)
|
||||
except (ImportError, AttributeError) as e:
|
||||
print(f"Failed to import or load model: {e}")
|
||||
print("Falling back to AutoModelForCausalLM")
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path)
|
||||
print(f"Model class: {type(model)}")
|
||||
#print(f"Model file: {type(model).__module__}")
|
||||
|
||||
model_name = os.path.basename(model_path)
|
||||
print(f"Model name: {model_name}")
|
||||
|
||||
prompt = "Hello world today"
|
||||
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
||||
print(f"Input tokens: {input_ids}")
|
||||
print(f"Input text: {repr(prompt)}")
|
||||
print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model(input_ids, output_hidden_states=True)
|
||||
|
||||
# Extract hidden states from the last layer
|
||||
# outputs.hidden_states is a tuple of (num_layers + 1) tensors
|
||||
# Index -1 gets the last layer, shape: [batch_size, seq_len, hidden_size]
|
||||
last_hidden_states = outputs.hidden_states[-1]
|
||||
|
||||
# Get embeddings for all tokens
|
||||
token_embeddings = last_hidden_states[0].float().cpu().numpy() # Remove batch dimension
|
||||
|
||||
print(f"Hidden states shape: {last_hidden_states.shape}")
|
||||
print(f"Token embeddings shape: {token_embeddings.shape}")
|
||||
print(f"Hidden dimension: {token_embeddings.shape[-1]}")
|
||||
print(f"Number of tokens: {token_embeddings.shape[0]}")
|
||||
|
||||
# Save raw token embeddings
|
||||
data_dir = Path("data")
|
||||
data_dir.mkdir(exist_ok=True)
|
||||
bin_filename = data_dir / f"pytorch-{model_name}-embeddings.bin"
|
||||
txt_filename = data_dir / f"pytorch-{model_name}-embeddings.txt"
|
||||
|
||||
# Save all token embeddings as binary
|
||||
print(token_embeddings)
|
||||
token_embeddings.astype(np.float32).tofile(bin_filename)
|
||||
|
||||
# Save as text for inspection
|
||||
with open(txt_filename, "w") as f:
|
||||
for i, embedding in enumerate(token_embeddings):
|
||||
for j, val in enumerate(embedding):
|
||||
f.write(f"{i} {j} {val:.6f}\n")
|
||||
|
||||
# Print embeddings per token in the requested format
|
||||
print("\nToken embeddings:")
|
||||
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
|
||||
for i, embedding in enumerate(token_embeddings):
|
||||
# Format: show first few values, ..., then last few values
|
||||
if len(embedding) > 10:
|
||||
# Show first 3 and last 3 values with ... in between
|
||||
first_vals = " ".join(f"{val:8.6f}" for val in embedding[:3])
|
||||
last_vals = " ".join(f"{val:8.6f}" for val in embedding[-3:])
|
||||
print(f"embedding {i}: {first_vals} ... {last_vals}")
|
||||
else:
|
||||
# If embedding is short, show all values
|
||||
vals = " ".join(f"{val:8.6f}" for val in embedding)
|
||||
print(f"embedding {i}: {vals}")
|
||||
|
||||
# Also show token info for reference
|
||||
print(f"\nToken reference:")
|
||||
for i, token in enumerate(tokens):
|
||||
print(f" Token {i}: {repr(token)}")
|
||||
|
||||
print(f"Saved bin logits to: {bin_filename}")
|
||||
print(f"Saved txt logist to: {txt_filename}")
|
||||
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# First try command line argument, then environment variable, then file
|
||||
CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
|
||||
|
||||
# Final check if we have a model path
|
||||
if [ -z "$CONVERTED_MODEL" ]; then
|
||||
echo "Error: Model path must be provided either as:" >&2
|
||||
echo " 1. Command line argument" >&2
|
||||
echo " 2. CONVERTED_MODEL environment variable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cmake --build ../../build --target llama-debug -j8
|
||||
|
||||
../../build/bin/llama-debug -m $CONVERTED_MODEL --embedding -p "Hello world today" --save-logits
|
||||
26
examples/model-conversion/scripts/causal/run-converted-model.sh
Executable file
26
examples/model-conversion/scripts/causal/run-converted-model.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# First try command line argument, then environment variable, then file
|
||||
CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
|
||||
MODEL_TESTING_PROMPT="${2:-"$MODEL_TESTING_PROMPT"}"
|
||||
|
||||
if [ -z "$MODEL_TESTING_PROMPT"]; then
|
||||
MODEL_TESTING_PROMPT="Hello, my name is"
|
||||
fi
|
||||
|
||||
# Final check if we have a model path
|
||||
if [ -z "$CONVERTED_MODEL" ]; then
|
||||
echo "Error: Model path must be provided either as:" >&2
|
||||
echo " 1. Command line argument" >&2
|
||||
echo " 2. CONVERTED_MODEL environment variable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo $CONVERTED_MODEL
|
||||
echo $MODEL_TESTING_PROMPT
|
||||
|
||||
cmake --build ../../build --target llama-debug -j8
|
||||
|
||||
../../build/bin/llama-debug -m "$CONVERTED_MODEL" -p "$MODEL_TESTING_PROMPT" --save-logits
|
||||
168
examples/model-conversion/scripts/causal/run-org-model.py
Executable file
168
examples/model-conversion/scripts/causal/run-org-model.py
Executable file
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import importlib
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForImageTextToText, AutoConfig
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
from utils.common import debug_hook, save_output_data
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description="Process model with specified path")
|
||||
parser.add_argument("--model-path", "-m", help="Path to the model")
|
||||
parser.add_argument("--prompt-file", "-f", help="Optional prompt file", required=False)
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose debug output")
|
||||
parser.add_argument("--device", "-d", help="Device to use (cpu, cuda, mps, auto)", default="auto")
|
||||
return parser.parse_args()
|
||||
|
||||
def load_model_and_tokenizer(model_path, device="auto"):
|
||||
print("Loading model and tokenizer using AutoTokenizer:", model_path)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
||||
multimodal = False
|
||||
full_config = config
|
||||
|
||||
# Determine device_map based on device argument
|
||||
if device == "cpu":
|
||||
device_map = {"": "cpu"}
|
||||
print("Forcing CPU usage")
|
||||
elif device == "auto":
|
||||
device_map = "auto"
|
||||
else:
|
||||
device_map = {"": device}
|
||||
|
||||
print("Model type: ", config.model_type)
|
||||
if "vocab_size" not in config and "text_config" in config:
|
||||
config = config.text_config
|
||||
multimodal = True
|
||||
|
||||
print("Vocab size: ", config.vocab_size)
|
||||
print("Hidden size: ", config.hidden_size)
|
||||
print("Number of layers: ", config.num_hidden_layers)
|
||||
print("BOS token id: ", config.bos_token_id)
|
||||
print("EOS token id: ", config.eos_token_id)
|
||||
|
||||
unreleased_model_name = os.getenv("UNRELEASED_MODEL_NAME")
|
||||
if unreleased_model_name:
|
||||
model_name_lower = unreleased_model_name.lower()
|
||||
unreleased_module_path = (
|
||||
f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
|
||||
)
|
||||
class_name = f"{unreleased_model_name}ForCausalLM"
|
||||
print(f"Importing unreleased model module: {unreleased_module_path}")
|
||||
|
||||
try:
|
||||
model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
|
||||
model = model_class.from_pretrained(
|
||||
model_path,
|
||||
device_map=device_map,
|
||||
offload_folder="offload",
|
||||
trust_remote_code=True,
|
||||
config=config
|
||||
)
|
||||
except (ImportError, AttributeError) as e:
|
||||
print(f"Failed to import or load model: {e}")
|
||||
exit(1)
|
||||
else:
|
||||
if multimodal:
|
||||
model = AutoModelForImageTextToText.from_pretrained(
|
||||
model_path,
|
||||
device_map=device_map,
|
||||
offload_folder="offload",
|
||||
trust_remote_code=True,
|
||||
config=full_config
|
||||
)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
device_map=device_map,
|
||||
offload_folder="offload",
|
||||
trust_remote_code=True,
|
||||
config=config
|
||||
)
|
||||
|
||||
print(f"Model class: {model.__class__.__name__}")
|
||||
|
||||
return model, tokenizer, config
|
||||
|
||||
def enable_torch_debugging(model):
|
||||
for name, module in model.named_modules():
|
||||
if len(list(module.children())) == 0: # only leaf modules
|
||||
module.register_forward_hook(debug_hook(name))
|
||||
|
||||
def get_prompt(args):
|
||||
if args.prompt_file:
|
||||
with open(args.prompt_file, encoding='utf-8') as f:
|
||||
return f.read()
|
||||
elif os.getenv("MODEL_TESTING_PROMPT"):
|
||||
return os.getenv("MODEL_TESTING_PROMPT")
|
||||
else:
|
||||
return "Hello, my name is"
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
model_path = os.environ.get("MODEL_PATH", args.model_path)
|
||||
if model_path is None:
|
||||
print("Error: Model path must be specified either via --model-path argument or MODEL_PATH environment variable")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
model, tokenizer, config = load_model_and_tokenizer(model_path, args.device)
|
||||
|
||||
if args.verbose:
|
||||
enable_torch_debugging(model)
|
||||
|
||||
model_name = os.path.basename(model_path)
|
||||
|
||||
# Iterate over the model parameters (the tensors) and get the first one
|
||||
# and use it to get the device the model is on.
|
||||
device = next(model.parameters()).device
|
||||
prompt = get_prompt(args)
|
||||
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
||||
token_ids = input_ids[0].cpu().tolist()
|
||||
|
||||
print(f"Input tokens: {input_ids}")
|
||||
print(f"Input text: {repr(prompt)}")
|
||||
print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
|
||||
|
||||
batch_size = 512
|
||||
|
||||
with torch.no_grad():
|
||||
past = None
|
||||
outputs = None
|
||||
for i in range(0, input_ids.size(1), batch_size):
|
||||
print(f"Processing chunk with tokens {i} to {i + batch_size}")
|
||||
chunk = input_ids[:, i:i + batch_size]
|
||||
outputs = model(chunk.to(model.device), past_key_values=past, use_cache=True)
|
||||
past = outputs.past_key_values
|
||||
|
||||
logits = outputs.logits # type: ignore
|
||||
|
||||
# Extract logits for the last token (next token prediction)
|
||||
last_logits = logits[0, -1, :].float().cpu().numpy()
|
||||
|
||||
print(f"Logits shape: {logits.shape}")
|
||||
print(f"Last token logits shape: {last_logits.shape}")
|
||||
print(f"Vocab size: {len(last_logits)}")
|
||||
|
||||
# Print some sample logits for quick verification
|
||||
print(f"First 10 logits: {last_logits[:10]}")
|
||||
print(f"Last 10 logits: {last_logits[-10:]}")
|
||||
|
||||
# Show top 5 predicted tokens
|
||||
top_indices = np.argsort(last_logits)[-5:][::-1]
|
||||
print("Top 5 predictions:")
|
||||
for idx in top_indices:
|
||||
token = tokenizer.decode([idx])
|
||||
print(f" Token {idx} ({repr(token)}): {last_logits[idx]:.6f}")
|
||||
|
||||
save_output_data(last_logits, token_ids, prompt, model_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user