[docs] fix HF reference script command (#4148)

This commit is contained in:
Adarsh Shirawalmath
2025-03-07 02:51:54 +05:30
committed by GitHub
parent 9c58e68b4c
commit 19fd57bcd7
2 changed files with 7 additions and 4 deletions

View File

@@ -83,7 +83,7 @@ You should test the new vLM locally against hf models. See [`mmmu`](https://gith
For interactive debugging, you can compare the outputs of huggingface/transformers and SGLang. For interactive debugging, you can compare the outputs of huggingface/transformers and SGLang.
The following two commands should give the same text output and very similar prefill logits. The following two commands should give the same text output and very similar prefill logits.
- Get the reference output by `python3 scripts/playground/reference_hf.py --model [new model]` - Get the reference output by `python3 scripts/playground/reference_hf.py --model-path [new model] --model-type {text,vlm}`
- Get the SGLang output by `python3 -m sglang.bench_one_batch --correct --model [new model]` - Get the SGLang output by `python3 -m sglang.bench_one_batch --correct --model [new model]`
#### Add the model to the test suite #### Add the model to the test suite

View File

@@ -1,6 +1,10 @@
""" """
Usage: Usage: python3 scripts/playground/reference_hf.py --model-path MODEL_PATH --model-type {text,vlm} [--max-new-tokens NUM] [--dtype DTYPE]
python3 reference_hf.py --model TinyLlama/TinyLlama-1.1B-Chat-v0.4 --model-path MODEL_PATH: Path to model (default: TinyLlama/TinyLlama-1.1B-Chat-v0.4)
--model-type {text,vlm}: Model type, text or vlm (default: text)
--max-new-tokens NUM: Max new tokens to generate (default: 16)
--dtype DTYPE: Data type for computation (default: float16)
Note: '--model' is deprecated; use '--model-path'. Runs normal_text() for text, vlm_text_with_image() for vlm.
Reference output: Reference output:
========== Prompt 0 ========== ========== Prompt 0 ==========
@@ -178,7 +182,6 @@ if __name__ == "__main__":
"--model-path", "--model-path",
type=str, type=str,
default="TinyLlama/TinyLlama-1.1B-Chat-v0.4", default="TinyLlama/TinyLlama-1.1B-Chat-v0.4",
# default="meta-llama/Llama-2-7b-chat-hf",
) )
parser.add_argument("--max-new-tokens", type=int, default=16) parser.add_argument("--max-new-tokens", type=int, default=16)