diff --git a/docs/references/supported_models.md b/docs/references/supported_models.md index 91366d8c0..69b11bfbd 100644 --- a/docs/references/supported_models.md +++ b/docs/references/supported_models.md @@ -83,7 +83,7 @@ You should test the new vLM locally against hf models. See [`mmmu`](https://gith For interactive debugging, you can compare the outputs of huggingface/transformers and SGLang. The following two commands should give the same text output and very similar prefill logits. -- Get the reference output by `python3 scripts/playground/reference_hf.py --model [new model]` +- Get the reference output by `python3 scripts/playground/reference_hf.py --model-path [new model] --model-type {text,vlm}` - Get the SGLang output by `python3 -m sglang.bench_one_batch --correct --model [new model]` #### Add the model to the test suite diff --git a/scripts/playground/reference_hf.py b/scripts/playground/reference_hf.py index 3ece3d648..14d23fb76 100644 --- a/scripts/playground/reference_hf.py +++ b/scripts/playground/reference_hf.py @@ -1,6 +1,10 @@ """ -Usage: -python3 reference_hf.py --model TinyLlama/TinyLlama-1.1B-Chat-v0.4 +Usage: python3 scripts/playground/reference_hf.py --model-path MODEL_PATH --model-type {text,vlm} [--max-new-tokens NUM] [--dtype DTYPE] + --model-path MODEL_PATH: Path to model (default: TinyLlama/TinyLlama-1.1B-Chat-v0.4) + --model-type {text,vlm}: Model type, text or vlm (default: text) + --max-new-tokens NUM: Max new tokens to generate (default: 16) + --dtype DTYPE: Data type for computation (default: float16) +Note: '--model' is deprecated; use '--model-path'. Runs normal_text() for text, vlm_text_with_image() for vlm. Reference output: ========== Prompt 0 ========== @@ -178,7 +182,6 @@ if __name__ == "__main__": "--model-path", type=str, default="TinyLlama/TinyLlama-1.1B-Chat-v0.4", - # default="meta-llama/Llama-2-7b-chat-hf", ) parser.add_argument("--max-new-tokens", type=int, default=16)