CMakeLists.txt LICENSE MANIFEST.in README.md pyproject.toml requirements-common.txt requirements-cpu.txt requirements-cuda.txt requirements-neuron.txt requirements-rocm.txt setup.py cmake/cpu_extension.cmake cmake/hipify.py cmake/utils.cmake ray_mlu/__init__.py ray_mlu/mlu.py ray_mlu/node.py ray_mlu/nsight.py ray_mlu/test_mlu.py tests/test_cache_block_hashing.py tests/test_config.py tests/test_embedded_commit.py tests/test_inputs.py tests/test_logger.py tests/test_logits_processor.py tests/test_regression.py tests/test_sampling_params.py tests/test_scalartype.py tests/test_sequence.py tests/test_sharded_state_loader.py tests/test_utils.py vllm/__init__.py vllm/_custom_ops.py vllm/_ipex_ops.py vllm/_mlu_ops.py vllm/beam_search.py vllm/block.py vllm/config.py vllm/connections.py vllm/envs.py vllm/forward_context.py vllm/logger.py vllm/logits_process.py vllm/outputs.py vllm/pooling_params.py vllm/py.typed vllm/sampling_params.py vllm/scalar_type.py vllm/scripts.py vllm/sequence.py vllm/tracing.py vllm/utils.py vllm/version.py vllm/version_config vllm.egg-info/PKG-INFO vllm.egg-info/SOURCES.txt vllm.egg-info/dependency_links.txt vllm.egg-info/entry_points.txt vllm.egg-info/requires.txt vllm.egg-info/top_level.txt vllm/adapter_commons/__init__.py vllm/adapter_commons/layers.py vllm/adapter_commons/models.py vllm/adapter_commons/request.py vllm/adapter_commons/utils.py vllm/adapter_commons/worker_manager.py vllm/assets/__init__.py vllm/assets/audio.py vllm/assets/base.py vllm/assets/image.py vllm/assets/video.py vllm/attention/__init__.py vllm/attention/layer.py vllm/attention/selector.py vllm/attention/backends/__init__.py vllm/attention/backends/abstract.py vllm/attention/backends/blocksparse_attn.py vllm/attention/backends/flash_attn.py vllm/attention/backends/flashinfer.py vllm/attention/backends/hpu_attn.py vllm/attention/backends/ipex_attn.py vllm/attention/backends/mlu_attn.py vllm/attention/backends/openvino.py vllm/attention/backends/pallas.py vllm/attention/backends/placeholder_attn.py vllm/attention/backends/rocm_flash_attn.py vllm/attention/backends/torch_sdpa.py vllm/attention/backends/utils.py vllm/attention/backends/xformers.py vllm/attention/ops/__init__.py vllm/attention/ops/hpu_paged_attn.py vllm/attention/ops/ipex_attn.py vllm/attention/ops/paged_attn.py vllm/attention/ops/prefix_prefill.py vllm/attention/ops/triton_flash_attention.py vllm/attention/ops/blocksparse_attention/__init__.py vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py vllm/attention/ops/blocksparse_attention/interface.py vllm/attention/ops/blocksparse_attention/utils.py vllm/compilation/__init__.py vllm/compilation/backends.py vllm/compilation/compile_context.py vllm/compilation/config.py vllm/compilation/counter.py vllm/compilation/decorators.py vllm/compilation/fusion.py vllm/compilation/inductor_pass.py vllm/compilation/levels.py vllm/compilation/reshapes.py vllm/compilation/wrapper.py vllm/core/__init__.py vllm/core/block_manager.py vllm/core/evictor.py vllm/core/interfaces.py vllm/core/placeholder_block_space_manager.py vllm/core/scheduler.py vllm/core/block/__init__.py vllm/core/block/block_table.py vllm/core/block/common.py vllm/core/block/cpu_gpu_block_allocator.py vllm/core/block/interfaces.py vllm/core/block/naive_block.py vllm/core/block/prefix_caching_block.py vllm/core/block/utils.py vllm/distributed/__init__.py vllm/distributed/communication_op.py vllm/distributed/parallel_state.py vllm/distributed/utils.py vllm/distributed/device_communicators/__init__.py vllm/distributed/device_communicators/cuda_wrapper.py vllm/distributed/device_communicators/custom_all_reduce.py vllm/distributed/device_communicators/custom_all_reduce_utils.py vllm/distributed/device_communicators/hpu_communicator.py vllm/distributed/device_communicators/pynccl.py vllm/distributed/device_communicators/pynccl_wrapper.py vllm/distributed/device_communicators/shm_broadcast.py vllm/distributed/device_communicators/tpu_communicator.py vllm/distributed/device_communicators/xpu_communicator.py vllm/engine/__init__.py vllm/engine/arg_utils.py vllm/engine/async_llm_engine.py vllm/engine/async_timeout.py vllm/engine/llm_engine.py vllm/engine/metrics.py vllm/engine/metrics_types.py vllm/engine/protocol.py vllm/engine/multiprocessing/__init__.py vllm/engine/multiprocessing/client.py vllm/engine/multiprocessing/engine.py vllm/engine/output_processor/__init__.py vllm/engine/output_processor/interfaces.py vllm/engine/output_processor/multi_step.py vllm/engine/output_processor/single_step.py vllm/engine/output_processor/stop_checker.py vllm/engine/output_processor/util.py vllm/entrypoints/__init__.py vllm/entrypoints/api_server.py vllm/entrypoints/chat_utils.py vllm/entrypoints/launcher.py vllm/entrypoints/llm.py vllm/entrypoints/logger.py vllm/entrypoints/openai/__init__.py vllm/entrypoints/openai/api_server.py vllm/entrypoints/openai/cli_args.py vllm/entrypoints/openai/logits_processors.py vllm/entrypoints/openai/protocol.py vllm/entrypoints/openai/run_batch.py vllm/entrypoints/openai/serving_chat.py vllm/entrypoints/openai/serving_completion.py vllm/entrypoints/openai/serving_embedding.py vllm/entrypoints/openai/serving_engine.py vllm/entrypoints/openai/serving_tokenization.py vllm/entrypoints/openai/tool_parsers/__init__.py vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py vllm/entrypoints/openai/tool_parsers/utils.py vllm/executor/__init__.py vllm/executor/cpu_executor.py vllm/executor/distributed_gpu_executor.py vllm/executor/distributed_mlu_executor.py vllm/executor/executor_base.py vllm/executor/gpu_executor.py vllm/executor/hpu_executor.py vllm/executor/mlu_executor.py vllm/executor/msgspec_utils.py vllm/executor/multiproc_gpu_executor.py vllm/executor/multiproc_mlu_executor.py vllm/executor/multiproc_worker_utils.py vllm/executor/multiproc_xpu_executor.py vllm/executor/neuron_executor.py vllm/executor/openvino_executor.py vllm/executor/ray_gpu_executor.py vllm/executor/ray_hpu_executor.py vllm/executor/ray_mlu_executor.py vllm/executor/ray_tpu_executor.py vllm/executor/ray_utils.py vllm/executor/ray_xpu_executor.py vllm/executor/tpu_executor.py vllm/executor/xpu_executor.py vllm/inputs/__init__.py vllm/inputs/data.py vllm/inputs/parse.py vllm/inputs/preprocess.py vllm/inputs/registry.py vllm/logging_utils/__init__.py vllm/logging_utils/formatter.py vllm/lora/__init__.py vllm/lora/fully_sharded_layers.py vllm/lora/layers.py vllm/lora/lora.py vllm/lora/models.py vllm/lora/punica.py vllm/lora/request.py vllm/lora/utils.py vllm/lora/worker_manager.py vllm/lora/ops/__init__.py vllm/lora/ops/bgmv_expand.py vllm/lora/ops/bgmv_expand_slice.py vllm/lora/ops/bgmv_shrink.py vllm/lora/ops/sgmv_expand.py vllm/lora/ops/sgmv_expand_slice.py vllm/lora/ops/sgmv_shrink.py vllm/lora/ops/utils.py vllm/model_executor/__init__.py vllm/model_executor/custom_op.py vllm/model_executor/parameter.py vllm/model_executor/pooling_metadata.py vllm/model_executor/sampling_metadata.py vllm/model_executor/utils.py vllm/model_executor/guided_decoding/__init__.py vllm/model_executor/guided_decoding/guided_fields.py vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py vllm/model_executor/guided_decoding/outlines_decoding.py vllm/model_executor/guided_decoding/outlines_logits_processors.py vllm/model_executor/layers/__init__.py vllm/model_executor/layers/activation.py vllm/model_executor/layers/layernorm.py vllm/model_executor/layers/linear.py vllm/model_executor/layers/logits_processor.py vllm/model_executor/layers/pooler.py vllm/model_executor/layers/rejection_sampler.py vllm/model_executor/layers/resampler.py vllm/model_executor/layers/rotary_embedding.py vllm/model_executor/layers/sampler.py vllm/model_executor/layers/spec_decode_base_sampler.py vllm/model_executor/layers/typical_acceptance_sampler.py vllm/model_executor/layers/vocab_parallel_embedding.py vllm/model_executor/layers/fused_moe/__init__.py vllm/model_executor/layers/fused_moe/fused_marlin_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/layer.py vllm/model_executor/layers/fused_moe/moe_pallas.py vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json vllm/model_executor/layers/mamba/__init__.py vllm/model_executor/layers/mamba/mamba_mixer.py vllm/model_executor/layers/mamba/ops/__init__.py vllm/model_executor/layers/mamba/ops/causal_conv1d.py vllm/model_executor/layers/mamba/ops/mamba_ssm.py vllm/model_executor/layers/quantization/__init__.py vllm/model_executor/layers/quantization/aqlm.py vllm/model_executor/layers/quantization/awq.py vllm/model_executor/layers/quantization/awq_marlin.py vllm/model_executor/layers/quantization/awq_triton.py vllm/model_executor/layers/quantization/base_config.py vllm/model_executor/layers/quantization/bitsandbytes.py vllm/model_executor/layers/quantization/deepspeedfp.py vllm/model_executor/layers/quantization/experts_int8.py vllm/model_executor/layers/quantization/fbgemm_fp8.py vllm/model_executor/layers/quantization/fp8.py vllm/model_executor/layers/quantization/gguf.py vllm/model_executor/layers/quantization/gptq.py vllm/model_executor/layers/quantization/gptq_marlin.py vllm/model_executor/layers/quantization/gptq_marlin_24.py vllm/model_executor/layers/quantization/ipex_quant.py vllm/model_executor/layers/quantization/kv_cache.py vllm/model_executor/layers/quantization/marlin.py vllm/model_executor/layers/quantization/modelopt.py vllm/model_executor/layers/quantization/neuron_quant.py vllm/model_executor/layers/quantization/qqq.py vllm/model_executor/layers/quantization/schema.py vllm/model_executor/layers/quantization/tpu_int8.py vllm/model_executor/layers/quantization/compressed_tensors/__init__.py vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py vllm/model_executor/layers/quantization/compressed_tensors/utils.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py vllm/model_executor/layers/quantization/kernels/MPLinearKernel.py vllm/model_executor/layers/quantization/kernels/__init__.py vllm/model_executor/layers/quantization/kernels/exllama.py vllm/model_executor/layers/quantization/kernels/machete.py vllm/model_executor/layers/quantization/kernels/marlin.py vllm/model_executor/layers/quantization/utils/__init__.py vllm/model_executor/layers/quantization/utils/layer_utils.py vllm/model_executor/layers/quantization/utils/machete_utils.py vllm/model_executor/layers/quantization/utils/marlin_utils.py vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py vllm/model_executor/layers/quantization/utils/marlin_utils_test.py vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py vllm/model_executor/layers/quantization/utils/quant_utils.py vllm/model_executor/layers/quantization/utils/w8a8_utils.py vllm/model_executor/model_loader/__init__.py vllm/model_executor/model_loader/loader.py vllm/model_executor/model_loader/neuron.py vllm/model_executor/model_loader/openvino.py vllm/model_executor/model_loader/tensorizer.py vllm/model_executor/model_loader/utils.py vllm/model_executor/model_loader/weight_utils.py vllm/model_executor/models/__init__.py vllm/model_executor/models/arctic.py vllm/model_executor/models/baichuan.py vllm/model_executor/models/bart.py vllm/model_executor/models/bert.py vllm/model_executor/models/blip.py vllm/model_executor/models/blip2.py vllm/model_executor/models/bloom.py vllm/model_executor/models/chameleon.py vllm/model_executor/models/chatglm.py vllm/model_executor/models/clip.py vllm/model_executor/models/commandr.py vllm/model_executor/models/dbrx.py vllm/model_executor/models/decilm.py vllm/model_executor/models/deepseek.py vllm/model_executor/models/deepseek_v2.py vllm/model_executor/models/eagle.py vllm/model_executor/models/exaone.py vllm/model_executor/models/falcon.py vllm/model_executor/models/florence2.py vllm/model_executor/models/fuyu.py vllm/model_executor/models/gemma.py vllm/model_executor/models/gemma2.py vllm/model_executor/models/glm4_vision_encoder.py vllm/model_executor/models/gpt2.py vllm/model_executor/models/gpt_bigcode.py vllm/model_executor/models/gpt_j.py vllm/model_executor/models/gpt_neox.py vllm/model_executor/models/granite.py vllm/model_executor/models/granitemoe.py vllm/model_executor/models/h2ovl.py vllm/model_executor/models/hunyuan.py vllm/model_executor/models/idefics2_vision_model.py vllm/model_executor/models/idefics3.py vllm/model_executor/models/interfaces.py vllm/model_executor/models/interfaces_base.py vllm/model_executor/models/intern_vit.py vllm/model_executor/models/internlm2.py vllm/model_executor/models/internlm2_ve.py vllm/model_executor/models/internvl.py vllm/model_executor/models/jais.py vllm/model_executor/models/jamba.py vllm/model_executor/models/llama.py vllm/model_executor/models/llava.py vllm/model_executor/models/llava_next.py vllm/model_executor/models/llava_next_video.py vllm/model_executor/models/llava_onevision.py vllm/model_executor/models/mamba.py vllm/model_executor/models/mamba_cache.py vllm/model_executor/models/medusa.py vllm/model_executor/models/minicpm.py vllm/model_executor/models/minicpm3.py vllm/model_executor/models/minicpmv.py vllm/model_executor/models/mixtral.py vllm/model_executor/models/mixtral_quant.py vllm/model_executor/models/mllama.py vllm/model_executor/models/mlp_speculator.py vllm/model_executor/models/module_mapping.py vllm/model_executor/models/molmo.py vllm/model_executor/models/mpt.py vllm/model_executor/models/nemotron.py vllm/model_executor/models/nvlm_d.py vllm/model_executor/models/olmo.py vllm/model_executor/models/olmoe.py vllm/model_executor/models/opt.py vllm/model_executor/models/orion.py vllm/model_executor/models/paligemma.py vllm/model_executor/models/persimmon.py vllm/model_executor/models/phi.py vllm/model_executor/models/phi3.py vllm/model_executor/models/phi3_small.py vllm/model_executor/models/phi3v.py vllm/model_executor/models/phimoe.py vllm/model_executor/models/pixtral.py vllm/model_executor/models/qwen.py vllm/model_executor/models/qwen2.py vllm/model_executor/models/qwen2_audio.py vllm/model_executor/models/qwen2_cls.py vllm/model_executor/models/qwen2_moe.py vllm/model_executor/models/qwen2_rm.py vllm/model_executor/models/qwen2_vl.py vllm/model_executor/models/registry.py vllm/model_executor/models/roberta.py vllm/model_executor/models/siglip.py vllm/model_executor/models/solar.py vllm/model_executor/models/stablelm.py vllm/model_executor/models/starcoder2.py vllm/model_executor/models/ultravox.py vllm/model_executor/models/utils.py vllm/model_executor/models/xverse.py vllm/multimodal/__init__.py vllm/multimodal/audio.py vllm/multimodal/base.py vllm/multimodal/image.py vllm/multimodal/inputs.py vllm/multimodal/processing.py vllm/multimodal/registry.py vllm/multimodal/utils.py vllm/multimodal/video.py vllm/platforms/__init__.py vllm/platforms/cpu.py vllm/platforms/cuda.py vllm/platforms/hpu.py vllm/platforms/interface.py vllm/platforms/mlu.py vllm/platforms/neuron.py vllm/platforms/openvino.py vllm/platforms/rocm.py vllm/platforms/tpu.py vllm/platforms/xpu.py vllm/plugins/__init__.py vllm/profiler/__init__.py vllm/profiler/layerwise_profile.py vllm/profiler/utils.py vllm/prompt_adapter/__init__.py vllm/prompt_adapter/layers.py vllm/prompt_adapter/models.py vllm/prompt_adapter/request.py vllm/prompt_adapter/utils.py vllm/prompt_adapter/worker_manager.py vllm/spec_decode/__init__.py vllm/spec_decode/batch_expansion.py vllm/spec_decode/draft_model_runner.py vllm/spec_decode/interfaces.py vllm/spec_decode/medusa_worker.py vllm/spec_decode/metrics.py vllm/spec_decode/mlp_speculator_worker.py vllm/spec_decode/mlu_batch_expansion.py vllm/spec_decode/mlu_draft_model_runner.py vllm/spec_decode/mlu_medusa_worker.py vllm/spec_decode/mlu_metrics.py vllm/spec_decode/mlu_mlp_speculator_worker.py vllm/spec_decode/mlu_multi_step_worker.py vllm/spec_decode/mlu_ngram_worker.py vllm/spec_decode/mlu_smaller_tp_proposer_worker.py vllm/spec_decode/mlu_spec_decode_worker.py vllm/spec_decode/mlu_target_model_runner.py vllm/spec_decode/mqa_scorer.py vllm/spec_decode/multi_step_worker.py vllm/spec_decode/ngram_worker.py vllm/spec_decode/proposer_worker_base.py vllm/spec_decode/smaller_tp_proposer_worker.py vllm/spec_decode/spec_decode_worker.py vllm/spec_decode/target_model_runner.py vllm/spec_decode/top1_proposer.py vllm/spec_decode/util.py vllm/transformers_utils/__init__.py vllm/transformers_utils/config.py vllm/transformers_utils/detokenizer.py vllm/transformers_utils/detokenizer_utils.py vllm/transformers_utils/processor.py vllm/transformers_utils/tokenizer.py vllm/transformers_utils/utils.py vllm/transformers_utils/configs/__init__.py vllm/transformers_utils/configs/arctic.py vllm/transformers_utils/configs/chatglm.py vllm/transformers_utils/configs/dbrx.py vllm/transformers_utils/configs/eagle.py vllm/transformers_utils/configs/exaone.py vllm/transformers_utils/configs/falcon.py vllm/transformers_utils/configs/h2ovl.py vllm/transformers_utils/configs/internvl.py vllm/transformers_utils/configs/jais.py vllm/transformers_utils/configs/medusa.py vllm/transformers_utils/configs/mllama.py vllm/transformers_utils/configs/mlp_speculator.py vllm/transformers_utils/configs/mpt.py vllm/transformers_utils/configs/nemotron.py vllm/transformers_utils/configs/nvlm_d.py vllm/transformers_utils/configs/solar.py vllm/transformers_utils/configs/ultravox.py vllm/transformers_utils/tokenizer_group/__init__.py vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py vllm/transformers_utils/tokenizer_group/tokenizer_group.py vllm/transformers_utils/tokenizers/__init__.py vllm/transformers_utils/tokenizers/mistral.py vllm/triton_utils/__init__.py vllm/triton_utils/custom_cache_manager.py vllm/triton_utils/importing.py vllm/usage/__init__.py vllm/usage/usage_lib.py vllm/v1/__init__.py vllm/v1/outputs.py vllm/v1/request.py vllm/v1/serial_utils.py vllm/v1/utils.py vllm/v1/attention/__init__.py vllm/v1/attention/backends/__init__.py vllm/v1/attention/backends/flash_attn.py vllm/v1/core/__init__.py vllm/v1/core/encoder_cache_manager.py vllm/v1/core/kv_cache_manager.py vllm/v1/core/kv_cache_utils.py vllm/v1/core/scheduler.py vllm/v1/engine/__init__.py vllm/v1/engine/async_llm.py vllm/v1/engine/async_stream.py vllm/v1/engine/core.py vllm/v1/engine/core_client.py vllm/v1/engine/detokenizer.py vllm/v1/engine/llm_engine.py vllm/v1/engine/mm_input_mapper.py vllm/v1/engine/processor.py vllm/v1/executor/__init__.py vllm/v1/executor/gpu_executor.py vllm/v1/sample/__init__.py vllm/v1/sample/metadata.py vllm/v1/sample/sampler.py vllm/v1/worker/__init__.py vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_worker.py vllm/worker/__init__.py vllm/worker/cache_engine.py vllm/worker/cpu_embedding_model_runner.py vllm/worker/cpu_enc_dec_model_runner.py vllm/worker/cpu_model_runner.py vllm/worker/cpu_worker.py vllm/worker/embedding_model_runner.py vllm/worker/enc_dec_model_runner.py vllm/worker/hpu_model_runner.py vllm/worker/hpu_worker.py vllm/worker/mlu_enc_dec_model_runner.py vllm/worker/mlu_model_runner.py vllm/worker/mlu_multi_step_model_runner.py vllm/worker/mlu_multi_step_worker.py vllm/worker/mlu_worker.py vllm/worker/model_runner.py vllm/worker/model_runner_base.py vllm/worker/multi_step_model_runner.py vllm/worker/multi_step_tpu_worker.py vllm/worker/multi_step_worker.py vllm/worker/neuron_model_runner.py vllm/worker/neuron_worker.py vllm/worker/openvino_model_runner.py vllm/worker/openvino_worker.py vllm/worker/tpu_model_runner.py vllm/worker/tpu_worker.py vllm/worker/utils.py vllm/worker/worker.py vllm/worker/worker_base.py vllm/worker/xpu_model_runner.py vllm/worker/xpu_worker.py