Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/requirements/build.txt
+++ b/requirements/build.txt
@@ -0,0 +1,11 @@
+# Should be mirrored in pyproject.toml
+cmake>=3.26.1
+ninja
+packaging>=24.2
+setuptools>=77.0.3,<81.0.0
+setuptools-scm>=8
+torch==2.9.0
+wheel
+jinja2>=3.1.6
+regex
+build
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -0,0 +1,54 @@
+regex # Replace re for higher-performance regex matching
+cachetools
+psutil
+sentencepiece  # Required for LLaMA tokenizer.
+numpy
+requests >= 2.26.0
+tqdm
+blake3
+py-cpuinfo
+transformers >= 4.56.0, < 5
+tokenizers >= 0.21.1  # Required for fast incremental detokenization.
+protobuf # Required by LlamaTokenizer.
+fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
+aiohttp
+openai >= 1.99.1  # For Responses API with reasoning content
+pydantic >= 2.12.0
+prometheus_client >= 0.18.0
+pillow  # Required for image processing
+prometheus-fastapi-instrumentator >= 7.0.0
+tiktoken >= 0.6.0  # Required for DBRX tokenizer
+lm-format-enforcer == 0.11.3
+llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+outlines_core == 0.2.11
+# required for outlines backend disk cache
+diskcache == 5.6.3
+lark == 1.2.2
+xgrammar == 0.1.27; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+typing_extensions >= 4.10
+filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
+partial-json-parser # used for parsing partial JSON outputs
+pyzmq >= 25.0.0
+msgspec
+gguf >= 0.17.0
+mistral_common[image] >= 1.8.5
+opencv-python-headless >= 4.11.0    # required for video IO
+pyyaml
+six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
+setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
+einops # Required for Qwen2-VL.
+compressed-tensors == 0.12.2 # required for compressed-tensors
+depyf==0.20.0 # required for profiling and debugging with compilation config
+cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
+watchfiles # required for http server to monitor the updates of TLS files
+python-json-logger # Used by logging as per examples/others/logging_configuration.md
+scipy # Required for phi-4-multimodal-instruct
+ninja # Required for xgrammar, rocm, tpu, xpu
+pybase64 # fast base64 implementation
+cbor2 # Required for cross-language serialization of hashable objects
+ijson # Required for mistral streaming tool parser
+setproctitle # Used to set process names for better debugging and monitoring
+openai-harmony >= 0.0.3  # Required for gpt-oss
+anthropic == 0.71.0
+model-hosting-container-standards >= 0.1.9, < 1.0.0
+mcp
--- a/requirements/cpu-build.txt
+++ b/requirements/cpu-build.txt
@@ -0,0 +1,11 @@
+cmake>=3.26.1
+ninja
+packaging>=24.2
+setuptools>=77.0.3,<81.0.0
+setuptools-scm>=8
+torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
+torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64"
+scons; platform_machine == "aarch64"    # needed to build Arm Compute Library (ACL)
+wheel
+jinja2>=3.1.6
+regex
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -0,0 +1,20 @@
+# Common dependencies
+-r common.txt
+
+numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
+
+# Dependencies for CPUs
+torch==2.9.1+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
+torch==2.9.1; platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "aarch64"
+
+# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
+torchaudio; platform_machine != "s390x"
+
+# required for the image processor of phi3v, this must be updated alongside torch
+torchvision; platform_machine != "s390x"
+
+# Intel Extension for PyTorch, only for x86_64 CPUs
+intel-openmp==2024.2.1; platform_machine == "x86_64"
+
+# Use this to gather CPU info and optimize based on ARM Neoverse cores
+py-cpuinfo; platform_machine == "aarch64"
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -0,0 +1,13 @@
+# Common dependencies
+-r common.txt
+
+numba == 0.61.2 # Required for N-gram speculative decoding
+
+# Dependencies for NVIDIA GPUs
+ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+torch==2.9.0
+torchaudio==2.9.0
+# These must be updated alongside torch
+torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+# FlashInfer should be updated together with the Dockerfile
+flashinfer-python==0.5.3
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -0,0 +1,5 @@
+-r lint.txt
+-r test.txt
+
+# Avoid adding requirements directly to this file.
+# Instead, modify the two files referenced above.
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -0,0 +1,16 @@
+mkdocs
+mkdocs-api-autonav
+mkdocs-material
+mkdocstrings-python
+mkdocs-gen-files
+mkdocs-awesome-nav
+mkdocs-glightbox
+mkdocs-git-revision-date-localized-plugin
+mkdocs-minify-plugin
+regex
+ruff
+pydantic
+
+# For generating argparse docs.
+# Adding requirements here should only be used as a last resort.
+msgspec  # Need for multiple inheritance involving msgspec.Struct
--- a/requirements/kv_connectors.txt
+++ b/requirements/kv_connectors.txt
@@ -0,0 +1,2 @@
+lmcache
+nixl >= 0.7.1 # Required for disaggregated prefill
--- a/requirements/lint.txt
+++ b/requirements/lint.txt
@@ -0,0 +1,2 @@
+# formatting
+pre-commit==4.0.1
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -0,0 +1,47 @@
+# testing
+pytest
+tensorizer==2.10.1
+pytest-forked
+pytest-asyncio
+pytest-rerunfailures
+pytest-shard
+pytest-timeout
+
+# testing utils
+backoff # required for phi4mm test
+blobfile # required for kimi-vl test
+einops # required for MPT, qwen-vl and Mamba
+httpx
+librosa # required for audio tests
+vocos # required for minicpmo_26 test
+peft
+pqdm
+ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
+sentence-transformers # required for embedding tests
+soundfile # required for audio tests
+jiwer # required for audio tests
+timm # required for internvl test
+transformers_stream_generator # required for qwen-vl test
+matplotlib # required for qwen-vl test
+mistral_common[image,audio] >= 1.8.5 # required for voxtral test
+num2words # required for smolvlm test
+opencv-python-headless >= 4.11.0 # required for video test
+datamodel_code_generator # required for minicpm3 test
+lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
+mteb>=1.38.11, <2 # required for mteb test
+transformers==4.57.3
+tokenizers==0.22.0
+schemathesis>=3.39.15 # Required for openai schema test.
+# quantization
+bitsandbytes>=0.46.1
+buildkite-test-collector==0.1.9
+
+
+genai_perf==0.0.8
+tritonclient==2.51.0
+
+numba == 0.61.2 # Required for N-gram speculative decoding
+numpy
+runai-model-streamer[s3,gcs]==0.15.3
+fastsafetensors>=0.1.10
+pydantic>=2.12 # 2.11 leads to error on python 3.13
--- a/requirements/rocm-build.txt
+++ b/requirements/rocm-build.txt
@@ -0,0 +1,17 @@
+# Common dependencies
+-r common.txt
+
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.9.0
+torchvision==0.24.0
+torchaudio==2.9.0
+
+triton==3.5.0
+cmake>=3.26.1,<4
+packaging>=24.2
+setuptools>=77.0.3,<80.0.0
+setuptools-scm>=8
+wheel
+jinja2>=3.1.6
+amdsmi==6.4.3
+timm>=1.0.17
--- a/requirements/rocm-test.txt
+++ b/requirements/rocm-test.txt
@@ -0,0 +1,90 @@
+# Common dependencies
+-r common.txt
+
+# Test infrastructure
+tblib==3.1.0
+pytest==8.3.5
+pytest-asyncio==0.24.0
+pytest-timeout==2.3.1
+pytest-cov==6.3.0
+pytest-forked==1.6.0
+pytest-rerunfailures==14.0
+pytest-shard==0.1.2
+
+# Async/HTTP dependencies
+anyio==4.6.2.post1
+    # via httpx, starlette
+aiohttp==3.13.0
+    # via gpt-oss
+httpx==0.27.2
+    # HTTP testing
+
+# Audio processing dependencies
+audioread==3.0.1
+    # via librosa
+cffi==1.17.1
+    # via soundfile
+decorator==5.2.1
+    # via librosa
+lazy-loader==0.4
+    # via librosa
+platformdirs==4.3.6
+    # via pooch
+pooch==1.8.2
+    # via librosa
+soundfile==0.13.1
+    # via librosa
+soxr==0.5.0.post1
+    # via librosa
+librosa==0.10.2.post1
+
+# Retrieval and search
+bm25s==0.2.13
+    # via mteb
+pystemmer==3.0.0
+    # via mteb
+
+# Multi-modal processing
+blobfile==3.0.0
+    # Multi-Modal Models Test
+decord==0.6.0
+    # video processing, required by entrypoints/openai/test_video.py
+rapidfuzz==3.12.1
+
+# OpenAI compatibility and testing
+gpt-oss==0.0.8
+    # OpenAI compatibility tests
+schemathesis==3.39.15
+    # OpenAI schema test
+
+# Evaluation and benchmarking
+lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d
+jiwer==4.0.0
+
+# Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test
+multiprocess==0.70.16
+
+# Required for v1/metrics/test_engine_logger_apis.py
+ray[cgraph,default]>=2.48.0
+
+# Plugins test
+terratorch @ git+https://github.com/IBM/terratorch.git@07184fcf91a1324f831ff521dd238d97fe350e3e
+torchgeo==0.7.0
+    # via terratorch
+# MTEB Benchmark Test
+mteb==2.1.2
+
+# Data processing
+xgrammar @ git+https://github.com/divakar-amd/xgrammar@3272f7c520564858056a60480d5afdf69ae79c84
+# Test async scheduling
+
+# Utilities
+num2words==0.5.14
+    # via lm-eval
+pqdm==0.2.0
+    # via lm-eval
+
+# Required for suffix decoding test
+arctic-inference == 0.1.1
+# Required for Nemotron test
+open-clip-torch==2.32.0
--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@@ -0,0 +1,18 @@
+# Common dependencies
+-r common.txt
+
+numba == 0.61.2 # Required for N-gram speculative decoding
+
+# Dependencies for AMD GPUs
+datasets
+ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+peft
+pytest-asyncio
+tensorizer==2.10.1
+packaging>=24.2
+setuptools>=77.0.3,<80.0.0
+setuptools-scm>=8
+runai-model-streamer[s3,gcs]==0.15.3
+conch-triton-kernels==1.2.1
+timm>=1.0.17
+fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -0,0 +1,59 @@
+# testing
+pytest
+tensorizer==2.10.1
+pytest-forked
+pytest-asyncio
+pytest-rerunfailures
+pytest-shard
+pytest-timeout
+pytest-cov
+
+# testing utils
+backoff # required for phi4mm test
+blobfile # required for kimi-vl test
+einops # required for MPT, qwen-vl
+httpx
+librosa # required for audio tests
+vector_quantize_pytorch # required for minicpmo_26 test
+vocos # required for minicpmo_26 test
+peft>=0.15.0 # required for phi-4-mm test
+pqdm
+ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
+sentence-transformers # required for embedding tests
+soundfile # required for audio tests
+jiwer # required for audio tests
+tblib # for pickling test exceptions
+timm >=1.0.17 # required for internvl and gemma3n-mm test
+torch==2.9.0
+torchaudio==2.9.0
+torchvision==0.24.0
+transformers_stream_generator # required for qwen-vl test
+matplotlib # required for qwen-vl test
+mistral_common[image,audio] >= 1.8.5 # required for voxtral test
+num2words # required for smolvlm test
+open_clip_torch==2.32.0 # Required for nemotron_vl test
+opencv-python-headless >= 4.11.0 # required for video test
+datamodel_code_generator # required for minicpm3 test
+# TODO: Use lm-eval[api]==0.4.10 once released
+lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
+mteb[bm25s]>=2, <3 # required for mteb test
+transformers==4.57.3
+tokenizers==0.22.0
+schemathesis>=3.39.15 # Required for openai schema test.
+# quantization
+bitsandbytes==0.46.1
+buildkite-test-collector==0.1.9
+
+
+genai_perf==0.0.8
+tritonclient==2.51.0
+
+arctic-inference == 0.1.1 # Required for suffix decoding test
+numba == 0.61.2 # Required for N-gram speculative decoding
+numpy
+runai-model-streamer[s3,gcs]==0.15.3
+fastsafetensors>=0.1.10
+pydantic>=2.12 # 2.11 leads to error on python 3.13
+decord==0.6.0
+terratorch @ git+https://github.com/IBM/terratorch.git@1.1.rc3 # required for PrithviMAE test
+gpt-oss >= 0.0.7; python_version > '3.11'
--- a/requirements/test.txt
+++ b/requirements/test.txt
--- a/requirements/tpu.txt
+++ b/requirements/tpu.txt
@@ -0,0 +1,14 @@
+# Common dependencies
+-r common.txt
+
+# Dependencies for TPU
+cmake>=3.26.1
+packaging>=24.2
+setuptools-scm>=8
+wheel
+jinja2>=3.1.6
+ray[default]
+ray[data]
+setuptools==78.1.0
+nixl==0.3.0
+tpu-inference==0.12.0
--- a/requirements/xpu.txt
+++ b/requirements/xpu.txt
@@ -0,0 +1,18 @@
+# Common dependencies
+-r common.txt
+
+ray>=2.9
+cmake>=3.26.1
+packaging>=24.2
+setuptools-scm>=8
+setuptools>=77.0.3,<81.0.0
+wheel
+jinja2>=3.1.6
+datasets # for benchmark scripts
+numba == 0.61.2 # Required for N-gram speculative decoding
+--extra-index-url=https://download.pytorch.org/whl/xpu
+torch==2.9.0+xpu
+torchaudio
+torchvision
+
+intel-extension-for-pytorch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.9.10.post0%2Bxpu-cp312-cp312-linux_x86_64.whl