Support precomputed_embeddings for Llama 4 (#8156)

Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xiang (Kevin) Li <lik@nvidia.com> Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
2025-07-27 01:14:49 -07:00
parent 5c9c275bc8
commit 44d600cd67
6 changed files with 449 additions and 123 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,11 @@ repos:
      - id: codespell
        additional_dependencies: ['tomli']
        args: ['--toml', 'python/pyproject.toml', '-L', 'cann']
-        exclude: test/srt/test_reasoning_parser.py # Exclude the test file that is expected to fail
+        exclude: |
+          (?x)^(
+            test/srt/test_reasoning_parser\.py|
+            docs/backend/vlm_query\.ipynb
+          )$
  - repo: https://github.com/pre-commit/mirrors-clang-format
    rev: v18.1.8
    hooks: