xc-llm-ascend/pyproject.toml

[build-system]
# Should be mirrored in requirements.txt
requires = [
    "attrs",
    "cmake>=3.26",
    "decorator",
    "einops",
    "googleapis-common-protos",
    "numpy<2.0.0",
    "packaging",
    "pip",
    "pybind11",
    "pyyaml",
    "scipy",
    "pandas",
    "pandas-stubs",
    "psutil",
    "setuptools>=64",
    "setuptools-scm>=8",
    "transformers<=4.57.1",
    "torch-npu==2.9.0",
    "torch==2.9.0",
    "torchvision",
    "wheel",
    "msgpack",
    "quart",
    "numba",
    "xgrammar>=0.1.30",
    "fastapi<0.124.0",
    "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
    "compressed_tensors>=0.11.0",
    "arctic-inference==0.1.1",
    "triton-ascend==3.2.0"
]
build-backend = "setuptools.build_meta"

[tool.pymarkdown]
plugins.md004.style = "sublist" # ul-style
plugins.md007.indent = 4 # ul-indent
plugins.md007.start_indented = true # ul-indent
plugins.md013.enabled = false # line-length
plugins.md041.enabled = false # first-line-h1
plugins.md033.enabled = false # inline-html
plugins.md046.enabled = false # code-block-style
plugins.md024.allow_different_nesting = true # no-duplicate-headers
plugins.md029.enabled = false # ol-prefix

[tool.ruff]
# TODO: according to PEP8, there should be 80 characters per line
line-length = 120
# Folder to be modified
exclude = [
    "tests/**",
    "vllm_ascend/attention/mla_v1.py",
    "vllm_ascend/attention/sfa_v1.py",
    "vllm_ascend/core",
    "vllm_ascend/distributed",
    "vllm_ascend/eplb",
    "vllm_ascend/kv_offload",
    "vllm_ascend/lora",
    "vllm_ascend/model_loader",
    "vllm_ascend/ops/fused_moe",
    "vllm_ascend/ops/activation.py",
    "vllm_ascend/ops/flashcomm2_oshard_manager.py",
    "vllm_ascend/ops/layer_shard_linear.py",
    "vllm_ascend/ops/layernorm.py",
    "vllm_ascend/ops/linear_op.py",
    "vllm_ascend/ops/linear.py",
    "vllm_ascend/ops/mla.py",
    "vllm_ascend/ops/mm_encoder_attention.py",
    "vllm_ascend/ops/register_custom_ops.py",
    "vllm_ascend/ops/rotary_embedding.py",
    "vllm_ascend/ops/vocab_parallel_embedding.py",
    "vllm_ascend/ops/weight_prefetch.py",
    "vllm_ascend/ops/__init__.py",
    "vllm_ascend/patch",
    "vllm_ascend/quantization",
    "vllm_ascend/sample",
    "vllm_ascend/spec_decode",
    "vllm_ascend/worker",
    "vllm_ascend/xlite",
]

[tool.ruff.lint]
select = [
    # pycodestyle
    "E",
    # Pyflakes
    "F",
    # pyupgrade
    "UP",
    # flake8-bugbear
    "B",
    # flake8-simplify
    "SIM",
    # isort
    "I",
    # flake8-logging-format
    "G",
]
ignore = [
    # star imports
    "F405", "F403",
    # lambda expression assignment
    "E731",
    # zip without `strict=`
    "B905",
    # Loop control variable not used within loop body
    "B007",
    # f-string format
    "UP032",
    # TODO: FIE ME
    "G004",
    "B904",
    "SIM108",
    "SIM102"
]

[tool.ruff.format]
docstring-code-format = true