### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
| `vllm_ascend/distributed/kv_transfer/__init__.py` |
| `vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py` |
|
`vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py`
|
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.14.0
- vLLM main:
d68209402d
Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -51,34 +51,49 @@ line-length = 120
|
||||
# Folder to be modified
|
||||
exclude = [
|
||||
"tests/**",
|
||||
"vllm_ascend/attention/mla_v1.py",
|
||||
"vllm_ascend/attention/sfa_v1.py",
|
||||
"vllm_ascend/core",
|
||||
"vllm_ascend/distributed",
|
||||
"vllm_ascend/eplb",
|
||||
"vllm_ascend/kv_offload",
|
||||
"vllm_ascend/lora",
|
||||
"vllm_ascend/model_loader",
|
||||
"vllm_ascend/ops/fused_moe",
|
||||
# (3)
|
||||
"vllm_ascend/attention/*.py",
|
||||
"vllm_ascend/core/*.py",
|
||||
"vllm_ascend/distributed/device_communicators/**",
|
||||
"vllm_ascend/distributed/utils.py",
|
||||
# (5)
|
||||
"vllm_ascend/distributed/kv_transfer/kv_pool/**",
|
||||
"vllm_ascend/distributed/kv_transfer/utils/**",
|
||||
"vllm_ascend/kv_offload/**",
|
||||
"vllm_ascend/lora/**",
|
||||
# (6)
|
||||
"vllm_ascend/eplb/**",
|
||||
"vllm_ascend/model_loader/netloader/**",
|
||||
"vllm_ascend/patch/**",
|
||||
# (7)
|
||||
"vllm_ascend/quantization/**",
|
||||
"vllm_ascend/sample/*.py",
|
||||
"vllm_ascend/worker/v2/**",
|
||||
"vllm_ascend/worker/block_table.py",
|
||||
"vllm_ascend/worker/npu_input_batch.py",
|
||||
# (8)
|
||||
"vllm_ascend/ops/__init__.py",
|
||||
"vllm_ascend/ops/activation.py",
|
||||
"vllm_ascend/ops/flashcomm2_oshard_manager.py",
|
||||
"vllm_ascend/ops/layer_shard_linear.py",
|
||||
"vllm_ascend/ops/layernorm.py",
|
||||
"vllm_ascend/ops/linear_op.py",
|
||||
"vllm_ascend/ops/linear.py",
|
||||
"vllm_ascend/ops/mla.py",
|
||||
"vllm_ascend/ops/mm_encoder_attention.py",
|
||||
"vllm_ascend/ops/register_custom_ops.py",
|
||||
"vllm_ascend/ops/rotary_embedding.py",
|
||||
"vllm_ascend/ops/vocab_parallel_embedding.py",
|
||||
"vllm_ascend/ops/weight_prefetch.py",
|
||||
"vllm_ascend/ops/__init__.py",
|
||||
"vllm_ascend/patch",
|
||||
"vllm_ascend/quantization",
|
||||
"vllm_ascend/sample",
|
||||
"vllm_ascend/spec_decode",
|
||||
"vllm_ascend/worker",
|
||||
"vllm_ascend/xlite",
|
||||
"vllm_ascend/spec_decode/**",
|
||||
# (9)
|
||||
"vllm_ascend/worker/model_runner_v1.py",
|
||||
"vllm_ascend/worker/pcp_utils.py",
|
||||
# (10)
|
||||
"vllm_ascend/ops/*linear*.py",
|
||||
"vllm_ascend/worker/worker.py",
|
||||
"vllm_ascend/distributed/parallel_state.py",
|
||||
"vllm_ascend/distributed/utils.py",
|
||||
"vllm_ascend/xlite/*.py",
|
||||
# (11)
|
||||
"vllm_ascend/ops/fused_moe/**",
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
|
||||
@@ -15,31 +15,32 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from vllm.distributed.kv_transfer.kv_connector.factory import \
|
||||
KVConnectorFactory
|
||||
from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory
|
||||
|
||||
|
||||
def register_connector():
|
||||
KVConnectorFactory.register_connector(
|
||||
"MooncakeConnectorV1",
|
||||
"vllm_ascend.distributed.kv_transfer.kv_p2p.mooncake_connector",
|
||||
"MooncakeConnector")
|
||||
"MooncakeConnectorV1", "vllm_ascend.distributed.kv_transfer.kv_p2p.mooncake_connector", "MooncakeConnector"
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"MooncakeConnectorStoreV1",
|
||||
"vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.ascend_store_connector",
|
||||
"AscendStoreConnector")
|
||||
"AscendStoreConnector",
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"AscendStoreConnector",
|
||||
"vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.ascend_store_connector",
|
||||
"AscendStoreConnector")
|
||||
"AscendStoreConnector",
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"MooncakeLayerwiseConnector",
|
||||
"vllm_ascend.distributed.kv_transfer.kv_p2p.mooncake_layerwise_connector",
|
||||
"MooncakeLayerwiseConnector")
|
||||
"MooncakeLayerwiseConnector",
|
||||
)
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector",
|
||||
"UCMConnectorV1")
|
||||
"UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector", "UCMConnectorV1"
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user