Clean up import vllm in quantization/__init__.py (#4834)

This commit is contained in:
Lianmin Zheng
2025-03-28 10:34:10 -07:00
committed by GitHub
parent ef9a378a20
commit 74e0ac1dbd
14 changed files with 191 additions and 254 deletions

View File

@@ -53,8 +53,6 @@ class TpModelWorker:
req_to_token_pool: Optional[ReqToTokenPool] = None,
token_to_kv_pool_allocator: Optional[TokenToKVPoolAllocator] = None,
):
self.worker = self
# Parse args
self.tp_rank = tp_rank
@@ -134,6 +132,9 @@ class TpModelWorker:
)[0]
set_random_seed(self.random_seed)
# A reference make this class has the same member as TpModelWorkerClient
self.worker = self
def get_worker_info(self):
return (
self.max_total_num_tokens,