Compat with latest VLLM 0.4.2 main + fork.number rename + Flashinfer 0.0.4 (#380)

Co-authored-by: ZX <zx@lbx.dev> Co-authored-by: ZhouXingg <165115237+ZhouXingg@users.noreply.github.com>
2024-05-12 07:37:49 +08:00
parent a511a2d089
commit 33b242df30
20 changed files with 611 additions and 187 deletions
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -1,6 +1,6 @@
 import torch
 from torch import nn
-from vllm.model_executor.parallel_utils.communication_op import (
+from vllm.distributed import (
    get_tensor_model_parallel_world_size,
    tensor_model_parallel_all_gather,
 )