Tiny fix missing alt stream in nextn layer (#10768)
This commit is contained in:
@@ -33,11 +33,14 @@ from sglang.srt.layers.vocab_parallel_embedding import (
|
|||||||
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
from sglang.srt.models.deepseek_v2 import DeepseekV2DecoderLayer, DeepseekV3ForCausalLM
|
from sglang.srt.models.deepseek_v2 import DeepseekV2DecoderLayer, DeepseekV3ForCausalLM
|
||||||
from sglang.srt.utils import BumpAllocator, add_prefix
|
from sglang.srt.utils import BumpAllocator, add_prefix, is_cuda
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
_is_cuda = is_cuda()
|
||||||
|
|
||||||
|
|
||||||
class DeepseekModelNextN(nn.Module):
|
class DeepseekModelNextN(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -66,12 +69,14 @@ class DeepseekModelNextN(nn.Module):
|
|||||||
|
|
||||||
self.eh_proj = nn.Linear(2 * config.hidden_size, config.hidden_size, bias=False)
|
self.eh_proj = nn.Linear(2 * config.hidden_size, config.hidden_size, bias=False)
|
||||||
|
|
||||||
|
self.alt_stream = torch.cuda.Stream() if _is_cuda else None
|
||||||
self.decoder = DeepseekV2DecoderLayer(
|
self.decoder = DeepseekV2DecoderLayer(
|
||||||
config,
|
config,
|
||||||
0,
|
0,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
is_nextn=True,
|
is_nextn=True,
|
||||||
prefix=add_prefix("decoder", prefix),
|
prefix=add_prefix("decoder", prefix),
|
||||||
|
alt_stream=self.alt_stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.shared_head = nn.Module()
|
self.shared_head = nn.Module()
|
||||||
|
|||||||
Reference in New Issue
Block a user