This commit is contained in:
Ying Sheng
2024-07-24 10:53:07 +00:00
parent 4367f4bb8d
commit 83d2b30d75

View File

@@ -104,8 +104,7 @@ class ModelTpServer:
else server_args.max_running_requests else server_args.max_running_requests
) )
self.max_running_requests = min( self.max_running_requests = min(
self.max_running_requests, self.max_running_requests, self.model_runner.req_to_token_pool.size - 1
self.model_runner.req_to_token_pool.size - 1
) )
self.int_token_logit_bias = torch.tensor( self.int_token_logit_bias = torch.tensor(
get_int_token_logit_bias(self.tokenizer, self.model_config.vocab_size) get_int_token_logit_bias(self.tokenizer, self.model_config.vocab_size)