format
This commit is contained in:
@@ -104,8 +104,7 @@ class ModelTpServer:
|
|||||||
else server_args.max_running_requests
|
else server_args.max_running_requests
|
||||||
)
|
)
|
||||||
self.max_running_requests = min(
|
self.max_running_requests = min(
|
||||||
self.max_running_requests,
|
self.max_running_requests, self.model_runner.req_to_token_pool.size - 1
|
||||||
self.model_runner.req_to_token_pool.size - 1
|
|
||||||
)
|
)
|
||||||
self.int_token_logit_bias = torch.tensor(
|
self.int_token_logit_bias = torch.tensor(
|
||||||
get_int_token_logit_bias(self.tokenizer, self.model_config.vocab_size)
|
get_int_token_logit_bias(self.tokenizer, self.model_config.vocab_size)
|
||||||
|
|||||||
Reference in New Issue
Block a user