fix some typos (#6209)
Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>
This commit is contained in:
@@ -1667,7 +1667,7 @@ class Scheduler(
|
||||
can_cuda_graph = 0
|
||||
|
||||
if not spec_algorithm.is_none():
|
||||
# TODO(sang): Support cuda graph when idle batch is there.
|
||||
# TODO(sang): Support CUDA graph when idle batch is there.
|
||||
if local_batch is None or local_batch.forward_mode.is_idle():
|
||||
can_cuda_graph = 0
|
||||
|
||||
@@ -1704,7 +1704,7 @@ class Scheduler(
|
||||
local_batch.global_num_tokens = global_num_tokens
|
||||
local_batch.global_num_tokens_for_logprob = global_num_tokens_for_logprob
|
||||
|
||||
# Check forward mode for cuda graph
|
||||
# Check forward mode for CUDA graph
|
||||
if not disable_cuda_graph:
|
||||
local_batch.can_run_dp_cuda_graph = can_cuda_graph
|
||||
|
||||
|
||||
@@ -238,7 +238,7 @@ class TokenizerManager:
|
||||
self.metrics_collector = TokenizerMetricsCollector(
|
||||
labels={
|
||||
"model_name": self.server_args.served_model_name,
|
||||
# TODO: Add lora name/path in the future,
|
||||
# TODO: Add LoRA name/path in the future,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -213,7 +213,7 @@ class TpModelWorkerClient:
|
||||
penalizer_orchestrator=None,
|
||||
)
|
||||
|
||||
# A cuda stream sync here to avoid the cuda illegal memory access error.
|
||||
# A CUDA stream sync here to avoid the CUDA illegal memory access error.
|
||||
self.scheduler_stream.synchronize()
|
||||
|
||||
# Push a new batch to the queue
|
||||
|
||||
Reference in New Issue
Block a user