[chore] Remove obsolete comments

This commit is contained in:
zhaoyingzhuo
2025-12-10 15:52:23 +08:00
parent ec935627cb
commit b614823125
6 changed files with 1 additions and 38 deletions

View File

@@ -126,12 +126,7 @@ class TorchCompileWrapperWithCustomDispatcher:
decompiled_file) decompiled_file)
except Exception: except Exception:
pass pass
# if self.vllm_config.compilation_config.use_cudagraph and \
# "update" in new_code.co_names:
# import depyf
# src = depyf.decompile(new_code)
# msg = "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n" + src # noqa
# raise RuntimeError(msg)
@contextmanager @contextmanager
def dispatch_to_code(self, index: int): def dispatch_to_code(self, index: int):

View File

@@ -253,7 +253,6 @@ class InternS1VisionMLP(nn.Module):
self.config = config self.config = config
self.activation_fn = get_act_fn(config.hidden_act) self.activation_fn = get_act_fn(config.hidden_act)
# self.activation_fn = GeluAndMul()
self.fc1 = ColumnParallelLinear(config.hidden_size, self.fc1 = ColumnParallelLinear(config.hidden_size,
config.intermediate_size, config.intermediate_size,
bias=True, bias=True,

View File

@@ -73,7 +73,6 @@ class KunlunOps:
alibi_sqrt=False, alibi_sqrt=False,
): ):
"""PagedAttentionV1""" """PagedAttentionV1"""
# block_size = value_cache.shape[2]
xtorch_ops.paged_attention( xtorch_ops.paged_attention(
x=query, x=query,
k_cache=key_cache, k_cache=key_cache,
@@ -116,7 +115,6 @@ class KunlunOps:
alibi_sqrt=False, alibi_sqrt=False,
): ):
"""PagedAttentionV2""" """PagedAttentionV2"""
# block_size = value_cache.shape[2]
xtorch_ops.paged_attention( xtorch_ops.paged_attention(
x=query, x=query,
k_cache=key_cache, k_cache=key_cache,
@@ -221,17 +219,6 @@ class KunlunOps:
num_heads = query_x.shape[1] // head_size num_heads = query_x.shape[1] // head_size
num_kv_heads = key_x.shape[1] // head_size num_kv_heads = key_x.shape[1] // head_size
# # [num_tokens, num_heads * head_size] -> [num_tokens, num_heads, head_size]
# query_x = query_x.view(num_tokens, num_heads, head_size)
# # [num_tokens, num_kv_heads * head_size] -> [num_tokens, num_kv_heads, head_size]
# key_x = key_x.view(num_tokens, num_kv_heads, head_size)
# # Ensure shapes are correct
# assert query_x.shape == (num_tokens, num_heads, head_size), \
# f"Expected query shape [{num_tokens}, {num_heads}, {head_size}], got {query_x.shape}"
# assert key_x.shape == (num_tokens, num_kv_heads, head_size), \
# f"Expected key shape [{num_tokens}, {num_kv_heads}, {head_size}], got {key_x.shape}"
torch.ops._C.rotary_embedding( torch.ops._C.rotary_embedding(
positions, query_x, key_x, head_size, cos_sin_cache, is_neox_style positions, query_x, key_x, head_size, cos_sin_cache, is_neox_style
) )
@@ -239,8 +226,6 @@ class KunlunOps:
query_x = query_x.view(num_tokens, num_heads * head_size) query_x = query_x.view(num_tokens, num_heads * head_size)
key_x = key_x.view(num_tokens, num_kv_heads * head_size) key_x = key_x.view(num_tokens, num_kv_heads * head_size)
# query.data = query_x
# key.data = key_x
return query_x, key_x return query_x, key_x
# Rotary embedding # Rotary embedding
@@ -290,7 +275,6 @@ class KunlunOps:
kv_cache_dtype, kv_cache_dtype,
): ):
"""reshape_and_cache""" """reshape_and_cache"""
# slot_mapping_cast = slot_mapping.to(torch.int32)
xtorch_ops.reshape_and_cache(key, value, key_cache, value_cache, slot_mapping) xtorch_ops.reshape_and_cache(key, value, key_cache, value_cache, slot_mapping)
@staticmethod @staticmethod

View File

@@ -271,13 +271,6 @@ class KunlunMetadata(AttentionMetadata, PagedAttentionMetadata):
if self.context_lens_tensor is None if self.context_lens_tensor is None
else self.context_lens_tensor[: self.num_prefills] else self.context_lens_tensor[: self.num_prefills]
) )
# for prefix cache, block table only contains blocks that hit
# if self.block_tables is None:
# block_tables = None
# elif self.block_tables.shape[1] == 0:
# block_tables = self.block_tables[:self.num_prefills]
# else:
# block_tables = self.block_tables[:self.num_prefills][:, -1].clone()
block_tables = ( block_tables = (
None None
@@ -442,7 +435,6 @@ class KunlunMetadataBuilder(CommonMetadataBuilder[KunlunMetadata]):
if inter_data.prefix_cache_hit: if inter_data.prefix_cache_hit:
assert context_len != 0 assert context_len != 0
assert context_len % self.block_size == 0 assert context_len % self.block_size == 0
# block_table = block_tables[seq_id]
block_table = block_tables[seq_id][: context_len // self.block_size] block_table = block_tables[seq_id][: context_len // self.block_size]
elif (not is_prompt) and block_tables is not None: elif (not is_prompt) and block_tables is not None:
if curr_sliding_window_block == 0: if curr_sliding_window_block == 0:
@@ -483,7 +475,6 @@ class KunlunMetadataBuilder(CommonMetadataBuilder[KunlunMetadata]):
query_start_loc, dtype=torch.int32, device="cpu" query_start_loc, dtype=torch.int32, device="cpu"
) )
attn_meta.query_start_loc_host = query_start_loc_host attn_meta.query_start_loc_host = query_start_loc_host
# max_kv_len = max(query_lens + prefix_cache_kv_lens)
attn_meta.max_kv_len = max(self.prefix_cache_kv_lens + attn_meta.seq_lens) attn_meta.max_kv_len = max(self.prefix_cache_kv_lens + attn_meta.seq_lens)
# If kv cache is included and there is a hit # If kv cache is included and there is a hit

View File

@@ -516,10 +516,6 @@ def _apply_top_k_top_p(
top_p_mask[:, -1] = False top_p_mask[:, -1] = False
logits_sort.masked_fill_(top_p_mask, -float("inf")) logits_sort.masked_fill_(top_p_mask, -float("inf"))
# Re-sort the probabilities.
# logits = torch.empty_like(logits_sort).scatter_(dim=-1,
# index=logits_idx,
# src=logits_sort)
return logits_sort, logits_idx return logits_sort, logits_idx
@@ -883,7 +879,6 @@ def _sample_with_torch(
seq_groups=seq_groups_arg, seq_groups=seq_groups_arg,
) )
if logits_idx is not None: if logits_idx is not None:
# multinomial_samples[sampling_type] = logits_idx[:, result_idx[:][0]]
token_ids = logits_idx[long_sample_indices].gather( token_ids = logits_idx[long_sample_indices].gather(
dim=1, index=result_idx.to(logits_idx.device) dim=1, index=result_idx.to(logits_idx.device)
) )

View File

@@ -200,7 +200,6 @@ class ModuleLoggingHookPost(object):
self.name_list.pop() self.name_list.pop()
# if os.environ.get("ENABLE_VLLM_MODULE_HOOK", "0") == "1":
if xenvs.ENABLE_VLLM_MODULE_HOOK: if xenvs.ENABLE_VLLM_MODULE_HOOK:
from torch.nn.modules.module import ( from torch.nn.modules.module import (
register_module_forward_pre_hook, register_module_forward_pre_hook,