minor: Add basic editorconfig and pre-commit hooks to enforce style for whitespaces (#1926)

2024-11-06 21:46:04 +08:00
parent 96766101b4
commit a5e0defb5a
77 changed files with 209 additions and 172 deletions
--- a/python/sglang/srt/layers/quantization/base_config.py
+++ b/python/sglang/srt/layers/quantization/base_config.py
@@ -134,4 +134,4 @@ def method_has_implemented_embedding(
    class_embedding = inspect.getattr_static(method_class, "embedding", None)

    return (class_embedding is not None
-            and class_embedding is not base_embedding)
+            and class_embedding is not base_embedding)
--- a/python/sglang/srt/layers/vocab_parallel_embedding.py
+++ b/python/sglang/srt/layers/vocab_parallel_embedding.py
@@ -311,7 +311,7 @@ class VocabParallelEmbedding(torch.nn.Module):
    def get_sharded_to_full_mapping(self) -> Optional[List[int]]:
        """Get a mapping that can be used to reindex the gathered
        logits for sampling.
-        
+
        During sampling, we gather logits from all ranks. The relationship
        of index->token_id will follow the same format as outlined in the class
        docstring. However, after the gather, we want to reindex the final
@@ -483,4 +483,4 @@ class ParallelLMHead(VocabParallelEmbedding):

    def forward(self, input_):
        del input_
-        raise RuntimeError("LMHead's weights should be used in the sampler.")
+        raise RuntimeError("LMHead's weights should be used in the sampler.")
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -838,7 +838,7 @@ class Scheduler:
        time_per_output_tokens_iter: List[float] = []

        # Request stats
-        #   Decode 
+        #   Decode
        gen_throughput: float = 0.0
        #   Latency
        time_e2e_requests: List[float] = []
@@ -866,11 +866,11 @@ class Scheduler:
                    time_waiting_requests.append(req.queued_time - req.created_time)
                    num_prompt_tokens_requests.append(len(req.origin_input_ids))
                    num_generation_tokens_requests.append(len(req.output_ids))
-                    finished_reason_requests.append(                            
+                    finished_reason_requests.append(
                            req.finished_reason.to_json()
                            if req.finished_reason is not None
                            else None)
-    
+
        return Stats(
            new_seq=new_seq,
            num_running_req=num_running_req,
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -384,7 +384,7 @@ class TokenizerManager:
            obj.load_format = self.server_args.load_format

        if not self.model_update_lock.locked():
-        
+
            async with self.model_update_lock:
                # wait for the previous generation requests to finish
                while len(self.rid_to_state) > 0:
--- a/python/sglang/srt/metrics/metrics_collector.py
+++ b/python/sglang/srt/metrics/metrics_collector.py
@@ -151,7 +151,7 @@ class Metrics:
                0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.04, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
                1.0, 2.5
            ])
-        
+
        # Request Stats
        #   Metadata
        self.num_prompt_tokens_requests = Histogram(
@@ -253,7 +253,7 @@ class PrometheusMetricsCollector(MetricsCollector):
                            stats.time_to_first_tokens_iter)
        self._log_histogram(self.metrics.histogram_time_per_output_token,
                            stats.time_per_output_tokens_iter)
-        
+
        # self._log_gauge(self.metrics.gpu_cache_usage_sys, stats.gpu_cache_usage_sys)
        self._log_gauge(self.metrics.num_running_sys, stats.num_running_req)
        self._log_gauge(self.metrics.num_waiting_sys, stats.num_waiting_req)
@@ -294,4 +294,4 @@ def build_1_2_5_buckets(max_value: int) -> List[int]:
                buckets.append(value)
            else:
                return buckets
-        exponent += 1
+        exponent += 1
--- a/python/sglang/srt/metrics/metrics_types.py
+++ b/python/sglang/srt/metrics/metrics_types.py
@@ -54,4 +54,4 @@ class Stats:
    num_prompt_tokens_iter: int = 0
    num_generation_tokens_iter: int = 0
    time_to_first_tokens_iter: List[float] = field(default_factory=list)
-    time_per_output_tokens_iter: List[float] = field(default_factory=list)
+    time_per_output_tokens_iter: List[float] = field(default_factory=list)
--- a/python/sglang/srt/mm_utils.py
+++ b/python/sglang/srt/mm_utils.py
@@ -17,7 +17,7 @@ limitations under the License.
 """
 Utilities for multi-modal models.

-This python file mainly contains utilities that were used in the 
+This python file mainly contains utilities that were used in the
 image processing logic of llava-next including operations such as
 anyres and anyres_max

--- a/python/sglang/srt/models/gpt2.py
+++ b/python/sglang/srt/models/gpt2.py
@@ -136,7 +136,7 @@ class GPT2Block(nn.Module):
        layer_id: int,
        config: GPT2Config,
        cache_config = None,
-        
+
        quant_config: Optional[QuantizationConfig] = None,
        prefix: str = "",
    ):
@@ -284,4 +284,4 @@ class GPT2LMHeadModel(nn.Module):
                                    default_weight_loader)
            weight_loader(param, loaded_weight)

-EntryClass = GPT2LMHeadModel
+EntryClass = GPT2LMHeadModel
--- a/python/sglang/srt/models/olmo.py
+++ b/python/sglang/srt/models/olmo.py
--- a/python/sglang/srt/models/qwen2_vl.py
+++ b/python/sglang/srt/models/qwen2_vl.py
@@ -57,27 +57,27 @@ logger = init_logger(__name__)

 class Qwen2VLImageInputs(TypedDict):
    pixel_values: torch.Tensor
-    """Shape: 
+    """Shape:
    `(num_patches, num_channels * patch_size * patch_size)`
    """

    image_grid_thw: torch.Tensor
    """Shape: `(num_images, 3)`
-    
+
    This should be in `(grid_t, grid_h, grid_w)` format.
    """


 class Qwen2VLVideoInputs(TypedDict):
    pixel_values_videos: torch.Tensor
-    """Shape: 
-    `(num_patches, 
+    """Shape:
+    `(num_patches,
      num_channels * temporal_patch_size * patch_size * patch_size)`
    """

    video_grid_thw: torch.Tensor
    """Shape: `(num_videos, 3)`
-    
+
    This should be in `(grid_t, grid_h, grid_w)` format.
    """

--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -759,7 +759,7 @@ class Engine:

        # before python program terminates, call shutdown implicitly. Therefore, users don't have to explicitly call .shutdown()
        atexit.register(self.shutdown)
-        
+
        # runtime server default log level is log
        # offline engine works in scripts, so we set it to error