Improve benchmark scripts & fix llava (#613)

2024-07-13 15:00:26 -07:00
parent 665815969a
commit 65c6577696
4 changed files with 43 additions and 22 deletions
--- a/python/sglang/README.md
+++ b/python/sglang/README.md
@@ -0,0 +1,12 @@
+# Code Structure
+
+- `backend`: Various backends for the language interpreter.
+- `lang`: The frontend language.
+- `srt`: The runtime for running local models.
+- `test`: Test utilities.
+- `api.py`: Public API.
+- `bench_latency.py`: Benchmark utilities.
+- `global_config.py`: The global configs and constants.
+- `launch_server.py`: The entry point of launching local server.
+- `utils.py`: Common utilities.
+
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -276,17 +276,13 @@ class ModelRunner:
        input_metadata = InputMetadata.create(
            self,
            forward_mode=ForwardMode.EXTEND,
-            tp_size=self.tp_size,
            req_pool_indices=batch.req_pool_indices,
            seq_lens=batch.seq_lens,
            prefix_lens=batch.prefix_lens,
            position_ids_offsets=batch.position_ids_offsets,
            out_cache_loc=batch.out_cache_loc,
-            top_logprobs_nums=batch.top_logprobs_nums,
            return_logprob=batch.return_logprob,
-            flashinfer_prefill_wrapper_ragged=self.flashinfer_prefill_wrapper_ragged,
-            flashinfer_prefill_wrapper_paged=self.flashinfer_prefill_wrapper_paged,
-            flashinfer_decode_wrapper=self.flashinfer_decode_wrapper,
+            top_logprobs_nums=batch.top_logprobs_nums,
        )
        return self.model.forward(
            batch.input_ids,