diff --git a/README.md b/README.md
index bfd01e208..2991b4063 100644
--- a/README.md
+++ b/README.md
@@ -233,7 +233,6 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 ### Supported Models
 
 **Generative Models**
-- Exaone 3.0
 - Llama / Llama 2 / Llama 3 / Llama 3.1
 - Mistral / Mixtral / Mistral NeMo
 - Gemma / Gemma 2
@@ -253,6 +252,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
 - Grok
 - ChatGLM
 - InternLM 2
+- Exaone 3
 
 **Embedding Models**
 
diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py
index 3a4874085..966a97d20 100644
--- a/python/sglang/bench_latency.py
+++ b/python/sglang/bench_latency.py
@@ -292,6 +292,7 @@ def latency_test_run_once(
     measurement_results["prefill_throughput"] = throughput
 
     # Decode
+    decode_latencies = []
     for i in range(output_len):
         torch.cuda.synchronize()
         tic = time.time()
@@ -300,17 +301,18 @@ def latency_test_run_once(
         latency = time.time() - tic
         tot_latency += latency
         throughput = batch_size / latency
+        decode_latencies.append(latency)
         if i < 5:
             rank_print(
                 f"Decode.  latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s"
             )
-    avg_decode_latency = (tot_latency - prefill_latency) / output_len
-    avg_decode_throughput = batch_size / avg_decode_latency
+    med_decode_latency = np.median(decode_latencies)
+    med_decode_throughput = batch_size / med_decode_latency
     rank_print(
-        f"Decode.  avg latency: {avg_decode_latency:6.5f} s, avg throughput: {avg_decode_throughput:9.2f} token/s"
+        f"Decode.  median latency: {med_decode_latency:6.5f} s, median throughput: {med_decode_throughput:9.2f} token/s"
     )
-    measurement_results["avg_decode_latency"] = avg_decode_latency
-    measurement_results["avg_decode_throughput"] = avg_decode_throughput
+    measurement_results["median_decode_latency"] = med_decode_latency
+    measurement_results["median_decode_throughput"] = med_decode_throughput
 
     throughput = (input_len + output_len) * batch_size / tot_latency
     rank_print(
diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py
index 7fce3b240..b22c61020 100644
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -50,8 +50,6 @@ for name, cls in _CONFIG_REGISTRY.items():
     with contextlib.suppress(ValueError):
         AutoConfig.register(name, cls)
 
-from sglang.srt.utils import is_multimodal_model
-
 
 def download_from_hf(model_path: str):
     if os.path.exists(model_path):
@@ -60,12 +58,6 @@ def download_from_hf(model_path: str):
     return snapshot_download(model_path, allow_patterns=["*.json", "*.bin", "*.model"])
 
 
-def get_config_json(model_path: str):
-    with open(os.path.join(model_path, "configs.json")) as f:
-        config = json.load(f)
-    return config
-
-
 def get_config(
     model: str,
     trust_remote_code: bool,