Let ModelRunner take InputMetadata as input, instead of ScheduleBatch (#1541)

This commit is contained in:
Lianmin Zheng
2024-09-29 20:28:45 -07:00
committed by GitHub
parent 55b974f96f
commit 3f0fe08d37
12 changed files with 142 additions and 157 deletions

View File

@@ -15,7 +15,6 @@ limitations under the License.
import multiprocessing as mp
import unittest
import uuid
import torch
@@ -85,9 +84,9 @@ class TestLoRA(unittest.TestCase):
with SRTRunner(
base_path,
tp_size=tp_size,
torch_dtype=torch_dtype,
is_generation=True,
model_type="generation",
tp_size=tp_size,
lora_paths=all_lora_paths,
max_loras_per_batch=3,
disable_cuda_graph=True,

View File

@@ -7,6 +7,7 @@ suites = {
"minimal": [
"models/test_embedding_models.py",
"models/test_generation_models.py",
# "models/test_lora.py",
"models/test_reward_models.py",
"sampling/penaltylib",
"test_chunked_prefill.py",