[TEST]Add mooncake common method for tests (#6194)
### What this PR does / why we need it?
This PR adds mooncake common method to conftest, we need it to add more
test cases later
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
by running a test
- vLLM version: v0.14.0
- vLLM main:
d68209402d
Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
This commit is contained in:
@@ -169,6 +169,52 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
|
|||||||
torch.npu.reset_peak_memory_stats()
|
torch.npu.reset_peak_memory_stats()
|
||||||
|
|
||||||
|
|
||||||
|
class MooncakeLauncher:
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
mooncake_port,
|
||||||
|
mooncake_metrics_port,
|
||||||
|
eviction_high_watermark_ratio=0.8,
|
||||||
|
eviction_ratio=0.05,
|
||||||
|
):
|
||||||
|
self.mooncake_port = mooncake_port
|
||||||
|
self.mooncake_metrics_port = mooncake_metrics_port
|
||||||
|
self.eviction_high_watermark_ratio = eviction_high_watermark_ratio
|
||||||
|
self.eviction_ratio = eviction_ratio
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
cmd = [
|
||||||
|
"mooncake_master",
|
||||||
|
"--eviction_high_watermark_ratio",
|
||||||
|
str(self.eviction_high_watermark_ratio),
|
||||||
|
"--eviction_ratio",
|
||||||
|
str(self.eviction_ratio),
|
||||||
|
"--port",
|
||||||
|
str(self.mooncake_port),
|
||||||
|
"--metrics_port",
|
||||||
|
str(self.mooncake_metrics_port),
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info("Launching mooncake: %s", " ".join(cmd))
|
||||||
|
curr_ld_path = os.environ.get("LD_LIBRARY_PATH", "")
|
||||||
|
mooncake_ld_path = "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:"
|
||||||
|
os.environ["LD_LIBRARY_PATH"] = mooncake_ld_path + curr_ld_path
|
||||||
|
env = os.environ.copy()
|
||||||
|
self.process = subprocess.Popen(cmd, env=env)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc, tb):
|
||||||
|
if not self.process:
|
||||||
|
return
|
||||||
|
logger.info("Stopping mooncake server...")
|
||||||
|
self.process.terminate()
|
||||||
|
try:
|
||||||
|
self.process.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
self.process.kill()
|
||||||
|
|
||||||
|
|
||||||
class RemoteOpenAIServer:
|
class RemoteOpenAIServer:
|
||||||
DUMMY_API_KEY = "token-abc123" # vLLM's OpenAI server does not need API key
|
DUMMY_API_KEY = "token-abc123" # vLLM's OpenAI server does not need API key
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user