diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 400d9778..4ba4ad42 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -169,6 +169,52 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False): torch.npu.reset_peak_memory_stats() +class MooncakeLauncher: + + def __init__( + self, + mooncake_port, + mooncake_metrics_port, + eviction_high_watermark_ratio=0.8, + eviction_ratio=0.05, + ): + self.mooncake_port = mooncake_port + self.mooncake_metrics_port = mooncake_metrics_port + self.eviction_high_watermark_ratio = eviction_high_watermark_ratio + self.eviction_ratio = eviction_ratio + + def __enter__(self): + cmd = [ + "mooncake_master", + "--eviction_high_watermark_ratio", + str(self.eviction_high_watermark_ratio), + "--eviction_ratio", + str(self.eviction_ratio), + "--port", + str(self.mooncake_port), + "--metrics_port", + str(self.mooncake_metrics_port), + ] + + logger.info("Launching mooncake: %s", " ".join(cmd)) + curr_ld_path = os.environ.get("LD_LIBRARY_PATH", "") + mooncake_ld_path = "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:" + os.environ["LD_LIBRARY_PATH"] = mooncake_ld_path + curr_ld_path + env = os.environ.copy() + self.process = subprocess.Popen(cmd, env=env) + return self + + def __exit__(self, exc_type, exc, tb): + if not self.process: + return + logger.info("Stopping mooncake server...") + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + + class RemoteOpenAIServer: DUMMY_API_KEY = "token-abc123" # vLLM's OpenAI server does not need API key