[Hicache]: Add E2E CI For 3FS-KVStore (#10131)
This commit is contained in:
135
test/srt/hicache/test_hicache_storage_3fs_backend.py
Normal file
135
test/srt/hicache/test_hicache_storage_3fs_backend.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Benchmark tests for HiCache Storage with 3FS backend.
|
||||
Usage:
|
||||
python3 -m pytest test/srt/hicache/test_hicache_storage_3fs_backend.py -v
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from test_hicache_storage_file_backend import HiCacheStorageBaseMixin
|
||||
|
||||
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
||||
from sglang.test.test_utils import CustomTestCase
|
||||
|
||||
|
||||
class HiCacheStorage3FSBackendBaseMixin(HiCacheStorageBaseMixin):
|
||||
"""Base mixin class with common setup and utilities"""
|
||||
|
||||
@classmethod
|
||||
def _get_additional_server_args_and_env(cls):
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
# Create a temporary JSON config file for HF3FS
|
||||
hf3fs_config = {
|
||||
"file_path_prefix": os.path.join(cls.temp_dir, "hicache"),
|
||||
"file_size": 1024 * 1024 * 1024 * 2,
|
||||
"numjobs": 2,
|
||||
"entries": 8,
|
||||
"use_mock_hf3fs_client": True,
|
||||
}
|
||||
|
||||
# Write config to temporary file
|
||||
config_file = os.path.join(cls.temp_dir, "hf3fs_config.json")
|
||||
with open(config_file, "w") as f:
|
||||
json.dump(hf3fs_config, f, indent=2)
|
||||
|
||||
server_args = {
|
||||
"--tp-size": 1,
|
||||
"--hicache-ratio": 1.2,
|
||||
"--hicache-storage-backend": "hf3fs",
|
||||
"--hicache-storage-backend-extra-config": json.dumps(hf3fs_config),
|
||||
}
|
||||
|
||||
# Set the environment variable to point to our config file
|
||||
env_vars = {
|
||||
"SGLANG_HICACHE_HF3FS_CONFIG_PATH": config_file,
|
||||
}
|
||||
|
||||
return server_args, env_vars
|
||||
|
||||
|
||||
class TestHf3fsBackendLayerFirstLayout(
|
||||
HiCacheStorage3FSBackendBaseMixin, CustomTestCase
|
||||
):
|
||||
"""Layer first layout tests for HiCache-Hf3fs backend"""
|
||||
|
||||
@classmethod
|
||||
def _get_additional_server_args_and_env(cls):
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-mem-layout"] = "layer_first"
|
||||
server_args["--hicache-io-backend"] = "direct"
|
||||
return server_args, env_vars
|
||||
|
||||
|
||||
class TestHf3fsBackendPageFirstLayout(
|
||||
HiCacheStorage3FSBackendBaseMixin, CustomTestCase
|
||||
):
|
||||
"""Page first layout tests for HiCache-Hf3fs backend"""
|
||||
|
||||
@classmethod
|
||||
def _get_additional_server_args_and_env(cls):
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-mem-layout"] = "page_first"
|
||||
return server_args, env_vars
|
||||
|
||||
|
||||
class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase):
|
||||
"""Accuracy tests for HiCache-Hf3fs backend"""
|
||||
|
||||
@classmethod
|
||||
def _get_additional_server_args_and_env(cls):
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-ratio"] = 1.5
|
||||
server_args["--tp-size"] = 2
|
||||
return server_args, env_vars
|
||||
|
||||
def test_eval_accuracy(self):
|
||||
"""Test eval accuracy with cache persistence across cache flushes"""
|
||||
print("\n=== Testing Eval Accuracy with Cache Persistence ===")
|
||||
|
||||
# First evaluation - populate cache
|
||||
print("Phase 1: Running initial GSM8K evaluation to populate cache...")
|
||||
args_initial = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=50,
|
||||
max_new_tokens=512,
|
||||
parallel=10,
|
||||
host=f"http://{self.base_host}",
|
||||
port=int(self.base_port),
|
||||
)
|
||||
metrics_initial = run_eval_few_shot_gsm8k(args_initial)
|
||||
|
||||
# Flush cache to force remote storage access
|
||||
print("Phase 2: Flushing device cache...")
|
||||
self.assertTrue(self.flush_cache(), "Cache flush should succeed")
|
||||
time.sleep(2)
|
||||
|
||||
# Second evaluation - should use remote cache
|
||||
print("Phase 3: Running second GSM8K evaluation using remote cache...")
|
||||
metrics_cached = run_eval_few_shot_gsm8k(args_initial)
|
||||
|
||||
# Verify accuracy consistency
|
||||
accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
|
||||
print(f"Accuracy difference: {accuracy_diff:.4f}")
|
||||
|
||||
# Assertions
|
||||
self.assertGreater(
|
||||
metrics_initial["accuracy"], 0.6, "Initial accuracy should be reasonable"
|
||||
)
|
||||
self.assertGreater(
|
||||
metrics_cached["accuracy"], 0.6, "Cached accuracy should be reasonable"
|
||||
)
|
||||
self.assertLess(
|
||||
accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user