Add a watch dog thread (#1816)

This commit is contained in:
Lianmin Zheng
2024-10-27 02:00:50 -07:00
committed by GitHub
parent 1be853ee69
commit 86fc0d79d0
34 changed files with 99 additions and 56 deletions

View File

@@ -31,7 +31,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(
self,

View File

@@ -45,7 +45,7 @@ class TestCacheReport(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1):
response = requests.post(

View File

@@ -25,7 +25,7 @@ class TestDataParallelism(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -43,7 +43,7 @@ class TestDoubleSparsity(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -28,7 +28,7 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_embedding(self, use_list_input, token_input):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)

View File

@@ -30,7 +30,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -25,7 +25,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -31,7 +31,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -22,7 +22,7 @@ class TestEvalAccuracyMini(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -41,7 +41,7 @@ class TestJSONConstrained(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(self, json_schema, return_logprob=False, top_logprobs_num=0, n=1):
response = requests.post(

View File

@@ -42,7 +42,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
cls.stdout.close()
cls.stderr.close()
os.remove("stdout.txt")

View File

@@ -32,7 +32,7 @@ class TestMatchedStop(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_completions_generation(
self,

View File

@@ -25,7 +25,7 @@ class TestMLA(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -31,7 +31,7 @@ class TestMLA(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mgsm_en(self):
args = SimpleNamespace(

View File

@@ -35,7 +35,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -36,7 +36,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
def tearDown(self):
if self.process:
kill_child_process(self.process.pid)
kill_child_process(self.process.pid, include_self=True)
def launch_server(self, model, is_fp8, is_tp2):
other_args = ["--log-level-http", "warning", "--trust-remote-code"]

View File

@@ -31,7 +31,7 @@ class TestOpenAIServer(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_completion(
self, echo, logprobs, use_list_input, parallel_sample_num, token_input

View File

@@ -27,7 +27,7 @@ class TestPyTorchSamplingBackend(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -22,7 +22,7 @@ class TestRetractDecode(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -26,7 +26,7 @@ class TestSkipTokenizerInit(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1):
max_new_tokens = 32

View File

@@ -27,7 +27,7 @@ class TestSRTEndpoint(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(
self,

View File

@@ -27,7 +27,7 @@ class TestTorchCompile(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -27,7 +27,7 @@ class TestTorchCompile(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_mmlu(self):
args = SimpleNamespace(

View File

@@ -50,7 +50,7 @@ class TestTritonAttnBackend(unittest.TestCase):
metrics = run_eval(args)
assert metrics["score"] >= 0.65
finally:
kill_child_process(process.pid)
kill_child_process(process.pid, include_self=True)
if __name__ == "__main__":

View File

@@ -23,7 +23,7 @@ class TestUpdateWeights(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def run_decode(self):
response = requests.post(

View File

@@ -45,7 +45,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
kill_child_process(cls.process.pid, include_self=True)
def test_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)