[Eagle] Fix kernel call after updating speculative sampling kernels (#7231)
This commit is contained in:
@@ -143,7 +143,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
|
||||
args.extend(
|
||||
[
|
||||
"--cuda-graph-max-bs",
|
||||
"2",
|
||||
"4",
|
||||
"--speculative-algorithm",
|
||||
"EAGLE3",
|
||||
"--speculative-draft",
|
||||
@@ -169,7 +169,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest):
|
||||
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
accuracy_threshold = 0.65
|
||||
speculative_decode = True
|
||||
spec_decode_threshold = 1.5
|
||||
spec_decode_threshold = 1.6
|
||||
|
||||
@classmethod
|
||||
def get_server_args(cls):
|
||||
@@ -177,7 +177,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest):
|
||||
args.extend(
|
||||
[
|
||||
"--cuda-graph-max-bs",
|
||||
"2",
|
||||
"4",
|
||||
"--speculative-algorithm",
|
||||
"EAGLE3",
|
||||
"--speculative-draft",
|
||||
@@ -201,7 +201,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest):
|
||||
model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
|
||||
accuracy_threshold = 0.60
|
||||
speculative_decode = True
|
||||
spec_decode_threshold = 1.5
|
||||
spec_decode_threshold = 2.5
|
||||
|
||||
@classmethod
|
||||
def get_server_args(cls):
|
||||
@@ -209,7 +209,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest):
|
||||
args.extend(
|
||||
[
|
||||
"--cuda-graph-max-bs",
|
||||
"2",
|
||||
"4",
|
||||
"--speculative-algorithm",
|
||||
"EAGLE",
|
||||
"--speculative-draft",
|
||||
@@ -233,7 +233,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest):
|
||||
model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
|
||||
accuracy_threshold = 0.60
|
||||
speculative_decode = True
|
||||
spec_decode_threshold = 1.5
|
||||
spec_decode_threshold = 2.95
|
||||
|
||||
@classmethod
|
||||
def get_server_args(cls):
|
||||
@@ -241,7 +241,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest):
|
||||
args.extend(
|
||||
[
|
||||
"--cuda-graph-max-bs",
|
||||
"2",
|
||||
"4",
|
||||
"--speculative-algorithm",
|
||||
"EAGLE",
|
||||
"--speculative-draft",
|
||||
|
||||
Reference in New Issue
Block a user