Fix the chat template for llava-v1.6-34b & format code (#177)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
||||
"""
|
||||
|
||||
import json
|
||||
import unittest
|
||||
|
||||
|
||||
@@ -66,9 +66,9 @@ class BenchBatch:
|
||||
p_idx = prefix_req_idx[i // fork_num].item()
|
||||
n_idx = self.req_pool_indices[i].item()
|
||||
req_to_token[n_idx, :prefix_len] = req_to_token[p_idx, :prefix_len]
|
||||
req_to_token[
|
||||
n_idx, prefix_len : prefix_len + extend_len
|
||||
] = self.out_cache_loc[i * extend_len : (i + 1) * extend_len]
|
||||
req_to_token[n_idx, prefix_len : prefix_len + extend_len] = (
|
||||
self.out_cache_loc[i * extend_len : (i + 1) * extend_len]
|
||||
)
|
||||
|
||||
def update_decode(self, predict_ids, batch_size):
|
||||
assert predict_ids.shape[0] == batch_size
|
||||
@@ -81,9 +81,9 @@ class BenchBatch:
|
||||
self.out_cache_cont_start,
|
||||
self.out_cache_cont_end,
|
||||
) = self.token_to_kv_pool.alloc_contiguous(batch_size)
|
||||
self.req_to_token_pool.req_to_token[
|
||||
self.req_pool_indices, self.seq_lens
|
||||
] = self.out_cache_loc
|
||||
self.req_to_token_pool.req_to_token[self.req_pool_indices, self.seq_lens] = (
|
||||
self.out_cache_loc
|
||||
)
|
||||
self.seq_lens.add_(1)
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
select_sglang_backend,
|
||||
)
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
IP_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
|
||||
|
||||
@@ -155,7 +155,8 @@ def test_chat_completion_stream(args):
|
||||
def test_regex(args):
|
||||
client = openai.Client(api_key="EMPTY", base_url=args.base_url)
|
||||
|
||||
regex = (r"""\{\n"""
|
||||
regex = (
|
||||
r"""\{\n"""
|
||||
+ r""" "name": "[\w]+",\n"""
|
||||
+ r""" "population": "[\w\d\s]+"\n"""
|
||||
+ r"""\}"""
|
||||
|
||||
Reference in New Issue
Block a user