Misc fixes (#432)
This commit is contained in:
8
docs/release_process.md
Normal file
8
docs/release_process.md
Normal file
@@ -0,0 +1,8 @@
|
||||
```
|
||||
pip install build twine
|
||||
```
|
||||
|
||||
```
|
||||
cd python
|
||||
bash upload_pypi.sh
|
||||
```
|
||||
@@ -81,3 +81,9 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
|
||||
cd test/lang
|
||||
python3 run_all.py
|
||||
```
|
||||
|
||||
## OpenAI API server
|
||||
```
|
||||
cd test/srt
|
||||
python test_openai_server.py
|
||||
```
|
||||
@@ -1,7 +1,8 @@
|
||||
import transformers
|
||||
import code
|
||||
|
||||
name = "meta-llama/Llama-2-7b-chat-hf"
|
||||
#name = "meta-llama/Llama-2-7b-chat-hf"
|
||||
name = "meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
t = transformers.AutoTokenizer.from_pretrained(name)
|
||||
code.interact(local=locals())
|
||||
|
||||
@@ -426,7 +426,9 @@ class ModelRpcServer:
|
||||
# Only transfer the selected logprobs of the next token to CPU to reduce overhead.
|
||||
if last_logprobs is not None:
|
||||
last_token_logprobs = (
|
||||
last_logprobs[torch.arange(len(batch.reqs)), next_token_ids].tolist()
|
||||
last_logprobs[
|
||||
torch.arange(len(batch.reqs), device=next_token_ids.device),
|
||||
next_token_ids].tolist()
|
||||
)
|
||||
|
||||
next_token_ids = next_token_ids.tolist()
|
||||
@@ -587,6 +589,7 @@ class ModelRpcServer:
|
||||
- req.prompt_tokens,
|
||||
"completion_tokens_wo_jump_forward": req.completion_tokens_wo_jump_forward,
|
||||
"finish_reason": str(req.finish_reason), # FIXME: convert to the correct string
|
||||
"hit_stop_str": req.hit_stop_str,
|
||||
}
|
||||
if req.return_logprob:
|
||||
(
|
||||
|
||||
@@ -110,8 +110,8 @@ class InputMetadata:
|
||||
self.kv_last_page_len = torch.ones(
|
||||
(self.batch_size,), dtype=torch.int32, device="cuda"
|
||||
)
|
||||
req_pool_indices_cpu = self.req_pool_indices.cpu().tolist()
|
||||
seq_lens_cpu = self.seq_lens.tolist()
|
||||
req_pool_indices_cpu = self.req_pool_indices.cpu().numpy()
|
||||
seq_lens_cpu = self.seq_lens.cpu().numpy()
|
||||
self.kv_indices = torch.cat(
|
||||
[
|
||||
self.req_to_token_pool.req_to_token[
|
||||
|
||||
@@ -163,7 +163,7 @@ def test_regex(args):
|
||||
regex = (
|
||||
r"""\{\n"""
|
||||
+ r""" "name": "[\w]+",\n"""
|
||||
+ r""" "population": [\w\d\s]+\n"""
|
||||
+ r""" "population": [\d]+\n"""
|
||||
+ r"""\}"""
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user