Update quick start examples (#120)

This commit is contained in:
Lianmin Zheng
2024-01-30 04:29:32 -08:00
committed by GitHub
parent 4ea92f8307
commit 0617528632
20 changed files with 567 additions and 237 deletions

View File

@@ -651,7 +651,7 @@ class ProgramState:
def sync(self):
return self.stream_executor.sync()
def text_iter(self, var_name=None):
def text_iter(self, var_name: Optional[str] = None):
if self.stream_executor.stream:
prev = 0
if var_name is None:
@@ -682,7 +682,9 @@ class ProgramState:
else:
yield self.get_var(name)
async def text_async_iter(self, var_name=None, return_meta_data=False):
async def text_async_iter(
self, var_name: Optional[str] = None, return_meta_data: bool = False
):
loop = asyncio.get_running_loop()
if self.stream_executor.stream:

View File

@@ -74,7 +74,9 @@ class SglSamplingParams:
)
return {
"max_tokens_to_sample": self.max_new_tokens,
"stop_sequences": self.stop,
"stop_sequences": self.stop
if isinstance(self.stop, (list, tuple))
else [self.stop],
"temperature": self.temperature,
"top_p": self.top_p,
"top_k": self.top_k,

View File

@@ -8,7 +8,6 @@ from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.managers.router.model_runner import InputMetadata
from torch import nn
from transformers import Qwen2Config
from vllm.model_executor.layers.activation import SiluAndMul
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.linear import (
@@ -30,6 +29,8 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator,
)
Qwen2Config = None
class Qwen2MLP(nn.Module):
def __init__(

View File

@@ -445,18 +445,26 @@ class Runtime:
pipe_reader, pipe_writer = mp.Pipe(duplex=False)
proc = mp.Process(target=launch_server, args=(self.server_args, pipe_writer))
proc.start()
pipe_writer.close()
self.pid = proc.pid
init_state = pipe_reader.recv()
try:
init_state = pipe_reader.recv()
except EOFError:
init_state = ""
if init_state != "init ok":
self.shutdown()
raise RuntimeError("Launch failed")
raise RuntimeError("Launch failed. Please see the error messages above.")
self.endpoint = RuntimeEndpoint(self.url)
def shutdown(self):
if self.pid is not None:
parent = psutil.Process(self.pid)
try:
parent = psutil.Process(self.pid)
except psutil.NoSuchProcess:
return
children = parent.children(recursive=True)
for child in children:
child.kill()