Update quick start examples (#120)
This commit is contained in:
@@ -651,7 +651,7 @@ class ProgramState:
|
||||
def sync(self):
|
||||
return self.stream_executor.sync()
|
||||
|
||||
def text_iter(self, var_name=None):
|
||||
def text_iter(self, var_name: Optional[str] = None):
|
||||
if self.stream_executor.stream:
|
||||
prev = 0
|
||||
if var_name is None:
|
||||
@@ -682,7 +682,9 @@ class ProgramState:
|
||||
else:
|
||||
yield self.get_var(name)
|
||||
|
||||
async def text_async_iter(self, var_name=None, return_meta_data=False):
|
||||
async def text_async_iter(
|
||||
self, var_name: Optional[str] = None, return_meta_data: bool = False
|
||||
):
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
if self.stream_executor.stream:
|
||||
|
||||
@@ -74,7 +74,9 @@ class SglSamplingParams:
|
||||
)
|
||||
return {
|
||||
"max_tokens_to_sample": self.max_new_tokens,
|
||||
"stop_sequences": self.stop,
|
||||
"stop_sequences": self.stop
|
||||
if isinstance(self.stop, (list, tuple))
|
||||
else [self.stop],
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"top_k": self.top_k,
|
||||
|
||||
@@ -8,7 +8,6 @@ from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
from sglang.srt.managers.router.model_runner import InputMetadata
|
||||
from torch import nn
|
||||
from transformers import Qwen2Config
|
||||
from vllm.model_executor.layers.activation import SiluAndMul
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.linear import (
|
||||
@@ -30,6 +29,8 @@ from vllm.model_executor.weight_utils import (
|
||||
hf_model_weights_iterator,
|
||||
)
|
||||
|
||||
Qwen2Config = None
|
||||
|
||||
|
||||
class Qwen2MLP(nn.Module):
|
||||
def __init__(
|
||||
|
||||
@@ -445,18 +445,26 @@ class Runtime:
|
||||
pipe_reader, pipe_writer = mp.Pipe(duplex=False)
|
||||
proc = mp.Process(target=launch_server, args=(self.server_args, pipe_writer))
|
||||
proc.start()
|
||||
pipe_writer.close()
|
||||
self.pid = proc.pid
|
||||
|
||||
init_state = pipe_reader.recv()
|
||||
try:
|
||||
init_state = pipe_reader.recv()
|
||||
except EOFError:
|
||||
init_state = ""
|
||||
|
||||
if init_state != "init ok":
|
||||
self.shutdown()
|
||||
raise RuntimeError("Launch failed")
|
||||
raise RuntimeError("Launch failed. Please see the error messages above.")
|
||||
|
||||
self.endpoint = RuntimeEndpoint(self.url)
|
||||
|
||||
def shutdown(self):
|
||||
if self.pid is not None:
|
||||
parent = psutil.Process(self.pid)
|
||||
try:
|
||||
parent = psutil.Process(self.pid)
|
||||
except psutil.NoSuchProcess:
|
||||
return
|
||||
children = parent.children(recursive=True)
|
||||
for child in children:
|
||||
child.kill()
|
||||
|
||||
Reference in New Issue
Block a user