Update quick start examples (#120)
This commit is contained in:
@@ -8,7 +8,6 @@ from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
from sglang.srt.managers.router.model_runner import InputMetadata
|
||||
from torch import nn
|
||||
from transformers import Qwen2Config
|
||||
from vllm.model_executor.layers.activation import SiluAndMul
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.linear import (
|
||||
@@ -30,6 +29,8 @@ from vllm.model_executor.weight_utils import (
|
||||
hf_model_weights_iterator,
|
||||
)
|
||||
|
||||
Qwen2Config = None
|
||||
|
||||
|
||||
class Qwen2MLP(nn.Module):
|
||||
def __init__(
|
||||
|
||||
@@ -445,18 +445,26 @@ class Runtime:
|
||||
pipe_reader, pipe_writer = mp.Pipe(duplex=False)
|
||||
proc = mp.Process(target=launch_server, args=(self.server_args, pipe_writer))
|
||||
proc.start()
|
||||
pipe_writer.close()
|
||||
self.pid = proc.pid
|
||||
|
||||
init_state = pipe_reader.recv()
|
||||
try:
|
||||
init_state = pipe_reader.recv()
|
||||
except EOFError:
|
||||
init_state = ""
|
||||
|
||||
if init_state != "init ok":
|
||||
self.shutdown()
|
||||
raise RuntimeError("Launch failed")
|
||||
raise RuntimeError("Launch failed. Please see the error messages above.")
|
||||
|
||||
self.endpoint = RuntimeEndpoint(self.url)
|
||||
|
||||
def shutdown(self):
|
||||
if self.pid is not None:
|
||||
parent = psutil.Process(self.pid)
|
||||
try:
|
||||
parent = psutil.Process(self.pid)
|
||||
except psutil.NoSuchProcess:
|
||||
return
|
||||
children = parent.children(recursive=True)
|
||||
for child in children:
|
||||
child.kill()
|
||||
|
||||
Reference in New Issue
Block a user