Improve docs & Rename Gemini -> VertexAI (#19)

This commit is contained in:
Lianmin Zheng
2024-01-17 02:54:41 -08:00
committed by GitHub
parent fd7c479239
commit bf51ddc6e5
13 changed files with 56 additions and 583 deletions

View File

@@ -28,7 +28,7 @@ class RouterManager:
self.model_client = model_client
self.recv_reqs = []
# Init Some Configs
# Init some configs
self.extend_dependency_time = GLOBAL_BACKEND_CONFIG.extend_dependency_time
async def loop_for_forward(self):
@@ -46,7 +46,7 @@ class RouterManager:
if has_finished:
await asyncio.sleep(self.extend_dependency_time)
await asyncio.sleep(0.001)
await asyncio.sleep(0.0006)
async def loop_for_recv_requests(self):
while True:

View File

@@ -108,7 +108,7 @@ class ModelRpcServer(rpyc.Service):
self.running_batch: Batch = None
self.out_pyobjs = []
self.decode_forward_ct = 0
self.stream_interval = 2
self.stream_interval = server_args.stream_interval
# Init the FSM cache for constrained generation
self.regex_fsm_cache = FSMCache(self.tokenizer)

View File

@@ -17,6 +17,7 @@ class ServerArgs:
model_mode: List[str] = ()
schedule_heuristic: str = "lpm"
random_seed: int = 42
stream_interval: int = 2
disable_log_stats: bool = False
log_stats_interval: int = 10
log_level: str = "info"
@@ -108,6 +109,12 @@ class ServerArgs:
default=ServerArgs.random_seed,
help="Random seed.",
)
parser.add_argument(
"--stream-interval",
type=int,
default=ServerArgs.random_seed,
help="The interval in terms of token length for streaming",
)
parser.add_argument(
"--log-level",
type=str,