minor fix (#7245)
This commit is contained in:
@@ -391,7 +391,7 @@ class Scheduler(
|
|||||||
self.forward_ct = 0
|
self.forward_ct = 0
|
||||||
self.forward_ct_decode = 0
|
self.forward_ct_decode = 0
|
||||||
self.num_generated_tokens = 0
|
self.num_generated_tokens = 0
|
||||||
self.num_prefill_tokens = 0
|
self.last_prefill_tokens = 0
|
||||||
self.last_decode_stats_tic = time.perf_counter()
|
self.last_decode_stats_tic = time.perf_counter()
|
||||||
self.last_prefill_stats_tic = time.perf_counter()
|
self.last_prefill_stats_tic = time.perf_counter()
|
||||||
self.return_health_check_ct = 0
|
self.return_health_check_ct = 0
|
||||||
@@ -1194,8 +1194,8 @@ class Scheduler(
|
|||||||
):
|
):
|
||||||
gap_latency = time.perf_counter() - self.last_prefill_stats_tic
|
gap_latency = time.perf_counter() - self.last_prefill_stats_tic
|
||||||
self.last_prefill_stats_tic = time.perf_counter()
|
self.last_prefill_stats_tic = time.perf_counter()
|
||||||
self.last_input_throughput = self.num_prefill_tokens / gap_latency
|
self.last_input_throughput = self.last_prefill_tokens / gap_latency
|
||||||
self.num_prefill_tokens = 0
|
self.last_prefill_tokens = adder.log_input_tokens
|
||||||
|
|
||||||
num_used = self.max_total_num_tokens - (
|
num_used = self.max_total_num_tokens - (
|
||||||
self.token_to_kv_pool_allocator.available_size()
|
self.token_to_kv_pool_allocator.available_size()
|
||||||
|
|||||||
@@ -60,6 +60,17 @@ pub async fn generate(
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[post("/v1/completions")]
|
||||||
|
pub async fn completions(
|
||||||
|
_req: HttpRequest,
|
||||||
|
req: web::Json<GenerateReqInput>,
|
||||||
|
app_state: web::Data<LBState>,
|
||||||
|
) -> Result<HttpResponse, actix_web::Error> {
|
||||||
|
app_state
|
||||||
|
.generate("/v1/completions", Box::new(req.into_inner()))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
#[post("/v1/chat/completions")]
|
#[post("/v1/chat/completions")]
|
||||||
pub async fn chat_completions(
|
pub async fn chat_completions(
|
||||||
_req: HttpRequest,
|
_req: HttpRequest,
|
||||||
@@ -162,6 +173,7 @@ pub async fn startup(lb_config: LBConfig, lb_state: LBState) -> std::io::Result<
|
|||||||
.service(get_loads)
|
.service(get_loads)
|
||||||
.service(generate)
|
.service(generate)
|
||||||
.service(chat_completions)
|
.service(chat_completions)
|
||||||
|
.service(completions)
|
||||||
})
|
})
|
||||||
.bind((lb_config.host, lb_config.port))?
|
.bind((lb_config.host, lb_config.port))?
|
||||||
.run()
|
.run()
|
||||||
|
|||||||
Reference in New Issue
Block a user