Fix flush cache (#5590)
This commit is contained in:
@@ -25,11 +25,8 @@ import multiprocessing as multiprocessing
|
|||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from ast import Mult
|
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from typing import AsyncIterator, Callable, Dict, Optional, Union
|
from typing import AsyncIterator, Callable, Dict, Optional
|
||||||
|
|
||||||
from sglang.srt.model_executor.model_runner import LocalSerializedTensor
|
|
||||||
|
|
||||||
# Fix a bug of Python threading
|
# Fix a bug of Python threading
|
||||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||||
@@ -84,7 +81,6 @@ from sglang.srt.openai_api.protocol import ModelCard, ModelList
|
|||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
MultiprocessingSerializer,
|
|
||||||
add_api_key_middleware,
|
add_api_key_middleware,
|
||||||
add_prometheus_middleware,
|
add_prometheus_middleware,
|
||||||
delete_directory,
|
delete_directory,
|
||||||
|
|||||||
@@ -716,7 +716,7 @@ class TokenizerManager:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
async def flush_cache(self) -> FlushCacheReqOutput:
|
async def flush_cache(self) -> FlushCacheReqOutput:
|
||||||
return await self.flush_cache_communicator(FlushCacheReqInput())
|
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
|
||||||
|
|
||||||
def abort_request(self, rid: str):
|
def abort_request(self, rid: str):
|
||||||
if rid not in self.rid_to_state:
|
if rid not in self.rid_to_state:
|
||||||
|
|||||||
Reference in New Issue
Block a user