update
This commit is contained in:
0
vllm/entrypoints/serve/cache/__init__.py
vendored
Normal file
0
vllm/entrypoints/serve/cache/__init__.py
vendored
Normal file
72
vllm/entrypoints/serve/cache/api_router.py
vendored
Normal file
72
vllm/entrypoints/serve/cache/api_router.py
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
|
||||
from fastapi import APIRouter, FastAPI, Query, Request
|
||||
from fastapi.responses import Response
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def engine_client(request: Request) -> EngineClient:
|
||||
return request.app.state.engine_client
|
||||
|
||||
|
||||
@router.post("/reset_prefix_cache")
|
||||
async def reset_prefix_cache(
|
||||
raw_request: Request,
|
||||
reset_running_requests: bool = Query(default=False),
|
||||
reset_external: bool = Query(default=False),
|
||||
):
|
||||
"""
|
||||
Reset the local prefix cache.
|
||||
|
||||
Optionally, if the query parameter `reset_external=true`
|
||||
also resets the external (connector-managed) prefix cache.
|
||||
|
||||
Note that we currently do not check if the prefix cache
|
||||
is successfully reset in the API server.
|
||||
|
||||
Example:
|
||||
POST /reset_prefix_cache?reset_external=true
|
||||
"""
|
||||
logger.info("Resetting prefix cache...")
|
||||
|
||||
await engine_client(raw_request).reset_prefix_cache(
|
||||
reset_running_requests, reset_external
|
||||
)
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
@router.post("/reset_mm_cache")
|
||||
async def reset_mm_cache(raw_request: Request):
|
||||
"""
|
||||
Reset the multi-modal cache. Note that we currently do not check if the
|
||||
multi-modal cache is successfully reset in the API server.
|
||||
"""
|
||||
logger.info("Resetting multi-modal cache...")
|
||||
await engine_client(raw_request).reset_mm_cache()
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
@router.post("/reset_encoder_cache")
|
||||
async def reset_encoder_cache(raw_request: Request):
|
||||
"""
|
||||
Reset the encoder cache. Note that we currently do not check if the
|
||||
encoder cache is successfully reset in the API server.
|
||||
"""
|
||||
logger.info("Resetting encoder cache...")
|
||||
await engine_client(raw_request).reset_encoder_cache()
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
def attach_router(app: FastAPI):
|
||||
if not envs.VLLM_SERVER_DEV_MODE:
|
||||
return
|
||||
app.include_router(router)
|
||||
Reference in New Issue
Block a user