diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 5ac567438..cf95f0fb2 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -14,7 +14,7 @@ """ The entry point of inference server. (SRT = SGLang Runtime) -This file implements HTTP APIs for the inferenc engine via fastapi. +This file implements HTTP APIs for the inference engine via fastapi. """ import asyncio diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index de30aab25..b76d84d9a 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -19,7 +19,7 @@ from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter Memory pool. SGLang has two levels of memory pool. -ReqToTokenPool maps a a request to its token locations. +ReqToTokenPool maps a request to its token locations. TokenToKVPoolAllocator manages the indices to kv cache data. KVCache actually holds the physical kv cache. """