update

2026-04-09 11:23:47 +08:00
parent 8082d5f4b2
commit 72387e4fa8
1885 changed files with 611521 additions and 1 deletions
--- a/vllm/utils/serial_utils.py
+++ b/vllm/utils/serial_utils.py
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import io
+import sys
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Literal, get_args
+
+import numpy as np
+import numpy.typing as npt
+import pybase64
+import torch
+
+sys_byteorder = sys.byteorder
+
+
+@dataclass(frozen=True)
+class DTypeInfo:
+    torch_dtype: torch.dtype
+
+    torch_view_dtype: torch.dtype
+    numpy_view_dtype: npt.DTypeLike
+
+    @property
+    def nbytes(self) -> int:
+        return self.torch_dtype.itemsize
+
+
+EmbedDType = Literal["float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"]
+Endianness = Literal["native", "big", "little"]
+EncodingFormat = Literal["float", "base64", "bytes", "bytes_only"]
+
+# I'm not sure if other platforms' CPUs support the fp8 data format.
+# EMBED_DTYPE only uses the fp8 data representation,
+# does not use fp8 computation, and only occurs on the CPU.
+# Apologize for any possible break.
+# NOTE: numpy does not support bfloat16 and fp8
+EMBED_DTYPES: Mapping[EmbedDType, DTypeInfo] = {
+    "float32": DTypeInfo(torch.float32, torch.float32, np.float32),
+    "float16": DTypeInfo(torch.float16, torch.float16, np.float16),
+    "bfloat16": DTypeInfo(torch.bfloat16, torch.float16, np.float16),
+    "fp8_e4m3": DTypeInfo(torch.float8_e4m3fn, torch.uint8, np.uint8),
+    "fp8_e5m2": DTypeInfo(torch.float8_e5m2, torch.uint8, np.uint8),
+}
+ENDIANNESS: tuple[Endianness, ...] = get_args(Endianness)
+
+
+def tensor2base64(x: torch.Tensor) -> str:
+    with io.BytesIO() as buf:
+        torch.save(x, buf)
+        buf.seek(0)
+        binary_data = buf.read()
+
+    return pybase64.b64encode(binary_data).decode("utf-8")
+
+
+def tensor2binary(
+    tensor: torch.Tensor,
+    embed_dtype: EmbedDType,
+    endianness: Endianness,
+) -> bytes:
+    assert isinstance(tensor, torch.Tensor)
+    assert embed_dtype in EMBED_DTYPES
+    assert endianness in ENDIANNESS
+
+    dtype_info = EMBED_DTYPES[embed_dtype]
+
+    np_array = (
+        tensor.to(dtype_info.torch_dtype)
+        .flatten()
+        .contiguous()
+        .view(dtype_info.torch_view_dtype)
+        .numpy()
+    )
+
+    if endianness != "native" and endianness != sys_byteorder:
+        np_array = np_array.byteswap()
+
+    return np_array.tobytes()
+
+
+def binary2tensor(
+    binary: bytes,
+    shape: tuple[int, ...],
+    embed_dtype: EmbedDType,
+    endianness: Endianness,
+) -> torch.Tensor:
+    assert embed_dtype in EMBED_DTYPES
+    assert endianness in ENDIANNESS
+
+    dtype_info = EMBED_DTYPES[embed_dtype]
+
+    np_array = np.frombuffer(binary, dtype=dtype_info.numpy_view_dtype).reshape(shape)
+
+    if endianness != "native" and endianness != sys_byteorder:
+        np_array = np_array.byteswap()
+
+    return torch.from_numpy(np_array).view(dtype_info.torch_dtype)