feat: update other MoE models deps (#2156)

2024-11-24 21:36:34 +08:00
parent c211e7b669
commit e3938b2f9c
10 changed files with 28 additions and 14 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -957,6 +957,21 @@ def direct_register_custom_op(
    fake_impl: Optional[Callable] = None,
    target_lib: Optional[Library] = None,
 ):
+    """
+    `torch.library.custom_op` can have significant overhead because it
+    needs to consider complicated dispatching logic. This function
+    directly registers a custom op and dispatches it to the CUDA backend.
+    See https://gist.github.com/youkaichao/ecbea9ec9fc79a45d2adce1784d7a9a5
+    for more details.
+
+    By default, the custom op is registered to the vLLM library. If you
+    want to register it to a different library, you can pass the library
+    object to the `target_lib` argument.
+
+    IMPORTANT: the lifetime of the operator is tied to the lifetime of the
+    library object. If you want to bind the operator to a different library,
+    make sure the library object is alive when the operator is used.
+    """
    import torch.library

    if hasattr(torch.library, "infer_schema"):