add tensorrt_llm common and cutlass_extensions as 3rdparty (#3216)

Co-authored-by: BBuf <35585791+BBuf@users.noreply.github.com>
2025-01-30 23:04:41 +08:00
parent 468d23cff9
commit 222ce6f1da
86 changed files with 23201 additions and 0 deletions
--- a/sgl-kernel/3rdparty/tensorrt_llm/common/envUtils.h
+++ b/sgl-kernel/3rdparty/tensorrt_llm/common/envUtils.h
@@ -0,0 +1,60 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstdint>
+#include <optional>
+#include <string>
+
+namespace tensorrt_llm::common
+{
+// Useful when you want to inject some debug code controllable with env var.
+std::optional<int32_t> getIntEnv(char const* name);
+
+// XQA kernels (optimized kernels for generation phase).
+bool forceXQAKernels();
+
+// Whether XQA JIT is enabled.
+//
+// Returns the value of TRTLLM_ENABLE_XQA_JIT env var. If such env var doesn't exist, std::nullopt is returned.
+std::optional<bool> getEnvEnableXQAJIT();
+
+// Tune the number of blocks per sequence for accuracy/performance purpose.
+bool getEnvMmhaMultiblockDebug();
+
+int getEnvMmhaBlocksPerSequence();
+
+int getEnvMmhaKernelBlockSize();
+
+// Whether PDL is enabled.
+bool getEnvEnablePDL();
+
+bool getEnvUseUCXKvCache();
+
+std::string getEnvUCXInterface();
+
+bool getEnvDisaggLayerwise();
+
+bool getEnvParallelCacheSend();
+
+bool getEnvRequestKVCacheSerial();
+
+bool getEnvDisableKVCacheTransferOverlap();
+
+bool getEnvDisableReceiveKVCacheParallel();
+
+} // namespace tensorrt_llm::common