Merge branch 'main' into xiezhq-hierarchical

sgl-project · Jan 30, 2025 · c31fdc1 · c31fdc1
2 parents fe550c6 + 9602c2a
commit c31fdc1
Show file tree

Hide file tree

Showing 102 changed files with 24,046 additions and 124 deletions.
diff --git a/.clang-format-ignore b/.clang-format-ignore
@@ -0,0 +1 @@
+sgl-kernel/3rdparty/tensorrt_llm/*
diff --git a/sgl-kernel/3rdparty/cutlass b/sgl-kernel/3rdparty/cutlass
diff --git a/sgl-kernel/3rdparty/flashinfer b/sgl-kernel/3rdparty/flashinfer
diff --git a/sgl-kernel/3rdparty/tensorrt_llm/common/assert.cpp b/sgl-kernel/3rdparty/tensorrt_llm/common/assert.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorrt_llm/common/assert.h"
+
+namespace
+{
+
+bool initCheckDebug()
+{
+    auto constexpr kDebugEnabled = "TLLM_DEBUG_MODE";
+    auto const debugEnabled = std::getenv(kDebugEnabled);
+    return debugEnabled && debugEnabled[0] == '1';
+}
+} // namespace
+
+bool DebugConfig::isCheckDebugEnabled()
+{
+    static bool const debugEnabled = initCheckDebug();
+    return debugEnabled;
+}
diff --git a/sgl-kernel/3rdparty/tensorrt_llm/common/assert.h b/sgl-kernel/3rdparty/tensorrt_llm/common/assert.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "tensorrt_llm/common/stringUtils.h"
+#include "tensorrt_llm/common/tllmException.h"
+
+#include <string>
+
+namespace tensorrt_llm::common
+{
+[[noreturn]] inline void throwRuntimeError(char const* const file, int const line, std::string const& info = "")
+{
+    throw TllmException(file, line, fmtstr("[TensorRT-LLM][ERROR] Assertion failed: %s", info.c_str()));
+}
+
+} // namespace tensorrt_llm::common
+
+class DebugConfig
+{
+public:
+    static bool isCheckDebugEnabled();
+};
+
+#if defined(_WIN32)
+#define TLLM_LIKELY(x) (__assume((x) == 1), (x))
+#define TLLM_UNLIKELY(x) (__assume((x) == 0), (x))
+#else
+#define TLLM_LIKELY(x) __builtin_expect((x), 1)
+#define TLLM_UNLIKELY(x) __builtin_expect((x), 0)
+#endif
+
+#define TLLM_CHECK(val)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0)                                                               \
+                                            : tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val);       \
+    } while (0)
+
+#define TLLM_CHECK_WITH_INFO(val, info, ...)                                                                           \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        TLLM_LIKELY(static_cast<bool>(val))                                                                            \
+        ? ((void) 0)                                                                                                   \
+        : tensorrt_llm::common::throwRuntimeError(                                                                     \
+            __FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__));                                    \
+    } while (0)
+
+#define TLLM_CHECK_DEBUG(val)                                                                                          \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled()))                                                         \
+        {                                                                                                              \
+            TLLM_LIKELY(static_cast<bool>(val)) ? ((void) 0)                                                           \
+                                                : tensorrt_llm::common::throwRuntimeError(__FILE__, __LINE__, #val);   \
+        }                                                                                                              \
+    } while (0)
+
+#define TLLM_CHECK_DEBUG_WITH_INFO(val, info, ...)                                                                     \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (TLLM_UNLIKELY(DebugConfig::isCheckDebugEnabled()))                                                         \
+        {                                                                                                              \
+            TLLM_LIKELY(static_cast<bool>(val))                                                                        \
+            ? ((void) 0)                                                                                               \
+            : tensorrt_llm::common::throwRuntimeError(                                                                 \
+                __FILE__, __LINE__, tensorrt_llm::common::fmtstr(info, ##__VA_ARGS__));                                \
+        }                                                                                                              \
+    } while (0)
+
+#define TLLM_THROW(...)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        throw NEW_TLLM_EXCEPTION(__VA_ARGS__);                                                                         \
+    } while (0)
+
+#define TLLM_WRAP(ex)                                                                                                  \
+    NEW_TLLM_EXCEPTION("%s: %s", tensorrt_llm::common::TllmException::demangle(typeid(ex).name()).c_str(), ex.what())
+27 −0		csrc/flashinfer_gemm_sm90_ops.cu
+65 −66		flashinfer/gemm.py
+5 −1		include/flashinfer/attention/scheduler.cuh