From eb34954056eb1ada21f3ebcf10c1a27c807dbf7f Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Sat, 1 Feb 2025 11:30:04 -0600 Subject: [PATCH] Additional tuning for grouped paged attention kernel Changed: - num_stages --- .../sglang/srt/layers/attention/triton_ops/decode_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/attention/triton_ops/decode_attention.py b/python/sglang/srt/layers/attention/triton_ops/decode_attention.py index 4ccf38b0588..25818b41dfa 100644 --- a/python/sglang/srt/layers/attention/triton_ops/decode_attention.py +++ b/python/sglang/srt/layers/attention/triton_ops/decode_attention.py @@ -467,7 +467,7 @@ def _decode_grouped_att_m_fwd( NUM_KV_SPLITS=NUM_KV_SPLITS, logit_cap=logit_cap, num_warps=4, - num_stages=2, + num_stages=1, Lk=Lk, Lv=Lv, **extra_kargs,