From 7811bfdaa76f903b51e67d5c6b4f4dbb42ec2f69 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sat, 1 Feb 2025 01:32:18 +0800 Subject: [PATCH] compatible with flashinfer v0.2 (#3235) --- python/sglang/srt/layers/attention/flashinfer_backend.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py index 7540515c5fd..cc6da781f56 100644 --- a/python/sglang/srt/layers/attention/flashinfer_backend.py +++ b/python/sglang/srt/layers/attention/flashinfer_backend.py @@ -800,7 +800,9 @@ def call_begin_forward( kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0) kv_indptr = kv_indptr[: bs + 1] kv_indices = torch.empty( - paged_kernel_lens_sum, dtype=torch.int32, device="cuda" + paged_kernel_lens_sum + 256, + dtype=torch.int32, + device=req_pool_indices.device, ) create_flashinfer_kv_indices_triton[(bs,)]( self.req_to_token,