diff --git a/tests/kernels/test_blocksparse_attention.py b/tests/kernels/test_blocksparse_attention.py index fad342d1b5923..1cfba68483338 100644 --- a/tests/kernels/test_blocksparse_attention.py +++ b/tests/kernels/test_blocksparse_attention.py @@ -228,6 +228,7 @@ def test_paged_attention( block_size, max_seq_len, alibi_slopes, + None, # TODO add custom bias kv_cache_dtype, k_scale, v_scale, @@ -265,6 +266,7 @@ def test_paged_attention( block_size, max_seq_len, alibi_slopes, + None, kv_cache_dtype, k_scale, v_scale, diff --git a/vllm/attention/ops/ipex_attn.py b/vllm/attention/ops/ipex_attn.py index cbc6c74acf09a..5e4b1c8bc29e2 100644 --- a/vllm/attention/ops/ipex_attn.py +++ b/vllm/attention/ops/ipex_attn.py @@ -103,6 +103,7 @@ def forward_decode( block_size, max_context_len, alibi_slopes, + None, # TODO add custom bias kv_cache_dtype, k_scale, v_scale,