Skip to content

Commit

Permalink
fixed rocm backend
Browse files Browse the repository at this point in the history
  • Loading branch information
garg-amit committed Sep 3, 2024
1 parent a87b84f commit 55b220d
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions vllm/attention/backends/rocm_flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def prefill_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]:
slot_mapping=self.slot_mapping[:self.num_prefill_tokens],
seq_lens=self.seq_lens[:self.num_prefills],
seq_lens_tensor=self.seq_lens_tensor[:self.num_prefills],
num_orig_input_tokens_tensor=self.num_orig_input_tokens_tensor[:self.num_prefills],
max_query_len=self.max_query_len,
max_prefill_seq_len=self.max_prefill_seq_len,
max_decode_seq_len=0,
Expand Down Expand Up @@ -164,6 +165,7 @@ def decode_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]:
slot_mapping=self.slot_mapping[self.num_prefill_tokens:],
seq_lens=None,
seq_lens_tensor=self.seq_lens_tensor[self.num_prefills:],
num_orig_input_tokens_tensor=self.num_orig_input_tokens_tensor[:self.num_prefills],
max_query_len=None,
max_prefill_seq_len=0,
max_decode_seq_len=self.max_decode_seq_len,
Expand Down

0 comments on commit 55b220d

Please sign in to comment.