Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
garg-amit committed Sep 3, 2024
1 parent 55b220d commit c400a81
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/attention/backends/rocm_flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def prefill_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]:
slot_mapping=self.slot_mapping[:self.num_prefill_tokens],
seq_lens=self.seq_lens[:self.num_prefills],
seq_lens_tensor=self.seq_lens_tensor[:self.num_prefills],
num_orig_input_tokens_tensor=self.num_orig_input_tokens_tensor[:self.num_prefills],
num_orig_input_tokens_tensor=self.
num_orig_input_tokens_tensor[:self.num_prefills],
max_query_len=self.max_query_len,
max_prefill_seq_len=self.max_prefill_seq_len,
max_decode_seq_len=0,
Expand Down Expand Up @@ -165,7 +166,8 @@ def decode_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]:
slot_mapping=self.slot_mapping[self.num_prefill_tokens:],
seq_lens=None,
seq_lens_tensor=self.seq_lens_tensor[self.num_prefills:],
num_orig_input_tokens_tensor=self.num_orig_input_tokens_tensor[:self.num_prefills],
num_orig_input_tokens_tensor=self.
num_orig_input_tokens_tensor[:self.num_prefills],
max_query_len=None,
max_prefill_seq_len=0,
max_decode_seq_len=self.max_decode_seq_len,
Expand Down

0 comments on commit c400a81

Please sign in to comment.