diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py index c7ee6609306d7..cb047c832e6cb 100644 --- a/vllm/core/block_manager_v2.py +++ b/vllm/core/block_manager_v2.py @@ -24,9 +24,8 @@ class BlockSpaceManagerV2(BlockSpaceManager): autoregressively-generated tokens, and other advanced features such as prefix caching, forking/copy-on-write, and sliding-window memory allocation. - The current implementation is partial; in particular prefix caching and - sliding-window are not feature complete. This class implements the design - described in https://github.com/vllm-project/vllm/pull/3492. + This class implements the design described in + https://github.com/vllm-project/vllm/pull/3492. Lookahead slots The block manager has the notion of a "lookahead slot". These are slots @@ -190,7 +189,7 @@ def allocate(self, seq_group: SequenceGroup) -> None: assert (request_id not in self.cross_block_tables), \ - "block table already exists" + "block table already exists" check_no_caching_or_swa_for_blockmgr_encdec(self, seq_group)