From 5a73145747bead80dab107734c5423461c1dc31c Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:30:59 +0000 Subject: [PATCH 1/7] fix: eagle test failed Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- vllm/model_executor/models/eagle.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/eagle.py b/vllm/model_executor/models/eagle.py index eb7b5af19ae96..7ef1de6ad777d 100644 --- a/vllm/model_executor/models/eagle.py +++ b/vllm/model_executor/models/eagle.py @@ -19,6 +19,11 @@ class DummyInputLayerNorm(nn.Module): + def __init__(self, weight=None, bias=None): + super().__init__() + self.weight = nn.Parameter(weight) if weight is not None else None + self.bias = nn.Parameter(bias) if bias is not None else None + def forward(self, x): return x @@ -69,7 +74,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Modify layer normalization and residual connections as suggested # in the EAGLE framework: https://github.com/SafeAILab/EAGLE - self.model.model.layers[0].input_layernorm = DummyInputLayerNorm() + self.model.model.layers[0].input_layernorm = DummyInputLayerNorm( + weight=self.model.model.layers[0].input_layernorm.weight) self.model.model.norm = DummyOutputNorm() self.orig_vocab_size = config.vocab_size From efbfc8c7ebaeadc84d3730ecb620c0c86559cf30 Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:43:35 +0000 Subject: [PATCH 2/7] add eagle test to test pipeline Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index cf82210f96ee3..f12b1693f62ce 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -234,10 +234,18 @@ steps: source_file_dependencies: - vllm/spec_decode - tests/spec_decode + - vllm/model_executor/models/eagle.py + - vllm/model_executor/models/eagle.py commands: - pytest -v -s spec_decode/e2e/test_multistep_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py +- label: EAGLE Model tests # 8min + source_file_dependencies: + - vllm/model_executor/models/eagle.py + commands: + - pytest -v -s spec_decode/e2e/test_eagle_correctness.py + - label: LoRA Test %N # 15min each mirror_hardwares: [amd] source_file_dependencies: From f6dac92a21cf4e2feda31d76e7684ad4a4177ac1 Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Sun, 12 Jan 2025 13:47:56 +0000 Subject: [PATCH 3/7] update test-pipeline.yaml Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f12b1693f62ce..70ca8945a5d9d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -234,8 +234,6 @@ steps: source_file_dependencies: - vllm/spec_decode - tests/spec_decode - - vllm/model_executor/models/eagle.py - - vllm/model_executor/models/eagle.py commands: - pytest -v -s spec_decode/e2e/test_multistep_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py From f6625dbb0f1f74fcf3ea2c1a8d2dcd190a408322 Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Sun, 12 Jan 2025 14:08:42 +0000 Subject: [PATCH 4/7] update test-pipeline Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 70ca8945a5d9d..da5ee4ae77619 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -230,18 +230,14 @@ steps: - pytest -v -s test_logits_processor.py - pytest -v -s model_executor/test_guided_processors.py -- label: Speculative decoding tests # 30min +- label: Speculative decoding tests # 40min source_file_dependencies: - vllm/spec_decode - tests/spec_decode + - vllm/model_executor/models/eagle.py commands: - pytest -v -s spec_decode/e2e/test_multistep_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py - -- label: EAGLE Model tests # 8min - source_file_dependencies: - - vllm/model_executor/models/eagle.py - commands: - pytest -v -s spec_decode/e2e/test_eagle_correctness.py - label: LoRA Test %N # 15min each From 1681246102c3c4efe6c92cfe99bee7d27ed7889f Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Mon, 13 Jan 2025 02:00:32 +0000 Subject: [PATCH 5/7] add comments Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- vllm/model_executor/models/eagle.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/model_executor/models/eagle.py b/vllm/model_executor/models/eagle.py index 7ef1de6ad777d..b18199dfedf76 100644 --- a/vllm/model_executor/models/eagle.py +++ b/vllm/model_executor/models/eagle.py @@ -74,6 +74,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Modify layer normalization and residual connections as suggested # in the EAGLE framework: https://github.com/SafeAILab/EAGLE + # While weights and biases are generally not needed, + # they are retained here to support certain unit tests + # (e.g., spec_decode/e2e/test_eagle_correctness.py). self.model.model.layers[0].input_layernorm = DummyInputLayerNorm( weight=self.model.model.layers[0].input_layernorm.weight) self.model.model.norm = DummyOutputNorm() From bbe255d5369c177db90d10bc4fd1cc94e919bc69 Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Mon, 13 Jan 2025 02:03:42 +0000 Subject: [PATCH 6/7] make format Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com> --- vllm/model_executor/models/eagle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/eagle.py b/vllm/model_executor/models/eagle.py index b18199dfedf76..948560b4906b8 100644 --- a/vllm/model_executor/models/eagle.py +++ b/vllm/model_executor/models/eagle.py @@ -75,7 +75,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Modify layer normalization and residual connections as suggested # in the EAGLE framework: https://github.com/SafeAILab/EAGLE # While weights and biases are generally not needed, - # they are retained here to support certain unit tests + # they are retained here to support certain unit tests # (e.g., spec_decode/e2e/test_eagle_correctness.py). self.model.model.layers[0].input_layernorm = DummyInputLayerNorm( weight=self.model.model.layers[0].input_layernorm.weight) From 8b80587865dd9111c5c76dc43b8491b993f4792e Mon Sep 17 00:00:00 2001 From: Sungjae Lee <33976427+llsj14@users.noreply.github.com> Date: Mon, 13 Jan 2025 04:57:42 +0000 Subject: [PATCH 7/7] retrigger CI Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>