-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add learning rate scheduling support for DeepSpeedStrategy
#20320
base: master
Are you sure you want to change the base?
Changes from 21 commits
188a45f
baf5988
1f4c18e
585e302
0451761
a912aab
d27d4a3
67089a1
1025875
9b45b99
a7a5835
3ece31c
e48acd2
f13516d
80b4a6d
2cab7e2
e9127f4
c127458
31a1fce
f215626
dfce07e
25e8d48
737162d
2d347d0
5d227ff
f94efa7
c2613ec
56464ed
e09941c
3709f1d
13195a2
3791c1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
from lightning_utilities.core.imports import RequirementCache | ||
from torch.nn import Module | ||
from torch.optim import Optimizer | ||
from torch.optim.lr_scheduler import _LRScheduler | ||
from typing_extensions import override | ||
|
||
from lightning.fabric.accelerators import Accelerator, CUDAAccelerator | ||
|
@@ -312,10 +313,9 @@ def model(self) -> "DeepSpeedEngine": | |
|
||
@override | ||
def setup_module_and_optimizers( | ||
self, module: Module, optimizers: list[Optimizer] | ||
) -> tuple["DeepSpeedEngine", list[Optimizer]]: | ||
"""Set up a model and multiple optimizers together. | ||
|
||
self, module: Module, optimizers: list[Optimizer], scheduler: Optional[_LRScheduler] = None | ||
) -> Tuple["DeepSpeedEngine", list[Optimizer], Optional[_LRScheduler]]: | ||
lantiga marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Set up a model and multiple optimizers together, along with an optional learning rate scheduler. | ||
Currently, only a single optimizer is supported. | ||
|
||
Return: | ||
|
@@ -329,9 +329,9 @@ def setup_module_and_optimizers( | |
f" Got {len(optimizers)} optimizers instead." | ||
) | ||
|
||
self._deepspeed_engine, optimizer = self._initialize_engine(module, optimizers[0]) | ||
self._deepspeed_engine, optimizer, scheduler = self._initialize_engine(module, optimizers[0], scheduler) | ||
self._set_deepspeed_activation_checkpointing() | ||
return self._deepspeed_engine, [optimizer] | ||
return self._deepspeed_engine, [optimizer], scheduler | ||
|
||
@override | ||
def setup_module(self, module: Module) -> "DeepSpeedEngine": | ||
|
@@ -340,7 +340,7 @@ def setup_module(self, module: Module) -> "DeepSpeedEngine": | |
For training, see :meth:`setup_module_and_optimizers`. | ||
|
||
""" | ||
self._deepspeed_engine, _ = self._initialize_engine(module) | ||
self._deepspeed_engine, _, _ = self._initialize_engine(module) | ||
return self._deepspeed_engine | ||
|
||
@override | ||
|
@@ -593,10 +593,8 @@ def register_strategies(cls, strategy_registry: _StrategyRegistry) -> None: | |
) | ||
|
||
def _initialize_engine( | ||
self, | ||
model: Module, | ||
optimizer: Optional[Optimizer] = None, | ||
) -> tuple["DeepSpeedEngine", Optimizer]: | ||
self, model: Module, optimizer: Optional[Optimizer] = None, scheduler: Optional[_LRScheduler] = None | ||
) -> tuple["DeepSpeedEngine", Optimizer, Optional[_LRScheduler]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in this commit. |
||
"""Initialize one model and one optimizer with an optional learning rate scheduler. | ||
|
||
This calls ``deepspeed.initialize`` internally. | ||
|
@@ -605,15 +603,16 @@ def _initialize_engine( | |
import deepspeed | ||
|
||
model_parameters = filter(lambda p: p.requires_grad, model.parameters()) | ||
deepspeed_engine, deepspeed_optimizer, _, _ = deepspeed.initialize( | ||
deepspeed_engine, deepspeed_optimizer, _, deepspeed_scheduler = deepspeed.initialize( | ||
args=argparse.Namespace(device_rank=self.root_device.index), | ||
config=self.config, | ||
model=model, | ||
model_parameters=model_parameters, | ||
optimizer=optimizer, | ||
lr_scheduler=scheduler, | ||
dist_init_required=False, | ||
) | ||
return deepspeed_engine, deepspeed_optimizer | ||
return deepspeed_engine, deepspeed_optimizer, deepspeed_scheduler | ||
|
||
@override | ||
def setup_environment(self) -> None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,7 +104,10 @@ def pl_worker_init_function(worker_id: int, rank: Optional[int] = None) -> None: | |
if _NUMPY_AVAILABLE: | ||
import numpy as np | ||
|
||
np.random.seed(seed_sequence[3] & 0xFFFFFFFF) # numpy takes 32-bit seed only | ||
ss = np.random.SeedSequence([base_seed, worker_id, global_rank]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an unrelated change, it shouldn't be included There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is now merged into |
||
np_rng_seed = ss.generate_state(4) | ||
|
||
np.random.seed(np_rng_seed) | ||
|
||
|
||
def _generate_seed_sequence(base_seed: int, worker_id: int, global_rank: int, count: int) -> list[int]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a breaking change, it will cause existing user code to fail, because
scheduler
is returned unconditionally.Since
scheduler
isOptional
in the signature, I suggest we only return it if it was notNone
as an argument, so we won't break anyone's code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. Addressed in this commit.