-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updating default configs to be less bad (#665)
* Update and rename small.yml to 125M.yml * Update 13B.yml * Update 2-7B.yml * Update 13B.yml * Update 6-7B.yml * Update and rename XL.yml to 1-3B.yml * Update 175B.yml * Update eleutherai_cluster.yml * Update eleutherai_cluster.yml * Update 125M.yml * Update 2-7B.yml * Update 1-3B.yml * Update 1-3B.yml * Update 125M.yml * Update 13B.yml * Update 1-3B.yml * Update 125M.yml * Update 175B.yml * Update 2-7B.yml * Update 6-7B.yml * Update medium.yml * Rename medium.yml to 350M.yml * Update and rename large.yml to 760M.yml * Update NeoXArgs docs automatically * Update NeoXArgs docs automatically * Create 19M.yml * Create 800M.yml * Update 19M.yml * Create 49M.yml * Update NeoXArgs docs automatically Co-authored-by: Stella Biderman <[email protected]> Co-authored-by: github-actions <[email protected]> Co-authored-by: Hailey Schoelkopf <[email protected]> Co-authored-by: Quentin Anthony <[email protected]>
- Loading branch information
1 parent
b1c74f3
commit fe21c3e
Showing
13 changed files
with
380 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
{ | ||
"pipe-parallel-size": 1, | ||
"model-parallel-size": 1, | ||
|
||
# model settings | ||
"num-layers": 6, | ||
"hidden-size": 512, | ||
"num-attention-heads": 8, | ||
"seq-length": 2048, | ||
"max-position-embeddings": 2048, | ||
"pos-emb": "rotary", | ||
"no-weight-tying": true, | ||
"gpt-j-residual": false, | ||
"output-layer-parallelism": "column", | ||
|
||
"scaled-upper-triang-masked-softmax-fusion": false, | ||
"bias-gelu-fusion": false, | ||
|
||
# init methods | ||
"init_method": "small_init", | ||
"output_layer_init_method": "wang_init", | ||
|
||
"optimizer": { | ||
"type": "Adam", | ||
"params": { | ||
"lr": 0.001, | ||
"betas": [0.9, 0.95], | ||
"eps": 1.0e-8, | ||
} | ||
}, | ||
"min_lr": 0.0001, | ||
|
||
"zero_optimization": { | ||
"stage": 1, | ||
"allgather_partitions": True, | ||
"allgather_bucket_size": 500000000, | ||
"overlap_comm": True, | ||
"reduce_scatter": True, | ||
"reduce_bucket_size": 500000000, | ||
"contiguous_gradients": True, | ||
"cpu_offload": False | ||
}, | ||
|
||
"train_micro_batch_size_per_gpu": 4, #32, | ||
"gas": 1, | ||
"data-impl": "mmap", | ||
"num_workers": 1, | ||
|
||
# activation checkpointing | ||
"checkpoint-activations": true, | ||
"checkpoint-num-layers": 1, | ||
"partition-activations": true, | ||
"synchronize-each-layer": true, | ||
|
||
# regularization | ||
"gradient_clipping": 1.0, | ||
"weight-decay": 0.1, | ||
"hidden-dropout": 0, | ||
"attention-dropout": 0, | ||
|
||
# precision settings | ||
"fp16": { | ||
"fp16": true, | ||
"enabled": true, | ||
"loss_scale": 0, | ||
"loss_scale_window": 1000, | ||
"initial_scale_power": 12, | ||
"hysteresis": 2, | ||
"min_loss_scale": 1, | ||
}, | ||
|
||
"train-iters": 143000, | ||
"lr-decay-iters": 143000, | ||
"distributed-backend": "nccl", | ||
"lr-decay-style": "cosine", | ||
"warmup": 0.01, | ||
"save-interval": 1000, | ||
"eval-interval": 100000, | ||
"eval-iters": 10, | ||
|
||
"log-interval": 10, | ||
"steps_per_print": 10, | ||
"wall_clock_breakdown": true, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.