From d5793c4b6a381332aae14d992bc830460d3835b5 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Jan 2025 10:20:51 +0800 Subject: [PATCH 1/2] fix typo --- megatron/training/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 7fe254285d..1382cfd5a0 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -958,7 +958,7 @@ def _add_network_size_args(parser): group.add_argument('--decoder-num-layers', type=int, default=None, help='Number of decoder transformer layers.') group.add_argument('--hidden-size', type=int, default=None, - help='Tansformer hidden size.') + help='Transformer hidden size.') group.add_argument('--ffn-hidden-size', type=int, default=None, help='Transformer Feed-Forward Network hidden size. ' 'This is set to 4*hidden-size if not provided') From d2bba69d550a7e9b26d808e5a0ef68f0384f9239 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Jan 2025 15:07:06 +0800 Subject: [PATCH 2/2] fix --- megatron/training/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training/arguments.py b/megatron/training/arguments.py index 1382cfd5a0..83625ad2db 100644 --- a/megatron/training/arguments.py +++ b/megatron/training/arguments.py @@ -1932,7 +1932,7 @@ def _add_data_args(parser): group.add_argument('--reset-position-ids', action='store_true', help='Reset posistion ids after end-of-document token.') group.add_argument('--reset-attention-mask', action='store_true', - help='Reset self attention maske after ' + help='Reset self attention mask after ' 'end-of-document token.') group.add_argument('--eod-mask-loss', action='store_true', help='Mask loss for the end of document tokens.')