Skip to content

Commit

Permalink
Merge pull request #2 from Jiayi-Pan/countdown-wip
Browse files Browse the repository at this point in the history
no need to save/eval frequently
  • Loading branch information
Jiayi-Pan authored Jan 23, 2025
2 parents e721899 + 764dde3 commit 000a419
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 11 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ PYTHONUNBUFFERE=1 python3 -m verl.trainer.main_ppo \
trainer.default_hdfs_dir=null \
trainer.n_gpus_per_node=$N_GPUS \
trainer.nnodes=1 \
trainer.save_freq=30 \
trainer.test_freq=10 \
trainer.save_freq=100 \
trainer.test_freq=100 \
trainer.project_name=TinyZero \
trainer.experiment_name=$EXPERIMENT_NAME \
trainer.total_epochs=15 2>&1 | tee verl_demo.log
Expand Down Expand Up @@ -109,8 +109,8 @@ python3 -m verl.trainer.main_ppo \
trainer.default_hdfs_dir=null \
trainer.n_gpus_per_node=$N_GPUS \
trainer.nnodes=1 \
trainer.save_freq=30 \
trainer.test_freq=10 \
trainer.save_freq=100 \
trainer.test_freq=100 \
trainer.project_name=TinyZero \
trainer.experiment_name=$EXPERIMENT_NAME \
trainer.total_epochs=15 2>&1 | tee verl_demo.log
Expand Down Expand Up @@ -151,8 +151,8 @@ python3 -m verl.trainer.main_ppo \
trainer.default_hdfs_dir=null \
trainer.n_gpus_per_node=$N_GPUS \
trainer.nnodes=1 \
trainer.save_freq=30 \
trainer.test_freq=10 \
trainer.save_freq=100 \
trainer.test_freq=100 \
trainer.project_name=TinyZero \
trainer.experiment_name=$EXPERIMENT_NAME \
trainer.total_epochs=15 2>&1 | tee verl_demo.log
Expand Down
19 changes: 14 additions & 5 deletions examples/data_preprocess/countdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,22 @@ def gen_dataset(

return samples

def make_prefix(dp):
def make_prefix(dp, template_type):
target = dp['target']
numbers = dp['nums']

prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
if template_type == 'base':
"""This works for any base model"""
prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
User: Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.
Assistant: Let me solve this step by step.
<think>"""
elif template_type == 'qwen-chat':
raise NotImplementedError("Qwen-chat template WIP")
# """This works for Qwen Instruct Models"""
# prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
# User: Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.
# Assistant: Let me solve this step by step.
# <think>"""
return prefix


Expand All @@ -71,7 +79,8 @@ def make_prefix(dp):
parser.add_argument('--min_number', type=int, default=1)
parser.add_argument('--max_number', type=int, default=100)
parser.add_argument('--train_size', type=int, default=327680)
parser.add_argument('--test_size', type=int, default=4096)
parser.add_argument('--test_size', type=int, default=1024)
parser.add_argument('--template_type', type=str, default='base')

args = parser.parse_args()

Expand All @@ -87,7 +96,7 @@ def make_prefix(dp):

def make_map_fn(split):
def process_fn(example, idx):
question = make_prefix(example)
question = make_prefix(example, template_type=args.template_type)
solution = {
"target": example['target'],
"numbers": example['nums']
Expand Down

0 comments on commit 000a419

Please sign in to comment.