From c957b833d2f0ec34c5e16acb57bdeaff8b10c778 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 17 Jun 2024 12:48:51 +0800 Subject: [PATCH 1/2] demo of DPO with QLoRA (w Llama3 70B Instruct) --- scripts/run_dpo_align_qlora.sh | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 scripts/run_dpo_align_qlora.sh diff --git a/scripts/run_dpo_align_qlora.sh b/scripts/run_dpo_align_qlora.sh new file mode 100644 index 000000000..45a6b8fcf --- /dev/null +++ b/scripts/run_dpo_align_qlora.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Please run this script under ${project_id} in project directory of + +# Parses arguments +model_name_or_path=meta-llama/Meta-Llama-3-70B-Instruct +dataset_path=data/dpo-mix-7k +output_dir=output_models/finetuned_llama3_70b_dpo_example +deepspeed_args="--master_port=13000 --include localhost:3" +# specify gpus/single gpu here by +# `--include localhost:0,1` or `--include localhost:0` + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -m|--model_name_or_path) + model_name_or_path="$2" + shift + ;; + -d|--dataset_path) + dataset_path="$2" + shift + ;; + -o|--output_lora_path) + output_dir="$2" + shift + ;; + --deepspeed_args) + deepspeed_args="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done +exp_id=dpo +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${exp_id} +mkdir -p ${output_dir} ${log_dir} + +deepspeed ${deepspeed_args} \ + examples/dpo_train.py \ + --model_name_or_path ${model_name_or_path} \ + --dataset_path ${dataset_path} \ + --output_dir ${output_dir} \ + --run_name dpo \ + --max_steps 200 \ + --learning_rate 1e-6 \ + --use_qlora 1 \ + --lora_r 8 \ + --sanity_check True \ + --save_aggregated_lora 0\ + --logging_steps 20 \ + | tee ${log_dir}/train.log \ + 2> ${log_dir}/train.err From 461b8f8585b051287e5339f18db87dfd4440d169 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 17 Jun 2024 12:50:15 +0800 Subject: [PATCH 2/2] Update run_dpo_align_qlora.sh --- scripts/run_dpo_align_qlora.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run_dpo_align_qlora.sh b/scripts/run_dpo_align_qlora.sh index 45a6b8fcf..3f498f8e0 100644 --- a/scripts/run_dpo_align_qlora.sh +++ b/scripts/run_dpo_align_qlora.sh @@ -5,7 +5,6 @@ model_name_or_path=meta-llama/Meta-Llama-3-70B-Instruct dataset_path=data/dpo-mix-7k output_dir=output_models/finetuned_llama3_70b_dpo_example -deepspeed_args="--master_port=13000 --include localhost:3" # specify gpus/single gpu here by # `--include localhost:0,1` or `--include localhost:0`