From c957b833d2f0ec34c5e16acb57bdeaff8b10c778 Mon Sep 17 00:00:00 2001
From: Andrew <anchen1011@gmail.com>
Date: Mon, 17 Jun 2024 12:48:51 +0800
Subject: [PATCH 1/2] demo of DPO with QLoRA (w Llama3 70B Instruct)

---
 scripts/run_dpo_align_qlora.sh | 56 ++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 scripts/run_dpo_align_qlora.sh

diff --git a/scripts/run_dpo_align_qlora.sh b/scripts/run_dpo_align_qlora.sh
new file mode 100644
index 000000000..45a6b8fcf
--- /dev/null
+++ b/scripts/run_dpo_align_qlora.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Please run this script under ${project_id} in project directory of
+
+# Parses arguments
+model_name_or_path=meta-llama/Meta-Llama-3-70B-Instruct
+dataset_path=data/dpo-mix-7k
+output_dir=output_models/finetuned_llama3_70b_dpo_example
+deepspeed_args="--master_port=13000 --include localhost:3"
+# specify gpus/single gpu here by 
+# `--include localhost:0,1` or `--include localhost:0`
+
+while [[ $# -ge 1 ]]; do
+  key="$1"
+  case ${key} in
+    -m|--model_name_or_path)
+      model_name_or_path="$2"
+      shift
+      ;;
+    -d|--dataset_path)
+      dataset_path="$2"
+      shift
+      ;;
+    -o|--output_lora_path)
+      output_dir="$2"
+      shift
+      ;;
+    --deepspeed_args)
+      deepspeed_args="$2"
+      shift
+      ;;
+    *)
+      echo "error: unknown option \"${key}\"" 1>&2
+      exit 1
+  esac
+  shift
+done
+exp_id=dpo
+project_dir=$(cd "$(dirname $0)"/..; pwd)
+log_dir=${project_dir}/log/${exp_id}
+mkdir -p ${output_dir} ${log_dir}
+
+deepspeed ${deepspeed_args} \
+  examples/dpo_train.py \
+    --model_name_or_path ${model_name_or_path} \
+    --dataset_path ${dataset_path} \
+    --output_dir ${output_dir} \
+    --run_name dpo \
+    --max_steps 200 \
+    --learning_rate 1e-6 \
+    --use_qlora 1 \
+    --lora_r 8 \
+    --sanity_check True \
+    --save_aggregated_lora 0\
+    --logging_steps 20 \
+    | tee ${log_dir}/train.log \
+    2> ${log_dir}/train.err

From 461b8f8585b051287e5339f18db87dfd4440d169 Mon Sep 17 00:00:00 2001
From: Andrew <anchen1011@gmail.com>
Date: Mon, 17 Jun 2024 12:50:15 +0800
Subject: [PATCH 2/2] Update run_dpo_align_qlora.sh

---
 scripts/run_dpo_align_qlora.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/run_dpo_align_qlora.sh b/scripts/run_dpo_align_qlora.sh
index 45a6b8fcf..3f498f8e0 100644
--- a/scripts/run_dpo_align_qlora.sh
+++ b/scripts/run_dpo_align_qlora.sh
@@ -5,7 +5,6 @@
 model_name_or_path=meta-llama/Meta-Llama-3-70B-Instruct
 dataset_path=data/dpo-mix-7k
 output_dir=output_models/finetuned_llama3_70b_dpo_example
-deepspeed_args="--master_port=13000 --include localhost:3"
 # specify gpus/single gpu here by 
 # `--include localhost:0,1` or `--include localhost:0`