update

hanlv15 · Jun 23, 2024 · 767b0ea · 767b0ea
1 parent cabb704
commit 767b0ea
Show file tree

Hide file tree

Showing 13 changed files with 37 additions and 31 deletions.
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json b/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/examples/pytorch/llm/my_tuning/Phi-3-medium-128k-instruct/lora/dora.sh b/examples/pytorch/llm/my_tuning/Phi-3-medium-128k-instruct/lora/dora.sh
@@ -68,5 +68,6 @@ python llm_sft.py \
     --save_total_limit 1 \
     --logging_steps 10 \
     --use_flash_attn false \
-    --do_sample false
+    --do_sample false \
+    --test_oom_error true
 
diff --git a/examples/pytorch/llm/my_tuning/Phi-3-medium-128k-instruct/lora/lora.sh b/examples/pytorch/llm/my_tuning/Phi-3-medium-128k-instruct/lora/lora.sh
@@ -32,11 +32,11 @@ fi
 
 lora_alpha=$(expr $lora_rank \* 4)
 
-max_length=8192
+max_length=3800
 
 NCCL_P2P_DISABLE="1" NCCL_IB_DISABLE="1" \
 PYTHONPATH=../../.. \
-CUDA_VISIBLE_DEVICES=0,1,2 \
+CUDA_VISIBLE_DEVICES=0,1 \
 python llm_sft.py \
     --model_type phi-3-medium-128k-instruct \
     --model_id_or_path /home/css/models/Phi-3-medium-128k-instruct \
@@ -68,5 +68,6 @@ python llm_sft.py \
     --save_total_limit 1 \
     --logging_steps 10 \
     --use_flash_attn false \
-    --do_sample false
+    --do_sample false \
+    --test_oom_error true
 
diff --git a/examples/pytorch/llm/my_tuning/run_0.py b/examples/pytorch/llm/my_tuning/run_0.py
@@ -3,19 +3,19 @@
 DEVICE = "0"
 
 # Llama-3
-# for lr in ["1e-4", "1.3e-4"]:
+# for lr in ["7e-5", "9e-5"]:
 #     run_dora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["1e-4", "1.3e-4"]:
+# for lr in ["7e-5", "9e-5"]:
 #     run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["4.6e-2", "4.7e-2"]:
-#     run_vera(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+# # for lr in ["4.6e-2", "4.7e-2"]:
+# #     run_vera(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["9e-6", "2e-5"]:
-#     run_lora_plus(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+# # for lr in ["9e-6", "2e-5"]:
+# #     run_lora_plus(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["1.8e-4", "1.9e-4"]:
+# for lr in ["3e-5", "4e-5", "5e-5", "7e-5"]:
 #     run_rslora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["6.7e-5"]:
@@ -30,5 +30,7 @@
 #     run_dora(SFTModels.openchat_35, lr, DEVICE)
 
 # phi-3-medium
-for lr in ["1e-4", "9e-5", "7e-5"]:
+for lr in ["1e-4", 
+           # "9e-5", "7e-5"
+           ]:
     run_lora(SFTModels.phi_3_medium_instruct, lr, DEVICE)
diff --git a/examples/pytorch/llm/my_tuning/run_1.py b/examples/pytorch/llm/my_tuning/run_1.py
@@ -3,20 +3,20 @@
 DEVICE = "1"
 
 # Llama-3
-# for lr in ["1.5e-4", "7e-5"]:
-#     run_dora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["1e-4", "1.3e-4"]:
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["1.5e-4", "7e-5"]:
-#     run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["1e-4", "1.3e-4"]:
+    run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["4.8e-2", "4.9e-2"]:
 #     run_vera(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["1e-5", "3e-5"]:
 #     run_lora_plus(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["1.6e-4", "2.1e-4"]:
-#     run_rslora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["9e-5", "1e-4", "1.1e-4", "1.3e-4"]:
+    run_rslora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["6.9e-5"]:
 #     run_pissa(SFTModels.llama_3_8b_instruct, lr, DEVICE)
@@ -31,5 +31,5 @@
 #     run_dora(SFTModels.openchat_35, lr, DEVICE)
 
 # phi-3-medium
-for lr in ["1.5e-4"]:
-    run_lora(SFTModels.phi_3_medium_instruct, lr, DEVICE)
+# for lr in ["1.5e-4"]:
+#     run_lora(SFTModels.phi_3_medium_instruct, lr, DEVICE)
diff --git a/examples/pytorch/llm/my_tuning/run_2.py b/examples/pytorch/llm/my_tuning/run_2.py
@@ -2,31 +2,33 @@
 
 DEVICE = "2"
 
-for lr in ["9e-5", "2e-4", "1e-4"]:
-    run_dora(SFTModels.openchat_35, lr, DEVICE)
-
 # Llama-3
-# for lr in ["9e-5", "2e-4"]:
-#     run_dora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["1.5e-4", "2e-4"]:
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["9e-5", "2e-4"]:
-#     run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["1.5e-4", "2e-4"]:
+    run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-for lr in ["5.1e-2", "5.2e-2", "2.6e-2", "2.7e-2", "2.8e-2", "2.9e-2"]:
-    run_vera(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+# "5.1e-2", "5.2e-2", "2.6e-2", "2.7e-2", "2.8e-2", "2.9e-2"
+# for lr in ["5.1e-2", "5.2e-2", "2.6e-2", "2.7e-2", "2.8e-2", "2.9e-2"]:
+#     run_vera(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["5e-5", "1e-4"]:
 #     run_lora_plus(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
-# for lr in ["4e-5", "3e-5"]:
-#     run_rslora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
+for lr in ["1.5e-4", "1.7e-4", "1.9e-4", "2e-4"]:
+    run_rslora(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 # for lr in ["7.5e-5"]:
 #     run_pissa(SFTModels.llama_3_8b_instruct, lr, DEVICE)
 
 
 
 # openchat-3.5
+# for lr in ["9e-5", "2e-4", "1e-4"]:
+#     run_dora(SFTModels.openchat_35, lr, DEVICE)
+
+
 # for lr in ["9e-5", "2e-4"]:
 #     run_lora(SFTModels.openchat_35, lr, DEVICE)