update

hanlv15 · Aug 21, 2024 · 4ac63cb · 4ac63cb
1 parent 84230ea
commit 4ac63cb
Show file tree

Hide file tree

Showing 59 changed files with 242 additions and 83 deletions.
diff --git a/examples/pytorch/llm/my_inferencing/create_prompt_llm/create_prompt_llm.ipynb b/examples/pytorch/llm/my_inferencing/create_prompt_llm/create_prompt_llm.ipynb
diff --git a/examples/pytorch/llm/my_inferencing/evaluation.py b/examples/pytorch/llm/my_inferencing/evaluation.py
@@ -208,7 +208,7 @@ def get_dataset_name(dataset_dir):
 
     if train_ratio == "1.0":
         file_dir = f"test_metric_single_llm/{dataset_name}/{data_type}/{with_or_without_info}/\
-{dataset_name}_data{data_version}-split={split_type}-ratio={train_ratio}-epochs={num_train_epochs}/{sft_type}"
+{dataset_name}_data{data_version}-split={split_type}-ratio={train_ratio}/{sft_type}"
         if sft_type == "adalora":
             r1, r2 = sft_args["adalora_target_r"], sft_args["adalora_init_r"]
             file_dir += f"-r={r1}_{r2}"
@@ -226,7 +226,7 @@ def get_dataset_name(dataset_dir):
                 break
     else:
         file_dir = f"test_metric_single_llm/{dataset_name}/{data_type}/{with_or_without_info}/\
-{dataset_name}_data{data_version}-split={split_type}-epochs={num_train_epochs}-sft={sft_type}-lr={lr}"
+{dataset_name}_data{data_version}-split={split_type}-sft={sft_type}-lr={lr}"
         metrics = load_metrics(file_dir, model_name, template_type)
         for item in metrics:
             if item["train_test_split"] == split_type and \

diff --git a/examples/pytorch/llm/my_inferencing/label_check/check_dataset.ipynb b/examples/pytorch/llm/my_inferencing/label_check/check_dataset.ipynb
@@ -17,7 +17,7 @@
     "\n",
     "search_engine = \"brave\"\n",
     "\n",
-    "data_search_dir = f\"/home/hanlv/workspace/data/machine_learning/dataset/research/misinformation_dataset/COVMIS-2024/train_{search_engine}_search.json\"\n",
+    "data_search_dir = f\"/home/hanlv/workspace/data/machine_learning/dataset/research/misinformation_dataset/COVMIS-2024/data_{search_engine}_search.json\"\n",
     "def load_search():\n",
     "    with open(data_search_dir, \"r\") as f:\n",
     "        data = json.load(f)\n",
@@ -32,7 +32,7 @@
     "        return json.load(f)\n",
     "    \n",
     "def load_train():\n",
-    "    with open(\"/home/hanlv/workspace/data/machine_learning/dataset/research/misinformation_dataset/COVMIS-2024/train.json\", \"r\") as f:\n",
+    "    with open(\"/home/hanlv/workspace/data/machine_learning/dataset/research/misinformation_dataset/COVMIS-2024/data.json\", \"r\") as f:\n",
     "        return json.load(f)\n",
     "    \n",
     "def load_type1():\n",
@@ -14982,7 +14982,7 @@
     {
      "data": {
       "text/plain": [
-       "(0.924559245592456, 0.8867710877419616, 0.9524633448215433, 0.849041520589057)"
+       "(0.9254098360655738, 0.8873715531505428, 0.953031719265943, 0.8494565217391303)"
       ]
      },
      "execution_count": 2,
@@ -14998,10 +14998,10 @@
     "import numpy as np\n",
     "import random\n",
     "\n",
-    "def set_seed(seed=DEFAULT_SEED):\n",
-    "    np.random.seed(seed)\n",
-    "    random.seed(seed)\n",
-    "set_seed()\n",
+    "# def set_seed(seed=DEFAULT_SEED):\n",
+    "#     np.random.seed(seed)\n",
+    "#     random.seed(seed)\n",
+    "# set_seed()\n",
     "\n",
     "data_train = load_train()\n",
     "data_train_bak = load_train_bak()\n",
@@ -15016,7 +15016,8 @@
     "    if item[\"label\"] != 1:\n",
     "        data_train_new.append(item)\n",
     "\n",
-    "train_list, test_list = train_test_split(data_train_new, test_size=0.2, shuffle=True)\n",
+    "train_list, test_list = train_test_split(data_train_new, test_size=0.2, random_state=DEFAULT_SEED, shuffle=True)\n",
+    "valid_list, test_list = train_test_split(test_list, test_size=0.5, random_state=DEFAULT_SEED, shuffle=True)\n",
     "\n",
     "cnt = 0\n",
     "labels, preds = [], [] # preds为covmis原始标签\n",

diff --git a/examples/pytorch/llm/my_inferencing/run_infer_0.py b/examples/pytorch/llm/my_inferencing/run_infer_0.py
@@ -3,14 +3,14 @@
 ]
 import os
 import subprocess
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 
 files = []
 
 # 获取当前目录下的所有文件
 for base_dir in [
-    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/without_info/data1-split=8:2-ratio=1.0',
-    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/with_info/data1-split=8:2-ratio=1.0',
+    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/liar2/Llama-3-8B-Instruct/with_llama3_info/brave/data1.3-split=8:1:1-ratio=1.0',
+    # '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/with_info/data1-split=8:2-ratio=1.0',
 ]:
     files.extend([os.path.join(base_dir, file) for file in os.listdir(base_dir)])
 
@@ -38,7 +38,7 @@
 
     if len(path) > 0:
         for file in os.listdir(path):
-            ckpt_dir = os.path.join(path, file, "checkpoint-609")
+            ckpt_dir = os.path.join(path, file, "checkpoint-782")
             if os.path.exists(ckpt_dir):
                 ckpt_list.append(ckpt_dir)
 

diff --git a/examples/pytorch/llm/my_inferencing/run_infer_1.py b/examples/pytorch/llm/my_inferencing/run_infer_1.py
@@ -3,15 +3,15 @@
 ]
 import os
 import subprocess
-os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 # 当前目录
 
 files = []
 
 # 获取当前目录下的所有文件
 for base_dir in [
-    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/without_info/data1-split=8:2-ratio=1.0',
-    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/with_info/data1-split=8:2-ratio=1.0',
+    '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/liar2/Llama-3-8B-Instruct/with_llama3_info/brave/data1.3-split=8:1:1-ratio=1.0',
+    # '/home/hanlv/workspace/code/research/infodemic/LLM/swift/examples/pytorch/llm/output/covmis/Llama-3-8B-Instruct/with_info/data1-split=8:2-ratio=1.0',
 ]:
     files.extend([os.path.join(base_dir, file) for file in os.listdir(base_dir)])
 
@@ -40,7 +40,7 @@
 
     if len(path) > 0:
         for file in os.listdir(path):
-            ckpt_dir = os.path.join(path, file, "checkpoint-609")
+            ckpt_dir = os.path.join(path, file, "checkpoint-782")
             if os.path.exists(ckpt_dir):
                 ckpt_list.append(ckpt_dir)
 

diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json b/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=2/Meta-Llama-3-8B-Instruct(_llama3).json → ...=2/Meta-Llama-3-8B-Instruct(_llama3).json b/...=2/Meta-Llama-3-8B-Instruct(_llama3).json → ...=2/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...32/Meta-Llama-3-8B-Instruct(_llama3).json → ...32/Meta-Llama-3-8B-Instruct(_llama3).json b/...32/Meta-Llama-3-8B-Instruct(_llama3).json → ...32/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=4/Meta-Llama-3-8B-Instruct(_llama3).json → ...=4/Meta-Llama-3-8B-Instruct(_llama3).json b/...=4/Meta-Llama-3-8B-Instruct(_llama3).json → ...=4/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...56/Meta-Llama-3-8B-Instruct(_llama3).json → ...56/Meta-Llama-3-8B-Instruct(_llama3).json b/...56/Meta-Llama-3-8B-Instruct(_llama3).json → ...56/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...256/Mistral-7B-Instruct-v0.3(_llama).json → ...256/Mistral-7B-Instruct-v0.3(_llama).json b/...256/Mistral-7B-Instruct-v0.3(_llama).json → ...256/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...=256/openchat-3.5-0106(openchat_3.5).json → ...=256/openchat-3.5-0106(openchat_3.5).json b/...=256/openchat-3.5-0106(openchat_3.5).json → ...=256/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json b/...16/Meta-Llama-3-8B-Instruct(_llama3).json → ...16/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=2/Meta-Llama-3-8B-Instruct(_llama3).json → ...=2/Meta-Llama-3-8B-Instruct(_llama3).json b/...=2/Meta-Llama-3-8B-Instruct(_llama3).json → ...=2/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...32/Meta-Llama-3-8B-Instruct(_llama3).json → ...32/Meta-Llama-3-8B-Instruct(_llama3).json b/...32/Meta-Llama-3-8B-Instruct(_llama3).json → ...32/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=4/Meta-Llama-3-8B-Instruct(_llama3).json → ...=4/Meta-Llama-3-8B-Instruct(_llama3).json b/...=4/Meta-Llama-3-8B-Instruct(_llama3).json → ...=4/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json b/...r=8/Mistral-7B-Instruct-v0.3(_llama).json → ...r=8/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json b/...-r=8/openchat-3.5-0106(openchat_3.5).json → ...-r=8/openchat-3.5-0106(openchat_3.5).json
diff --git a/...56/Meta-Llama-3-8B-Instruct(_llama3).json → ...56/Meta-Llama-3-8B-Instruct(_llama3).json b/...56/Meta-Llama-3-8B-Instruct(_llama3).json → ...56/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...256/Mistral-7B-Instruct-v0.3(_llama).json → ...256/Mistral-7B-Instruct-v0.3(_llama).json b/...256/Mistral-7B-Instruct-v0.3(_llama).json → ...256/Mistral-7B-Instruct-v0.3(_llama).json
diff --git a/...=256/openchat-3.5-0106(openchat_3.5).json → ...=256/openchat-3.5-0106(openchat_3.5).json b/...=256/openchat-3.5-0106(openchat_3.5).json → ...=256/openchat-3.5-0106(openchat_3.5).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...brave/liar2_data1.3-split=8:1:1-ratio=1.0/dora-r=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...brave/liar2_data1.3-split=8:1:1-ratio=1.0/dora-r=8/Meta-Llama-3-8B-Instruct(_llama3).json
@@ -0,0 +1,68 @@
+[
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.12106248,
+        "lr": "1e-4",
+        "ACC": 0.9201277955271565,
+        "F1": 0.9167775944225993,
+        "Precision": 0.9124560122728282,
+        "Recall": 0.9229558446576486
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11976161,
+        "lr": "1.1e-4",
+        "ACC": 0.9201277955271565,
+        "F1": 0.9168658698539176,
+        "Precision": 0.9123335952822331,
+        "Recall": 0.9235785526265856
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.1192237,
+        "lr": "1.2e-4",
+        "ACC": 0.9246006389776358,
+        "F1": 0.921583064965418,
+        "Precision": 0.9168494593361776,
+        "Recall": 0.9287697981460207
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11972405,
+        "lr": "1.3e-4",
+        "ACC": 0.921405750798722,
+        "F1": 0.9181527466738326,
+        "Precision": 0.9137070724386317,
+        "Recall": 0.9246169742257548
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11786839,
+        "lr": "1.4e-4",
+        "ACC": 0.923961661341853,
+        "F1": 0.9208978572105128,
+        "Precision": 0.9162192769062998,
+        "Recall": 0.9279392333619676
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11727598,
+        "lr": "1.5e-4",
+        "ACC": 0.929073482428115,
+        "F1": 0.9261768924302789,
+        "Precision": 0.9215311920689933,
+        "Recall": 0.9330269817120502
+    }
+]
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...=8/Meta-Llama-3-8B-Instruct(_llama3).json → ...=8/Meta-Llama-3-8B-Instruct(_llama3).json
diff --git a/...brave/liar2_data1.3-split=8:1:1-ratio=1.0/dora-r=8/Meta-Llama-3-8B-Instruct(_llama3).json b/...brave/liar2_data1.3-split=8:1:1-ratio=1.0/dora-r=8/Meta-Llama-3-8B-Instruct(_llama3).json
@@ -0,0 +1,68 @@
+[
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.12106248,
+        "lr": "1e-4",
+        "ACC": 0.9073482428115016,
+        "F1": 0.9040064535751646,
+        "Precision": 0.89892617181251,
+        "Recall": 0.9132013725395434
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11976161,
+        "lr": "1.1e-4",
+        "ACC": 0.9111821086261981,
+        "F1": 0.9080688219192903,
+        "Precision": 0.9028602378844657,
+        "Recall": 0.9178797785157518
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.1192237,
+        "lr": "1.2e-4",
+        "ACC": 0.9105431309904153,
+        "F1": 0.9071068555225419,
+        "Precision": 0.9022124456127325,
+        "Recall": 0.9151681844229189
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11972405,
+        "lr": "1.3e-4",
+        "ACC": 0.9079872204472843,
+        "F1": 0.9044038048439478,
+        "Precision": 0.899614086996104,
+        "Recall": 0.9121536719575258
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11786839,
+        "lr": "1.4e-4",
+        "ACC": 0.9054313099041533,
+        "F1": 0.9020445379743409,
+        "Precision": 0.8969759260123003,
+        "Recall": 0.9113320813857955
+    },
+    {
+        "model": "Meta-Llama-3-8B-Instruct",
+        "train_test_split": "8:1:1",
+        "train_ratio": "1.0",
+        "train_loss": 0.11727598,
+        "lr": "1.5e-4",
+        "ACC": 0.9073482428115016,
+        "F1": 0.9040064535751646,
+        "Precision": 0.89892617181251,
+        "Recall": 0.9132013725395434
+    }
+]
diff --git a/examples/pytorch/llm/my_tuning/Meta-Llama-3-8B-Instruct/lora/dora.sh b/examples/pytorch/llm/my_tuning/Meta-Llama-3-8B-Instruct/lora/dora.sh
@@ -39,6 +39,7 @@ lora_alpha=$(expr $lora_rank \* 4)
 max_length=8192
 
 NCCL_P2P_DISABLE="1" NCCL_IB_DISABLE="1" \
+PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 \
 PYTHONPATH=../../.. \
 CUDA_VISIBLE_DEVICES=$device \
 python llm_sft.py \

diff --git a/examples/pytorch/llm/my_tuning/run_0.py b/examples/pytorch/llm/my_tuning/run_0.py
@@ -5,14 +5,21 @@
 DEVICE = "0"
 
 # Llama-3
-# for lr in ["7e-5", "9e-5"]: 
+for data_version in ["1.4", "1.6"]:
+    for lr in ["1e-4", "1.1e-4"]: # 8e-5
+        run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version=data_version)
+for lr in ["9e-5"]:
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1.3")
+for lr in ["9e-5"]: # 8e-5
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1")
+
+# for lr in ["7e-5", "9e-5"]:
 #     for rank in ["2", "4"]:
 #         run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
 
-for lr in ["9e-5"]: 
-    for rank in ["16", "32"]:
-        run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
-
+# for lr in ["9e-5"]:
+#     for rank in ["16", "32"]:
+#         run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
 
 # for lr in ["7e-5", "9e-5", "1e-4", ]:
 #     with_info = True
@@ -22,8 +29,8 @@
 # for lr in ["9e-5"]: #  covmis: 9.5e-5 1.05e-4
 #     run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, data_version="1")
 
-# for lr in ["7e-5"]: # 8e-5
-#     run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1")
+for lr in ["9e-5"]: # 8e-5
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1.5")
 
 
 # for lr in ["1e-4"]:

diff --git a/examples/pytorch/llm/my_tuning/run_1.py b/examples/pytorch/llm/my_tuning/run_1.py
@@ -2,18 +2,22 @@
 DEVICE = "1"
 
 # Llama-3
-for lr in ["1.6e-4", "1.8e-4"]: 
-    for rank in ["2", "4"]:
-        run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
+for data_version in ["1.4", "1.5", "1.6"]:
+    for lr in ["1.2e-4", "1.3e-4"]:
+        run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version=data_version)
+
+# for lr in ["1.6e-4", "1.8e-4"]: 
+#     for rank in ["2", "4"]:
+#         run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
 
 # for lr in ["7e-5", "9e-5", "1e-4", ]:
 #     with_info = True
 #     run_dora_with_info_or_not(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, with_info, data_version="1")
 
 ############################
 
-# for lr in ["1.05e-4",]:
-#     run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, data_version="1")
+for lr in ["9e-5"]:
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1.4")
 
 # for lr in ["3e-4"]:
 #     run_lora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1")

diff --git a/examples/pytorch/llm/my_tuning/run_2.py b/examples/pytorch/llm/my_tuning/run_2.py
@@ -4,6 +4,10 @@
 DEVICE = "2"
 
 # Llama-3
+for data_version in ["1.4", "1.5", "1.6"]:
+    for lr in ["1.4e-4", "1.5e-4"]:
+        run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version=data_version)
+
 # for lr in ["1.3e-4"]: 
 #     for rank in ["2", "4"]:
 #         run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
@@ -12,13 +16,13 @@
 #     for rank in ["16", "32"]:
 #         run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, rank=rank, data_version="1")
 
-for with_info in [True, False]:
-    for lr in ["6e-5", "8e-5", "1.2e-4", "1.4e-4"]:
-        run_dora_with_info_or_not(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, with_info, data_version="1")
+# for with_info in [True, False]:
+#     for lr in ["6e-5", "8e-5", "1.2e-4", "1.4e-4"]:
+#         run_dora_with_info_or_not(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, with_info, data_version="1")
 
 ############################
-# for lr in ["9e-5", "1.1e-4"]: # 8e-5
-#     run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.covmis, DEVICE, data_version="1")
+for lr in ["9e-5"]:
+    run_dora(SFTModels.llama_3_8b_instruct, lr, DatasetName.liar2, DEVICE, data_version="1.6")
 
 # for lr in ["1.1e-4"]:
 #     run_lora(SFTModels.llama_3_8b_instruct, lr, DEVICE, data_version="3")