fix modules.base; support fast_loss

hfxunlp · Feb 6, 2021 · c6b98da · c6b98da
1 parent 3ce7e4e
commit c6b98da
Show file tree

Hide file tree

Showing 24 changed files with 228 additions and 153 deletions.
diff --git a/adv/predict/doc/para/predict_doc_para.py b/adv/predict/doc/para/predict_doc_para.py
@@ -22,7 +22,7 @@
 
 def load_fixing(module):
 
-	if "fix_load" in dir(module):
+	if hasattr(module, "fix_load"):
 		module.fix_load()
 
 td = h5py.File(cnfg.test_data, "r")

diff --git a/adv/predict/predict_ape.py b/adv/predict/predict_ape.py
@@ -22,7 +22,7 @@
 
 def load_fixing(module):
 
-	if "fix_load" in dir(module):
+	if hasattr(module, "fix_load"):
 		module.fix_load()
 
 td = h5py.File(cnfg.test_data, "r")

diff --git a/adv/rank/doc/para/rank_loss_para.py b/adv/rank/doc/para/rank_loss_para.py
@@ -29,7 +29,7 @@
 
 def load_fixing(module):
 
-	if "fix_load" in dir(module):
+	if hasattr(module, "fix_load"):
 		module.fix_load()
 
 td = h5py.File(sys.argv[2], "r")

diff --git a/adv/rank/doc/rank_loss_sent.py b/adv/rank/doc/rank_loss_sent.py
@@ -29,7 +29,7 @@
 
 def load_fixing(module):
 
-	if "fix_load" in dir(module):
+	if hasattr(module, "fix_load"):
 		module.fix_load()
 
 td = h5py.File(sys.argv[2], "r")

diff --git a/adv/train/doc/para/train_doc_para.py b/adv/train/doc/para/train_doc_para.py
@@ -14,7 +14,7 @@
 from utils.fmt.base import tostr, save_states, load_states, pad_id
 from utils.fmt.base4torch import parse_cuda, load_emb
 
-from lrsch import GoogleLR
+from lrsch import GoogleLR as LRScheduler
 from loss.base import LabelSmoothingLoss
 
 from random import shuffle
@@ -176,7 +176,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
 
 def init_fixing(module):
 
-	if "fix_init" in dir(module):
+	if hasattr(module, "fix_init"):
 		module.fix_init()
 
 rid = cnfg.run_id
@@ -280,7 +280,7 @@ def init_fixing(module):
 	logger.info("Load optimizer state from: " + fine_tune_state)
 	optimizer.load_state_dict(h5load(fine_tune_state))
 
-lrsch = GoogleLR(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
+lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
 
 num_checkpoint = cnfg.num_checkpoint
 cur_checkid = 0

diff --git a/adv/train/train_ape.py b/adv/train/train_ape.py
@@ -14,7 +14,7 @@
 from utils.fmt.base import tostr, save_states, load_states, pad_id
 from utils.fmt.base4torch import parse_cuda, load_emb
 
-from lrsch import GoogleLR
+from lrsch import GoogleLR as LRScheduler
 from loss.base import LabelSmoothingLoss
 
 from random import shuffle
@@ -174,7 +174,7 @@ def hook_lr_update(optm, flags=None):
 
 def init_fixing(module):
 
-	if "fix_init" in dir(module):
+	if hasattr(module, "fix_init"):
 		module.fix_init()
 
 rid = cnfg.run_id
@@ -270,7 +270,7 @@ def init_fixing(module):
 	logger.info("Load optimizer state from: " + fine_tune_state)
 	optimizer.load_state_dict(h5load(fine_tune_state))
 
-lrsch = GoogleLR(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
+lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
 
 num_checkpoint = cnfg.num_checkpoint
 cur_checkid = 0

diff --git a/adv/train/train_dynb.py b/adv/train/train_dynb.py
@@ -16,7 +16,7 @@
 
 from utils.fmt.base4torch import parse_cuda, load_emb
 
-from lrsch import GoogleLR
+from lrsch import GoogleLR as LRScheduler
 from loss.base import LabelSmoothingLoss
 
 from random import shuffle
@@ -195,7 +195,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
 
 def init_fixing(module):
 
-	if "fix_init" in dir(module):
+	if hasattr(module, "fix_init"):
 		module.fix_init()
 
 rid = cnfg.run_id
@@ -291,7 +291,7 @@ def init_fixing(module):
 	logger.info("Load optimizer state from: " + fine_tune_state)
 	optimizer.load_state_dict(h5load(fine_tune_state))
 
-lrsch = GoogleLR(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
+lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)
 
 num_checkpoint = cnfg.num_checkpoint
 cur_checkid = 0

diff --git a/cnfg/hyp.py b/cnfg/hyp.py
@@ -17,6 +17,9 @@
 use_k_relative_position = 0
 disable_std_pemb = False
 
+# using fast implementation of label smoothing loss, but it cannot exclude the negative impact of special tokens, like <pad>, on training. `forbidden_indexes` in `cnfg/base.py` shall be set to None to enable.
+use_fast_loss = False
+
 # configure maximum batch size w.r.t GPU memory
 max_sentences_gpu = 768
 max_tokens_gpu = 4608