Skip to content
This repository has been archived by the owner on Aug 10, 2023. It is now read-only.

Commit

Permalink
August 2023 update
Browse files Browse the repository at this point in the history
  • Loading branch information
hfxunlp committed Aug 10, 2023
1 parent 5494a8b commit 635c75c
Show file tree
Hide file tree
Showing 404 changed files with 7,343 additions and 3,372 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Tokenized case-sensitive BLEU measured with [multi-bleu.perl](https://github.com

## Acknowledgments

Hongfei Xu enjoys a doctoral grant from [China Scholarship Council](https://www.csc.edu.cn/) ([2018]3101, 201807040056) while maintaining this project.
Hongfei Xu is partially supported by the Education Department of Henan Province (Grant No. 232300421386) while maintaining this project.

Details of this project can be found [here](https://arxiv.org/abs/1903.07402), and please cite it if you enjoy the implementation :)

Expand Down
29 changes: 15 additions & 14 deletions adv/eva/eva_probe.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
#encoding: utf-8

import sys

import torch

from utils.tqdm import tqdm

from utils.h5serial import h5File

import cnfg.probe as cnfg
from cnfg.ihyp import *

from transformer.Probe.NMT import NMT
from loss.base import LabelSmoothingLoss
from parallel.base import DataParallelCriterion
from parallel.parallelMT import DataParallelMT
from transformer.Probe.NMT import NMT
from utils.base import set_random_seed
from utils.fmt.base4torch import parse_cuda
from utils.h5serial import h5File
from utils.io import load_model_cpu
from utils.torch.comp import torch_autocast, torch_compile, torch_inference_mode
from utils.tqdm import tqdm

from utils.base import *
import cnfg.probe as cnfg
from cnfg.ihyp import *
from cnfg.vocab.base import pad_id
from utils.fmt.base4torch import parse_cuda

probe_reorder = cnfg.probe_reorder

Expand All @@ -37,7 +35,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
sum_loss = 0.0
model.eval()
src_grp, tgt_grp = ed["src"], ed["tgt"]
with torch.no_grad():
with torch_inference_mode():
for i in tqdm(range(nd), mininterval=tqdm_mininterval):
bid = str(i)
seq_batch = torch.from_numpy(src_grp[bid][()])
Expand All @@ -48,7 +46,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
seq_o = seq_o.to(mv_device, non_blocking=True)
seq_batch, seq_o = seq_batch.long(), seq_o.long()
ot = seq_o.narrow(1, ind_shift, lo).contiguous()
with autocast(enabled=use_amp):
with torch_autocast(enabled=use_amp):
output = model(seq_batch, seq_o.narrow(1, 0, lo))
loss = lossf(output, ot)
if multi_gpu:
Expand All @@ -71,7 +69,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
nword = td["nword"][()].tolist()
nwordi, nwordt = nword[0], nword[-1]

mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_layer_fwd)
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.act_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_layer_fwd)

mymodel = load_model_cpu(sys.argv[2], mymodel)
mymodel.apply(load_fixing)
Expand Down Expand Up @@ -100,6 +98,9 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False)
lossf = DataParallelCriterion(lossf, device_ids=cuda_devices, output_device=cuda_device.index, replicate_once=True)

mymodel = torch_compile(mymodel, *torch_compile_args, **torch_compile_kwargs)
lossf = torch_compile(lossf, *torch_compile_args, **torch_compile_kwargs)

use_amp = cnfg.use_amp and use_cuda

vloss, vprec = eva(td, ntest, mymodel, lossf, cuda_device, multi_gpu, use_amp)
Expand Down
39 changes: 18 additions & 21 deletions adv/eva/prompt/roberta/eva_single.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
#encoding: utf-8

import sys

import torch

from utils.tqdm import tqdm

from utils.h5serial import h5File

import cnfg.prompt.roberta.base as cnfg
from cnfg.prompt.roberta.ihyp import *
from cnfg.vocab.plm.roberta import vocab_size

from transformer.Prompt.RoBERTa.NMT import NMT
from loss.base import NLLLoss
from parallel.base import DataParallelCriterion
from parallel.parallelMT import DataParallelMT

from utils.base import *
from transformer.Prompt.RoBERTa.NMT import NMT
from utils.base import set_random_seed
from utils.fmt.base4torch import parse_cuda
from utils.fmt.plm.base import fix_parameter_name
from utils.h5serial import h5File
from utils.io import load_model_cpu
from utils.torch.comp import torch_autocast, torch_compile, torch_inference_mode
from utils.tqdm import tqdm

import cnfg.prompt.roberta.base as cnfg
from cnfg.prompt.roberta.ihyp import *
from cnfg.vocab.plm.roberta import vocab_size

def load_fixing(module):

Expand All @@ -31,7 +29,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
sum_loss = 0.0
model.eval()
src_grp, tgt_grp = ed["src"], ed["tgt"]
with torch.no_grad():
with torch_inference_mode():
for i in tqdm(range(nd), mininterval=tqdm_mininterval):
bid = str(i)
seq_batch = torch.from_numpy(src_grp[bid][()])
Expand All @@ -40,7 +38,7 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
seq_batch = seq_batch.to(mv_device, non_blocking=True)
seq_o = seq_o.to(mv_device, non_blocking=True)
seq_batch, seq_o = seq_batch.long(), seq_o.long()
with autocast(enabled=use_amp):
with torch_autocast(enabled=use_amp):
output = model(seq_batch)
loss = lossf(output, seq_o)
if multi_gpu:
Expand All @@ -55,12 +53,9 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
w = float(w)
return sum_loss / w, (w - r) / w * 100.0

td = h5File(sys.argv[1], "r")

ntest = td["ndata"][()].item()
nwordi = nwordt = vocab_size

mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)

# important to load the pre-trained model, as the load_plm function not only load parameters, but also may introduce new parameters, which affects the parameter alignment.
pre_trained_m = cnfg.pre_trained_m
Expand Down Expand Up @@ -93,10 +88,12 @@ def eva(ed, nd, model, lossf, mv_device, multi_gpu, use_amp=False):
mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False)
lossf = DataParallelCriterion(lossf, device_ids=cuda_devices, output_device=cuda_device.index, replicate_once=True)

use_amp = cnfg.use_amp and use_cuda
mymodel = torch_compile(mymodel, *torch_compile_args, **torch_compile_kwargs)
lossf = torch_compile(lossf, *torch_compile_args, **torch_compile_kwargs)

vloss, vprec = eva(td, ntest, mymodel, lossf, cuda_device, multi_gpu, use_amp)
use_amp = cnfg.use_amp and use_cuda

td.close()
with h5File(sys.argv[1], "r") as td:
vloss, vprec = eva(td, td["ndata"][()].item(), mymodel, lossf, cuda_device, multi_gpu, use_amp)

print("loss/error: %.3f %.2f" % (vloss, vprec,))
12 changes: 5 additions & 7 deletions adv/examples/plm/bart.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
#encoding: utf-8

import torch
from transformers import BartModel

from transformer.PLM.BART.NMT import NMT
from utils.fmt.plm.base import fix_parameter_name
from utils.fmt.plm.roberta.base import ldvocab
from utils.torch.comp import torch_inference_mode

import cnfg.plm.bart.base as cnfg
from cnfg.plm.bart.ihyp import *
from cnfg.vocab.plm.roberta import vocab_size

from transformer.PLM.BART.NMT import NMT

from transformers import BartModel

def init_fixing(module):

if hasattr(module, "fix_init"):
module.fix_init()

print("load pre-trained models")

tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod.apply(init_fixing)
tmod.load_plm(fix_parameter_name(torch.load("plm/bart-base/pytorch_model.bin", map_location="cpu")))

Expand All @@ -34,7 +32,7 @@ def init_fixing(module):
tde = torch.as_tensor([0, 100, 50264, 15162, 4, 2], dtype=torch.long).unsqueeze(0)
tdo = torch.as_tensor([2, 100, 33, 41, 15162, 4, 2], dtype=torch.long).unsqueeze(0)

with torch.no_grad():
with torch_inference_mode():
ers = smod(input_ids=tde, decoder_input_ids=tdo).last_hidden_state
print("forward for test")
trs = tmod(tde, tdo)
Expand Down
12 changes: 5 additions & 7 deletions adv/examples/plm/bert.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
#encoding: utf-8

import torch
from transformers import BertModel

from transformer.PLM.BERT.NMT import NMT
from utils.fmt.plm.base import fix_parameter_name
from utils.fmt.plm.bert.base import ldvocab
from utils.torch.comp import torch_inference_mode

import cnfg.plm.bert.base as cnfg
from cnfg.plm.bert.ihyp import *
from cnfg.vocab.plm.bert import vocab_size

from transformer.PLM.BERT.NMT import NMT

from transformers import BertModel

def init_fixing(module):

if hasattr(module, "fix_init"):
module.fix_init()

print("load pre-trained models")

tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod.apply(init_fixing)
tmod.load_plm(fix_parameter_name(torch.load("plm/bert-base-cased/pytorch_model.bin", map_location="cpu")))

Expand All @@ -33,7 +31,7 @@ def init_fixing(module):
print("forward with transformers")
td = torch.as_tensor([101, 146, 1138, 1126, 12075, 119, 102], dtype=torch.long).unsqueeze(0)

with torch.no_grad():
with torch_inference_mode():
ers = smod(td).last_hidden_state
print("forward for test")
trs = tmod(td)
Expand Down
47 changes: 47 additions & 0 deletions adv/examples/plm/mbart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#encoding: utf-8

import torch
from transformers import MBartForConditionalGeneration, MBartTokenizerFast as Tokenizer

from transformer.PLM.MBART.NMT import NMT
from utils.fmt.plm.base import fix_parameter_name
from utils.torch.comp import torch_inference_mode

import cnfg.plm.mbart.base as cnfg
from cnfg.plm.mbart.ihyp import *
from cnfg.vocab.plm.mbart import vocab_size

def init_fixing(module):

if hasattr(module, "fix_init"):
module.fix_init()

print("load pre-trained models")
tokenizer = Tokenizer(tokenizer_file="plm/mbart-large-cc25/tokenizer.json")

tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod.apply(init_fixing)
tmod.load_plm(fix_parameter_name(torch.load("plm/mbart-large-cc25/pytorch_model.bin", map_location="cpu")))
tmod.eval()

print("load models with transformers")
smod = MBartForConditionalGeneration.from_pretrained("plm/mbart-large-cc25")
smod.eval()

print("forward with transformers")
tde = torch.as_tensor([17, 765, 142, 108787, 5, 2, 250004], dtype=torch.long).unsqueeze(0)
tdo = torch.as_tensor([250004, 17, 765, 142, 108787, 5, 2], dtype=torch.long).unsqueeze(0)

print("forward for test")
with torch_inference_mode():
ers = smod(input_ids=tde, decoder_input_ids=tdo, output_hidden_states=True).decoder_hidden_states[-1]
trs = tmod(tde, tdo)

print(ers)
print(trs)

with torch_inference_mode():
ers = smod.generate(tde, decoder_start_token_id=250004)
trs = tmod.decode(tde, lang_id=250004)
print(tokenizer.convert_ids_to_tokens(ers.squeeze(0)))
print(tokenizer.convert_ids_to_tokens(trs.squeeze(0)))
12 changes: 5 additions & 7 deletions adv/examples/plm/roberta.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
#encoding: utf-8

import torch
from transformers import RobertaModel

from transformer.PLM.RoBERTa.NMT import NMT
from utils.fmt.plm.base import fix_parameter_name
from utils.fmt.plm.roberta.base import ldvocab
from utils.torch.comp import torch_inference_mode

import cnfg.plm.roberta.base as cnfg
from cnfg.plm.roberta.ihyp import *
from cnfg.vocab.plm.roberta import vocab_size

from transformer.PLM.RoBERTa.NMT import NMT

from transformers import RobertaModel

def init_fixing(module):

if hasattr(module, "fix_init"):
module.fix_init()

print("load pre-trained models")

tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod.apply(init_fixing)
tmod.load_plm(fix_parameter_name(torch.load("plm/roberta-base/pytorch_model.bin", map_location="cpu")))

Expand All @@ -33,7 +31,7 @@ def init_fixing(module):
print("forward with transformers")
td = torch.as_tensor([0, 100, 33, 41, 15162, 4, 2], dtype=torch.long).unsqueeze(0)

with torch.no_grad():
with torch_inference_mode():
ers = smod(td).last_hidden_state
print("forward for test")
trs = tmod(td)
Expand Down
13 changes: 6 additions & 7 deletions adv/examples/plm/t5.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
#encoding: utf-8

import torch
from transformers import T5ForConditionalGeneration, T5TokenizerFast as Tokenizer

from transformer.PLM.T5.NMT import NMT
from utils.fmt.plm.base import fix_parameter_name
from utils.torch.comp import torch_inference_mode

import cnfg.plm.t5.base as cnfg
from cnfg.plm.t5.ihyp import *
from cnfg.vocab.plm.t5 import vocab_size

from transformer.PLM.T5.NMT import NMT

from transformers import T5ForConditionalGeneration, T5TokenizerFast as Tokenizer

def init_fixing(module):

if hasattr(module, "fix_init"):
Expand All @@ -20,7 +19,7 @@ def init_fixing(module):
print("load pre-trained models")
tokenizer = Tokenizer(tokenizer_file="plm/t5-base/tokenizer.json")

tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod = NMT(cnfg.isize, vocab_size, vocab_size, cnfg.nlayer, fhsize=cnfg.ff_hsize, dropout=cnfg.drop, attn_drop=cnfg.attn_drop, act_drop=cnfg.act_drop, global_emb=cnfg.share_emb, num_head=cnfg.nhead, xseql=cache_len_default, ahsize=cnfg.attn_hsize, norm_output=cnfg.norm_output, bindDecoderEmb=cnfg.bindDecoderEmb, forbidden_index=cnfg.forbidden_indexes, model_name=cnfg.model_name)
tmod.apply(init_fixing)
tmod.load_plm(fix_parameter_name(torch.load("plm/t5-base/pytorch_model.bin", map_location="cpu")))

Expand All @@ -34,15 +33,15 @@ def init_fixing(module):
tde = torch.as_tensor([27, 43, 192, 16981, 5, 1], dtype=torch.long).unsqueeze(0)
tdo = torch.as_tensor([0, 531, 25, 241, 80, 58], dtype=torch.long).unsqueeze(0)

with torch.no_grad():
with torch_inference_mode():
ers = smod(input_ids=tde, decoder_input_ids=tdo, output_hidden_states=True).decoder_hidden_states[-1]
print("forward for test")
trs = tmod(tde, tdo)
print(ers)
print(trs)

tde = torch.as_tensor([27, 43, 32099, 16981, 5, 32098, 241, 80, 58, 1], dtype=torch.long).unsqueeze(0)
with torch.no_grad():
with torch_inference_mode():
ers = smod.generate(tde)
trs = tmod.decode(tde)
print(tokenizer.convert_ids_to_tokens(ers.squeeze(0)))
Expand Down
Loading

0 comments on commit 635c75c

Please sign in to comment.