Skip to content
This repository has been archived by the owner on Aug 10, 2023. It is now read-only.

Commit

Permalink
Hello World :-)
Browse files Browse the repository at this point in the history
  • Loading branch information
liuqiuhui2015 committed Apr 2, 2020
1 parent 6befd65 commit 07250af
Show file tree
Hide file tree
Showing 66 changed files with 924 additions and 424 deletions.
55 changes: 4 additions & 51 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,6 @@ share_emb = False
# number of heads for multi-head attention.
nhead = max(1, isize // 64)
# maximum steps cached for the positional embedding.
cache_len = 256
# warm up steps for the training.
warm_step = 8000
# scalar of learning rate
Expand All @@ -182,6 +179,8 @@ attn_hsize = None
seed = 666666
```

Configure advanced details with `cnfg/hyp.py`:

## Training

Just execute the following command to launch the training:
Expand Down Expand Up @@ -227,7 +226,7 @@ where `rsf` is the result file, `h5f` is HDF5 formatted input of file of your co

Foundamental models needed for the construction of transformer.

### `loss.py`
### `loss/`

Implementation of label smoothing loss function required by the training of transformer.

Expand All @@ -249,53 +248,7 @@ An example depends on Flask to provide simple Web service and REST API about how

### `transformer/`

#### `NMT.py`

The transformer model encapsulates encoder and decoder. Switch [the comment line](https://github.com/anoidgit/transformer/blob/master/transformer/NMT.py#L9-L11) to make a choice between the standard decoder and the average decoder.

#### `Encoder.py`

The encoder of transformer.

#### `Decoder.py`

The standard decoder of transformer.

#### `AvgDecoder.py`

The average decoder of transformer proposed by [Accelerating Neural Transformer via an Average Attention Network](https://www.aclweb.org/anthology/P18-1166/).

#### `EnsembleNMT.py`

A model encapsulates several NMT models to do ensemble decoding. Switch [the comment line](https://github.com/anoidgit/transformer/blob/master/transformer/EnsembleNMT.py#L9-L11) to make a choice between the standard decoder and the average decoder.

#### `EnsembleEncoder.py`

A model encapsulates several encoders for ensemble decoding.

#### `EnsembleDecoder.py`

A model encapsulates several standard decoders for ensemble decoding.

#### `EnsembleAvgDecoder.py`

A model encapsulates several average decoders proposed by [Accelerating Neural Transformer via an Average Attention Network](https://www.aclweb.org/anthology/P18-1166/) for ensemble decoding.

#### `AGG/`

Implementation of aggregation models.

##### `Hier*.py`

Hierarchical aggregation proposed in [Exploiting Deep Representations for Neural Machine Translation](https://www.aclweb.org/anthology/D18-1457/).

#### `TA/`

Implementation of transparent attention proposed in [Training Deeper Neural Machine Translation Models with Transparent Attention](https://aclweb.org/anthology/D18-1338).

#### `SC/`

Implementation of sentential context proposed in [Exploiting Sentential Context for Neural Machine Translation](https://www.aclweb.org/anthology/P19-1624/).
Implementations of seq2seq models.

### `parallel/`

Expand Down
6 changes: 4 additions & 2 deletions adv/predict/doc/para/predict_doc_para.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import h5py

import cnfg.docpara as cnfg
from cnfg.ihyp import *

from transformer.Doc.Para.Base.NMT import NMT
from transformer.EnsembleNMT import NMT as Ensemble
Expand All @@ -31,15 +32,15 @@ def load_fixing(module):
vcbt = reverse_dict(vcbt)

if len(sys.argv) == 4:
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)

mymodel = load_model_cpu(sys.argv[3], mymodel)
mymodel.apply(load_fixing)

else:
models = []
for modelf in sys.argv[3:]:
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)

tmp = load_model_cpu(modelf, tmp)
tmp.apply(load_fixing)
Expand All @@ -54,6 +55,7 @@ def load_fixing(module):

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding)

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)

if use_cuda:
Expand Down
10 changes: 5 additions & 5 deletions adv/rank/doc/para/rank_loss_doc_para.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
import h5py

import cnfg.docpara as cnfg
from cnfg.ihyp import *

from transformer.Doc.Para.Base.NMT import NMT
from transformer.EnsembleNMT import NMT as Ensemble
from parallel.parallelMT import DataParallelMT
from parallel.base import DataParallelCriterion

from loss import LabelSmoothingLoss
from loss.base import LabelSmoothingLoss

from utils.base import *
from utils.fmt.base4torch import parse_cuda
Expand All @@ -38,15 +39,15 @@ def load_fixing(module):
cuda_device = torch.device(cnfg.gpuid)

if len(sys.argv) == 4:
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)

mymodel = load_model_cpu(sys.argv[3], mymodel)
mymodel.apply(load_fixing)

else:
models = []
for modelf in sys.argv[3:]:
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes, cnfg.num_prev_sent, cnfg.num_layer_context)

tmp = load_model_cpu(modelf, tmp)
tmp.apply(load_fixing)
Expand All @@ -59,9 +60,8 @@ def load_fixing(module):
lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=0, reduction='none', forbidden_index=cnfg.forbidden_indexes)

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)
# disable multi_gpu, not supported
multi_gpu, cuda_devices = False, None

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)

if use_cuda:
Expand Down
8 changes: 5 additions & 3 deletions adv/rank/doc/rank_loss_sent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
import h5py

import cnfg.base as cnfg
from cnfg.ihyp import *

from transformer.NMT import NMT
from transformer.EnsembleNMT import NMT as Ensemble
from parallel.parallelMT import DataParallelMT
from parallel.base import DataParallelCriterion

from loss import LabelSmoothingLoss
from loss.base import LabelSmoothingLoss

from utils.base import *
from utils.fmt.base4torch import parse_cuda
Expand All @@ -38,15 +39,15 @@ def load_fixing(module):
cuda_device = torch.device(cnfg.gpuid)

if len(sys.argv) == 4:
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

mymodel = load_model_cpu(sys.argv[3], mymodel)
mymodel.apply(load_fixing)

else:
models = []
for modelf in sys.argv[3:]:
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cnfg.cache_len, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

tmp = load_model_cpu(modelf, tmp)
tmp.apply(load_fixing)
Expand All @@ -60,6 +61,7 @@ def load_fixing(module):

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)

if use_cuda:
Expand Down
Loading

0 comments on commit 07250af

Please sign in to comment.