Skip to content

Commit

Permalink
Update the dev/v5.0_beta
Browse files Browse the repository at this point in the history
  • Loading branch information
byshiue committed Sep 30, 2021
1 parent dd4c071 commit 952a1f2
Show file tree
Hide file tree
Showing 819 changed files with 159,828 additions and 47,203 deletions.
62 changes: 62 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Language: Cpp
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AllowShortEnumsOnASingleLine: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowAllArgumentsOnNextLine: true
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BreakBeforeBinaryOperators: NonAssignment
BreakBeforeBraces: Stroustrup
BreakBeforeTernaryOperators: false
BreakConstructorInitializers: AfterColon
BreakInheritanceList: AfterColon
BreakStringLiterals: false
ColumnLimit: 120
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: false
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCtorInitializerColon: false
SpaceBeforeInheritanceColon: false
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
15 changes: 15 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[flake8]
ignore = W292
exclude =
*migrations*,
# python related
*.pyc,
.git,
__pycache__,

max-line-length=120
max-complexity=12
format=pylint
show_source = True
statistics = True
count = True
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
*~
*.o
*build*/
*.pyc
models/
__pycache__/
.vscode
./translation
.cache
234 changes: 234 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
stages:
- build
- test

build_pyt_release:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: build
only:
- master
- v4.1
- main
artifacts:
paths:
- ${CI_PROJECT_DIR}/build/
expire_in: 1 week
script:
- cd ${CI_PROJECT_DIR} && mkdir build && cd build
- git submodule init && git submodule update
- cmake -DSM=xx -DCMAKE_BUILD_TYPE=Release -DBUILD_PYT=ON -DBUILD_GPT=ON ..
- make -j12

build_pyt_release_sparse:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: build
only:
- master
- v4.1
- main
artifacts:
paths:
- ${CI_PROJECT_DIR}/build/
expire_in: 1 week
script:
- cd ${CI_PROJECT_DIR} && mkdir build && cd build
- git submodule init && git submodule update
- wget https://developer.download.nvidia.com/compute/libcusparse-lt/0.1.0/local_installers/libcusparse_lt-linux-x86_64-0.1.0.2.tar.gz
- tar -xzvf libcusparse_lt-linux-x86_64-0.1.0.2.tar.gz
- cmake -DSM=xx -DCMAKE_BUILD_TYPE=Release -DBUILD_PYT=ON -DSPARSITY_SUPPORT=ON -DCUSPARSELT_PATH=${CI_PROJECT_DIR}/build/libcusparse_lt/ ..
- make -j12

build_tf_release:
image: nvcr.io/nvidia/tensorflow:21.02-tf1-py3
tags:
- fastertransformer
stage: build
only:
- master
- v4.1
- main
artifacts:
paths:
- ${CI_PROJECT_DIR}/build/
expire_in: 1 week
script:
- cd ${CI_PROJECT_DIR} && mkdir build && cd build
- git submodule init && git submodule update
- cmake -DSM=xx -DCMAKE_BUILD_TYPE=Release -DBUILD_TF=ON -DTF_PATH=/usr/local/lib/python3.8/dist-packages/tensorflow_core/ -DBUILD_GPT=ON ..
- make -j12
- apt-get update && apt-get install bc

# 1. Get accuracy on LAMBADA dataset
# 2. Run pytorch gpt op as basline
# 3. Run pytorch piepline parallel and compare difference with baseline
# 4. Run pytorch tensor parallel and compare difference with baseline
pyt_gpt_test:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_pyt_release
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/build/
- git submodule init && git submodule update
- export PYTHONPATH="${CI_PROJECT_DIR}/:$PYTHONPATH"
- export NVIDIA_TF32_OVERRIDE=0 # Disable the TF32
- export CUDA_VISIBLE_DEVICES=0
- wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json -P ../models
- wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt -P ../models
- wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O megatron_lm_345m_v0.0.zip
- wget https://github.com/cybertronai/bflm/raw/master/lambada_test.jsonl -P ../models/megatron-models
- unzip megatron_lm_345m_v0.0.zip -d ../models/megatron-models/345m
- python ../examples/pytorch/gpt/utils/megatron_ckpt_convert.py -head_num 16 -i ../models/megatron-models/345m/release/ -o ../models/megatron-models/c-model/345m/ -t_g 1 -i_g 1
- bash ../examples/pytorch/gpt/scripts/evaluate_zeroshot_gpt.sh
- python ../examples/pytorch/gpt/gpt_example.py --ckpt_path=../models/megatron-models/c-model/345m/1-gpu/ --top_p 0.5 --sample_output_file single-gpu-out.txt
- export CUDA_VISIBLE_DEVICES=0,1
- mpirun -n 2 --allow-run-as-root python ../examples/pytorch/gpt/multi_gpu_gpt_example.py --tensor_para_size=1 --pipeline_para_size=2 --ckpt_path=../models/megatron-models/c-model/345m/1-gpu/ --top_p 0.5 --sample_output_file pipeline-parallel-2-gpu-out.txt
- diff single-gpu-out.txt pipeline-parallel-2-gpu-out.txt
- python ../examples/pytorch/gpt/utils/megatron_ckpt_convert.py -head_num 16 -i ../models/megatron-models/345m/release/ -o ../models/megatron-models/c-model/345m/ -t_g 1 -i_g 2
- mpirun -n 2 --allow-run-as-root python ../examples/pytorch/gpt/multi_gpu_gpt_example.py --tensor_para_size=2 --pipeline_para_size=1 --ckpt_path=../models/megatron-models/c-model/345m/2-gpu/ --top_p 0.5 --sample_output_file tensor-parallel-2-gpu-out.txt
- diff single-gpu-out.txt tensor-parallel-2-gpu-out.txt
timeout: 4h 30m

tf_test:
image: nvcr.io/nvidia/tensorflow:21.02-tf1-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_tf_release
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/build/
- apt-get update && apt-get install bc
- export PYTHONPATH="${CI_PROJECT_DIR}/:$PYTHONPATH"
- export NVIDIA_TF32_OVERRIDE=0 # Disable the TF32
- export CUDA_VISIBLE_DEVICES=0
- bash ${CI_PROJECT_DIR}/examples/tensorflow/decoding/utils/translation/download_model_data.sh
- mkdir -p ${CI_PROJECT_DIR}/translation/ckpt_fp16
- python ${CI_PROJECT_DIR}/examples/tensorflow/ckpt_type_convert.py --init_checkpoint=${CI_PROJECT_DIR}/translation/ckpt/model.ckpt-500000 --fp16_checkpoint=${CI_PROJECT_DIR}/translation/ckpt_fp16/model.ckpt-500000
- python ${CI_PROJECT_DIR}/tests/decoding/tf_decoding_unit_test.py
timeout: 4h 30m

tf_xlnet_test:
image: nvcr.io/nvidia/tensorflow:21.02-tf1-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_tf_release
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/examples/tensorflow/xlnet
- bash downloadModel.sh
- bash verifyCorrectness.sh # For FP32 model

pyt_sp_test:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_pyt_release_sparse
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/build/
- export PYTHONPATH="${CI_PROJECT_DIR}/:$PYTHONPATH"
- export NVIDIA_TF32_OVERRIDE=0 # Disable the TF32
- export CUDA_VISIBLE_DEVICES=0
- pip install transformers==2.5.1
# GOS has no Ampere GPU, so no sparse tests can be done. only test some dense cases
- ${CI_PROJECT_DIR}/build/bin/bert_gemm 32 64 12 64 1 0
- python ${CI_PROJECT_DIR}/examples/pytorch/bert/bert_example.py 32 12 64 12 64 --fp16
- ${CI_PROJECT_DIR}/build/bin/bert_gemm 32 64 12 64 1 1
- python ${CI_PROJECT_DIR}/examples/pytorch/bert/bert_example.py 32 12 64 12 64 --fp16 --int8_mode 1
- python ${CI_PROJECT_DIR}/examples/pytorch/bert/bert_example.py 32 12 64 12 64 --fp16 --int8_mode 2
- python ${CI_PROJECT_DIR}/examples/pytorch/bert/bert_example.py 32 12 64 12 64 --fp16 --int8_mode 3

pyt_longformer_test:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_pyt_release
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/examples/pytorch/longformer
- apt-get update && apt-get install git-lfs
- git lfs install
- git config lfs.fetchinclude "pytorch_model.bin,config.json"
- git clone https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa
- cd ${CI_PROJECT_DIR}
- export PYTHONPATH="${CI_PROJECT_DIR}/:$PYTHONPATH"
- export NVIDIA_TF32_OVERRIDE=0 # Disable the TF32
- export CUDA_VISIBLE_DEVICES=0
- pip install transformers==4.8.2
- python3 tests/longformer/py_longformer_unit_test.py

pyt_decoding_test:
image: nvcr.io/nvidia/pytorch:21.02-py3
tags:
- fastertransformer
stage: test
only:
- master
- v4.1
- main
needs:
- job: build_pyt_release
artifacts: true
script:
- cd ${CI_PROJECT_DIR}/build/
- export PYTHONPATH="${CI_PROJECT_DIR}/:$PYTHONPATH"
- export NVIDIA_TF32_OVERRIDE=0 # Disable the TF32
- export CUDA_VISIBLE_DEVICES=0
- apt-get update && apt-get install bc
- pip install sacrebleu
- pip install opennmt-py==1.1.1
- bash ../examples/pytorch/decoding/utils/download_model.sh
- mkdir pytorch/translation/data -p
- cp ../examples/tensorflow/decoding/utils/translation/test* pytorch/translation/data
- python ../examples/pytorch/decoding/utils/recover_bpe.py pytorch/translation/data/test.de debpe_ref.txt
- echo "Run decoding fp32" # decoding fp32 testing
- python ../examples/pytorch/decoding/translate_example.py --batch_size 128 --beam_size 4 --model_type decoding_ext --decoding_ths_path ./lib/libth_decoding.so --data_type fp32 --output_file output.txt
- python ../examples/pytorch/decoding/utils/recover_bpe.py output.txt debpe_output.txt
- cat debpe_output.txt | sacrebleu debpe_ref.txt
- echo "Run decoder fp32" # decoder fp32 testing
- python ../examples/pytorch/decoding/translate_example.py --batch_size 128 --beam_size 4 --model_type torch_decoding_with_decoder_ext --decoder_ths_path ./lib/libth_decoder.so --data_type fp32 --output_file output.txt
- python ../examples/pytorch/decoding/utils/recover_bpe.py output.txt debpe_output.txt
- cat debpe_output.txt | sacrebleu debpe_ref.txt
- echo "Run decoding fp16" # decoding fp16 testing
- python ../examples/pytorch/decoding/translate_example.py --batch_size 128 --beam_size 4 --model_type decoding_ext --decoding_ths_path ./lib/libth_decoding.so --data_type fp16 --output_file output.txt
- python ../examples/pytorch/decoding/utils/recover_bpe.py output.txt debpe_output.txt
- cat debpe_output.txt | sacrebleu debpe_ref.txt
- echo "Run decoder fp16" # decoder fp16 testing
- python ../examples/pytorch/decoding/translate_example.py --batch_size 128 --beam_size 4 --model_type torch_decoding_with_decoder_ext --decoder_ths_path ./lib/libth_decoder.so --data_type fp16 --output_file output.txt
- python ../examples/pytorch/decoding/utils/recover_bpe.py output.txt debpe_output.txt
- cat debpe_output.txt | sacrebleu debpe_ref.txt
7 changes: 7 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[submodule "3rdparty/Megatron-LM"]
path = 3rdparty/Megatron-LM
url = https://github.com/NVIDIA/Megatron-LM.git
branch = v2.4
[submodule "examples/tensorflow/bert/tensorflow_bert/bert"]
path = examples/tensorflow/bert/tensorflow_bert/bert
url = https://github.com/google-research/bert.git
Loading

0 comments on commit 952a1f2

Please sign in to comment.