Skip to content

Commit

Permalink
Use nvidia cuda image without cudnn for cudnn 8 and up
Browse files Browse the repository at this point in the history
Use nvidia cuda image without cudnn for cudnn 8 and up.
We want to decouple the CUDA and cudnn versions so that we can evolve these version separately.
We want to use cudnn 8.3.2 for following CUDA versions 11.3, 11.5 and 11.6.
We are using Official Nvidia Cuda ubuntu image. And installing cudnn 8.3.2 on top of it.

Pull Request resolved: pytorch#74545
Approved by: https://github.com/malfet
  • Loading branch information
atalman authored and pytorchmergebot committed Mar 25, 2022
1 parent 66e07f2 commit ca96d1d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
8 changes: 8 additions & 0 deletions .circleci/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,13 @@ fi

tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')

#when using cudnn version 8 install it separately from cuda
if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
if [[ ${CUDNN_VERSION} == 8 ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
fi
fi

# Build image
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
Expand Down Expand Up @@ -321,6 +328,7 @@ docker build \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
-f $(dirname ${DOCKERFILE})/Dockerfile \
-t "$tmp_tag" \
"$@" \
Expand Down
18 changes: 18 additions & 0 deletions .circleci/docker/common/install_cudnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

if [[ ${CUDNN_VERSION} == 8 ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
curl -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
tar xf ${CUDNN_NAME}.tar.xz
cp -a ${CUDNN_NAME}/include/* /usr/include/
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/

cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
cd ..
rm -rf tmp_cudnn
ldconfig
fi
11 changes: 8 additions & 3 deletions .circleci/docker/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
ARG UBUNTU_VERSION
ARG CUDA_VERSION
ARG CUDNN_VERSION
ARG IMAGE_NAME

FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
FROM ${IMAGE_NAME}

ARG UBUNTU_VERSION
ARG CUDA_VERSION
ARG CUDNN_VERSION

ENV DEBIAN_FRONTEND noninteractive

Expand Down Expand Up @@ -101,5 +100,11 @@ ENV CUDA_PATH /usr/local/cuda
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm

# Install CUDNN
ARG CUDNN_VERSION
ADD ./common/install_cudnn.sh install_cudnn.sh
RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
RUN rm install_cudnn.sh

USER jenkins
CMD ["bash"]

0 comments on commit ca96d1d

Please sign in to comment.