[slides] chapter prelim

marble234 · Feb 17, 2021 · 842481d · 842481d
1 parent dc7a27a
commit 842481d
Show file tree

Hide file tree

Showing 18 changed files with 388 additions and 456 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -15,6 +15,8 @@ stage("Build and Publish") {
 
       sh label: "Build Environment", script: """set -ex
       conda env update -n ${ENV_NAME} -f static/build.yml
+      pip uninstall -y d2lbook
+      pip install git+https://github.com/d2l-ai/d2l-book
       pip list
       nvidia-smi
       """
@@ -35,6 +37,7 @@ stage("Build and Publish") {
       conda activate ${ENV_NAME}
       ./static/cache.sh restore _build/eval_pytorch/data
       d2lbook build eval --tab pytorch
+      d2lbook build slides --tab pytorch
       ./static/cache.sh store _build/eval_pytorch/data
       """
 
@@ -60,13 +63,13 @@ stage("Build and Publish") {
         sh label:"Release", script:"""set -ex
         conda activate ${ENV_NAME}
         d2lbook build pkg
-        d2lbook deploy html pdf --s3 s3://zh-v2.d2l.ai
+        d2lbook deploy html pdf slides --s3 s3://zh-v2.d2l.ai
         """
 
       } else {
         sh label:"Publish", script:"""set -ex
         conda activate ${ENV_NAME}
-        d2lbook deploy html pdf --s3 s3://preview.d2l.ai/${JOB_NAME}/
+        d2lbook deploy html pdf slides --s3 s3://preview.d2l.ai/${JOB_NAME}/
         """
         if (env.BRANCH_NAME.startsWith("PR-")) {
             pullRequest.comment("Job ${JOB_NAME}/${BUILD_NUMBER} is complete. \nCheck the results at http://preview.d2l.ai/${JOB_NAME}/")

diff --git a/Jenkinsfile_origin b/Jenkinsfile_origin
@@ -1,8 +1,3 @@
----
-source: https://github.com/d2l-ai/d2l-en/blob/master/Jenkinsfile
-commit: 9bf95b1
----
-
 stage("Build and Publish") {
   // such as d2l-en and d2l-zh
   def REPO_NAME = env.JOB_NAME.split('/')[0]
@@ -17,12 +12,12 @@ stage("Build and Publish") {
       checkout scm
       // conda environment
       def ENV_NAME = "${TASK}-${EXECUTOR_NUMBER}";
-      // assign two GPUs to each build
-      def EID = EXECUTOR_NUMBER.toInteger()
-      def CUDA_VISIBLE_DEVICES=(EID*2).toString() + ',' + (EID*2+1).toString();
 
       sh label: "Build Environment", script: """set -ex
       conda env update -n ${ENV_NAME} -f static/build.yml
+      conda activate ${ENV_NAME}
+      pip uninstall -y d2lbook
+      pip install git+https://github.com/d2l-ai/d2l-book
       pip list
       nvidia-smi
       """
@@ -34,23 +29,21 @@ stage("Build and Publish") {
 
       sh label: "Execute Notebooks", script: """set -ex
       conda activate ${ENV_NAME}
-      export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
       ./static/cache.sh restore _build/eval/data
       d2lbook build eval
       ./static/cache.sh store _build/eval/data
       """
 
       sh label: "Execute Notebooks [PyTorch]", script: """set -ex
       conda activate ${ENV_NAME}
-      export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
       ./static/cache.sh restore _build/eval_pytorch/data
       d2lbook build eval --tab pytorch
+      d2lbook build slides --tab pytorch
       ./static/cache.sh store _build/eval_pytorch/data
       """
 
       sh label: "Execute Notebooks [TensorFlow]", script: """set -ex
       conda activate ${ENV_NAME}
-      export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
       ./static/cache.sh restore _build/eval_tensorflow/data
       export TF_CPP_MIN_LOG_LEVEL=3
       d2lbook build eval --tab tensorflow
@@ -71,7 +64,7 @@ stage("Build and Publish") {
         sh label:"Release", script:"""set -ex
         conda activate ${ENV_NAME}
         d2lbook build pkg
-        d2lbook deploy html pdf pkg colab sagemaker --s3 s3://preview.d2l.ai/${JOB_NAME}/
+        d2lbook deploy html pdf pkg colab sagemaker slides --s3 s3://en.d2l.ai/
         """
 
         sh label:"Release d2l", script:"""set -ex
@@ -83,7 +76,7 @@ stage("Build and Publish") {
       } else {
         sh label:"Publish", script:"""set -ex
         conda activate ${ENV_NAME}
-        d2lbook deploy html pdf --s3 s3://preview.d2l.ai/${JOB_NAME}/
+        d2lbook deploy html pdf slides --s3 s3://preview.d2l.ai/${JOB_NAME}/
         """
         if (env.BRANCH_NAME.startsWith("PR-")) {
             pullRequest.comment("Job ${JOB_NAME}/${BUILD_NUMBER} is complete. \nCheck the results at http://preview.d2l.ai/${JOB_NAME}/")

diff --git a/chapter_preliminaries/autograd.md b/chapter_preliminaries/autograd.md
@@ -6,43 +6,36 @@
 深度学习框架通过自动计算导数（即 *自动求导*（automatic differentiation））来加快这项工作。实际中，根据我们设计的模型，系统会构建一个 *计算图*（computational graph），来跟踪数据通过若干操作组合起来产生输出。自动求导使系统能够随后反向传播梯度。
 这里，*反向传播*（backpropagate）只是意味着跟踪整个计算图，填充关于每个参数的偏导数。
 
-```{.python .input}
-from mxnet import autograd, np, npx
-npx.set_np()
-```
-
-```{.python .input}
-#@tab pytorch
-import torch
-```
-
-```{.python .input}
-#@tab tensorflow
-import tensorflow as tf
-```
 
 ## 一个简单的例子
 
-作为一个演示例子，假设我们想对函数 $y = 2\mathbf{x}^{\top}\mathbf{x}$关于列向量 $\mathbf{x}$求导。首先，我们创建变量 `x` 并为其分配一个初始值。
+作为一个演示例子，(**假设我们想对函数 $y = 2\mathbf{x}^{\top}\mathbf{x}$关于列向量 $\mathbf{x}$求导**)。首先，我们创建变量 `x` 并为其分配一个初始值。
 
 ```{.python .input}
+from mxnet import autograd, np, npx
+npx.set_np()
+
 x = np.arange(4.0)
 x
 ```
 
 ```{.python .input}
 #@tab pytorch
+import torch
+
 x = torch.arange(4.0)
 x
 ```
 
 ```{.python .input}
 #@tab tensorflow
+import tensorflow as tf
+
 x = tf.range(4, dtype=tf.float32)
 x
 ```
 
-在我们计算$y$关于$\mathbf{x}$的梯度之前，我们需要一个地方来存储梯度。
+[**在我们计算$y$关于$\mathbf{x}$的梯度之前，我们需要一个地方来存储梯度。**]
 重要的是，我们不会在每次对一个参数求导时都分配新的内存。因为我们经常会成千上万次地更新相同的参数，每次都分配新的内存可能很快就会将内存耗尽。注意，标量函数关于向量$\mathbf{x}$的梯度是向量，并且与$\mathbf{x}$具有相同的形状。
 
 ```{.python .input}
@@ -64,7 +57,7 @@ x.grad  # 默认值是None
 x = tf.Variable(x)
 ```
 
-现在让我们计算 $y$。
+(**现在让我们计算 $y$。**)
 
 ```{.python .input}
 # 把代码放到`autograd.record`内，以建立计算图
@@ -87,7 +80,7 @@ with tf.GradientTape() as t:
 y
 ```
 
-`x` 是一个长度为 4 的向量，计算 `x` 和 `x` 的内积，得到了我们赋值给 `y` 的标量输出。接下来，我们可以通过调用反向传播函数来自动计算`y`关于`x` 每个分量的梯度，并打印这些梯度。
+`x` 是一个长度为 4 的向量，计算 `x` 和 `x` 的内积，得到了我们赋值给 `y` 的标量输出。接下来，我们可以[**通过调用反向传播函数来自动计算`y`关于`x` 每个分量的梯度**]，并打印这些梯度。
 
 ```{.python .input}
 y.backward()
@@ -122,7 +115,7 @@ x.grad == 4 * x
 x_grad == 4 * x
 ```
 
-现在让我们计算 `x` 的另一个函数。
+[**现在让我们计算 `x` 的另一个函数。**]
 
 ```{.python .input}
 with autograd.record():
@@ -151,7 +144,7 @@ t.gradient(y, x)  # 被新计算的梯度覆盖
 
 当 `y` 不是标量时，向量`y`关于向量`x`的导数的最自然解释是一个矩阵。对于高阶和高维的 `y` 和 `x`，求导的结果可以是一个高阶张量。
 
-然而，虽然这些更奇特的对象确实出现在高级机器学习中（包括深度学习中），但当我们调用向量的反向计算时，我们通常会试图计算一批训练样本中每个组成部分的损失函数的导数。这里，我们的目的不是计算微分矩阵，而是批量中每个样本单独计算的偏导数之和。
+然而，虽然这些更奇特的对象确实出现在高级机器学习中（包括[**深度学习中**]），但当我们调用向量的反向计算时，我们通常会试图计算一批训练样本中每个组成部分的损失函数的导数。这里(**，我们的目的不是计算微分矩阵，而是批量中每个样本单独计算的偏导数之和。**)
 
 ```{.python .input}
 # 当我们对向量值变量`y`（关于`x`的函数）调用`backward`时，
@@ -167,7 +160,7 @@ x.grad  # 等价于y = sum(x * x)
 # 对非标量调用`backward`需要传入一个`gradient`参数，该参数指定微分函数关于`self`的梯度。在我们的例子中，我们只想求偏导数的和，所以传递一个1的梯度是合适的
 x.grad.zero_()
 y = x * x
-# 等价于y.backward(torch.ones(len(x))) 
+# 等价于y.backward(torch.ones(len(x)))
 y.sum().backward()
 x.grad
 ```
@@ -181,7 +174,7 @@ t.gradient(y, x)  # 等价于 `y = tf.reduce_sum(x * x)`
 
 ## 分离计算
 
-有时，我们希望将某些计算移动到记录的计算图之外。
+有时，我们希望[**将某些计算移动到记录的计算图之外**]。
 例如，假设`y`是作为`x`的函数计算的，而`z`则是作为`y`和`x`的函数计算的。
 现在，想象一下，我们想计算 `z` 关于 `x` 的梯度，但由于某种原因，我们希望将 `y` 视为一个常数，并且只考虑到 `x` 在`y`被计算后发挥的作用。
 

diff --git a/chapter_preliminaries/autograd_origin.md b/chapter_preliminaries/autograd_origin.md
@@ -1,8 +1,3 @@
----
-source: https://github.com/d2l-ai/d2l-en/blob/master/chapter_preliminaries/autograd.md
-commit: 9e55a9c
----
-
 # Automatic Differentiation
 :label:`sec_autograd`
 
@@ -24,49 +19,42 @@ Automatic differentiation enables the system to subsequently backpropagate gradi
 Here, *backpropagate* simply means to trace through the computational graph,
 filling in the partial derivatives with respect to each parameter.
 
-```{.python .input}
-from mxnet import autograd, np, npx
-npx.set_np()
-```
-
-```{.python .input}
-#@tab pytorch
-import torch
-```
-
-```{.python .input}
-#@tab tensorflow
-import tensorflow as tf
-```
 
 ## A Simple Example
 
 As a toy example, say that we are interested
-in differentiating the function
+in (**differentiating the function
 $y = 2\mathbf{x}^{\top}\mathbf{x}$
-with respect to the column vector $\mathbf{x}$.
+with respect to the column vector $\mathbf{x}$.**)
 To start, let us create the variable `x` and assign it an initial value.
 
 ```{.python .input}
+from mxnet import autograd, np, npx
+npx.set_np()
+
 x = np.arange(4.0)
 x
 ```
 
 ```{.python .input}
 #@tab pytorch
+import torch
+
 x = torch.arange(4.0)
 x
 ```
 
 ```{.python .input}
 #@tab tensorflow
+import tensorflow as tf
+
 x = tf.range(4, dtype=tf.float32)
 x
 ```
 
-Before we even calculate the gradient
+[**Before we even calculate the gradient
 of $y$ with respect to $\mathbf{x}$,
-we will need a place to store it.
+we will need a place to store it.**]
 It is important that we do not allocate new memory
 every time we take a derivative with respect to a parameter
 because we will often update the same parameters
@@ -95,7 +83,7 @@ x.grad  # The default value is None
 x = tf.Variable(x)
 ```
 
-Now let us calculate $y$.
+(**Now let us calculate $y$.**)
 
 ```{.python .input}
 # Place our code inside an `autograd.record` scope to build the computational
@@ -122,8 +110,8 @@ y
 Since `x` is a vector of length 4,
 an inner product of `x` and `x` is performed,
 yielding the scalar output that we assign to `y`.
-Next, we can automatically calculate the gradient of `y`
-with respect to each component of `x`
+Next, [**we can automatically calculate the gradient of `y`
+with respect to each component of `x`**]
 by calling the function for backpropagation and printing the gradient.
 
 ```{.python .input}
@@ -143,8 +131,8 @@ x_grad = t.gradient(y, x)
 x_grad
 ```
 
-The gradient of the function $y = 2\mathbf{x}^{\top}\mathbf{x}$
-with respect to $\mathbf{x}$ should be $4\mathbf{x}$.
+(**The gradient of the function $y = 2\mathbf{x}^{\top}\mathbf{x}$
+with respect to $\mathbf{x}$ should be $4\mathbf{x}$.**)
 Let us quickly verify that our desired gradient was calculated correctly.
 
 ```{.python .input}
@@ -161,7 +149,7 @@ x.grad == 4 * x
 x_grad == 4 * x
 ```
 
-Now let us calculate another function of `x`.
+[**Now let us calculate another function of `x`.**]
 
 ```{.python .input}
 with autograd.record():
@@ -172,9 +160,9 @@ x.grad  # Overwritten by the newly calculated gradient
 
 ```{.python .input}
 #@tab pytorch
-# PyTorch accumulates the gradient in default, we need to clear the previous 
+# PyTorch accumulates the gradient in default, we need to clear the previous
 # values
-x.grad.zero_() 
+x.grad.zero_()
 y = x.sum()
 y.backward()
 x.grad
@@ -196,13 +184,13 @@ For higher-order and higher-dimensional `y` and `x`,
 the differentiation result could be a high-order tensor.
 
 However, while these more exotic objects do show up
-in advanced machine learning (including in deep learning),
-more often when we are calling backward on a vector,
+in advanced machine learning (including [**in deep learning**]),
+more often (**when we are calling backward on a vector,**)
 we are trying to calculate the derivatives of the loss functions
 for each constituent of a *batch* of training examples.
-Here, our intent is not to calculate the differentiation matrix
-but rather the sum of the partial derivatives
-computed individually for each example in the batch.
+Here, (**our intent is**) not to calculate the differentiation matrix
+but rather (**the sum of the partial derivatives
+computed individually for each example**) in the batch.
 
 ```{.python .input}
 # When we invoke `backward` on a vector-valued variable `y` (function of `x`),
@@ -236,8 +224,8 @@ t.gradient(y, x)  # Same as `y = tf.reduce_sum(x * x)`
 
 ## Detaching Computation
 
-Sometimes, we wish to move some calculations
-outside of the recorded computational graph.
+Sometimes, we wish to [**move some calculations
+outside of the recorded computational graph.**]
 For example, say that `y` was calculated as a function of `x`,
 and that subsequently `z` was calculated as a function of both `y` and `x`.
 Now, imagine that we wanted to calculate
@@ -309,10 +297,10 @@ t.gradient(y, x) == 2 * x
 ## Computing the Gradient of Python Control Flow
 
 One benefit of using automatic differentiation
-is that even if building the computational graph of a function
-required passing through a maze of Python control flow
+is that [**even if**] building the computational graph of (**a function
+required passing through a maze of Python control flow**)
 (e.g., conditionals, loops, and arbitrary function calls),
-we can still calculate the gradient of the resulting variable.
+(**we can still calculate the gradient of the resulting variable.**)
 In the following snippet, note that
 the number of iterations of the `while` loop
 and the evaluation of the `if` statement