remove mxnet from torch (d2l-ai#993)

bluewelkin · Oct 25, 2021 · b0dcd2f · b0dcd2f
1 parent 69308b2
commit b0dcd2f
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 7 deletions.
diff --git a/chapter_attention-mechanisms/nadaraya-waston.md b/chapter_attention-mechanisms/nadaraya-waston.md
@@ -396,8 +396,7 @@ animator = d2l.Animator(xlabel='epoch', ylabel='loss', xlim=[1, 5])
 
 for epoch in range(5):
     trainer.zero_grad()
-    # 注意：L2 Loss = 1/2 * MSE Loss。
-    # PyTorch 的 MSE Loss 与 MXNet 的 L2Loss 差一个 2 的因子，因此被除2。
+    # L2 Loss = 1/2 * MSE Loss
     l = loss(net(x_train, keys, values), y_train) / 2
     l.sum().backward()
     trainer.step()

diff --git a/chapter_natural-language-processing-pretraining/bert.md b/chapter_natural-language-processing-pretraining/bert.md
@@ -317,8 +317,6 @@ nsp_Y_hat.shape
 
 ```{.python .input}
 #@tab pytorch
-# 默认情况下，PyTorch不会像mxnet中那样展平张量
-# 如果flatten=True，则除第一个输入数据轴外，所有输入数据轴都折叠在一起
 encoded_X = torch.flatten(encoded_X, start_dim=1)
 # NSP的输入形状: (batch size, `num_hiddens`)
 nsp = NextSentencePred(encoded_X.shape[-1])

diff --git a/chapter_optimization/minibatch-sgd.md b/chapter_optimization/minibatch-sgd.md
@@ -532,9 +532,7 @@ def train_concise_ch11(trainer_fn, hyperparams, data_iter, num_epochs=4):
     optimizer = trainer_fn(net.parameters(), **hyperparams)
 
     loss = nn.MSELoss()
-    # 注意: L2 Loss = 1/2 * MSE Loss。
-    # PyTorch的MSE损失与MXNet的L2损失大概相差2倍。
-    # 因此，我们将PyTorch中的损失减半
+    # L2 Loss = 1/2 * MSE Loss
     animator = d2l.Animator(xlabel='epoch', ylabel='loss',
                             xlim=[0, num_epochs], ylim=[0.22, 0.35])
     n, timer = 0, d2l.Timer()