Skip to content

Commit

Permalink
Add network code for paper
Browse files Browse the repository at this point in the history
  • Loading branch information
HarleysZhang committed Sep 11, 2021
1 parent a77880c commit f50035e
Show file tree
Hide file tree
Showing 9 changed files with 449 additions and 74 deletions.
6 changes: 5 additions & 1 deletion 5-deep_learning/3-经典 backbone 总结.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ ResNeXt和Resnet的模型结构参数对比图如下图所示。

## VoVNet

**One-Shot Aggregation(只聚集一次)是指 OSA 模块的 concat 操作只进行一次,即只有最后一层的输入是前面所有层 feature map 的 concat(叠加)**`OSA` 模块的结构图如图 1(b) 所示。
**One-Shot Aggregation(只聚集一次)是指 OSA 模块的 concat 操作只进行一次,即只有最后一层($1\times 1$ 卷积)的输入是前面所有层 feature map 的 concat(叠加)**`OSA` 模块的结构图如图 1(b) 所示。

![VoVNet](../data/images/backbone/VoVNet.png)

Expand All @@ -99,6 +99,10 @@ ResNeXt和Resnet的模型结构参数对比图如下图所示。
- 设计 `layer` 数量少的网络是更好的选择。
- 1x1 卷积可以减少计算量,但不利于 GPU 计算。

在 CenterMask 论文提出了 VoVNetv2,其卷积模块结构图如下:

![VoVNetv2](../data/images/backbone/VoVNetv2.png)

## 一些结论

- 当卷积层的输入输出通道数相等时,内存访问代价(`MAC`)最小。
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ $[\mathrm{\mathrm{x_0},\mathrm{x_1},...,\mathrm{x_{l-1}}]}$ 表示网络层 $0,.

![densenet-block结构图](../../data/images/densenet/densenet-block结构图.png)

`DenseBlock` 的设计中,作者重点提到了一个参数 $k$,被称为网络的增长率(`growth of the network`)。如果每个 $H_l(\cdot)$ 函数都输出 $k$ 个特征图,那么第 $l$ 层的输入特征图数量为 $k_0 + k\times (l-1)$,$k_0$ 是 `DenseBlock` 的输入特征图数量(即第一个卷积层的输入通道数)。`DenseNet` 网络和其他网络最显著的区别是,$k$ 值可以变得很小,比如 $k=12$,即网络变得很“窄”,但又不影响精度。如表 4 所示。
`DenseBlock` 的设计中,作者重点提到了一个参数 $k$,被称为网络的增长率(`growth of the network`,其实是 `DenseBlock` 中任何一个 $3\times 3$ 卷积层的滤波器个数(输出通道数)。如果每个 $H_l(\cdot)$ 函数都输出 $k$ 个特征图,那么第 $l$ 层的输入特征图数量为 $k_0 + k\times (l-1)$,$k_0$ 是 `DenseBlock` 的输入特征图数量(即第一个卷积层的输入通道数)。`DenseNet` 网络和其他网络最显著的区别是,$k$ 值可以变得很小,比如 $k=12$,即网络变得很“窄”,但又不影响精度。如表 4 所示。

![densenet对比实验结果](../../data/images/densenet/densenet对比实验结果.png)

Expand All @@ -51,6 +51,8 @@ $[\mathrm{\mathrm{x_0},\mathrm{x_1},...,\mathrm{x_{l-1}}]}$ 表示网络层 $0,.

![densenet系列网络参数表](../../data/images/densenet/densenet系列网络参数表.png)

网络中每个阶段卷积层的 `feature map` 数量都是 `32`

## 优点

1. **省参数**
Expand Down
156 changes: 156 additions & 0 deletions 5-deep_learning/Backbone 论文解读/densenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# This implementation is based on the DenseNet-BC implementation in torchvision
# https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from collections import OrderedDict


def _bn_function_factory(norm, relu, conv):
def bn_function(*inputs):
concated_features = torch.cat(inputs, 1)
bottleneck_output = conv(relu(norm(concated_features)))
return bottleneck_output

return bn_function


class _DenseLayer(nn.Module):
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, efficient=False):
super(_DenseLayer, self).__init__()
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
self.add_module('relu1', nn.ReLU(inplace=True)),
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * growth_rate,
kernel_size=1, stride=1, bias=False)),
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
self.add_module('relu2', nn.ReLU(inplace=True)),
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
kernel_size=3, stride=1, padding=1, bias=False)),
self.drop_rate = drop_rate
self.efficient = efficient

def forward(self, *prev_features):
bn_function = _bn_function_factory(self.norm1, self.relu1, self.conv1)
if self.efficient and any(prev_feature.requires_grad for prev_feature in prev_features):
bottleneck_output = cp.checkpoint(bn_function, *prev_features)
else:
bottleneck_output = bn_function(*prev_features)
new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
if self.drop_rate > 0: # 加入 dropout 增加模型泛化能力
new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
return new_features


class _Transition(nn.Sequential):
def __init__(self, num_input_features, num_output_features):
super(_Transition, self).__init__()
self.add_module('norm', nn.BatchNorm2d(num_input_features))
self.add_module('relu', nn.ReLU(inplace=True))
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
kernel_size=1, stride=1, bias=False))
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))


class _DenseBlock(nn.Module):
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, efficient=False):
super(_DenseBlock, self).__init__()
for i in range(num_layers):
layer = _DenseLayer(
num_input_features + i * growth_rate,
growth_rate=growth_rate,
bn_size=bn_size,
drop_rate=drop_rate,
efficient=efficient,
)
self.add_module('denselayer%d' % (i + 1), layer)

def forward(self, init_features):
features = [init_features]
for name, layer in self.named_children():
new_features = layer(*features)
features.append(new_features)
return torch.cat(features, 1)


class DenseNet(nn.Module):
r"""Densenet-BC model class, based on
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`
Args:
growth_rate (int) - how many filters to add each layer (`k` in paper)
block_config (list of 3 or 4 ints) - how many layers in each pooling block
num_init_features (int) - the number of filters to learn in the first convolution layer
bn_size (int) - multiplicative factor for number of bottle neck layers
(i.e. bn_size * k features in the bottleneck layer)
drop_rate (float) - dropout rate after each dense layer
num_classes (int) - number of classification classes
small_inputs (bool) - set to True if images are 32x32. Otherwise assumes images are larger.
efficient (bool) - set to True to use checkpointing. Much more memory efficient, but slower.
"""
def __init__(self, growth_rate=12, block_config=(16, 16, 16), compression=0.5,
num_init_features=24, bn_size=4, drop_rate=0,
num_classes=10, small_inputs=True, efficient=False):

super(DenseNet, self).__init__()
assert 0 < compression <= 1, 'compression of densenet should be between 0 and 1'

# First convolution
if small_inputs:
self.features = nn.Sequential(OrderedDict([
('conv0', nn.Conv2d(3, num_init_features, kernel_size=3, stride=1, padding=1, bias=False)),
]))
else:
self.features = nn.Sequential(OrderedDict([
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
]))
self.features.add_module('norm0', nn.BatchNorm2d(num_init_features))
self.features.add_module('relu0', nn.ReLU(inplace=True))
self.features.add_module('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1,
ceil_mode=False))

# Each denseblock
num_features = num_init_features
for i, num_layers in enumerate(block_config):
block = _DenseBlock(
num_layers=num_layers,
num_input_features=num_features,
bn_size=bn_size,
growth_rate=growth_rate,
drop_rate=drop_rate,
efficient=efficient,
)
self.features.add_module('denseblock%d' % (i + 1), block)
num_features = num_features + num_layers * growth_rate
if i != len(block_config) - 1:
trans = _Transition(num_input_features=num_features,
num_output_features=int(num_features * compression))
self.features.add_module('transition%d' % (i + 1), trans)
num_features = int(num_features * compression)

# Final batch norm
self.features.add_module('norm_final', nn.BatchNorm2d(num_features))

# Linear layer
self.classifier = nn.Linear(num_features, num_classes)

# Initialization
for name, param in self.named_parameters():
if 'conv' in name and 'weight' in name:
n = param.size(0) * param.size(2) * param.size(3)
param.data.normal_().mul_(math.sqrt(2. / n))
elif 'norm' in name and 'weight' in name:
param.data.fill_(1)
elif 'norm' in name and 'bias' in name:
param.data.fill_(0)
elif 'classifier' in name and 'bias' in name:
param.data.fill_(0)

def forward(self, x):
features = self.features(x)
out = F.relu(features, inplace=True)
out = F.adaptive_avg_pool2d(out, (1, 1))
out = torch.flatten(out, 1)
out = self.classifier(out)
return out
119 changes: 109 additions & 10 deletions 5-deep_learning/轻量级网络论文解析/CSPNet 论文详解.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
- [1,介绍](#1介绍)
- [2,相关工作](#2相关工作)
- [3,改进方法](#3改进方法)
- [3.1,Cross Stage Partial Network](#31cross-stage-partial-network)
- [3.2,Exact Fusion Model](#32exact-fusion-model)
- [3.1,Cross Stage Partial Network](#31cross-stage-partial-network)
- [3.2,Exact Fusion Model](#32exact-fusion-model)
- [4,实验](#4实验)
- [4.1,实验细节](#41实验细节)
- [4.2,消融实验](#42消融实验)
- [4.3,实验总结](#43实验总结)
- [4.1,实验细节](#41实验细节)
- [4.2,消融实验](#42消融实验)
- [4.3,实验总结](#43实验总结)
- [5,结论](#5结论)
- [6,代码解读](#6代码解读)
- [参考资料](#参考资料)

## 摘要
Expand All @@ -31,7 +32,7 @@ CSPNet 和不同 backbone 结合后的效果如下图所示。
`CSPNet` 提出主要是为了解决三个问题:

1. 增强 CNN 的学习能力,能够在轻量化的同时保持准确性。
2. 降低计算瓶颈
2. 降低计算瓶颈和 DenseNet 的梯度信息重复
3. 降低内存成本。

## 2,相关工作
Expand All @@ -48,12 +49,18 @@ CSPNet 和不同 backbone 结合后的效果如下图所示。

1,**DenseNet**

![DenseNet的密集层权重更新公式](../../data/images/CSPNet/DenseNet的密集层权重更新公式.png)

其中 $f$ 为权值更新函数,$g_i$ 为传播到第 $i$ 个密集层的梯度。从公式 (2) 可以发现,大量的度信息被重用来更新不同密集层的权值,这将导致无差异的密集层反复学习复制的梯度信息。

2,**Cross Stage Partial DenseNet.**

作者提出的 `CSPDenseNet` 的单阶段的架构如图 2(b) 所示。`CSPDenseNet` 的一个阶段是由局部密集块和局部过渡层组成(`a partial dense block and a partial transition layer`)。

![DenseNet和CSPDenseNet结构图](../../data/images/CSPNet/DenseNet和CSPDenseNet结构图.png)

总的来说,作者提出的 CSPDenseNet 保留了 DenseNet 重用特征特性的优点,但同时通过**截断梯度流**防止了过多的重复梯度信息。该思想通过设计一种分层的特征融合策略来实现,并应用于局部过渡层(partial transition layer)。

3,**Partial Dense Block.**

设计局部密集块(`partial dense block`)的目的是为了
Expand All @@ -68,8 +75,13 @@ CSPNet 和不同 backbone 结合后的效果如下图所示。

![Figure3](../../data/images/CSPNet/Figure3.png)

- **Fustion First** 的方式是对两个分支的 feature map 先进行`concatenation` 操作,这样梯度信息可以被重用。
- **Fusion Last** 的方式 是对 Dense Block 所在分支先进行 `transition` 操作,然后再进行 concatenation, 梯度信息将被截断,因此不会重复使用梯度信息。
Transition layer 的含义和 DenseNet 类似,是一个 1x1 的卷积层(没有再使用 `average pool`)。上图中 `transition layer` 的位置决定了梯度的结构方式,并且各有优势:

- (c) 图 Fusion First 方式,先将两个部分进行 concatenate,然后再进行输入到Transion layer 中,采用这种做法会是的大量特梯度信息被重用,有利于网络学习;
- (d) 图 Fusion Last 的方式,先将部分特征输入 Transition layer,然后再进行concatenate,这样梯度信息将被截断,损失了部分的梯度重用,但是由于 Transition 的输入维度比(c)图少,大大减少了计算复杂度。
- (b) 图中的结构是论文 `CSPNet` 所采用的,其结合了 (c)、(d) 的特点,提升了学习能力的同时也提高了一些计算复杂度。 作者在论文中给出其使用不同 Partial Transition Layer 的实验结果,如下图所示。具体使用哪种结构,我们可以根据条件和使用场景进行调整。

![不同Transition-layer的对比实验](../../data/images/CSPNet/不同Transition-layer的对比实验.png)

5,**Apply CSPNet to Other Architectures.**

Expand Down Expand Up @@ -101,7 +113,6 @@ EFM 在 COCO 数据集上的消融实验结果。

从实验结果来看,分类问题中,使用 `CSPNet` 可以降低计算量,但是准确率提升很小;在目标检测问题中,使用 `CSPNet` 作为`Backbone` 带来的精度提升比较大,可以有效增强 `CNN` 的学习能力,同时也降低了计算量。


## 5,结论

`CSPNet` 是能够用于移动 `gpu``cpu` 的轻量级网络架构。
Expand All @@ -112,6 +123,94 @@ EFM 在 COCO 数据集上的消融实验结果。

实验结果表明,本文提出的基于 `EFM``CSPNet` 在移动`GPU``CPU` 的实时目标检测任务的准确性和推理率方面明显优于竞争对手。

## 6,代码解读

1,Partial Dense Block 的实现,代码可以直接在 Dense Block 代码的基础上稍加修改即可,代码参考 [这里]()。简单的 Dense Block 代码如下:

```python
class conv2d_bn_relu(nn.Module):
"""
BN_RELU_CONV,
"""

def __init__(self, in_channels: object, out_channels: object, kernel_size: object, stride: object, padding: object,
dilation=1, groups=1, bias=False) -> object:
super(BN_Conv2d, self).__init__()
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=groups, bias=bias),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=False)]

self.seq = nn.Sequential(*layers)

def forward(self, x):
return self.seq(x)

class bn_relu_conv2d(nn.Module):
"""
BN_RELU_CONV,
"""

def __init__(self, in_channels: object, out_channels: object, kernel_size: object, stride: object, padding: object,
dilation=1, groups=1, bias=False) -> object:
super(BN_Conv2d, self).__init__()
layers = [nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=False),
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=groups, bias=bias)]

self.seq = nn.Sequential(*layers)

def forward(self, x):
return self.seq(x)

class DenseBlock(nn.Module):

def __init__(self, input_channels, num_layers, growth_rate):
super(DenseBlock, self).__init__()
self.num_layers = num_layers
self.k0 = input_channels
self.k = growth_rate
self.layers = self.__make_layers()

def __make_layers(self):
layer_list = []
for i in range(self.num_layers):
layer_list.append(nn.Sequential(
bn_relu_conv2d(self.k0 + i * self.k, 4 * self.k, 1, 1, 0),
bn_relu_conv2d(4 * self.k, self.k, 3, 1, 1)
))
return layer_list

def forward(self, x):
feature = self.layers[0](x)
out = torch.cat((x, feature), 1)
for i in range(1, len(self.layers)):
feature = self.layers[i](out)
out = torch.cat((feature, out), 1)
return out

# Partial Dense Block 的实现:
class CSP_DenseBlock(nn.Module):

def __init__(self, in_channels, num_layers, k, part_ratio=0.5):
super(CSP_DenseBlock, self).__init__()
self.part1_chnls = int(in_channels * part_ratio)
self.part2_chnls = in_channels - self.part1_chnls
self.dense = DenseBlock(self.part2_chnls, num_layers, k)
trans_chnls = self.part2_chnls + k * num_layers
self.transtion = conv2d_bn_relu(trans_chnls, trans_chnls, 1, 1, 0)

def forward(self, x):
part1 = x[:, :self.part1_chnls, :, :]
part2 = x[:, self.part1_chnls:, :, :]
part2 = self.dense(part2) # 也可以是残差块单元
part2 = self.transtion(part2) # Fusion lirst
out = torch.cat((part1, part2), 1)
return out
```

## 参考资料

- [增强CNN学习能力的Backbone:CSPNet](https://www.cnblogs.com/pprp/p/12566116.html)
- [增强CNN学习能力的Backbone:CSPNet](https://www.cnblogs.com/pprp/p/12566116.html)
- [CSPNet——PyTorch实现CSPDenseNet和CSPResNeXt](https://zhuanlan.zhihu.com/p/263555330)
Loading

0 comments on commit f50035e

Please sign in to comment.