Paper address: https://arxiv.org/pdf/2207.14284.pdf
Source code address: https://github.com/raoyongming/HorNet.
High-quality reference article: https://blog.csdn.net/qq_42075634/article/details/126670347
The structure of the HorNet module is shown in the figure below:
This figure is from the paper. From the picture, we can clearly see that the HorNet module and the Swin transformer module have a similar structure. The difference is that HorNet uses a new module like GnConv. The structure of GnConv is also shown in the picture above.
1. Add the HorBlock.yaml file
Add to /models/file
# parameters nc: 80 # number of classes depth_multiple: 0.33 # model depth multiple width_multiple: 0.50 # layer channel multiple #anchors anchors: - [10,13, 16,30, 33,23] # P3/8 - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 # YOLOv5 v6.0 backbone backbone: # [from, number, module, args] [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, HorBlock, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 6, HorBlock, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, HorBlock, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 3, HorBlock, [1024]], [-1, 1, SPPF, [1024, 5]], # 9 ] # YOLOv5 v6.0 head head: [[-1, 1, Conv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 3, C3, [512, False]], # 13 [-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 3, C3, [256, False]], # 17 (P3/8-small) [-1, 1, Conv, [256, 3, 2]], [[-1, 14], 1, Concat, [1]], # cat head P4 [-1, 3, C3, [512, False]], # 20 (P4/16-medium) [-1, 1, Conv, [512, 3, 2]], [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, C3, [1024, False]], # 23 (P5/32-large) [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) ]
2.common.py configuration
Add the following template code to the ./models/common.py file
"""HorLayerNorm""" class gnconv(nn.Module): def __init__(self, dim, order=5, gflayer=None, h=14, w=8, s=1.0): super().__init__() self.order = order self.dims = [dim // 2 ** i for i in range(order)] self.dims.reverse() self.proj_in = nn.Conv2d(dim, 2 * dim, 1) if gflayer is None: self.dwconv = get_dwconv(sum(self.dims), 7, True) else: self.dwconv = gflayer(sum(self.dims), h=h, w=w) self.proj_out = nn.Conv2d(dim, dim, 1) self.pws = nn.ModuleList( [nn.Conv2d(self.dims[i], self.dims[i + 1], 1) for i in range(order - 1)] ) self.scale = s def forward(self, x, mask=None, dummy=False): # B, C, H, W = x.shape gnconv [512] by iscyy/air fused_x = self.proj_in(x) pwa, abc = torch.split(fused_x, (self.dims[0], sum(self.dims)), dim=1) dw_abc = self.dwconv(abc) * self.scale dw_list = torch.split(dw_abc, self.dims, dim=1) x = pwa * dw_list[0] for i in range(self.order - 1): x = self.pws[i](x) * dw_list[i + 1] x = self.proj_out(x) return x def get_dwconv(dim, kernel, bias): return nn.Conv2d(dim, dim, kernel_size=kernel, padding=(kernel - 1) // 2, bias=bias, groups=dim) class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" def __init__(self,drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path_f(x, self.drop_prob, self.training) def drop_path_f(x, drop_prob: float = 0., training: bool = False): """ prop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...I've opted for changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the argument. """ if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class HorLayerNorm(nn.Module): def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): super().__init__() self.weight = nn.Parameter(torch.ones(normalized_shape)) self.bias = nn.Parameter(torch.zeros((normalized_shape))) self.eps = eps self.data_format = data_format if self.data_format not in ["channels_last", "channels_first"]: raise NotImplementedError # by iscyy/air self.normalized_shape = (normalized_shape,) def forward(self, x): if self.data_format == "channels_last": return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) elif self.data_format == "channels_first": u = x.mean(1, keepdim=True) s = (x - u).pow(2).mean(1, keepdim=True) x = (x - u) / torch.sqrt(s + self.eps) x = self.weight[:, None, None] * x + self.bias[:, None, None] return x class HorBlock(nn.Module): def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, gnconv=gnconv): super().__init__() self.norm1 = HorLayerNorm(dim, eps=1e-6, data_format='channels_first') self.gnconv = gnconv(dim) self.norm2 = HorLayerNorm(dim, eps=1e-6) self.pwconv1 = nn.Linear(dim, 4 * dim) self.act = nn.GELU() self.pwconv2 = nn.Linear(4 * dim, dim) self.gamma1 = nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True) if layer_scale_init_value > 0 else None self.gamma2 = nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True) if layer_scale_init_value > 0 else None self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() def forward(self, x): B, C, H, W = x.shape if self.gamma1 is not None: gamma1 = self.gamma1.view(C, 1, 1) else: gamma1 = 1 x = x + self.drop_path(gamma1 * self.gnconv(self.norm1(x))) input = x x = x.permute(0, 2, 3, 1) x = self.norm2(x) x = self.pwconv1(x) x = self.act(x) x = self.pwconv2(x) if self.gamma2 is not None: x = self.gamma2 * x x = x.permute(0, 3, 1, 2) x = input + self.drop_path(x) return x """HorLayerNorm"""
3.yolo.py configuration
Find the parse_model()
class in the models/yolo.py
file and declare the HorBlock
class. The location of the declaration is shown in the figure below.
if m in {<!-- --> Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, HorBlock}:
Or add to parse_model
function for i, (f, n, m, args) in enumeate(d[ 'backbone' ] + d[ 'head' ]):
Internally, add the following code below the corresponding location, choose one of the two methods.
elif m is HorBlock: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m is HorBlock: args.insert(2, n) n=1
4. Training model
python train.py --cfg HorBlock.yaml