AlexNet Apr 30, 2024 AlexNet卷积网络,见论文和参考代码。AlexNet成功开启了深度学习在计算机视觉领域的革命,并且为后续的神经网络架构奠定了基础。,其原理大致如下: #=> AlexNet - CNN + MLP #=> input img - (b, c, h, w) #=> input y - (b, ) feat = CNN(img) # (b, c_1, h_1, w_1) logits = MLP(feat.reshape(b, -1)) # (b, n_class) loss = cross_entropy(logits, y) import torch import torch.nn as nn import torch.nn.functional as F class AlexNet(nn.Module): def __init__(self, ch_in, d_out): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(ch_in, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(0.5), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, d_out) ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) logits = self.classifier(x) # probas = F.softmax(logits, dim=1) prob = F.log_softmax(logits, dim=1) return prob if __name__ == "__main__": ch_in, d_out = 1, 10 x = torch.randn(2, 1, 64, 64) model = AlexNet(ch_in, d_out) y_pred = model(x) print(y_pred.shape)