AlexNet
AlexNet卷积网络,见论文和参考代码。AlexNet成功开启了深度学习在计算机视觉领域的革命,并且为后续的神经网络架构奠定了基础。,其原理大致如下:
#=> AlexNet - CNN + MLP
#=> input img - (b, c, h, w)
#=> input y - (b, )
feat = CNN(img) # (b, c_1, h_1, w_1)
logits = MLP(feat.reshape(b, -1)) # (b, n_class)
loss = cross_entropy(logits, y)
import torch
import torch.nn as nn
import torch.nn.functional as F
class AlexNet(nn.Module):
def __init__(self, ch_in, d_out):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(ch_in, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, d_out)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), 256 * 6 * 6)
logits = self.classifier(x)
# probas = F.softmax(logits, dim=1)
prob = F.log_softmax(logits, dim=1)
return prob
if __name__ == "__main__":
ch_in, d_out = 1, 10
x = torch.randn(2, 1, 64, 64)
model = AlexNet(ch_in, d_out)
y_pred = model(x)
print(y_pred.shape)