RNN May 2, 2024 RNN(Recurrent Neural Network,循环神经网络)是一类用于处理序列数据的神经网络结构。与传统的前馈神经网络(Feedforward Neural Networks)不同,RNN 具有循环连接。RNN 的每个时间步都会接收输入和前一个时间步的隐藏状态,并输出当前时间步的隐藏状态。这种结构使得 RNN 能够捕捉序列数据中的时间相关性。 #=> input x import torch import torch.nn as nn from torch.optim import Adam class RNN(nn.Module): def __init__(self, d_emb, d_hid, vocab_size, n_layer, dropout=0.5): super(RNN, self).__init__() self.drop = nn.Dropout(dropout) self.emb = nn.Embedding(vocab_size, d_emb) self.enc = nn.RNN(d_emb, d_hid, n_layer, dropout=dropout) self.dec = nn.Linear(d_hid, vocab_size) self.init_weights() def init_weights(self): a = 0.1 nn.init.uniform_(self.emb.weight, -a, a) nn.init.uniform_(self.dec.weight, -a, a) nn.init.zeros_(self.dec.bias) def forward(self, x, hid): emb = self.drop(self.emb(x)) seq_len, bsz, d_emb = emb.shape # hs: (seq_len, bsz, d_hid) # h : (n_layer, bsz, d_hid) # hs[-1] = h[-1] hs, h = self.enc(emb, hid) hs = self.drop(hs) logits = self.dec(hs) logits = logits.view(seq_len * bsz, -1) return logits, h if __name__ == "__main__": # config d_emb, d_hid, vocab_size, n_layer = 8, 4, 16, 5 bsz, lr = 2, 1e-4 # data x = torch.Tensor([[1, 7], [2, 8], [3, 9]]).long() y = torch.Tensor([[2, 8], [3, 9], [4, 10]]).long() # model model = RNN(d_emb, d_hid, vocab_size, n_layer) optimizer = Adam(model.parameters(), lr=lr) loss_fn = nn.CrossEntropyLoss() # training h = torch.zeros(n_layer, bsz, d_hid) optimizer.zero_grad() logits, h = model(x, h) loss = loss_fn(logits, y.reshape(-1)) loss.backward() optimizer.step() print(loss.item())