Recurrent Neural Networks are specialized neural networks designed for processing sequential data. RNNs are in many ways the preceeder of transformers. They are an in an interesting position right now, as many of their applications are being replaced by transformers. Contrary to what some people will say, they still have their use cases - see the table further down on this page. They maintain an internal memory of previous inputs, making them particularly effective for tasks involving time series, natural language processing, and speech recognition. You can find a good visual explanation of RNNs here.
RNNs are built on several key concepts that enable them to effectively process sequential data.
The structure of an RNN consists of:
The main operations in RNNs include:
import tensorflow as tf
from tensorflow.keras import layers, models
def create_rnn_model(input_shape, num_classes):
model = models.Sequential([
# Input layer
layers.Input(shape=input_shape),
# RNN layers
layers.SimpleRNN(64, return_sequences=True),
layers.SimpleRNN(32),
# Dense layers
layers.Dense(16, activation='relu'),
layers.Dense(num_classes, activation='softmax')
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
return model
# Example usage
input_shape = (10, 20) # Sequence length of 10, features of 20
num_classes = 5
model = create_rnn_model(input_shape, num_classes)
model.summary()
import torch
import torch.nn as nn
import torch.nn.functional as F
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
# RNN layers
self.rnn1 = nn.RNN(input_size, hidden_size, batch_first=True, return_sequences=True)
self.rnn2 = nn.RNN(hidden_size, hidden_size, batch_first=True)
# Dense layers
self.fc1 = nn.Linear(hidden_size, 16)
self.fc2 = nn.Linear(16, num_classes)
def forward(self, x):
# Initialize hidden state
h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
# RNN layers
out, _ = self.rnn1(x, h0)
out, _ = self.rnn2(out)
# Dense layers
out = F.relu(self.fc1(out[:, -1, :]))
out = self.fc2(out)
return F.softmax(out, dim=1)
# Example usage
input_size = 20 # Number of features
hidden_size = 64 # Number of hidden units
num_classes = 5 # Number of output classes
model = RNN(input_size, hidden_size, num_classes)
print(model)