Source code for botiverse.models.GRUClassifier.GRUClassifier

import torch
import torch.nn as nn

[docs]class BasicGRU(nn.Module): '''An interface for the basic GRU unit''' def __init__(self, input_size, dropout_p=0.1): """ Constructs a BasicGRU instance with specific layer sizes and dropout probability. :param input_size: The size of the input to the model. :type input_size: int :param dropout_p: A regularization parameter. :type dropout_p: float :returns: None """ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") super(BasicGRU, self).__init__() self.hidden_size = input_size # update gate self.W_z = nn.Linear(input_size, input_size) # reset gate self.W_r = nn.Linear(input_size, input_size) # new memory gate self.W_h = nn.Linear(input_size, input_size) # activation functions self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() # dropout (for regularization) self.dropout = nn.Dropout(dropout_p)
[docs] def forward(self, input, hidden): """ Defines the computation performed by the model. :param input: The provided sequance input. :type input: Tensor :param hidden: The provided hidden state. :type hidden: Tensor :returns: New hidden state. :rtype: Tensor """ combined = input + hidden # update activation vector calculation z = self.W_z(combined) z = self.sigmoid(z) # reset activation vector calculation r = self.W_r(combined) r = self.sigmoid(r) # new memory vector calculation h = self.W_h(input + r * hidden) h = self.tanh(h) # set the memory with the weighted sum of old and new memory hidden = (1 - z) * hidden + z * h # apply dropout hidden = self.dropout(hidden) return hidden
# initialize the hidden state with zeros (initial memory)
[docs] def initHidden(self, batch_size): """ Creates a tensor of zeros for the hidden state initialization. :param batch_size: The size of the batch for which the hidden state is to be initialized. :type batch_size: int :returns: Tensor of zeros of the shape (batch_size, 1, hidden_size). :rtype: Tensor """ return torch.zeros(batch_size, 1, self.hidden_size).to(self.device)
[docs]class GRUTextClassifier(nn.Module): ''' An interface for the GRU text classifier which uses a basic GRU unit with a linear output layer and an input embedding layer''' def __init__(self, vocabulary, embedding_size, output_size, dropout_p=0.1): """ Constructs a GRUTextClassifier instance with specific hyperparameters. :param vocabulary: The size of vocabulary used in the Embedding layer. :type vocabulary: int :param embedding_size: The size of each embedding vector. :type embedding_size: int :param output_size: The size of the output from the model (number of classes). :type output_size: int :param dropout_p: A regularization parameter. :type dropout_p: float :returns: None """ super(GRUTextClassifier, self).__init__() # the embedding layer self.embedding = nn.Embedding(vocabulary, embedding_size) # the GRU layer self.gru = BasicGRU(embedding_size, dropout_p) # the output layer self.h2o = nn.Linear(embedding_size, output_size) # activation function self.softmax = nn.LogSoftmax(dim=1)
[docs] def forward(self, input): """ Defines the computation performed by the model. :param input: The model input. :type input: Tensor :returns: Output after the forward pass (classes probabilities). :rtype: Tensor """ batch_size = input.size()[0] # get the first hidden state hidden = self.gru.initHidden(batch_size) # embed the input input_temp = self.embedding(input) # pass the input through the GRU layer for each token in the sequence for i in range(input_temp.size()[1]): hidden = self.gru(input_temp[:, i:i+1, :], hidden) # pass the last hidden state through the output layer output = self.h2o(hidden.squeeze(1)) # apply softmax output = self.softmax(output) return output