Source code for botiverse.models.GRUClassifier.GRUClassifier

import torch
import torch.nn as nn

[docs]class BasicGRU(nn.Module):
    '''An interface for the basic GRU unit'''
    def __init__(self, input_size, dropout_p=0.1):
        """
        Constructs a BasicGRU instance with specific layer sizes and dropout probability.
        
        :param input_size: The size of the input to the model.
        :type input_size: int

        :param dropout_p: A regularization parameter.
        :type dropout_p: float

        :returns: None
        """
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        super(BasicGRU, self).__init__()
        self.hidden_size = input_size
        # update gate
        self.W_z = nn.Linear(input_size, input_size)
        # reset gate
        self.W_r = nn.Linear(input_size, input_size)
        # new memory gate
        self.W_h = nn.Linear(input_size, input_size)
        # activation functions
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        # dropout (for regularization)
        self.dropout = nn.Dropout(dropout_p)

[docs]    def forward(self, input, hidden):
        """
        Defines the computation performed by the model.

        :param input: The provided sequance input.
        :type input: Tensor
        
        :param hidden: The provided hidden state.
        :type hidden: Tensor

        :returns: New hidden state.
        :rtype: Tensor
        """
        combined = input + hidden
        # update activation vector calculation
        z = self.W_z(combined)
        z = self.sigmoid(z)
        # reset activation vector calculation
        r = self.W_r(combined)
        r = self.sigmoid(r)
        # new memory vector calculation
        h = self.W_h(input + r * hidden)
        h = self.tanh(h)
        # set the memory with the weighted sum of old and new memory
        hidden = (1 - z) * hidden + z * h
        # apply dropout
        hidden = self.dropout(hidden)
        return hidden

    # initialize the hidden state with zeros (initial memory)
[docs]    def initHidden(self, batch_size):
        """
        Creates a tensor of zeros for the hidden state initialization.

        :param batch_size: The size of the batch for which the hidden state is to be initialized.
        :type batch_size: int

        :returns: Tensor of zeros of the shape (batch_size, 1, hidden_size).
        :rtype: Tensor
        """
        return torch.zeros(batch_size, 1, self.hidden_size).to(self.device)

[docs]class GRUTextClassifier(nn.Module):
    ''' An interface for the GRU text classifier which uses a basic GRU unit with a linear output layer and an input embedding layer'''
    def __init__(self, vocabulary, embedding_size, output_size, dropout_p=0.1):
        """
        Constructs a GRUTextClassifier instance with specific hyperparameters.
 

        :param vocabulary: The size of vocabulary used in the Embedding layer.
        :type vocabulary: int
 
        :param embedding_size: The size of each embedding vector.
        :type embedding_size: int
 
        :param output_size: The size of the output from the model (number of classes).
        :type output_size: int

        :param dropout_p: A regularization parameter.
        :type dropout_p: float
 
        :returns: None
        """
        super(GRUTextClassifier, self).__init__()
        # the embedding layer
        self.embedding = nn.Embedding(vocabulary, embedding_size)
        # the GRU layer
        self.gru = BasicGRU(embedding_size, dropout_p)
        # the output layer
        self.h2o = nn.Linear(embedding_size, output_size)
        # activation function
        self.softmax = nn.LogSoftmax(dim=1)

[docs]    def forward(self, input):
        """
        Defines the computation performed by the model.

        :param input: The model input.
        :type input: Tensor

        :returns: Output after the forward pass (classes probabilities).
        :rtype: Tensor
        """
        batch_size = input.size()[0]
        # get the first hidden state
        hidden = self.gru.initHidden(batch_size)
        # embed the input
        input_temp = self.embedding(input)
        # pass the input through the GRU layer for each token in the sequence
        for i in range(input_temp.size()[1]):
            hidden = self.gru(input_temp[:, i:i+1, :], hidden)
        # pass the last hidden state through the output layer
        output = self.h2o(hidden.squeeze(1))
        # apply softmax
        output = self.softmax(output)
        return output