Source code for botiverse.models.NN.utils

import numpy as np

[docs]def split_data(X,y, val_ratio): ''' Split the data into training and validation sets. ''' N = X.shape[0] # randomly shuffle the data with numpy's permutation function. index = np.random.permutation(N) X, y = X[index], y[index] # split V = int(N * val_ratio) X_t, y_t = X[:N - V], y[:N - V] X_v, y_v = X[N - V:], y[N - V:] return X_t, y_t, X_v, y_v
[docs]def batchify(x_data, y_data, batch_size): ''' Given x_data of shape (N, d) and y_data of shape (N) return x_data of shape (B, N//B, d, 1) and y_data of shape (B, N//B, K, 1). ''' N = x_data.shape[0] # shuffle the data with numpy's permutation function. index = np.random.permutation(N) x_data, y_data = x_data[index], y_data[index] # add a trailing dimension to x_data x_data = x_data[..., np.newaxis] # make y_data a one-hot vector u = len(np.unique(y_data)) y_data = np.array([np.identity(u)[:,[y]] for y in y_data]) #y is a one-hot column vector. # truncate the data to be divisible by the batch size x_data = x_data[:N - N%batch_size] y_data = y_data[:N - N%batch_size] # batchify the data using numpy x_data = np.split(x_data, N//batch_size) y_data = np.split(y_data, N//batch_size) return np.array(x_data), np.array(y_data)