Source code for botiverse.models.SVM.SVM

#!/usr/bin/env python
# coding: utf-8

# # <font color="cyan">Support Vector Machine </font> From Scratch Implementation
# 
# In this notebook, we shall demonstrate implementing multiclass SVM from scratch using the dual formulation and quadratic programming.

# In[1]:


'''
This module implements and provides an interface for the multiclass Support Vector Machine (SVM) algorithm implemented from scratch.
'''
import numpy as np
from scipy.spatial import distance
import cvxopt
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import copy
import pickle


# ### SVM Class

# In[2]:


[docs]class SVM: ''' This class implements the kernelized soft-margin Support Vector Machine algorithm. ''' linear = lambda x, xࠤ , c=0: x @ xࠤ .T polynomial = lambda x, xࠤ , Q=5: (1 + x @ xࠤ.T)**Q rbf = lambda x, xࠤ , γ=10: np.exp(-γ * distance.cdist(x, xࠤ, 'sqeuclidean')) kernel_funs = {'linear': linear, 'polynomial': polynomial, 'rbf': rbf} def __init__(self, kernel='rbf', C=1, k=2): ''' Initialize an instance of the SVM model. :param kernel: The kernel function to use. Can be 'linear', 'polynomial', or 'rbf'. :type kernel: str :param C: The regularization parameter. :type C: float :param k: The hyperparameter for the polynomial and rbf kernels. Ignored for the linear kernel. :type k: int ''' self.kernel_str = kernel self.kernel = SVM.kernel_funs[kernel] self.C = C self.k = k self.X, y = None, None self.αs = None # for multi-class classification self.multiclass = False self.clfs = []
SVMClass = lambda func: setattr(SVM, func.__name__, func) or func # #### <font color="white">Solve the Dual Optimization Problem </font> # We want to find the vector of Lagrange multipliers $\alpha = (\alpha_1, \alpha_2, ..., \alpha_n)$ that maximizes the following dual objective function: # # $$ # \max_\alpha [ \sum_{i=1}^{n} \alpha_i - \frac{1}{2} \sum_{i=1}^{n} \sum_{j=1}^{n} \alpha_i \alpha_j y_i y_j (z_iz_j^T) ] # \quad # \text{subject to} \quad # # \sum_{i=1}^{n} \alpha_i y_i = 0, \; 0 \leq \alpha_i \leq C, \; \forall i = 1, 2, ..., n # $$ # # where $z_i = \phi(x_i)$ and $z_j = \phi(x_j)$ are higher dimensional representations of $x_i$ and $x_j$ respectively given by the transform $\phi$. # # <br> # In matrix form, this becomes # # $$\max_\alpha [ 1^Tα - \frac{1}{2} * α^T (YY^T * K) α ] \quad # \text{subject to} \quad # y^T \alpha = 0, \; 0 \leq \alpha_i \leq C, \; \forall i = 1, 2, \ldots, n # $$ # where $K[i, j] = z_i^T z_j$ and $X$, $y$ are $(N,d)$ and $(N,1)$ dimensional training data matrices respectively. # <br> # CVXOPT solves # # $$ # \min_x \frac{1}{2} x^T P x + q^T x \quad # \text{subject to} \; A x = b, \;G x ≤ h # $$ # # Hence, we have # <br> # $ x = \alpha $ # <br> # $ P = YY^T * K $ # <br> # $ q = -1^T $ # <br><br> # and for the constraints # <br> # $ A = y^T $ # <br> # $ b = 0 $ # <br> # $ G = \begin{bmatrix} -I \\ I \end{bmatrix} $ # <br> # $ h = \begin{bmatrix} 0 \\ C \end{bmatrix} $ # In[3]:
[docs]@SVMClass def fit(self, X, y, eval_train=False): ''' Fit the SVM model to the given data with N training examples and d features. :param X: The training data arranged as a float numpy array of shape (N, d) :type X: numpy.ndarray :param y: The training labels arranged as a numpy array of shape (N,) where each element is in {-1, 1} or {0, 1}. :type y: numpy.ndarray :param eval_train: Whether to print the training accuracy after training is done. :type eval_train: bool :note: This function assume C is not too small; otherwise, it becomes hard to distinguish support vectors. ''' if len(np.unique(y)) > 2: self.multiclass = True return self.multi_fit(X, y, eval_train) if set(np.unique(y)) == {0, 1}: y[y == 0] = -1 self.y = y.reshape(-1, 1).astype(np.double) # Has to be a column vector self.X = X m = X.shape[0] # compute the kernel over all possible pairs of (x, x') in the data self.K = self.kernel(X, X, self.k) # For 1/2 x^T P x + q^T x P = cvxopt.matrix(self.y @ self.y.T * self.K) q = cvxopt.matrix(-np.ones((m, 1))) # For Ax = b A = cvxopt.matrix(self.y.T) b = cvxopt.matrix(np.zeros(1)) # For Gx <= h G = cvxopt.matrix(np.vstack((-np.identity(m), np.identity(m)))) h = cvxopt.matrix(np.vstack((np.zeros((m,1)), np.ones((m,1)) * self.C))) # Solve cvxopt.solvers.options['show_progress'] = False sol = cvxopt.solvers.qp(P, q, G, h, A, b) self.αs = np.array(sol["x"]) # Maps into support vectors self.is_sv = ((self.αs > 1e-3) & (self.αs <= self.C)).squeeze() self.margin_sv = np.argmax((1e-3 < self.αs) & (self.αs < self.C - 1e-2)) if eval_train: print(f"Model finished training with accuracy {self.evaluate(X, y)}")
# #### Multiclass Case with OVR # In this, we train $K$ binary classifiers, where $K$ is the number of classes where each classifier perceives one class as +1 and all other classes as -1. # In[4]:
[docs]@SVMClass def multi_fit(self, X, y, eval_train=False): ''' Fit k classifier for k classes. :param X: The training data arranged as a float numpy array of shape (N, d) :type X: numpy.ndarray :param y: The training labels arranged as a numpy array of shape (N,) where each element is an integer between 0 and k-1 :type y: numpy.ndarray :param eval_train: Whether to print the training accuracy after training is done. :type eval_train: bool ''' self.k = len(np.unique(y)) # number of classes # for each pair of classes for i in range(self.k): # get the data for the pair Xs, Ys = X, copy.copy(y) # change the labels to -1 and 1 Ys[Ys!=i], Ys[Ys==i] = -1, +1 # fit the classifier clf = SVM(kernel=self.kernel_str, C=self.C, k=self.k) clf.fit(Xs, Ys) # save the classifier self.clfs.append(clf) if eval_train: print(f"Model finished training with accuracy {self.evaluate(X, y)}")
# #### SVM Prediction # Given a new data point $x$, we predict its label $y$ by computing: # # $$g(x) = \sum_{i=1}^{n} \alpha_i y_i k(x_i, x) + b$$ # # where $k(x_i, x)$ is the kernel function and $b= y_s - \sum_{i=1}^{n} \alpha_i y_i k(x_i, x_s)$ is the bias term. $s$ is the index of any margin support vector. # # # In[5]:
[docs]@SVMClass def predict(self, X_t): ''' Predict the labels for given test data. :param X_t: The test data arranged as a float numpy array of shape (N, d) :return: The predicted labels arranged as a numpy array of shape (N,) where each element is in {-1, 1} or {0, 1}. :return: The predicted labels arranged as a numpy array of shape (N,) and the scores for the positive class. :rtype: tuple ''' if self.multiclass: return self.multi_predict(X_t) xₛ, yₛ = self.X[self.margin_sv, np.newaxis], self.y[self.margin_sv] αs, y, X= self.αs[self.is_sv], self.y[self.is_sv], self.X[self.is_sv] b = yₛ - np.sum(αs * y * self.kernel(X, xₛ, self.k), axis=0) score = np.sum(αs * y * self.kernel(X, X_t, self.k), axis=0) + b return np.sign(score).astype(int), score
[docs]@SVMClass def evaluate(self, X,y): ''' Compare the predicted labels for given test data y with the actual labels by passing X to model. :param X: The test data arranged as a float numpy array of shape (N, d) :type X: numpy.ndarray :param y: The test labels arranged as a numpy array of shape (N,) where each element is an integer between 0 and k-1 :type y: numpy.ndarray :return: The accuracy of the model on the given test data. :rtype: float ''' outputs, _ = self.predict(X) accuracy = np.sum(outputs == y) / len(y) return round(accuracy, 2)
# #### Multiclass Case with OVR # Each classifier compute the score of a class against all other classes. The class with the highest score is the predicted class. # In[6]:
[docs]@SVMClass def multi_predict(self, X): ''' Predict the labels for given test data. :param X: The test data arranged as a float numpy array of shape (N, d) :type X: numpy.ndarray :return: The predicted labels arranged as a numpy array of shape (N,) where each element is an integer between 0 and k-1 and the corresponding highest scores. :rtype: tuple ''' # get the predictions from all classifiers preds = np.zeros((X.shape[0], self.k)) for i, clf in enumerate(self.clfs): _, preds[:, i] = clf.predict(X) # transform the predictions into probabilities using softmax preds = np.exp(preds) / np.sum(np.exp(preds), axis=1, keepdims=True) # get the argmax and the corresponding score return np.argmax(preds, axis=1), np.max(preds, axis=1)
# ### Example # In[7]: # if running from notebook if __name__ == '__main__': get_ipython().system('jupyter nbconvert --to script SVM.ipynb')