Skip to content

Files

Latest commit

590e50c · Jan 11, 2022

History

History
292 lines (226 loc) · 8.98 KB

11.md

File metadata and controls

292 lines (226 loc) · 8.98 KB

十一、附录 B:SMO 算法

代码清单 55

import numpy as np
  from random import randrange 
  # Written from the pseudo-code in :
# http://luthuli.cs.uiuc.edu/~daf/courses/optimization/Papers/smoTR.pdf
  class SmoAlgorithm:
    def  __init__(self, X, y, C, tol, kernel, use_linear_optim):
        self.X = X
        self.y = y
        self.m, self.n = np.shape(self.X)
        self.alphas = np.zeros(self.m)

        self.kernel = kernel
        self.C = C
        self.tol = tol

        self.errors = np.zeros(self.m)
        self.eps = 1e-3 # epsilon

        self.b = 0

        self.w = np.zeros(self.n)
        self.use_linear_optim = use_linear_optim

    # Compute the SVM output for example i
    # Note that Platt uses the convention w.x-b=0 
    # while we have been using w.x+b in the book.
    def  output(self, i):
        if  self.use_linear_optim:
            # Equation 1
            return  float(np.dot(self.w.T, self.X[i])) - self.b
        else:
            # Equation 10
            return  np.sum([self.alphas[j] * self.y[j]
                           * self.kernel(self.X[j], self.X[i])
                           for  j in  range(self.m)]) - self.b

        # Try to solve the problem analytically.
    def  take_step(self, i1, i2):

        if  i1 == i2:
            return False

          a1 = self.alphas[i1]
        y1 = self.y[i1]
        X1 = self.X[i1]
        E1 = self.get_error(i1)

        s = y1 * self.y2

        # Compute the bounds of the new alpha2.
        if  y1 != self.y2:
            # Equation 13
            L = max(0, self.a2 - a1)
            H = min(self.C, self.C + self.a2 - a1)
        else:
            # Equation 14
            L = max(0, self.a2 + a1 - self.C)
            H = min(self.C, self.a2 + a1)

        if  L == H:
            return False

        k11 = self.kernel(X1, X1)
        k12 = self.kernel(X1, self.X[i2])
        k22 = self.kernel(self.X[i2], self.X[i2])

        # Compute the second derivative of the
        # objective function along the diagonal.
        # Equation 15
        eta = k11 + k22 - 2 * k12

        if  eta > 0:
            # Equation 16
            a2_new = self.a2 + self.y2 * (E1 - self.E2) / eta

            # Clip the new alpha so that is stays at the end of the line.
            # Equation 17
            if  a2_new < L:
                a2_new = L
            elif a2_new > H:
                a2_new = H
        else:
            # Under unusual cicumstances, eta will not be positive.
            # Equation 19
            f1 = y1 * (E1 + self.b) - a1 * k11 - s * self.a2 * k12
            f2 = self.y2 * (self.E2 + self.b) - s * a1 * k12 \                 - self.a2 * k22
            L1 = a1 + s(self.a2 - L)
            H1 = a1 + s * (self.a2 - H)
            Lobj = L1 * f1 + L * f2 + 0.5 * (L1 ** 2) * k11 \
                   + 0.5 * (L ** 2) * k22 + s * L * L1 * k12
            Hobj = H1 * f1 + H * f2 + 0.5 * (H1 ** 2) * k11 \
                   + 0.5 * (H ** 2) * k22 + s * H * H1 * k12

            if  Lobj < Hobj - self.eps:
                a2_new = L
            elif Lobj > Hobj + self.eps:
                a2_new = H
            else:
                a2_new = self.a2

        # If alpha2 did not change enough the algorithm
        # returns without updating the multipliers.
        if  abs(a2_new - self.a2) < self.eps * (a2_new + self.a2 \                                               + self.eps):
            return False

          # Equation 18
        a1_new = a1 + s * (self.a2 - a2_new)

        new_b = self.compute_b(E1, a1, a1_new, a2_new, k11, k12, k22, y1)

        delta_b = new_b - self.b

        self.b = new_b

        # Equation 22
        if  self.use_linear_optim:
            self.w = self.w + y1*(a1_new - a1)*X1 \
                     + self.y2*(a2_new - self.a2) * self.X2

        # Update the error cache using the new Lagrange multipliers.
        delta1 = y1 * (a1_new - a1)
        delta2 = self.y2 * (a2_new - self.a2)

        # Update the error cache.
        for  i in  range(self.m):
            if  0 < self.alphas[i] < self.C:
                self.errors[i] += delta1 * self.kernel(X1, self.X[i]) + \
                                delta2 * self.kernel(self.X2,self.X[i]) \                                - delta_b

        self.errors[i1] = 0
        self.errors[i2] = 0

        self.alphas[i1] = a1_new
        self.alphas[i2] = a2_new

        return True

    def compute_b(self, E1, a1, a1_new, a2_new, k11, k12, k22, y1):
        # Equation 20
        b1 = E1 + y1 * (a1_new - a1) * k11 + \
             self.y2 * (a2_new - self.a2) * k12 + self.b

        # Equation 21
        b2 = self.E2 + y1 * (a1_new - a1) * k12 + \
             self.y2 * (a2_new - self.a2) * k22 + self.b

        if  (0 < a1_new) and (self.C > a1_new):
            new_b = b1
        elif (0 < a2_new) and (self.C > a2_new):
            new_b = b2
        else:
            new_b = (b1 + b2) / 2.0
        return  new_b

    def  get_error(self, i1):
        if  0 < self.alphas[i1] <self.C:
            return  self.errors[i1]
        else:
            return  self.output(i1) - self.y[i1]

    def  second_heuristic(self, non_bound_indices):
        i1 = -1
        if  len(non_bound_indices) > 1:
            max = 0

            for  j in  non_bound_indices:
                E1 = self.errors[j] - self.y[j]
                step = abs(E1 - self.E2) # approximation
                if  step > max:
                    max = step
                    i1 = j
        return  i1

    def  examine_example(self, i2):
        self.y2 = self.y[i2]
        self.a2 = self.alphas[i2]
        self.X2 = self.X[i2]
        self.E2 = self.get_error(i2)

        r2 = self.E2 * self.y2

        if not((r2 < -self.tol and self.a2 < self.C) or
                (r2 > self.tol and self.a2 > 0)):
            # The KKT conditions are met, SMO looks at another example.
            return  0

        # Second heuristic A: choose the Lagrange multiplier which
        # maximizes the absolute error.
        non_bound_idx = list(self.get_non_bound_indexes())
        i1 = self.second_heuristic(non_bound_idx)

        if  i1 = > 0 and self.take_step(i1, i2):
            return  1

        # Second heuristic B: Look for examples making positive        
  # progress by looping over all non-zero and non-C alpha,         # starting at a random point.
        if  len(non_bound_idx) > 0:
            rand_i = randrange(len(non_bound_idx))
            for  i1 in  non_bound_idx[rand_i:] + non_bound_idx[:rand_i]:
                if  self.take_step(i1, i2):
                    return  1

        # Second heuristic C: Look for examples making positive progress        
  # by looping over all possible examples, starting at a random         # point.
        rand_i = randrange(self.m)
        all_indices = list(range(self.m))
        for  i1 in  all_indices[rand_i:] + all_indices[:rand_i]:
            if  self.take_step(i1, i2):
                return  1

        # Extremely degenerate circumstances, SMO skips the first example.
        return  0

    def  error(self, i2):
        return  self.output(i2) - self.y2

    def  get_non_bound_indexes(self):
        return  np.where(np.logical_and(self.alphas > 0,
                                       self.alphas < self.C))[0]

    # First heuristic: loop over examples where alpha is not 0 and not C
    # they are the most likely to violate the KKT conditions
    # (the non-bound subset).
    def  first_heuristic(self):
        num_changed = 0
        non_bound_idx = self.get_non_bound_indexes()

        for  i in  non_bound_idx:
            num_changed += self.examine_example(i)
        return  num_changed

    def  main_routine(self):
        num_changed = 0
        examine_all = True

        while num_changed > 0 or examine_all:
            num_changed = 0

            if  examine_all:
                for  i in  range(self.m):
                    num_changed += self.examine_example(i)
            else:
                num_changed += self.first_heuristic()

            if  examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

代码清单 56 演示了如何实例化一个 SmoAlgorithm 对象,运行算法,并打印结果。

代码清单 56

import numby as np
  from random import seed
  from succinctly.datasets import linearly_separable, get_dataset
  from succinctly.algorithms.smo_algorithm import SmoAlgorithm

  def  linear_kernel(x1, x2):
    return  np.dot(x1, x2)

  def  compute_w(multipliers, X, y):
    return  np.sum(multipliers[i] * y[i] * X[i] for  i in  range(len(y)))

  if  __name__ == '__main__':

    seed(5) # to have reproducible results

    X_data, y_data = get_dataset(linearly_separable.get_training_examples)

    smo = SmoAlgorithm(X_data, y_data, C=10, tol=0.001, kernel=linear_kernel, use_linear_optim=True)
    smo.main_routine()

    w = compute_w(smo.alphas, X_data, y_data)

    print ('w = {}'.format(w))

    # -smo.b because Platt uses the convention w.x-b=0
    print ('b = {}'.format(-smo.b))

    # w = [0.4443664  1.1105648]
    # b = -9.662686418%2