十一、附录 B：SMO 算法

代码清单 55

import numpy as np
  from random import randrange 
  # Written from the pseudo-code in :
# http://luthuli.cs.uiuc.edu/~daf/courses/optimization/Papers/smoTR.pdf
  class SmoAlgorithm:
    def  __init__(self, X, y, C, tol, kernel, use_linear_optim):
        self.X = X
        self.y = y
        self.m, self.n = np.shape(self.X)
        self.alphas = np.zeros(self.m)

        self.kernel = kernel
        self.C = C
        self.tol = tol

        self.errors = np.zeros(self.m)
        self.eps = 1e-3 # epsilon

        self.b = 0

        self.w = np.zeros(self.n)
        self.use_linear_optim = use_linear_optim

    # Compute the SVM output for example i
    # Note that Platt uses the convention w.x-b=0 
    # while we have been using w.x+b in the book.
    def  output(self, i):
        if  self.use_linear_optim:
            # Equation 1
            return  float(np.dot(self.w.T, self.X[i])) - self.b
        else:
            # Equation 10
            return  np.sum([self.alphas[j] * self.y[j]
                           * self.kernel(self.X[j], self.X[i])
                           for  j in  range(self.m)]) - self.b

        # Try to solve the problem analytically.
    def  take_step(self, i1, i2):

        if  i1 == i2:
            return False

          a1 = self.alphas[i1]
        y1 = self.y[i1]
        X1 = self.X[i1]
        E1 = self.get_error(i1)

        s = y1 * self.y2

        # Compute the bounds of the new alpha2.
        if  y1 != self.y2:
            # Equation 13
            L = max(0, self.a2 - a1)
            H = min(self.C, self.C + self.a2 - a1)
        else:
            # Equation 14
            L = max(0, self.a2 + a1 - self.C)
            H = min(self.C, self.a2 + a1)

        if  L == H:
            return False

        k11 = self.kernel(X1, X1)
        k12 = self.kernel(X1, self.X[i2])
        k22 = self.kernel(self.X[i2], self.X[i2])

        # Compute the second derivative of the
        # objective function along the diagonal.
        # Equation 15
        eta = k11 + k22 - 2 * k12

        if  eta > 0:
            # Equation 16
            a2_new = self.a2 + self.y2 * (E1 - self.E2) / eta

            # Clip the new alpha so that is stays at the end of the line.
            # Equation 17
            if  a2_new < L:
                a2_new = L
            elif a2_new > H:
                a2_new = H
        else:
            # Under unusual cicumstances, eta will not be positive.
            # Equation 19
            f1 = y1 * (E1 + self.b) - a1 * k11 - s * self.a2 * k12
            f2 = self.y2 * (self.E2 + self.b) - s * a1 * k12 \                 - self.a2 * k22
            L1 = a1 + s(self.a2 - L)
            H1 = a1 + s * (self.a2 - H)
            Lobj = L1 * f1 + L * f2 + 0.5 * (L1 ** 2) * k11 \
                   + 0.5 * (L ** 2) * k22 + s * L * L1 * k12
            Hobj = H1 * f1 + H * f2 + 0.5 * (H1 ** 2) * k11 \
                   + 0.5 * (H ** 2) * k22 + s * H * H1 * k12

            if  Lobj < Hobj - self.eps:
                a2_new = L
            elif Lobj > Hobj + self.eps:
                a2_new = H
            else:
                a2_new = self.a2

        # If alpha2 did not change enough the algorithm
        # returns without updating the multipliers.
        if  abs(a2_new - self.a2) < self.eps * (a2_new + self.a2 \                                               + self.eps):
            return False

          # Equation 18
        a1_new = a1 + s * (self.a2 - a2_new)

        new_b = self.compute_b(E1, a1, a1_new, a2_new, k11, k12, k22, y1)

        delta_b = new_b - self.b

        self.b = new_b

        # Equation 22
        if  self.use_linear_optim:
            self.w = self.w + y1*(a1_new - a1)*X1 \
                     + self.y2*(a2_new - self.a2) * self.X2

        # Update the error cache using the new Lagrange multipliers.
        delta1 = y1 * (a1_new - a1)
        delta2 = self.y2 * (a2_new - self.a2)

        # Update the error cache.
        for  i in  range(self.m):
            if  0 < self.alphas[i] < self.C:
                self.errors[i] += delta1 * self.kernel(X1, self.X[i]) + \
                                delta2 * self.kernel(self.X2,self.X[i]) \                                - delta_b

        self.errors[i1] = 0
        self.errors[i2] = 0

        self.alphas[i1] = a1_new
        self.alphas[i2] = a2_new

        return True

    def compute_b(self, E1, a1, a1_new, a2_new, k11, k12, k22, y1):
        # Equation 20
        b1 = E1 + y1 * (a1_new - a1) * k11 + \
             self.y2 * (a2_new - self.a2) * k12 + self.b

        # Equation 21
        b2 = self.E2 + y1 * (a1_new - a1) * k12 + \
             self.y2 * (a2_new - self.a2) * k22 + self.b

        if  (0 < a1_new) and (self.C > a1_new):
            new_b = b1
        elif (0 < a2_new) and (self.C > a2_new):
            new_b = b2
        else:
            new_b = (b1 + b2) / 2.0
        return  new_b

    def  get_error(self, i1):
        if  0 < self.alphas[i1] <self.C:
            return  self.errors[i1]
        else:
            return  self.output(i1) - self.y[i1]

    def  second_heuristic(self, non_bound_indices):
        i1 = -1
        if  len(non_bound_indices) > 1:
            max = 0

            for  j in  non_bound_indices:
                E1 = self.errors[j] - self.y[j]
                step = abs(E1 - self.E2) # approximation
                if  step > max:
                    max = step
                    i1 = j
        return  i1

    def  examine_example(self, i2):
        self.y2 = self.y[i2]
        self.a2 = self.alphas[i2]
        self.X2 = self.X[i2]
        self.E2 = self.get_error(i2)

        r2 = self.E2 * self.y2

        if not((r2 < -self.tol and self.a2 < self.C) or
                (r2 > self.tol and self.a2 > 0)):
            # The KKT conditions are met, SMO looks at another example.
            return  0

        # Second heuristic A: choose the Lagrange multiplier which
        # maximizes the absolute error.
        non_bound_idx = list(self.get_non_bound_indexes())
        i1 = self.second_heuristic(non_bound_idx)

        if  i1 = > 0 and self.take_step(i1, i2):
            return  1

        # Second heuristic B: Look for examples making positive        
  # progress by looping over all non-zero and non-C alpha,         # starting at a random point.
        if  len(non_bound_idx) > 0:
            rand_i = randrange(len(non_bound_idx))
            for  i1 in  non_bound_idx[rand_i:] + non_bound_idx[:rand_i]:
                if  self.take_step(i1, i2):
                    return  1

        # Second heuristic C: Look for examples making positive progress        
  # by looping over all possible examples, starting at a random         # point.
        rand_i = randrange(self.m)
        all_indices = list(range(self.m))
        for  i1 in  all_indices[rand_i:] + all_indices[:rand_i]:
            if  self.take_step(i1, i2):
                return  1

        # Extremely degenerate circumstances, SMO skips the first example.
        return  0

    def  error(self, i2):
        return  self.output(i2) - self.y2

    def  get_non_bound_indexes(self):
        return  np.where(np.logical_and(self.alphas > 0,
                                       self.alphas < self.C))[0]

    # First heuristic: loop over examples where alpha is not 0 and not C
    # they are the most likely to violate the KKT conditions
    # (the non-bound subset).
    def  first_heuristic(self):
        num_changed = 0
        non_bound_idx = self.get_non_bound_indexes()

        for  i in  non_bound_idx:
            num_changed += self.examine_example(i)
        return  num_changed

    def  main_routine(self):
        num_changed = 0
        examine_all = True

        while num_changed > 0 or examine_all:
            num_changed = 0

            if  examine_all:
                for  i in  range(self.m):
                    num_changed += self.examine_example(i)
            else:
                num_changed += self.first_heuristic()

            if  examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

代码清单 56 演示了如何实例化一个 SmoAlgorithm 对象，运行算法，并打印结果。

代码清单 56

import numby as np
  from random import seed
  from succinctly.datasets import linearly_separable, get_dataset
  from succinctly.algorithms.smo_algorithm import SmoAlgorithm

  def  linear_kernel(x1, x2):
    return  np.dot(x1, x2)

  def  compute_w(multipliers, X, y):
    return  np.sum(multipliers[i] * y[i] * X[i] for  i in  range(len(y)))

  if  __name__ == '__main__':

    seed(5) # to have reproducible results

    X_data, y_data = get_dataset(linearly_separable.get_training_examples)

    smo = SmoAlgorithm(X_data, y_data, C=10, tol=0.001, kernel=linear_kernel, use_linear_optim=True)
    smo.main_routine()

    w = compute_w(smo.alphas, X_data, y_data)

    print ('w = {}'.format(w))

    # -smo.b because Platt uses the convention w.x-b=0
    print ('b = {}'.format(-smo.b))

    # w = [0.4443664  1.1105648]
    # b = -9.662686418%2

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Files

11.md

11.md

十一、附录 B：SMO 算法

Collapse file tree

Files

11.md

Latest commit

History

11.md

File metadata and controls

十一、附录 B：SMO 算法