支持向量机
本节将依SMO算法训练线性SVM, 核方法的使用可以很方便的进行扩展.
import numpy as np
import matplotlib.pyplot as plt
下面的Point的类中的:
[g(x) = sum_{i=1}^N alpha_i y_i k(x_i, x) + b,
]
[e = g - y, yg = y * g
]
class Point:
"""
因为在SM0算法中,我们需要不断更新以及判断数据点的
各种属性,所以创建了一个新的数据类型Point.
"""
def __init__(self, x, y, k, order,
g, alpha, c):
assert y == 1 or y == -1, "Invalid y"
self.x = x #数据向量
self.y = y # 类
self.order = order #序
self.k = k #距离矩阵
self.c = c
self.g = g
self.yg = g * y #衡量违背条件的指标
self.alpha = alpha
self.e = g - y
def kkt(self, epsilon=0.001):
"""
检查是否符合KKT条件
:epsilon: 条件判断时允许的误差
:return: 0 表示符合, 1 表示 不符合 其原因是便于统计不满足kkt的数据点的总量
"""
alpha = self.alpha
cond = self.y * self.g
if alpha == 0. and cond < 1 - epsilon:
self.satisfy_kkt = False
elif 0 < alpha < self.c and (cond <1 - epsilon or cond > 1 + epsilon):
self.satisfy_kkt = False
elif alpha == self.c and cond > 1 + epsilon:
self.satisfy_kkt = False
else:
self.satisfy_kkt = True
if self.satisfy_kkt:
return 0
else:
return 1
def update_g_e(self, dog1, dog2, oldb, newb, newalpha1, newalpha2):
"""
更新g, e参数
"""
k1 = self.k[self.order, dog1.order]
k2 = self.k[self.order, dog2.order]
self.g = self.g + (newalpha1- dog1.alpha) * dog1.y * k1
+ (newalpha2- dog2.alpha) * dog2.y * k2
+ newb - oldb
self.e = self.g - self.y
self.yg = self.g * self.y
下面是Points类的定义, 用训练和预测,值得一提的是在initial函数中对于alpha的初始化,从([0, self.c / self.size])中依照均匀分布选取,并且对最后一个样本的进行特别处理,以保证(sum alpha_i y_i=0), 这意味着(alpha)将是一个可行解, 一些文章中似乎是将alpha初始化为0的,不知道按照这种方式怎么进行更新迭代,因为:
[alpha_2^{new, unc} = alpha_2^{old} + frac{y_2(E_1-E_2)}{eta},
]
可(alpha^{old} = 0, b = 0, Rightarrow E=0 Rightarrow alpha^{new}=0). 意味着样本的参数(alpha)是不会变化的.
另外calc_func_value,计算的是:
[frac{1}{2} sum_{i=1}^Nsum_{j=1}^N alpha_i alpha_j y_i y_j k(x_i, x_j) - sum_{i=1}^N alpha_i
]
这个值越小收敛的越好
class Points:
"""
数据点的集合
"""
def __init__(self, points, labels, c=1.):
self.points = [] #用于保存Point对象
self.size = len(points)
self.initial(points, labels, c=c)
def distance(self, point1, point2):
"""
计算内积矩阵, 可以在这个函数中加入核方法而达到
核SVM的目的.
"""
return point1 @ point2
def initial(self, points, labels, b=0., c=1.):
"""
初始化 w, b
w: 权重
b: 截距
c: 正则化项的系数
:return:
"""
self.b = b
self.c = c
# alpha 进行初始化
alpha = np.random.uniform(0, self.c / self.size, size=self.size)
y = labels
the_sum = np.sum(y * alpha)
alpha[-1] = alpha[-1] - the_sum * y[-1]
while not (0 < alpha[-1] < self.c):
alpha = np.random.uniform(0, self.c / self.size, size=self.size)
the_sum = np.sum(y * alpha)
alpha[-1] = alpha[-1] - the_sum * y[-1]
alphay = alpha * y
k = np.zeros((self.size, self.size))
for i in range(self.size):
for j in range(i, self.size):
k[i, j] = self.distance(points[i], points[j])
k[j, i] = self.distance(points[j], points[i])
self.k = k #内积矩阵
g = k @ alphay + self.b
for i in range(self.size):
point = Point(points[i], labels[i], k, i,
g[i], alpha[i], c)
self.points.append(point)
def find_dog1(self, points):
"""
寻找第一个点
"""
lucky_dog1 = None
lucky_value1 = 99999999999.
dogs = points.copy()
#首先我们在不满足的kkt条件的支持向量中寻找
for dog in dogs:
if not dog.satisfy_kkt and dog.alpha > 0:
cond =np.abs(dog.yg - 1)
if cond < lucky_value1:
lucky_dog1 = dog
lucky_value1 = cond
#如果没找到,再在整个训练集中找违背kkt最严重的点.
if lucky_dog1 is None:
for dog in dogs:
if not dog.satisfy_kkt:
if dog.yg < lucky_value1:
lucky_dog1 = dog
lucky_value1 = dog.yg
return lucky_dog1
def find_dog2(self, points, dog1):
"""
寻找第二个点
"""
dogs = points.copy()
e1 = dog1.e
lucky_dog2 = None
lucky_value2 = -1.
#寻找使得|e2-e1|最大的点
for dog in dogs:
e2 = dog.e
value = np.abs(e2 - e1)
if value > lucky_value2:
lucky_dog2 = dog
lucky_value2 = value
return lucky_dog2
def calc_l_h(self, dogs):
"""
alpha需要裁剪,所以要计算L, H
:param dogs:
:return:
"""
dog1 = dogs[0]
dog2 = dogs[1]
if dog1.y != dog2.y:
l = max(0, dog2.alpha-dog1.alpha)
h = min(self.c, self.c + dog2.alpha-dog1.alpha)
else:
l = max(0, dog2.alpha+dog1.alpha-self.c)
h = min(self.c, dog2.alpha+dog1.alpha)
return (l, h)
def update(self, dogs, l, h):
"""
更新alpha, b, g, e
"""
dog1 = dogs[0]
dog2 = dogs[1]
e1 = dog1.e
e2 = dog2.e
k11 = self.k[dog1.order, dog1.order]
k12 = self.k[dog1.order, dog2.order]
k21 = self.k[dog2.order, dog1.order]
k22 = self.k[dog2.order, dog2.order]
eta = k11 + k22 - 2 * k12
alpha2_unc = dog2.alpha + dog2.y * (e1 - e2) / eta #未裁剪的alpha2
if alpha2_unc < l:
alpha2 = l
elif l < alpha2_unc < h:
alpha2 = alpha2_unc
else:
alpha2 = h
alpha1 = dog1.alpha + dog1.y * dog2.y * (dog2.alpha - alpha2)
b1 = -e1 - dog1.y * k11 * (alpha1 - dog1.alpha) -
dog2.y * k21 * (alpha2 - dog2.alpha) + self.b
b2 = -e2 - dog1.y * k12 * (alpha1 - dog1.alpha) -
dog2.y * k22 * (alpha2 - dog2.alpha) + self.b
b = (b1 + b2) / 2
#更新g, e
for point in self.points:
point.update_g_e(dog1, dog2, self.b, b, alpha1, alpha2)
#更新alpha, b
dog1.alpha = alpha1
dog2.alpha = alpha2
self.b = b
return (alpha1, alpha2, b)
def update_kkt(self, epsilon):
"""
检查每个Point是否符合kkt条件,并返回
不合格的Point的数量
"""
count = 0
for point in self.points:
count += point.kkt(epsilon)
return count
def calc_func_value(self):
"""
计算函数的值
"""
alpha = np.array([point.alpha for point in self.points])
y = np.array([point.y for point in self.points])
alphay = alpha * y
value = alphay @ self.k @ alphay - np.sum(alpha)
return value
def calc_w(self):
"""
计算w
"""
w = 0.0
for point in self.points:
w += point.alpha * point.y * point.x
self.w = w
return w
def smo(self, max_iter, x, y1, y2, epsilon=1e-3, enough_num=10):
"""
事实上,并不需要传入x, y1, y2, 这里只是做可视化用,
w 也仅在线性分类器时是有用的.
enough_num: 当不满足kkt条件的数目小于enough_num是退出
"""
iteration = 0
value_pre = None
points1 = self.points.copy()
points2 = self.points.copy()
while True:
iteration += 1
count = self.update_kkt(epsilon)
value = self.calc_func_value()
#print(iteration, value, count)
if count < enough_num or iteration > max_iter:
break
#下面部分的含义是如果我们所选的Point不能函数值下降,那么遍历其它的点
#直到目标函数有足够的下降,如果还不行,那么抛弃第一个Point,选择其它的Point
if value_pre == value:
points2.remove(dog2)
if len(points2) > 1:
dog2 = self.find_dog2(points2, dog1)
else:
points1.remove(dog1)
points2 = points1.copy()
dog1 = self.find_dog1(points1)
points2.remove(dog1)
dog2 = self.find_dog2(points2, dog1)
else:
points1 = self.points.copy()
points2 = self.points.copy()
dog1 = self.find_dog1(points1)
points2.remove(dog1)
dog2 = self.find_dog2(points2, dog1)
value_pre = value
l, h = self.calc_l_h((dog1, dog2))
self.update((dog1, dog2), l, h)
"""
可视化部分
if iteration % 10 == 9:
self.calc_w()
plt.cla()
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -self.w[0] / self.w[1] * x - self.b
plt.title("SVM")
plt.plot(x, y, color="red")
plt.pause(0.01)
"""
self.calc_w()
return self.w, self.b
def pre(self, x):
"""
给定x 预测其分类, %%sh上,如果是线性分类器,
只需计算self.w,再计算self.w @ x + self.b即可,
为了方便扩展,这里做了一般化.
"""
value = self.b
for point in self.points:
value += point.alpha * point.y *
self.distance(x, point.x)
sign = 1 if value >= 0 else -1
return sign
准备数据
蓝色部分:
[y1 = 2x + 5 + epsilon_1, epsilon1 sim mathcal{N}(0, sigma^2).
]
红色部分
[y2 = 2x - 5 + epsilon_2, epsilon2 sim mathcal{N}(0, sigma^2)
]
np.random.seed(10086)
x = np.linspace(0, 10, 100)
e1 = np.random.randn(100) * 2
e2 = np.random.randn(100) * 2
y1 = 2 * x + 5 + e1
y2 = 2 * x - 5 + e2
data1 = np.hstack((x.reshape(-1, 1), y1.reshape(-1, 1)))
data2 = np.hstack((x.reshape(-1, 1), y2.reshape(-1, 1)))
data = np.vstack((data1, data2))
labels = np.hstack((np.ones(100), -np.ones(100)))
plt.scatter(x, y1)
plt.scatter(x, y2)
plt.show()
测试
test = Points(data, labels, c=10)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-1.25243632 0.5887332 ] -1.1498779874793863
有些时候结果是不会那么好的, 在收敛过程中还是会有波动的, 而且数据越是混越是波动大
test.pre(np.array([10., 0.]))
-1
test.pre(np.array([0., 10.]))
1
test = Points(data, labels, c=1)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-1.33721113 0.83955356] -1.6680372246340474
test = Points(data, labels, c=0.1)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-0.78780019 0.44522279] -0.6061702858422205
实验中发现, c越大波动越大,或许在训练过程可以动态调整c