import numpy as np class GMM(object): """Gaussian Mixture Model """ def __init__(self, data, K): """ K: the number of gaussian models alpha: the weight for corresponding gaussian model mu: the vector of means sigma2: the vector of variances N: the number of samples K: the number of gaussian models """ self.data = data self.K = K self.alpha = (np.ones(K) / K) self.mu = (np.arange(K) - K // 2) * (data.max() - data.min()) / K self.sigma2 = (np.ones(K)) self.N = len(data) self.gamma = np.ones((self.N, K)) / K def phi(self): # phi.shape(K, N) phi = (1 / np.sqrt(2 * np.pi * self.sigma2.reshape(self.K, 1)) * np.exp(- (self.data - self.mu.reshape(self.K, 1)) ** 2 / (2 * self.sigma2.reshape(self.K, 1)))) return phi def fit(self): sigma2_ = self.sigma2 mu_ = self.mu while True: # gamma.shape(N, K) self.gamma = (0.1 * self.gamma + 0.9 * self.phi().T * self.alpha / (self.phi().T * self.alpha).sum(axis=1).reshape(self.N, 1)) # mu.shape(1, K) self.mu = (0.1 * self.mu + 0.9 * np.matmul(self.data, self.gamma) / self.gamma.sum(axis=0)) # sigma2.shape(1,K) self.sigma2 = (0.1 * self.sigma2 + 0.9 * (self.gamma * (data.reshape(self.N, 1) - self.mu) ** 2).sum(axis=0) / self.gamma.sum(axis=0)) # alpha.shape(1, K) self.alpha = (0.1 * self.alpha + 0.9 * self.gamma.sum(axis=0) / self.N) if (np.sum((self.mu - mu_) ** 2) + np.abs(self.sigma2 - sigma2_).sum()) < 10 ** (-10): break mu_ = self.mu sigma2_ = self.sigma2 print(self.gamma.argmax(axis=1)) return self.gamma.argmax(axis=1) data = np.concatenate((np.random.normal(-4, 1, 2000), np.random.normal(4, 1, 2000))) gmm = GMM(data, 2) label = gmm.fit()