Created
January 1, 2012 22:51
-
-
Save diogojc/1548571 to your computer and use it in GitHub Desktop.
Density estimation using multivariate gaussians
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def params(X): | |
""" | |
Calculates the mean vector and covariance matrix for the given data. | |
Arguments | |
--------- | |
X: mxn matrix with m samples drawn from an unknown multivariate Gaussian | |
distribution. | |
Returns | |
--------- | |
(mu, Sigma) | |
mu: n sized vector with the means of the n independent variables. | |
Sigma: nxn covariance matrix of the n independent variables. | |
""" | |
return (np.mean(X, 0), np.cov(X.T)) | |
def density(X, mu, sigma): | |
""" | |
Calculates the probability of the data according to an estimated | |
multivariate gaussian distribution. | |
Arguments | |
--------- | |
X: mxn matrix with m samples drawn from an unknown multivariate Gaussian | |
distribution. | |
mu: n sized vector with the means of the n independent variables. | |
Sigma: nxn covariance matrix of the n independent variables. | |
Returns | |
--------- | |
m sized vector with probabilities of the data. | |
""" | |
assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n." | |
detsigma = np.linalg.det(sigma) | |
n = len(mu) | |
a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma)) | |
invsigma = np.linalg.inv(sigma) | |
b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T)) | |
for x in X]) | |
return a * b | |
class Classification(object): | |
""" | |
Model data as a mixture of multivariate gaussian distributions | |
""" | |
def fit(self, X, y): | |
self.classes = np.unique(y) | |
self.mus, self.sigmas = dict(), dict() | |
for i in self.classes: | |
self.mus[i], self.sigmas[i] = params(X[y == i]) | |
def predict_proba(self, X): | |
return np.array([density(X, self.mus[i], self.sigmas[i]) | |
for i in self.classes]).T | |
def predict(self, X): | |
Y = self.predict_proba(X) | |
return self.classes[np.argmax(Y, 1)] | |
class OutlierDetection(object): | |
def fit(self, X): | |
self.mu, self.sigma = params(X) | |
def predict_proba(self, X): | |
return density(X, self.mu, self.sigma) | |
if __name__ == '__main__': | |
# Create synthetic data | |
mux = np.array([2, 2]) | |
sigmax = np.array([[3, 0], [0, 3]]) | |
X = np.random.multivariate_normal(mux, sigmax, 100) | |
yx = 1 * np.ones(X.shape[0]) | |
muz = np.array([10, 10]) | |
sigmaz = np.array([[1, .8], [.8, 1]]) | |
Z = np.random.multivariate_normal(muz, sigmaz, 100) | |
yz = 2 * np.ones(Z.shape[0]) | |
muw = np.array([15, 0]) | |
sigmaw = np.array([[2, 0], [0, 1]]) | |
W = np.random.multivariate_normal(muw, sigmaw, 100) | |
yw = 3 * np.ones(W.shape[0]) | |
# Model data | |
c = Classification() | |
X = np.vstack((X, Z, W)) | |
y = np.hstack((yx, yz, yw)) | |
c.fit(X, y) | |
# Visualize data and model | |
fig = plt.figure() | |
ax1 = fig.add_subplot(211) | |
ax1.plot(X[:, 0], X[:, 1], 'rx') | |
ax1.plot(Z[:, 0], Z[:, 1], 'bx') | |
ax1.plot(W[:, 0], W[:, 1], 'gx') | |
ax2 = fig.add_subplot(212) | |
x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100)) | |
p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T) | |
z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1])) | |
ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment