Skip to content
This repository has been archived by the owner on Feb 23, 2021. It is now read-only.

Commit

Permalink
Gah, git commit -am doesn't add new folders...
Browse files Browse the repository at this point in the history
  • Loading branch information
jackmaney committed Jan 13, 2014
1 parent 1d65317 commit 242cd8c
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions examples/three_clusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pandas import DataFrame,Series
import pandas as pd
import numpy as np
import sys
import os

sys.path = [os.path.abspath("..")] + sys.path

from k_means_plus_plus import *

np.random.seed(1234) #For reproducibility

# We create a data set with three sets of 500 points each chosen from a normal distrubution with a standard deviation of 10.
# The means for the distributions from which we sample are (25,45), (-30,5), and (5,-20)
data = DataFrame({'x':10*np.random.randn(500) + 25,'y':10*np.random.randn(500) + 45},columns=list('xy'))
data = data.append(DataFrame({'x':10*np.random.randn(500) - 30,'y':10*np.random.randn(500) + 5},columns=list('xy')))
data = data.append(DataFrame({'x':10*np.random.randn(500) + 5,'y':10*np.random.randn(500) - 20},columns=list('xy')))

# Grab a scatterplot
import matplotlib.pyplot as plt
plt.scatter(data['x'],data['y'],s=5)
plt.savefig("three_clusters_scatterplot.png")

# Cluster
kmpp = KMeansPlusPlus(data,3)
kmpp.cluster()
kmpp.centers

# Get a scatterplot that's color-coded by cluster
colors = ["red" if x == 0 else "blue" if x == 1 else "green" for x in kmpp.clusters]
plt.scatter(data['x'],data['y'],s=5,c=colors)
plt.savefig("three_clusters_clusters.png")

0 comments on commit 242cd8c

Please sign in to comment.