Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
Unsupervised learning in R

Step 1: Data preparation
Step 2: Data description
Step 3: Load required libraries
Step 4: Compute Gap Statistics to determine total number of clusters
Step 5: Compute hierarchical clustering
Step 6" Visualize the cluster dendrogram using factoextra R package
  • Loading branch information
MoganaD authored Sep 4, 2020
1 parent f7c2012 commit 98f3e36
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions ul.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#1.Import data
###Import your own dataset from your device
data1 <- read.csv (file="C:/Users/Mogana Darshini/OneDrive/unsupervised learning/Manuscript/Analysis/data1.csv")

#2.describe data
dim(data1) ##will give the details of number of columns and rows
str(data1) ##will describe the structure of the data

#3.Load libraries
library(factoextra)
library(cluster)
library(magrittr)

#4.Compute Gap Statistics to determine total number of clusters
fviz_nbclust(data1, kmeans, method = "gap_stat")

#5. Compute hierarchical clustering
res.hc <- data1 %>%
scale() %>% # Scale the data
dist(method = "euclidean") %>% # Compute dissimilarity matrix
hclust(method = "ward.D2") # Compute hierachical clustering

print (res.hc)

#6.# Visualize using factoextra
# Cut in five groups and color by groups
fviz_dend(res.hc, k = 5, # Cut in five groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07", "1D08A5"),
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE # Add rectangle around groups
)

#6.k means clustering
set.seed (20)
kmeans_clustering <- kmeans(data1, 5)
#inspect 'kmeans_clustering'
str(kmeans_clustering)
#cluster: a vector of integers (from 1:k) indicating the cluster to which each point is allocated.
#centers: a matrix of cluster centers.
#withinss: vector of within-cluster sum of squares, one component per cluster.
#tot.withinss: total within-cluster sum of squares. That is, sum(withinss).
#size: the number of points in each cluster.

print(kmeans_clustering)
kmeans_clustering$cluster #variables in each cluster
kmeans_clustering$size #total

# Scatter plot of x
fviz_cluster(kmeans_clustering, data = data1,
palette = c("#1D08A5", "#00AFBB", "#E7B800", "#08A508", "#A50822"),
geom = "point",
ellipse.type = "convex",
ggtheme = theme_bw()
)

0 comments on commit 98f3e36

Please sign in to comment.