zoo_data <- read.csv("zoo.csv", header = TRUE, sep = ",") head(zoo_data) # Remove or exclude the 'type' column clustering_data <- zoo_data[, -which(names(zoo_data) == "type")] # Remove or exclude the 'type' column clustering_data <- clustering_data[, -which(names(clustering_data) == "name")] clustering_data <- clustering_data[, -which(names(clustering_data) == "legs")] clustering_data[] <- lapply(clustering_data, function(x) ifelse(x == "yes", 1, 0)) clustering_data$legs <- zoo_data$legs #1. Elbow Method wss <- sapply(1:10, function(k){ kmeans(clustering_data, centers = k)$tot.withinss }) plot(1:10, wss, type = "b", pch = 19, xlab = "Number of clusters (k)", ylab = "Total within-clusters sum of squares", main = "Elbow Method for Choosing k", ylim = c(0,max(wss))) #2. Silhouette Method library(cluster) sil_widths <- sapply(2:10, function(k){ km <- kmeans(clustering_data, centers = k) ss <- silhouette(km$cluster, dist(clustering_data)) mean(ss[, 3]) }) plot(2:10, sil_widths, type='b', pch=19, xlab='Number of clusters (k)', ylab='Average silhouette width', main='Silhouette Method for Choosing k') #3. Gap Statistic library(cluster) set.seed(123) gap_stat <- clusGap(clustering_data, FUN = kmeans, nstart = 25, K.max = 10, B = 50) print(gap_stat) plot(gap_stat)