#result=na.omit(dd) # Assuming your dataset is named 'zoo_data' # and the type column is named 'type' or similar zoo_data <- read.csv("zoo.csv", header = TRUE, sep = ",") head(zoo_data) # Remove or exclude the 'type' column clustering_data <- zoo_data[, -which(names(zoo_data) == "type")] # Remove or exclude the 'type' column clustering_data <- clustering_data[, -which(names(clustering_data) == "name")] clustering_data <- clustering_data[, -which(names(clustering_data) == "legs")] clustering_data[] <- lapply(clustering_data, function(x) ifelse(x == "yes", 1, 0)) clustering_data$legs <- zoo_data$legs # Perform k-means clustering (for example, with 3 clusters) set.seed(123) kmeans_result <- kmeans(clustering_data, centers = 7) # Add cluster assignments to the data (if needed) clustering_data$cluster <- kmeans_result$cluster # check with the type columnexists in your original data table(zoo_data$type, clustering_data$cluster) # order to improve the looking contingency_table = table(zoo_data$type, clustering_data$cluster) contingency_table[c("insect","invertebrate","bird","amphibian","fish","reptile","mammal"),] # You can then visualize the clusters, for example with PCA library(ggplot2) pca <- prcomp(clustering_data, scale. = TRUE) pca_df <- data.frame(pca$x, cluster = factor(clustering_data$cluster)) ggplot(pca_df, aes(PC1, PC2, color = cluster)) + geom_point(size = 2) + labs(title = "Clustering of Zoo Data (PCA view)") # add the true type back on the produced dataframe clustering_data$true_label <- zoo_data$type