## hclustering ## # Load necessary libraries library(ggplot2) # Sample Data data <- iris[, -5] # Use only the numerical features from the iris dataset # Step 1: Compute the distance matrix distance_matrix <- dist(data, method = "euclidean") # Step 2: Perform hierarchical clustering hc <- hclust(distance_matrix, method = "complete") # You can also try "average" or "single" # Step 3: Plot the dendrogram plot(hc, main = "Dendrogram of Hierarchical Clustering", xlab = "Samples", ylab = "Distance") # Step 4: Cut the dendrogram into clusters # Here, we will cut it into 3 clusters clusters <- cutree(hc, k = 3) # Step 5: Add cluster membership to the original data iris$Cluster <- as.factor(clusters) # Step 6: Visualize the clusters (optional) ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Cluster)) + geom_point(size = 3) + labs(title = "Hierarchical Clustering of Iris Dataset", x = "Sepal Length", y = "Sepal Width") + theme_minimal() ## dbscan ## # Install and load necessary libraries if (!requireNamespace("dbscan", quietly = TRUE)) { install.packages("dbscan") } library(dbscan) data(iris) # Sample Data: Using the iris dataset data <- iris[, -5] # Use only the numerical features # Step 1: Scale the data data_scaled <- scale(data) # Step 2: Run DBSCAN # Adjust eps and minPts according to your data dbscan_result <- dbscan(data_scaled, eps = 0.5, minPts = 5) # Step 3: Create a new data frame with the clustering results iris$Cluster <- as.factor(dbscan_result$cluster) # Step 4: Visualize the results ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Cluster)) + geom_point(size = 3) + labs(title = "DBSCAN Clustering of Iris Dataset", x = "Sepal Length", y = "Sepal Width") + theme_minimal()