## hclustering ##

# Load necessary libraries
library(ggplot2)

# Sample Data
data <- iris[, -5]  # Use only the numerical features from the iris dataset

# Step 1: Compute the distance matrix
distance_matrix <- dist(data, method = "euclidean")

# Step 2: Perform hierarchical clustering
hc <- hclust(distance_matrix, method = "complete")  # You can also try "average" or "single"

# Step 3: Plot the dendrogram
plot(hc, main = "Dendrogram of Hierarchical Clustering", xlab = "Samples", ylab = "Distance")

# Step 4: Cut the dendrogram into clusters
# Here, we will cut it into 3 clusters
clusters <- cutree(hc, k = 3)

# Step 5: Add cluster membership to the original data
iris$Cluster <- as.factor(clusters)

# Step 6: Visualize the clusters (optional)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Cluster)) +
  geom_point(size = 3) +
  labs(title = "Hierarchical Clustering of Iris Dataset", 
       x = "Sepal Length", y = "Sepal Width") +
  theme_minimal()



## dbscan ##


# Install and load necessary libraries
if (!requireNamespace("dbscan", quietly = TRUE)) {
  install.packages("dbscan")
}
library(dbscan)
data(iris)

# Sample Data: Using the iris dataset
data <- iris[, -5]  # Use only the numerical features

# Step 1: Scale the data
data_scaled <- scale(data)

# Step 2: Run DBSCAN
# Adjust eps and minPts according to your data
dbscan_result <- dbscan(data_scaled, eps = 0.5, minPts = 5)

# Step 3: Create a new data frame with the clustering results
iris$Cluster <- as.factor(dbscan_result$cluster)

# Step 4: Visualize the results
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Cluster)) +
  geom_point(size = 3) +
  labs(title = "DBSCAN Clustering of Iris Dataset", 
       x = "Sepal Length", y = "Sepal Width") +
  theme_minimal()