########## #Example 4 (using the tidymodels library) ########## ########## ### in case we haven't installed the necessary libraries # install.packages("tidymodels") # install.packages("ggplot2") # Load necessary libraries library(tidymodels) library(rpart.plot) library(ggplot2) # Set seed for reproducibility set.seed(123) # Load the dataset data(iris) # Split into training and testing sets (80/20 split) iris_split <- initial_split(iris, prop = 0.8) iris_train <- training(iris_split) iris_test <- testing(iris_split) # Define the decision tree model specification dt_spec <- decision_tree() %>% set_engine("rpart") %>% set_mode("classification") # Create the workflow iris_wf <- workflow() %>% add_model(dt_spec) %>% add_formula(Species ~ .) # Fit the model iris_fit <- fit(iris_wf, data = iris_train) # Extract the fitted rpart object for visualization rpart_obj <- pull_workflow_fit(iris_fit)$fit # Plot the decision tree rpart.plot(rpart_obj, main = "Decision Tree for Iris (tidymodels)") # Make predictions on the test set iris_preds <- predict(iris_fit, iris_test) %>% bind_cols(iris_test) # Show first few predictions print(head(iris_preds)) # Generate confusion matrix conf_mat_obj <- conf_mat(iris_preds, truth = Species, estimate = .pred_class) # Plot confusion matrix autoplot(conf_mat_obj) + ggtitle("Confusion Matrix for Iris Classification") # Calculate accuracy accuracy_val <- accuracy(iris_preds, truth = Species, estimate = .pred_class) print(accuracy_val) # Visualize misclassified points ggplot(iris_preds, aes(x = Sepal.Length, y = Sepal.Width, color = Species, shape = .pred_class)) + geom_point(size=3) + labs(title = "Misclassified Points in Iris Dataset", shape = "Predicted Class") + theme_minimal()