if (!requireNamespace("doBy", quietly = TRUE)) { install.packages("doBy") } library(doBy) data(wine) write.csv(wine,"wine.csv", row.names = FALSE) # simply to export it as a csv locally # keep the original colnames orig_colnames <- colnames(wine) colnames(wine) <- c("Class", paste0("Feature_", 1:13)) head(wine) # Visualize pairwise relationships pairs(wine[ ,2:5], col=wine$Class, main="Pairwise Plot of Selected Features") # Summary summary(wine) # Split data set.seed(123) library(caret) train_idx <- createDataPartition(wine$Class, p=0.7, list=FALSE) train <- wine[train_idx,] test <- wine[-train_idx,] # k-NN library(class) knn_pred <- knn(train[ ,2:14], test[ ,2:14], train$Class, k=5) cat("k-NN Accuracy:", mean(knn_pred == test$Class), "\n") # Decision Tree library(rpart) tree_model <- rpart(Class ~ ., data=train) library(rpart.plot) rpart.plot(tree_model) tree_pred <- predict(tree_model, test, type="class") cat("Decision Tree Accuracy:", mean(tree_pred == test$Class), "\n") # Perceptron (binary: class 1 vs. classes 2 & 3) library(neuralnet) wine$BinaryClass <- ifelse(wine$Class == 1, 1, 0) train_bin <- wine[train_idx,] test_bin <- wine[-train_idx,] perceptron_model <- neuralnet(BinaryClass ~ Feature_1 + Feature_2 + Feature_3 + Feature_4 + Feature_5 + Feature_6 + Feature_7 + Feature_8 + Feature_9 + Feature_10 + Feature_11 + Feature_12 + Feature_13, data=train_bin, hidden=0, linear.output=FALSE) preds <- compute(perceptron_model, test_bin[ ,2:14])$net.result pred_class <- ifelse(preds > 0.5, 1, 0) accuracy_perceptron <- mean(pred_class == test_bin$BinaryClass) cat("Perceptron Accuracy (class 1 vs. others):", accuracy_perceptron, "\n") ## Validation # Cross-validation for k-NN and decision tree train_control <- trainControl(method="cv", number=5) knn_cv <- train(as.factor(Class) ~ ., data=wine, method="knn", tuneGrid=data.frame(k=5), trControl=train_control) print(knn_cv) dt_cv <- train(as.factor(Class) ~ ., data=wine, method="rpart", trControl=train_control) print(dt_cv) # access the sd of the model's performance knn_cv$results$Accuracy+c(-1,1)*1.96*knn_cv$results$AccuracySD ## clustering set.seed(123) # For reproducibility # Apply k-means clustering with k=3 clusters kmeans_res <- kmeans(wine[ ,2:14], centers=3) # Visualize the clustering results using two features plot(wine$Feature_1, wine$Feature_2, col=kmeans_res$cluster, main="k-means Clustering of Wine Data", xlab="Feature 1", ylab="Feature 2", pch=19) # Optionally, add true class labels for comparison points(wine$Feature_1, wine$Feature_2, col=as.numeric(wine$Class), pch=1) legend("topright", legend=c("Cluster 1", "Cluster 2", "Cluster 3", "True Class 1", "True Class 2", "True Class 3"), col=c(1,2,3,1,2,3), pch=c(19,19,19,1,1,1))