# ----------------------------------- # 1. Setting up our workspace # ----------------------------------- # Check current working directory getwd() # Set your working directory to your project folder setwd("C:/Users/kgi062/ML_2025") # ----------------------------------- # 2. Basic R Commands and Data Types # ----------------------------------- # Printing print("Hello, world!") # Simple arithmetic 2 + 2 # Variables and Data Types x <- 10 # Numeric Variable name <- "Alice" # Character Variable flag <- TRUE # Logical Variable # Creating a numeric vector numbers <- c(1, 2, 3, 4, 5) # Calculating mean and sum of the vector mean_numbers <- mean(numbers) sum_numbers <- sum(numbers) # Display results print(paste("Mean:", mean_numbers)) print(paste("Sum:", sum_numbers)) # ----------------------------------- # 3. Exploring Built-in Datasets # ----------------------------------- # Accessing dataset help ?CO2 # Viewing the dataset in RStudio's viewer View(CO2) # Exploring dataset structure head(CO2) # First few rows tail(CO2) # Last few rows colnames(CO2) # Column names dim(CO2) # Dimensions (rows, columns) str(CO2) # Structure of the dataset summary(CO2) # Summary statistics summary(CO2$uptake) # Summary of a specific column median(CO2$uptake) # Median value of 'uptake' class(CO2$uptake) # Data type of 'uptake' unique(CO2$Treatment) # Unique treatment groups subset(CO2, conc == min(CO2$conc)) # Subset for minimum concentration # ----------------------------------- # 4. Basic Data Visualization # ----------------------------------- # Creating a numeric vector numbers <- c(1, 2, 3, 4, 5) # Calculating mean and sum of the vector mean_numbers <- mean(numbers) sum_numbers <- sum(numbers) # Display results print(paste("Mean:", mean_numbers)) print(paste("Sum:", sum_numbers)) # ----------------------------------- # 3. Exploring Built-in Datasets # ----------------------------------- # Accessing dataset help ?CO2 # Viewing the dataset in RStudio's viewer View(CO2) # Exploring dataset structure head(CO2) # First few rows tail(CO2) # Last few rows colnames(CO2) # Column names dim(CO2) # Dimensions (rows, columns) str(CO2) # Structure of the dataset summary(CO2) # Summary statistics summary(CO2$uptake) # Summary of a specific column median(CO2$uptake) # Median value of 'uptake' class(CO2$uptake) # Data type of 'uptake' unique(CO2$Treatment) # Unique treatment groups subset(CO2, conc == min(CO2$conc)) # Subset for minimum concentration # ----------------------------------- # 4. Basic Data Visualization # ----------------------------------- # Creating a numeric vector numbers <- c(1, 2, 3, 4, 5) # Calculating mean and sum of the vector mean_numbers <- mean(numbers) sum_numbers <- sum(numbers) # Display results print(paste("Mean:", mean_numbers)) print(paste("Sum:", sum_numbers)) # ----------------------------------- # 3. Exploring Built-in Datasets # ----------------------------------- # Accessing dataset help ?CO2 # Viewing the dataset in RStudio's viewer View(CO2) # Exploring dataset structure head(CO2) # First few rows tail(CO2) # Last few rows colnames(CO2) # Column names dim(CO2) # Dimensions (rows, columns) str(CO2) # Structure of the dataset summary(CO2) # Summary statistics summary(CO2$uptake) # Summary of a specific column median(CO2$uptake) # Median value of 'uptake' class(CO2$uptake) # Data type of 'uptake' unique(CO2$Treatment) # Unique treatment groups subset(CO2, conc == min(CO2$conc)) # Subset for minimum concentration # ----------------------------------- # 4. Basic Data Visualization # ----------------------------------- # Histogram of 'uptake' hist(CO2[,5], main="Histogram of CO2 Uptake", xlab="Uptake x axis",ylab="Freq", col="red") # Scatter plot: Orange dataset my_title = "My alternative title" plot(x=Orange$age, y=Orange$circumference, main=my_title, xlab="Age (days)", ylab="Circumference (mm)") # Customized scatter plot plot(Orange$age, Orange$circumference, xlab="Age", ylab="Circumference", main="Circumference vs. Age in Orange Trees", col="blue", pch=20) # ----------------------------------- # 5. Importing External Data # ----------------------------------- # Reading CSV file (update the path accordingly) # Make sure 'external_file.csv' exists in your working directory data <- read.csv("external_file (1).csv") # download it via opencourses # Visualize the imported data hist(data[,1]) # bug correction: data is read as a data.frame, not a vector # ----------------------------------- # 6. Generating Random Data # ----------------------------------- # Generate 100 normally distributed random points with mean=5 and sd=1 d <- rnorm(100, mean = 5, sd = 1) hist(d) # Plot the generated data plot(d, main="Random Normal Data", xlab="Index", ylab="Value") # Access help for functions # e.g., ?mean, ?plot