# Load necessary libraries library(readr) library(dplyr) # Read the data file (replace "data.csv" with your actual file name) data <- read_csv("data.csv") # Function to detect outliers using Tukey's method detect_outliers <- function(vector) { Q1 <- quantile(vector, 0.25) Q3 <- quantile(vector, 0.75) IQR <- Q3 - Q1 lower_bound <- Q1 - 1.5 * IQR upper_bound <- Q3 + 1.5 * IQR outliers <- vector[vector < lower_bound | vector > upper_bound] return(outliers) } # Function to determine distribution type determine_distribution <- function(vector) { skew <- skewness(vector) if (skew < -1) { return("Strongly Left Skewed") } else if (skew > 1) { return("Strongly Right Skewed") } else if (skew < 0) { return("Slightly Left Skewed") } else if (skew > 0) { return("Slightly Right Skewed") } else { return("Normal") } } # Iterate through columns for (col in names(data)) { if (is.numeric(data[[col]])) { outliers <- detect_outliers(data[[col]]) distribution <- determine_distribution(data[[col]]) cat("Column:", col, "\n") cat("Distribution:", distribution, "\n") if (length(outliers) > 0) { cat("Outliers:", outliers, "\n") } else { cat("No outliers found.\n") } cat("\n") } }