Last active
June 20, 2025 16:17
-
-
Save thinkphp/62867e03840b9f8bf08308d5a1bf0dd1 to your computer and use it in GitHub Desktop.
part3 #question 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(readxl) | |
#load the training data | |
training_data <- read_excel("training_participation_survey.xlsx") | |
#question 1: Error rates in variables | |
cat("\nPART 3 - Question 1: Error Rates\n") | |
#check program status errors (should only be specific values) | |
valid_program_status <- c("Completed and Passed","Completed but Failed","Dropped Out") | |
program_status_errors <- sum(!training_data$program_status %in% valid_program_status) / nrow(training_data) * 100 | |
cat("Program Status Error Rate: ", round(program_status_errors, 1),"%\n") | |
# scor de satisfactie 1-5 | |
satisfaction_errors <- sum(training_data$training_hours < 1 | training_data$training_hours > 5, na_rm = TRUE) / nrow(training_data) * 100 | |
cat("Satisfaction Score Error Rate: ", round(satisfaction_errors, 1),"%\n") | |
# 0 - 2000 interval rezonabil, tot ce este in afara acestui interval este considerat eroare | |
training_hours_errors <- sum(training_data$training_hours < 0 | training_data$training_hours > 2000, na_rm = TRUE) / nrow(training_data) * 100 | |
cat("Training Hours Error Rate: ", round(training_hours_errors, 1),"%\n") | |
# vec <- c("Completed and Passed","Completed but Failed","Dropped Out") | |
#Question2 | |
# clean the data for questions 2 - 4 | |
traning_clean <- training_data %>% | |
mutate( | |
#fix program status spelling errors | |
program_status_clean = case_when( | |
str_detect(tolower(program_status), "completed.*passed") ~"Completed and Passed", | |
str_detect(tolower(program_status), "completed.*failed") ~"Completed but Failed", | |
str_detect(tolower(program_status), "dropped") - "Dropped Out", | |
TRUE ~ program_status | |
), | |
#fix satisfaction_score (1-5scale) | |
satisfaction_score_clean = ifelse ( | |
satisfaction_score >= 1 & satisfaction_score <= 5, | |
satisfaction_score, | |
median(satisfaction_score[satisfaction_score >= 1 & satisfaction_score <= 5], na.rm = TRUE) | |
), | |
#fix satisfaction training hours | |
training_hours_clean = ifelse( | |
training_hours >= 0 & training_hours <= 2000, | |
training_hours, | |
median(satisfaction_hours[training_hours >= 0 & training_hours<= 2000], na.rm = TRUE) | |
) | |
) | |
cat("PART 3 - Question 2: Mean Satisfaction by Program Status and Job Offer\n") | |
satisfaction_analysis <- traning_clean %>% | |
summarise( | |
mean_satisfaction = mean(satisfaction_score_clean, na.rm = TRUE), | |
.groups = "drop" | |
) %>% | |
pivot_wider( | |
names_from = job_offer_within_3mo, | |
values_from = mean_satisfaction, | |
names_prefix = "job_offer_" | |
) | |
print(satisfaction_analysis) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment