Skip to content

Instantly share code, notes, and snippets.

@thinkphp
Last active June 20, 2025 16:17
Show Gist options
  • Save thinkphp/62867e03840b9f8bf08308d5a1bf0dd1 to your computer and use it in GitHub Desktop.
Save thinkphp/62867e03840b9f8bf08308d5a1bf0dd1 to your computer and use it in GitHub Desktop.
part3 #question 2
library(tidyverse)
library(readxl)
#load the training data
training_data <- read_excel("training_participation_survey.xlsx")
#question 1: Error rates in variables
cat("\nPART 3 - Question 1: Error Rates\n")
#check program status errors (should only be specific values)
valid_program_status <- c("Completed and Passed","Completed but Failed","Dropped Out")
program_status_errors <- sum(!training_data$program_status %in% valid_program_status) / nrow(training_data) * 100
cat("Program Status Error Rate: ", round(program_status_errors, 1),"%\n")
# scor de satisfactie 1-5
satisfaction_errors <- sum(training_data$training_hours < 1 | training_data$training_hours > 5, na_rm = TRUE) / nrow(training_data) * 100
cat("Satisfaction Score Error Rate: ", round(satisfaction_errors, 1),"%\n")
# 0 - 2000 interval rezonabil, tot ce este in afara acestui interval este considerat eroare
training_hours_errors <- sum(training_data$training_hours < 0 | training_data$training_hours > 2000, na_rm = TRUE) / nrow(training_data) * 100
cat("Training Hours Error Rate: ", round(training_hours_errors, 1),"%\n")
# vec <- c("Completed and Passed","Completed but Failed","Dropped Out")
#Question2
# clean the data for questions 2 - 4
traning_clean <- training_data %>%
mutate(
#fix program status spelling errors
program_status_clean = case_when(
str_detect(tolower(program_status), "completed.*passed") ~"Completed and Passed",
str_detect(tolower(program_status), "completed.*failed") ~"Completed but Failed",
str_detect(tolower(program_status), "dropped") - "Dropped Out",
TRUE ~ program_status
),
#fix satisfaction_score (1-5scale)
satisfaction_score_clean = ifelse (
satisfaction_score >= 1 & satisfaction_score <= 5,
satisfaction_score,
median(satisfaction_score[satisfaction_score >= 1 & satisfaction_score <= 5], na.rm = TRUE)
),
#fix satisfaction training hours
training_hours_clean = ifelse(
training_hours >= 0 & training_hours <= 2000,
training_hours,
median(satisfaction_hours[training_hours >= 0 & training_hours<= 2000], na.rm = TRUE)
)
)
cat("PART 3 - Question 2: Mean Satisfaction by Program Status and Job Offer\n")
satisfaction_analysis <- traning_clean %>%
summarise(
mean_satisfaction = mean(satisfaction_score_clean, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_wider(
names_from = job_offer_within_3mo,
values_from = mean_satisfaction,
names_prefix = "job_offer_"
)
print(satisfaction_analysis)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment