HR Analytics: Exploring Employee Data with R
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner

    HR Analytics: Exploring Employee Data with R

    # Import libraries needed
    library(dplyr)
    library(tidyr)
    library(readr)
    library(broom)
    library(ggplot2)
    # Get data urls
    hr_data_url <- "https://assets.datacamp.com/production/course_5977/datasets/hr_data.csv"
    fair_pay_data_url <- "https://assets.datacamp.com/production/course_5977/datasets/fair_pay_data.csv"
    survey_data_url <- "https://assets.datacamp.com/production/course_5977/datasets/survey_data.csv"
    # Read data
    hr_data <- read_csv(hr_data_url, show_col_types = FALSE)
    fair_pay_data <-  read_csv(fair_pay_data_url, show_col_types = FALSE)
    survey_data <-  read_csv(survey_data_url, show_col_types = FALSE)
    
    # Show head and structure of each dataframe
    head(hr_data)
    glimpse(hr_data)
    head(fair_pay_data)
    glimpse(fair_pay_data)
    head(survey_data)
    glimpse(survey_data)
    # Merge survey and fair_pay data
    merged <- merge(survey_data, fair_pay_data, by = c("employee_id", "department", "salary"), all.x = TRUE)
    
    head(merged)
    glimpse(merged)
    # Merge the merged dataframe with hr_data
    
    data <- merge(merged, hr_data, by = c("employee_id", "department", "job_level"))
    
    head(data)
    glimpse(data)
    # Check for missing values
    sum(is.na(data))
    mean_salary_for_dep <- data %>%
    							group_by(department) %>%
    								summarize(mean_salary = mean(salary))
    
    ggplot(mean_salary_for_dep, aes(x=department, y=mean_salary, fill=department)) +
    	geom_col() + 
    		scale_fill_manual(values=c("darkred",
                                 	"darkgreen",
                                      "darkblue")) +
    		 							theme(legend.position = "none")
    # Are employees in finance earning the same as others?
    data <- data %>%
    			mutate(is_finance = ifelse(department == "Finance", 1, 0))
    
    test <- t.test(salary~is_finance, data=data) %>%
    	tidy() 
    
    test %>%
    	pull(p.value, statistic)
    
    if(test$p.value < 0.05){
        print('Null hypothesis rejected at 95% CL')
    } else {
        print("Failed to reject null hypothesis at 95% CL")
    }
    mean_salary_for_dep_and_gender <- data %>%
    							group_by(department, gender) %>%
    								summarize(mean_salary = mean(salary))
    
    ggplot(mean_salary_for_dep_and_gender, aes(x=department, y=mean_salary, fill=department)) +
    	geom_col() +
    		facet_wrap(~gender) + 
    			scale_fill_manual(values=c("darkred",
                                 	"darkgreen",
                                      "darkblue")) +
    		 							theme(legend.position = "none")
    # Are gender and department independent?
    test <- chisq.test(data$department, data$gender) %>%
    		tidy()
    			
    
    test %>%
    	pull(p.value, statistic)
    
    if(test$p.value < 0.05){
        print('Null hypothesis rejected at 95% CL')
    } else {
        print("Failed to reject null hypothesis at 95% CL")
    }
    mean_salary_for_dep_and_level <- data %>%
    							group_by(department, job_level) %>%
    								summarize(mean_salary = mean(salary))
    
    ggplot(mean_salary_for_dep_and_level, aes(x=job_level, y=mean_salary, fill=job_level)) +
    	geom_col() + 
    		facet_wrap(~department) + 
    			scale_fill_manual(values=c("lightblue",
                                 	"darkolivegreen",
                                      "darkcyan"))  +
    									 theme(legend.position = "none")
    # Are salaried employees earning the same as hourly ones?
    salaried_hourly <- data %>%
    						filter(job_level != "Manager") 
    
    
    test <- t.test(salary~job_level, data=salaried_hourly) %>%
    			tidy() 
    
    test %>%
    	pull(p.value, statistic)
    
    if(test$p.value < 0.05){
        print('Null hypothesis rejected at 95% CL')
    } else {
        print("Failed to reject null hypothesis at 95% CL")
    }
    ggplot(data, aes(x=gender, fill=job_level)) +
    	geom_bar() + 
    		scale_fill_manual(values=c("lightblue",
                                 	"darkolivegreen",
                                      "darkcyan"))
    # Are gender and job level independent?
    test <- chisq.test(data$gender, data$job_level) %>%
    			tidy() 
    
    test %>%
    	pull(p.value, statistic)
    
    if(test$p.value < 0.05){
        print('Null hypothesis rejected at 95% CL')
    } else {
        print("Failed to reject null hypothesis at 95% CL")
    }