add text here
# Write and run code here # Using late_shipments, plot pack_price vs. shipment_mode # as a box plot with flipped x and y coordinates ggplot(late_shipments, aes(shipment_mode, pack_price)) + geom_boxplot() + coord_flip() # Perform a chi-square test of independence on freight_cost_group and vendor_inco_term test_results <- late_shipments %>% chisq_test(freight_cost_group~vendor_inco_term) # above was chi comparing proportions of two discrete variables. Now we compare one discrete variable to its hypotesized values (which we construct ourselves) # Using late_shipments, count the vendor incoterms vendor_inco_term_counts <- late_shipments %>% count(vendor_inco_term) # Get the number of rows in the whole sample n_total <- nrow(late_shipments) hypothesized <- tribble( ~ vendor_inco_term, ~ prop, "EXW", 0.75, "CIP", 0.05, "DDP", 0.1, "FCA", 0.1 ) %>% # Add a column of hypothesized counts for the incoterms mutate(n = prop*n_total) # Using vendor_inco_term_counts, plot n vs. vendor_inco_term ggplot(vendor_inco_term_counts, aes(vendor_inco_term, n)) + # Make it a (precalculated) bar plot geom_col() + # Add points from hypothesized geom_point(data = hypothesized,color = "purple") #construct hypothesized props hypothesized_props <- c( EXW = 0.75, CIP = 0.05, DDP = 0.1, FCA = 0.1 ) # Run chi-square goodness of fit test on vendor_inco_term test_results <- late_shipments %>% chisq_test( response = vendor_inco_term, p = hypothesized_props ) # See the results test_results #### hypothesis testing process when assumptions (e.g. sample size across groups) are not met. This here constructs null distribution (with generate and hypothesize) and compares it to actual values. Then it "calculate" test statistics of the null distribution null_distn <- late_shipments %>% specify( late ~ freight_cost_group, success = "Yes" ) %>% hypothesize(null = "independence") %>% generate(reps = 2000, type = "permute") %>% calculate( stat = "diff in props", order = c("expensive", "reasonable") ) # Visualize the null distribution visualize(null_distn) # Copy, paste, and modify the pipeline to get the observed statistic (actualy I just take care of "hypothesize" and "generate" below to re-focus on calculatiing the observed statistc (in order to compare it with null distribution)) obs_stat <- late_shipments %>% specify( late ~ freight_cost_group, success = "Yes" ) %>% #hypothesize(null = "independence") %>% #generate(reps = 2000, type = "permute") %>% calculate( stat = "diff in props", order = c("expensive", "reasonable") ) # Visualize the null dist'n, adding a vertical line at the observed statistic visualize(null_distn) + geom_vline( aes(xintercept = stat), data = obs_stat, color = "red" ) # Get the p-value p_value <- get_p_value(null_distn, obs_stat, direction = "two sided") # See the result p_value ####### The Wilcoxon-Mann-Whitney and Kruskal-Wallace tests are useful when you cannot satisfy the assumptions for parametric tests, and don't want the computational expense of simulation-based tests. So they are kind of alternative/equivalent for simulation (generate, hypothesize) from the previous step, they are rank-based AND from the parametric test family they are alternative for t-test and ANOVA respectively.