[R] EDA: Motorcycle parts (Competition)
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner
    knitr::opts_chunk$set(echo = TRUE)
    knitr::opts_chunk$set(class.output = "code-background")
    .code-background {
      background-color: lightgreen;
      border: 3px solid brown;
      font-weight: bold;
    }
    
    library(tidyverse)
    library(viridisLite)
    theme_set(theme_bw())
    theme_update(plot.title = element_text(hjust = 0.5, size = 20),
                 plot.subtitle = element_text(hjust = 0.5, size = 15),
                 axis.text = element_text(size = 18),
                 axis.title = element_text(size = 18),
                 legend.position = "bottom")
    df <- readr::read_csv('./data/sales_data.csv')
    
    head(df)

    1. What are the total sales for each payment method?

    sales_per_payment <- df %>% 
      group_by(payment) %>% 
      summarize(transactions = n(),
                total_sales_usd = sum(total)) %>% 
      arrange(desc(total_sales_usd))
    
    sales_per_payment
    

    2. What is the average unit price for each product line?

    unit_price_per_line <- df %>% 
      group_by(product_line) %>% 
      summarize(mean_price = mean(unit_price)) %>% 
      arrange(desc(mean_price))
    
    unit_price_per_line
    

    3. Visualizing findings for questions 1 and 2

    ggplot(sales_per_payment, aes(payment, total_sales_usd)) +
      geom_segment(aes(xend = payment, yend = total_sales_usd, col = payment), y = 0, lwd = 1.5) +
      geom_point(pch = 21, aes(size = transactions, fill = payment)) +
      coord_flip() + 
      geom_text(aes(label = round(total_sales_usd, -3)), size = 5) +
      scale_size(range = c(18, 40), name = "Number of\ntransactions") +
      scale_y_continuous(limits = c(10000, 170000)) +
      guides(fill = "none", color = "none") +
      theme(legend.key.size = unit(0.1, 'cm')) +
      labs(x = "Payment method", y = "Total purchase value ($)")
     
    ggplot(df, aes(fct_reorder(product_line, unit_price), unit_price, fill = product_line)) +
      geom_point(pch = 21, col = "black", alpha = 0.5,
                 position = position_jitter(0.2, 0)) +
      stat_summary(fun = mean, geom = "point", col = "red", size = 3) +
      scale_fill_viridis_d() +
      labs(x = "Product line", y = "Unit price ($)") +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1))
    

    4. Optional questions

    What is the total purchase value by product line and customer type?