Skip to content
Data Manipulation with dplyr
  • AI Chat
  • Code
  • Report
  • Spinner

    Data Manipulation with dplyr

    Run the hidden code cell below to import the data used in this course.

    Take Notes

    Add notes about the concepts you've learned and code cells with code you want to keep.

    Add your notes here

    # Add your code snippets here
    
    library(dplyr)
    
    glimpse(df) # allows to view the first few values from each variable, along with the data type
    
    
    names_filtered <- names_normalized %>%
      # Filter for the names Steven, Thomas, and Matthew
      filter(name == "Steven"| name == "Thomas" | name == "Matthew")
    
    # Visualize these names over time
    ggplot(names_filtered, aes(x = year, y = fraction_max, color = name)) + 
      geom_line()
    
    
    # Change the name of the unemployment column
    counties %>%
      rename(unemployment_rate = unemployment)
    
    # Keep the state and county columns, and the columns containing poverty
    counties %>%
      select(state, county, contains("poverty"))
    
    # Calculate the fraction_women column without dropping the other columns
    counties %>%
      mutate(fraction_women = women / population)
    
    # Keep only the state, county, and employment_rate columns
    counties %>%
      transmute(state, county, employment_rate = employed / population)
    
    counties_selected %>%
      # Find the total population for each combination of state and metro
      group_by(state, metro) %>%
      summarize(total_pop = sum(population))
    
    
    babynames %>%
      # Add columns name_total and name_max for each name
      group_by(name) %>%
      mutate(name_total = sum(number),
             name_max = max(number)) %>%
      # Ungroup the table 
      ungroup() %>%
      # Add the fraction_max column containing the number by the name maximum 
      mutate(fraction_max = number / name_max)