Data Manipulation with dplyr
    Data Manipulation with dplyr

    Run the hidden code cell below to import the data used in this course.

    glimpse(df) # allows to view the first few values from each variable, along with the data type
    names_filtered <- names_normalized %>%
      # Filter for the names Steven, Thomas, and Matthew
      filter(name == "Steven"| name == "Thomas" | name == "Matthew")
    # Visualize these names over time
    ggplot(names_filtered, aes(x = year, y = fraction_max, color = name)) + 
    # Change the name of the unemployment column
    counties %>%
      rename(unemployment_rate = unemployment)
    # Keep the state and county columns, and the columns containing poverty
    counties %>%
      select(state, county, contains("poverty"))
    # Calculate the fraction_women column without dropping the other columns
    counties %>%
      mutate(fraction_women = women / population)
    # Keep only the state, county, and employment_rate columns
    counties %>%
      transmute(state, county, employment_rate = employed / population)
    counties_selected %>%
      # Find the total population for each combination of state and metro
      group_by(state, metro) %>%
      summarize(total_pop = sum(population))
    babynames %>%
      # Add columns name_total and name_max for each name
      group_by(name) %>%
      mutate(name_total = sum(number),
             name_max = max(number)) %>%
      # Ungroup the table 
      ungroup() %>%
      # Add the fraction_max column containing the number by the name maximum 
      mutate(fraction_max = number / name_max)