Anna Baranowska-Clarke/

Data Manipulation with dplyr


Data Manipulation with dplyr

Run the hidden code cell below to import the data used in this course.

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Add your code snippets here


glimpse(df) # allows to view the first few values from each variable, along with the data type

names_filtered <- names_normalized %>%
  # Filter for the names Steven, Thomas, and Matthew
  filter(name == "Steven"| name == "Thomas" | name == "Matthew")

# Visualize these names over time
ggplot(names_filtered, aes(x = year, y = fraction_max, color = name)) + 

# Change the name of the unemployment column
counties %>%
  rename(unemployment_rate = unemployment)

# Keep the state and county columns, and the columns containing poverty
counties %>%
  select(state, county, contains("poverty"))

# Calculate the fraction_women column without dropping the other columns
counties %>%
  mutate(fraction_women = women / population)

# Keep only the state, county, and employment_rate columns
counties %>%
  transmute(state, county, employment_rate = employed / population)

counties_selected %>%
  # Find the total population for each combination of state and metro
  group_by(state, metro) %>%
  summarize(total_pop = sum(population))

babynames %>%
  # Add columns name_total and name_max for each name
  group_by(name) %>%
  mutate(name_total = sum(number),
         name_max = max(number)) %>%
  # Ungroup the table 
  ungroup() %>%
  # Add the fraction_max column containing the number by the name maximum 
  mutate(fraction_max = number / name_max)
  • AI Chat
  • Code