Skip to content
Project: Wrangling and Visualizing Music Data
from Billboard Chart Dataset
from Billboard Chart Dataset
# Loading individual Tidyverse packages
# .... YOUR CODE FOR TASK 1 ....
library(dplyr)
library(readr)
library(ggplot2)
# Reading in the McGill Billboard chord data
bb <- read_csv('datasets/bb_chords.csv')
# Taking a look at the first rows in bb
# .... YOUR CODE FOR TASK 1 ....
bb
saveRDS(bb, "bb.rds")
Hidden output
Run cancelled
# Counting the most common chords
bb_count <- bb %>%
count(chord)%>%
arrange(desc(n))
# Displaying the top 20 chords
# .... YOUR CODE FOR TASK 2 ....
head(bb_count, 20)
Run cancelled
# Creating a bar plot from bb_count
bb_count %>%
slice(1:20) %>%
mutate(share = (n/sum(n))*100,
chord = reorder(chord, share)) %>%
ggplot(aes(x = chord, y = share, fill = chord)) +
geom_bar(stat = "identity", show.legend = F) +
coord_flip() +
xlab("Chord") +
ylab("Share of total chords")
Run cancelled
# Wrangling and counting bigrams
bb_bigram_count <- bb %>%
# .... YOUR CODE FOR TASK 4 ....
mutate(next_chord = lead(chord),
next_title = lead(title),
bigram = paste(chord, next_chord, sep = " ")) %>%
filter(title == next_title) %>%
count(bigram, sort = TRUE)
# Displaying the first 20 rows of bb_bigram_count
# .... YOUR CODE FOR TASK 4 ....
Hidden output
Run cancelled
# Creating a column plot from bb_bigram_count
bb_bigram_count %>%
slice(1:20) %>%
mutate(share = (n/sum(n))*100,
bigram = reorder(bigram, share)) %>%
ggplot(aes(x = bigram, y = share, fill = bigram)) +
geom_bar(stat = "identity", show.legend = F) +
coord_flip() +
xlab("Bigram") +
ylab("Share of total bigrams")
Run cancelled
# Finding 30 artists with the most songs in the corpus
bb_30_artists <- bb %>%
#.... YOUR CODE FOR TASK 6 ....
select(artist, title) %>%
unique(by="title") %>%
count(artist) %>%
arrange(desc(n))
print(bb_30_artists, n = 30)
# Displaying 30 artists with the most songs in the corpus
#.... YOUR CODE FOR TASK 6 ....
Run cancelled
tags <- tibble(
artist = c('Abba', 'Billy Joel', 'Elton John', 'Stevie Wonder', 'The Rolling Stones', 'The Beatles', 'Eric Clapton'),
instrument = c('piano', 'piano', 'piano', 'piano', 'guitar', 'guitar', 'guitar'))
# Creating a new dataframe bb_tagged that includes a new column instrument from tags
bb_tagged <- bb %>%
inner_join(tags, by = "artist")
# .... YOUR CODE FOR TASK 7 ....
# Displaying the new data frame
bb_tagged
# .... YOUR CODE FOR TASK 7 ....