Skip to content
Wrangling and Visualizing Musical Data
  • AI Chat
  • Code
  • Report
  • Spinner

    Project: Wrangling and Visualizing Music Data
    from Billboard Chart Dataset

    # Loading individual Tidyverse packages
    # .... YOUR CODE FOR TASK 1 ....
    
    library(dplyr)
    library(readr)
    library(ggplot2)
    # Reading in the McGill Billboard chord data
    bb <- read_csv('datasets/bb_chords.csv')
    
    
    
    # Taking a look at the first rows in bb
    # .... YOUR CODE FOR TASK 1 ....
    bb
    saveRDS(bb, "bb.rds")
    Hidden output
    Run cancelled
    # Counting the most common chords
    bb_count <- bb %>%
      count(chord)%>%
      arrange(desc(n))
    
    
    # Displaying the top 20 chords
    # .... YOUR CODE FOR TASK 2 ....
    
    head(bb_count, 20)
    Run cancelled
    # Creating a bar plot from bb_count
    bb_count %>%
      slice(1:20) %>%
      mutate(share = (n/sum(n))*100,
             chord = reorder(chord, share)) %>%
      ggplot(aes(x = chord, y = share, fill = chord)) +
      geom_bar(stat = "identity", show.legend = F) +
      coord_flip() +
      xlab("Chord") +
      ylab("Share of total chords") 
    Run cancelled
    # Wrangling and counting bigrams
    bb_bigram_count <- bb %>%
        # .... YOUR CODE FOR TASK 4 ....
          mutate(next_chord = lead(chord),
                 next_title = lead(title),
                 bigram = paste(chord, next_chord, sep = " ")) %>%
          filter(title == next_title) %>%
          count(bigram, sort = TRUE)
    # Displaying the first 20 rows of bb_bigram_count
    # .... YOUR CODE FOR TASK 4 ....
    Hidden output
    Run cancelled
    # Creating a column plot from bb_bigram_count
    bb_bigram_count %>%
      slice(1:20) %>%
      mutate(share = (n/sum(n))*100,
             bigram = reorder(bigram, share)) %>%
      ggplot(aes(x = bigram, y = share, fill = bigram)) +
      geom_bar(stat = "identity", show.legend = F) +
      coord_flip() +
      xlab("Bigram") +
      ylab("Share of total bigrams") 
    Run cancelled
    # Finding 30 artists with the most songs in the corpus
    bb_30_artists <- bb %>%
        #.... YOUR CODE FOR TASK 6 ....
      select(artist, title) %>%
      unique(by="title") %>%
      count(artist) %>%
      arrange(desc(n)) 
    
    print(bb_30_artists, n = 30)
    # Displaying 30 artists with the most songs in the corpus
    #.... YOUR CODE FOR TASK 6 ....
    Run cancelled
    tags <- tibble(
      artist = c('Abba', 'Billy Joel', 'Elton John', 'Stevie Wonder', 'The Rolling Stones', 'The Beatles', 'Eric Clapton'),
      instrument = c('piano', 'piano', 'piano', 'piano', 'guitar', 'guitar', 'guitar'))
    
    # Creating a new dataframe bb_tagged that includes a new column instrument from tags
    bb_tagged <- bb %>%
      inner_join(tags, by = "artist")
        # .... YOUR CODE FOR TASK 7 ....
        
    # Displaying the new data frame
    bb_tagged
    # .... YOUR CODE FOR TASK 7 ....