library(jsmp)
library(janitor)
excel_to_tsv <- function(){
read_excel("files/top_10000_songs_180715.xlsx") |>
select(Critic_rank = "PLACE\r\n2018-JUL-15", Artist, Song, Year) |>
write_tsv("files/acclaimed_songs.tsv")
read_excel("files/top_3000_albums_180715.xlsx") |>
select(Critic_rank = "PLACE\r\n2018-JUL-15", Artist, Album, Year) |>
write_tsv("files/acclaimed_albums.tsv")
}
artist_c <- read_tsv("files/artist_corrections.tsv")
album_c <- read_tsv("files/album_corrections.tsv")
song_c <- read_tsv("files/song_corrections.tsv")
exclude_albums = c("1", "Saturday Night Fever", "The Bodyguard",
"Grease: The Original Soundtrack")
correct_songs <- function(df){
df |> mutate(song = stringr::str_replace_all
(song, setNames(song_c$old, song_c$new))) |>
mutate(song = tools::toTitleCase(as.character(song)))
}
correct_albums <- function(df){
df |> mutate(album = stringr::str_replace_all
(album, setNames(album_c$old, album_c$new))) |>
mutate(album = tools::toTitleCase(as.character(album))) |>
filter(!album %in% exclude_albums)
}
correct_artists <- function(df){
df |> mutate(artist = stringr::str_replace_all
(artist, setNames(artist_c$old, artist_c$new))) |>
mutate(artist = tools::toTitleCase(as.character(artist)))
}
selling_songs <- read_csv("files/selling_songs.csv") |> fix_names() |>
select(artist, song = name, public_rank = position) |>
correct_artists() |> correct_songs()
acclaimed_songs <- read_tsv("files/acclaimed_songs.tsv") |> fix_names() |>
correct_artists() |> correct_songs()
selling_albums <- read.csv("files/selling_albums.csv") |> fix_names() |>
select(artist, album = name, year, public_rank = position) |>
correct_albums() |> correct_artists()
acclaimed_albums <- read_tsv("files/acclaimed_albums.tsv") |> fix_names() |>
correct_albums() |> correct_artists()
genres <- read_delim(delim=";", "files/genres.csv", col_types = cols(.default = "c")) |> fix_names()
high_n <- 7000
get_rank <- function(col) {
return(log(log(10 + high_n)) - log(log(10 + col)))
}
rankify <- function(df){
df |> replace_na(list(public_rank=high_n, critic_rank=high_n)) |>
mutate(public_value = get_rank(public_rank),
critic_value = get_rank(critic_rank),
dif = critic_value - public_value,
sum = critic_value + public_value) |>
naniar::replace_with_na(list(public_rank=high_n, critic_rank=high_n))
}
songs = full_join(acclaimed_songs, selling_songs, by=c("artist", "song")) |> rankify()
albums = full_join(acclaimed_albums, selling_albums, by=c("artist", "album")) |> rankify()
albums_year <- albums |> mutate(year = year.x) |> group_by(year) |>
summarise(acclaim_albums = sum(critic_value))
songs_year <- songs |> group_by(year) |>
summarise(acclaim_songs = sum(critic_value))
acclaim <- inner_join(albums_year, songs_year, by="year") |>
mutate(acclaim = acclaim_songs + acclaim_albums) |> filter(year > 1949)
plot_acclaim <- function(df){
df |>
mutate(decade = cut_interval(year, length=10, right=F)) |>
ggplot(aes(x = year, y = acclaim, fill=decade, order=decade)) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(face="bold", color="#993333", size=11),
text = element_text(size=15)) +
guides(fill=FALSE) +
scale_x_continuous(
breaks=c(1955, 1965, 1975, 1985, 1995, 2005, 2015),
labels=c("1950s", "1960s", "1970s", "1980s", "1990s", "2000s", "2010s")) +
labs(title = "Critical acclaim of music by year", x = "") +
gg_y_zero() +
gg_y_remove()
}
acclaim |> plot_acclaim()