Which names are more cat-like, dog-like, and human-like?
The tidytuesday data set has a large number of pet names from Seattle. A data set with human names can be easily acquired from the Tidyverse package babynames.
I investigate which names are more typical of pets compared to humans, and which names are more typical of humans compared to pets.
library(pacman)
p_load(tidyverse, babynames, scales, glue)
source('../../src/extra.R', echo = F, encoding="utf-8")
= "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-26/seattle_pets.csv"
fn
<- read_csv(fn) %>%
pets mutate(name = animals_name,
n = 1) %>%
drop_na(name)
<- function(df, c){
get_rank %>%
df group_by(name) %>%
summarise(n = sum(n)) %>%
arrange(desc(n)) %>%
mutate(
rvalue = row_number() / nrow(.) * 10000,
rank = log(log(10 + nrow(.))) - log(log(10 + row_number()))) %>%
select(name, !!glue("{c}_rank") := rank)
}
<- babynames %>%
ranks get_rank("human") %>%
full_join(
%>% filter(species == "Dog") %>% get_rank("dog")
pets %>%
) full_join(
%>% filter(species == "Cat") %>% get_rank("cat")
pets %>%
) mutate(
human_rank = replace_na(human_rank, 0),
cat_rank = replace_na(cat_rank, 0),
dog_rank = replace_na(dog_rank, 0),
cat_human_diff = cat_rank - human_rank,
catdog_rank = cat_rank + human_rank / 2,
dog_human_diff = dog_rank - human_rank,
#dog_human_sum = dog_rank + human_rank,
dog_cat_diff = dog_rank - cat_rank,
catdog_human_diff = catdog_rank - human_rank,
dog_cat_sum = dog_rank + cat_rank
)
<- function(df, a, b, diff, name){
plotg %>%
df ggplot(aes({{a}}, {{b}}, label={{name}}, color={{diff}})) +
geom_abline(intercept=0, slope=1, linetype="dashed") +
geom_text(check_overlap=T) +
scale_colour_gradient2(low = muted("red"), high=muted("blue")) +
guides(color=F, size=F) +
ylim(0, 1.57) + xlim(0, 1.45) +
theme(
axis.title.y = element_text(margin = margin(t = 0, r = 30, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 30, r = 0, b = 0, l = 0)),
axis.text = element_blank(),
axis.ticks = element_blank(),
text = element_text(size=13),
plot.title = element_text(size = 15, hjust = 0.5)
) }
plotg(ranks, catdog_rank, human_rank, catdog_human_diff, name) +
labs(x = "More pet-like name", y = "More human-like name", title = "Pet names vs human names")
plotg(ranks, dog_rank, cat_rank, dog_cat_diff, name) +
labs(x = "More dog-like name", y = "More cat-like name", title = "Dog names vs cat names")