Assortative mating

Assortative mating on years of education.

Jonatan Pallesen
05-23-2019

This analysis is based on data of couples from the Health and Retirement Study. Data preparation is performed here. The table is fitted with iterative proportional fitting.

Quite extreme results - a low proportion of couples have a large difference in years of education.


read data


library(pacman)

p_load(tidyverse, magrittr, glue, feather, janitor, cowplot, naniar)

source('../../src/extra.R', echo = F, encoding="utf-8")

df <- read_feather("data/hrs_cog.feather") %>% 
  clean_names() %>% 
  replace_with_na(list(schlyrs = 99))

analysis


get_assortative <- function(df){
  couples <- df %>% group_by(hhid) %>% 
    filter(pn %in% c(10, 20), sum(pn) == 30, sum(gender) == 3) %>%
    mutate(gender = ifelse(gender == 1, "male", "female"))
  school_years <- couples %>% 
    select(hhid, gender, schlyrs) %>% 
    spread(gender, schlyrs) %>% 
    na.omit() %>% 
    ungroup() %>% 
    select(female, male) %>% 
    mutate(
      male_cat = case_when(
        male < 12 ~ "< 12",
        male == 12 ~ "12",
        between(male, 13, 15) ~ "13-15",
        male >= 16 ~ "16+"
        ),
      female_cat = case_when(
        female < 12 ~ "< 12",
        female == 12 ~ "12",
        between(female, 13, 15) ~ "13-15",
        female >= 16 ~ "16+"
        )
    )
  rake_male <- function(df){
    df %>% group_by(male_cat, female_cat) %>% 
    summarise(s = sum(freq)) %>% 
    mutate(freq = s / sum(s)) 
  }
  rake_female <- function(df){
    df %>% group_by(female_cat, male_cat) %>% 
    summarise(s = sum(freq)) %>% 
    mutate(freq = s / sum(s)) 
  }
  school_years %>% 
    group_by(female_cat, male_cat) %>% 
    summarise(n = n()) %>% 
    mutate(freq = n / sum(n)) %>% 
    rake_male() %>% rake_female() %>% rake_male() %>% 
    rake_female() %>% rake_male() %>% rake_female() %>% 
    rake_male() %>% rake_female()
}

a <- get_assortative(df)

a1 <- get_assortative(df %>% filter(birthyr < 1938))

a2 <- get_assortative(df %>% filter(birthyr >= 1938))

Plot of full sample


plotit <- function(df, title, s){
  df %>% ggplot(aes(x = female_cat, y = male_cat, fill = freq)) + 
    geom_tile() +
    geom_text(aes(label = round(freq, 2)), size = s / 3.5) +
    labs(x = "years of education - wife", 
         y = "years of education - husband",
         title = title) +
    theme(legend.position = "none",
          text = element_text(size = s))
}

plotit(a, "", 14)


Increase in assortative mating over time


plot_grid(plotit(a1, "born 1920-1937", 24), plotit(a2, "born 1938-1955", 24))