Fantasy Premier League

Forecasting FPL player scores

Jonatan Pallesen
01-04-2020

Introduction

I play in a premier league fantasy league, but I don’t watch soccer at all. So I’m interested in making a model that can predict player scores in the upcoming weeks.

Stats from fpl are accesible through the fpl package.


library(pacman)

p_load(tidyverse, magrittr, pander, fplr, goalmodel, rap)

source('../../src/extra.R', echo = F, encoding="utf-8")

current_week <- 22

players <- fpl_get_player_all()

fixtures <- fpl_get_fixtures()

teams <- fpl_get_teams()

players <- full_join(players, teams %>% select(code, team_name = name), by = c("team_code" = "code"))


By summing up the previous fixtures, we can get the number of goals scored by team.


get_goals_scored <-  function(week){
  away_scores <- fixtures %>% 
    filter(event < week) %>%
    group_by(team_a) %>%
    summarise(score_a = sum(team_a_score, na.rm = T)) %>% 
    rename(team = team_a)
  home_scores <- fixtures %>%
    filter(event < week) %>%
    group_by(team_h) %>%
    summarise(score_h = sum(team_h_score, na.rm = T)) %>% 
    rename(team = team_h)
  inner_join(home_scores, away_scores, by = "team") %>% 
    mutate(score = score_h + score_a, goals_per_week = score / current_week)
}

goals_scored <- get_goals_scored(current_week)


I use the goalmodel package to predict goals scored in specific matchups. A multiplier is calculated, which is the predicted number of goals divided by the average number of goals scored by the team in a gameweek.


get_predictions_gameweek <- function(week){
  f <- fixtures %>% filter(event < current_week)
  gm_res <- goalmodel(goals1 = f$team_h_score, goals2 = f$team_a_score, 
                      team1 = f$team_h, team2 = f$team_a)
  week <- fixtures %>% filter(event == week)
  predv <- predict_expg(gm_res, team1=week$team_h, team2=week$team_a, return_df = T)
  bind_rows(
    predv %>% select(team = team1, exp = expg1, goals_against = expg2), 
    predv %>% select(team = team2, exp = expg2, goals_against = expg1)
    ) %>% 
    mutate(team = as.numeric(team)) %>% 
    inner_join(goals_scored, by = "team") %>% 
    mutate(multiplier = exp / goals_per_week)
}


Now we calculate the predicted points for a player in a specific fixture.

I make a simple model for the the upcoming fixtures based on the following principles:

Note that two important elements are ignored in this simple model: Chance to score goals for defenders, and form of players.

Defenders and goalkeepers score 4 points for a clean sheet, and get -1 for each two goals scored against them. I ignore the risk of conceding 4+ goals, and thus only 4 and -1 are modelled, using a poisson distribution based on the expected goal score against.

The estimate of the defender’s bonus points are calculated by making the simplifying assumption that it is related only to the number of clean sheets. If the player has at least three clean sheets, use an individualized bonus per clean sheet value for that player, otherwise the average is used. There is no need to make a seperate consideration for bonus points for forwards and midfielders, since this is already included in the total score.


get_mean_bonus_per_clean_sheet <- function(position){
  players %>% 
  filter(clean_sheets >= 3, bonus > 1, element_type == position) %>% 
  mutate(bonus_per_clean_sheet = clean_sheets / bonus) %>% 
  summarise(a = mean(bonus_per_clean_sheet)) %>% pull(a)
}

mean_goalkeeper_bonus_per_clean_sheet <- get_mean_bonus_per_clean_sheet(1)

mean_defender_bonus_per_clean_sheet <- get_mean_bonus_per_clean_sheet(2)

players %<>% mutate(
  bonus_per_clean_sheet = ifelse(
      clean_sheets >= 3 & bonus > 1, 
      clean_sheets / bonus,
      ifelse(element_type  == 1,
             mean_goalkeeper_bonus_per_clean_sheet,
             mean_defender_bonus_per_clean_sheet))
  )

players_get_points <-  function(week){
  week_predictions <- get_predictions_gameweek(week)
  inner_join(week_predictions, players, by = "team") %>% 
  filter(minutes > 500) %>% 
  mutate(expected_score = case_when(
    element_type %in% c(3, 4) ~ points_per_game * multiplier,
    element_type %in% c(1, 2) ~ 2 + ppois(0, lambda = goals_against) * (4 + bonus_per_clean_sheet) - 
      1 * ppois(2, lambda = goals_against, lower = F))) %>% 
  group_by(id, second_name) %>% 
  summarise(expected_score = sum(expected_score)) %>% 
  rename(!!quo_name(glue("gw_{week}")) := expected_score) %>% 
  ungroup()
}


Use the model to list the best players to get over the upcoming gameweeks


get_forecast <- function(week){
  players_get_points(week) %>% 
  inner_join(players_get_points(week + 1), by = c("id", "second_name")) %>% 
  inner_join(players_get_points(week + 2), by = c("id", "second_name")) %>%
  inner_join(players_get_points(week + 3), by = c("id", "second_name")) %>% 
  mutate(average = (!!sym(glue("gw_{week}")) + !!sym(glue("gw_{week + 1}")) +
           !!sym(glue("gw_{week + 2}")) + !!sym(glue("gw_{week + 3}"))) / 4) %>% 
  left_join(players, by = c("id", "second_name")) %>% 
  select(second_name, team_name, points_per_game, !!sym(glue("gw_{week}")), !!sym(glue("gw_{week + 1}")),
         !!sym(glue("gw_{week + 2}")), !!sym(glue("gw_{week + 3}")), average)
}

get_forecast(current_week) %>% 
  arrange(desc(average)) %>% head(30)
second_name team_name points_per_game gw_22 gw_23 gw_24 gw_25 average
Mané Liverpool 7.4 7.91 7.7 16.4 11.2 10.8
Salah Liverpool 7.1 7.59 7.38 15.7 10.7 10.3
Vardy Leicester 7.6 10.8 8.69 10.6 8.62 9.67
Maddison Leicester 5.2 7.39 5.95 7.23 5.9 6.62
De Bruyne Man City 7 8.64 6.13 4.69 6.99 6.61
Firmino Liverpool 4.5 4.81 4.68 9.95 6.78 6.55
Rashford Man Utd 5.8 9.19 2.89 7.59 5.81 6.37
Abraham Chelsea 5.8 7.59 6.05 6.72 3.63 6
Martial Man Utd 5.4 8.56 2.69 7.07 5.41 5.93
Son Spurs 5.6 3.2 6.16 8.83 5.4 5.9
Pérez Leicester 4.6 6.53 5.26 6.4 5.22 5.85
Alli Spurs 5.4 3.08 5.94 8.51 5.21 5.69
Gomez Liverpool 3.5 4 4.51 8.98 4.94 5.61
Ramses Becker Liverpool 3.5 3.99 4.5 8.96 4.93 5.6
San Miguel del Castillo Liverpool 2.5 3.99 4.5 8.96 4.93 5.6
Schmeichel Leicester 4 5.86 5.62 5.75 4.79 5.5
Matip Liverpool 4.6 3.92 4.42 8.79 4.84 5.49
Lovren Liverpool 2 3.92 4.42 8.79 4.84 5.49
Kane Spurs 5.2 2.97 5.72 8.2 5.01 5.48
de Andrade Everton 4.7 5.06 5.66 5.66 5.22 5.4
Aubameyang Arsenal 5.9 4.51 4.57 5.78 6.72 5.39
van Dijk Liverpool 4.6 3.85 4.33 8.61 4.73 5.38
Agüero Man City 5.5 6.79 4.82 3.68 5.49 5.2
Sterling Man City 5.5 6.79 4.82 3.68 5.49 5.2
Shelvey Newcastle 4.4 3.93 5.1 4.57 7.17 5.19
Robertson Liverpool 4.9 3.7 4.14 8.25 4.52 5.15
Alexander-Arnold Liverpool 5.6 3.7 4.14 8.25 4.52 5.15
Grealish Aston Villa 4.9 4.7 4.52 6.18 5.1 5.12
Pukki Norwich 4.8 4.13 5.91 4.89 5.09 5.01
Mahrez Man City 5.2 6.42 4.56 3.48 5.19 4.91


Use the model to recommend who should be benched and captained this gameweek


week_forecast <- players_get_points(current_week) %>% 
  rename(expected_score = !!sym(glue("gw_{current_week}")))

my_players <- tibble(second_name = c("Ramsdale", "Alexander-Arnold", "Pereira", "Tarkowski", "Martial", "De Bruyne",
             "Grealish", "Mount", "Rashford", "Vardy", "Jiménez", "Martin", "Cantwell", "Doherty", "Bednarek"))

team_score <- inner_join(week_forecast, my_players, by = "second_name") %>% 
  inner_join(players, by = c("id", "second_name")) %>% 
  select(second_name, element_type, expected_score) %>% 
  # For some reason Pereira is listed as a midfielders, which is wrong.
  mutate(element_type = ifelse(second_name == "Pereira", 2, element_type))

goalkeeper <- team_score %>% 
  filter(element_type == 1) %>% top_n(1, expected_score)

t3_defenders <- team_score %>% 
  filter(element_type == 2) %>% top_n(3, expected_score)

b2_defenders <- team_score %>% 
  filter(element_type == 2) %>% top_n(-2, expected_score)

t3_midfielders <- team_score %>% 
  filter(element_type == 3) %>% top_n(3, expected_score)

b2_midfielders <- team_score %>% 
  filter(element_type == 3) %>% top_n(-2, expected_score)

t1_forwards <- team_score %>% 
  filter(element_type == 4) %>% top_n(1, expected_score)

b2_forwards <- team_score %>% 
  filter(element_type == 4) %>% top_n(-2, expected_score)

remaining_attack <- bind_rows(b2_midfielders, b2_forwards)

t2_attack <- remaining_attack %>% top_n(2, expected_score)

b2_attack <- remaining_attack %>% top_n(-2, expected_score)

remaining_players <- bind_rows(b2_defenders, b2_attack)

t1_remaining <- remaining_players %>% top_n(1, expected_score)

b3_remaining <- remaining_players %>% top_n(-3, expected_score) %>% 
  arrange(desc(expected_score)) %>% 
  rowid_to_column("bench_position")

bind_rows(goalkeeper, t3_defenders, t3_midfielders, t1_forwards, t2_attack, t1_remaining) %>% 
  mutate(captain = ifelse(expected_score == max(team_score$expected_score), T, NA),
         on_team = T) %>% 
  full_join(team_score, by = c("second_name", "element_type", "expected_score")) %>% 
  full_join(b3_remaining, by = c("second_name", "element_type", "expected_score")) %>% 
  arrange(element_type)
second_name element_type expected_score captain on_team bench_position
Ramsdale 1 3.86 - TRUE -
Alexander-Arnold 2 3.7 - TRUE -
Pereira 2 4.28 - TRUE -
Doherty 2 3.95 - TRUE -
Tarkowski 2 2.2 - - 2
Bednarek 2 1.7 - - 3
De Bruyne 3 8.64 - TRUE -
Martial 3 8.56 - TRUE -
Mount 3 5.11 - TRUE -
Grealish 3 4.7 - TRUE -
Cantwell 3 3.44 - - 1
Vardy 4 10.8 TRUE TRUE -
Rashford 4 9.19 - TRUE -
Jiménez 4 5.99 - TRUE -