Load data and libraries

tuesdata <- tidytuesdayR::tt_load(2021, week = 22)
## --- Compiling #TidyTuesday Information for 2021-05-25 ----
## --- There are 2 files available ---
## --- Starting Download ---
## 
##  Downloading file 1 of 2: `drivers.csv`
##  Downloading file 2 of 2: `records.csv`
## --- Download complete ---
records <- tuesdata$records
drivers <- tuesdata$drivers
library(tidyverse)
library(ggrepel)
library(lubridate)
theme_set(theme_light())

Exploration

knitr::kable(head(records))
track type shortcut player system_played date time_period time record_duration
Luigi Raceway Three Lap No Salam NTSC 1997-02-15 2M 12.99S 132.99 1
Luigi Raceway Three Lap No Booth NTSC 1997-02-16 2M 9.99S 129.99 0
Luigi Raceway Three Lap No Salam NTSC 1997-02-16 2M 8.99S 128.99 12
Luigi Raceway Three Lap No Salam NTSC 1997-02-28 2M 6.99S 126.99 7
Luigi Raceway Three Lap No Gregg G NTSC 1997-03-07 2M 4.51S 124.51 54
Luigi Raceway Three Lap No Rocky G NTSC 1997-04-30 2M 2.89S 122.89 0

Records by track over time

records %>%
    filter(type == "Three Lap") %>%
    filter(shortcut == "No") %>%
    ggplot(aes(x=date, y=time, color=track)) + geom_line() +
    facet_wrap(~ track, scale="free_y") + theme(legend.position="none") +
    #geom_text_repel(aes(label=player)) +
    NULL

Number of record entries by player

records %>%
    mutate(player=fct_lump(player,20)) %>%
    mutate(player=fct_rev(fct_infreq(player))) %>%
    ggplot(aes(y=player, fill=shortcut)) + geom_bar() +
    facet_wrap(~type) + scale_fill_brewer(palette="Set1")

How many records have been broken since March 2020?

records %>%
    filter(date>ymd('2020-02-29')) %>%
    count(track, sort=T)
## # A tibble: 15 x 2
##    track                     n
##    <chr>                 <int>
##  1 Toad's Turnpike          50
##  2 Sherbet Land             20
##  3 Luigi Raceway            19
##  4 Rainbow Road             17
##  5 Choco Mountain           12
##  6 Banshee Boardwalk        10
##  7 Frappe Snowland          10
##  8 Kalimari Desert          10
##  9 Mario Raceway             9
## 10 Royal Raceway             9
## 11 Yoshi Valley              9
## 12 Koopa Troopa Beach        7
## 13 Wario Stadium             7
## 14 Bowser's Castle           5
## 15 D.K.'s Jungle Parkway     2

Only for one track there was no new record since then:

tracks_with_new_record <- records %>%
    filter(date>ymd('2020-02-29')) %>%
    pull(track) %>%
    unique
records %>% select(track) %>% unique %>% filter(!(track %in% tracks_with_new_record))
## # A tibble: 1 x 1
##   track       
##   <chr>       
## 1 Moo Moo Farm

Better way to get there is sorting by date:

records %>%
    group_by(track) %>%
    top_n(1, date) %>%
    ungroup %>%
    arrange(date)
## # A tibble: 24 x 9
##    track     type   shortcut player system_played date       time_period  time record_duration
##    <chr>     <chr>  <chr>    <chr>  <chr>         <date>     <chr>       <dbl>           <dbl>
##  1 Moo Moo … Three… No       MR     PAL           2020-02-18 1M 25.93S    85.9             374
##  2 Royal Ra… Three… No       Dan    PAL           2020-09-07 2M 51.25S   171.              172
##  3 Koopa Tr… Singl… No       Dan    PAL           2020-10-18 30.78S       30.8             131
##  4 D.K.'s J… Singl… No       Dan    PAL           2020-10-26 42.04S       42.0             123
##  5 D.K.'s J… Singl… Yes      Dan    PAL           2020-10-26 42.04S       42.0             123
##  6 Kalimari… Singl… No       Dan    PAL           2020-11-07 38.97S       39.0               0
##  7 Kalimari… Singl… No       Dan    PAL           2020-11-07 38.96S       39.0             111
##  8 Kalimari… Singl… Yes      Dan    PAL           2020-11-07 38.97S       39.0               0
##  9 Kalimari… Singl… Yes      Dan    PAL           2020-11-07 38.96S       39.0             111
## 10 Frappe S… Three… Yes      abney… PAL           2021-01-08 23.61S       23.6              49
## # … with 14 more rows

Answer some of the questions (with plots)

  • Which track is the fastest?
  • For which track did the world record improve the most?
  • For how many tracks have shortcuts been discovered?

Fastest track (with and without shortcuts)

records %>%
    group_by(track, shortcut) %>%
    summarize(record=min(time)) %>%
    ungroup %>%
    mutate(
        nosc_record = if_else(shortcut=="No", record, 0),
        track = fct_reorder(track, nosc_record, max)
    ) %>%
    ggplot(aes(y=track, x=record)) +
        geom_col() +
        facet_wrap(~shortcut)
## `summarise()` has grouped output by 'track'. You can override using the `.groups` argument.

There are shortcuts for most tracks (all except 4)

records %>%
    group_by(track) %>%
    count(shortcut) %>%
    pivot_wider(names_from=shortcut, values_from=n, values_fill=0) %>%
    filter(Yes==0)
## # A tibble: 4 x 3
## # Groups:   track [4]
##   track                 No   Yes
##   <chr>              <int> <int>
## 1 Banshee Boardwalk     83     0
## 2 Bowser's Castle       69     0
## 3 Koopa Troopa Beach    89     0
## 4 Moo Moo Farm          81     0

Largest improvement without shortcuts

#records %>%
#   filter(shortcut=="No") %>%

Number of new records per month

records %>%
    mutate(month=floor_date(date, unit="month")) %>%
    count(month) %>%
    arrange(month) %>%
    mutate(cum = cumsum(n)) %>%
    ggplot(aes(x=month, y=cum)) + geom_line()