Workspace
Samuel Verevis/

Exploring UK's Fatal Traffic Accidents

0
Beta
Spinner

Exploring UK's Fatal Traffic Accidents

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)

install.packages(c("leaflet", "dygraphs", "tidygeocoder", "leaflet.extras", "wesanderson", "viridis", "ggmap", "gganimate", "gifski"))

library(tidyverse)
library(lubridate)
library(leaflet)
library(dygraphs)
library(xts) 
library(tidygeocoder)
library(viridis)
library(leaflet.extras)
library(wesanderson)
library(kableExtra)
#library(mapdeck)
library(scales)
library(ggmap)
library(gganimate)
library(gifski)



theme_set(theme_minimal(base_family = "Lato") +
            theme(text = element_text(color = "gray12"),
    
    # Customize the text in the title, subtitle, and caption
    plot.title = element_text(face = "bold", size = 14, hjust = 0.05),
    plot.subtitle = element_text(size = 10, hjust = 0.05),
    plot.caption = element_text(size = 10, hjust = .5),
    
    # Make the background white and remove extra grid lines
    panel.background = element_rect(fill = "white", color = "white"),
    panel.grid = element_blank(),
    panel.grid.major.x = element_blank()
  ))

accidents <- readr::read_csv('./data/accident-data.csv')
lookup <- readr::read_csv('./data/road-safety-lookups.csv')
region <- read_csv("https://gist.githubusercontent.com/radiac/d91d2ed1b971c03d49e9b7bd85e23f1c/raw/1e51ebb467b809ea8dcf1d7d429581e95ac48e3d/uk-counties-to-regions.csv")
## Firstly, keep variables of interest, flatten, and then join across look-ups
##
  acc_df <- accidents %>% 
              unite(date_time, c(date, time)) %>% 
              mutate(date_time = dmy_hms(date_time)) %>% 
              relocate(c(date_time, number_of_casualties), .after = latitude) %>% 
              select(-c(accident_index, accident_year, first_road_number, 
                        second_road_class, second_road_number, special_conditions_at_site, 
                        carriageway_hazards, day_of_week)) %>% 
              mutate(fatal_grp = case_when(accident_severity == 1 & number_of_casualties >=3 ~ 
                                             "high fatal",
                                           accident_severity == 2 & number_of_casualties >=3 ~ 
                                             "high serious",
                                           accident_severity == 3 & number_of_casualties >=3 ~ 
                                             "high slight",
                                           accident_severity == 1 & number_of_casualties < 3 ~ 
                                             "low fatal",
                                            TRUE                                             ~ "Other"))
## Load geoodes from tidygeocoder package
##
  geo_codes <- readRDS("geo_codes.RDS")
## Takes up too much RAM error 137 in knitting :( # ms = mapdeck_style("dark") # token <- "pk.eyJ1Ijoic2FtanZzIiwiYSI6ImNrdzE4cXQ2ajAxOHUzMnA4OGloOTQ5Z2QifQ.Bhl6GXUuotw6eZz1Ws-xMg" # # test_map <- accidents %>% # drop_na() %>% # select(latitude, longitude) # # mapdeck( style = mapdeck_style("dark"), location = c(-1.5,53), pitch = 45, zoom = 5.5) %>% # add_hexagon( # data = test_map # , lat = "latitude" # , lon = "longitude" # , layer_id = "hex_layer" # , elevation_scale = 100 # ,update_view = FALSE # , colour_range = colourvalues::colour_values(1:6, palette = colourvalues::get_palette("viridis")[70:256,]) # )

/* Add some styles to the document to polish a little */

h1 { color: #111; font-family: 'Helvetica Neue', sans-serif; font-size: 45px; font-weight: bold; letter-spacing: -1px; line-height: 1; text-align: center; }
    
h2 { color: #111; font-family: 'Open Sans', sans-serif; font-size: 30px; font-weight: 300; line-height: 32px; margin: 0 0 52px; text-align: center; }
    
h4 {color: #111; font-family: 'Helvetica Neue', sans-serif; font-size: 20px; line-height: 24px; margin: 0 0 24px; text-align: justify; text-justify: inter-word;}
    
p { color: #685206; font-family: 'Helvetica Neue', sans-serif; font-size: 14px; line-height: 24px; margin: 0 0 24px; text-align: justify; text-justify: inter-word; }    
    
p.b { color: #685206; font-family: 'Helvetica Neue', sans-serif; font-size: 14px; line-height: 24px; margin: 0 0 0px; text-align: justify; text-justify: inter-word; }  
    
ul {color: #685206; font-family: 'Helvetica Neue', sans-serif; font-size: 14px; line-height: 24px; margin: 0 0 24px; text-align: justify; text-justify: inter-word; }
    
ol {color: #685206; font-family: 'Helvetica Neue', sans-serif; font-size: 14px; line-height: 24px; margin: 0 0 24px; text-align: justify; text-justify: inter-word; }
    

Insights

  • There were over 90,000 traffic accidents in 2020, with 202 (0.2%) of these considered fatal accidents.
  • Traffic accidents peaked in February following a large dip in April, which coincides with UK's bank holiday periods. However, this feature isn't as salient for fatal accidents.
  • In general, traffic accidents are lowest on a Monday and increase through the working week, peaking on Fridays - for fatal accidents the peak is on a Saturday.
  • Both fatal and non-fatal accidents occur late in the day, with most accidents happening around 5pm (after-work rush).
  • In conjunction with this, fatal accidents tend to occur most in fair weather conditions, in speed zones of 60-70mph, and on Class A single carriageway roads.
  • Fatal accidents have a high (75%) tendency to occur in rural UK regions, relative to other accident groups.
  • Regionally, South East England has the highest accident rate overall, however Midlands has the highest fatal accident rate (as a proportion of the regions traffic accidents).
  • In general, the aim of this report is stimulate more questions than answers.

Future Research

These high level findings underline several avenues for future research, such as:

  • Do bank holiday period causally reduce traffic accidents throughout the year, and if so by how much?
  • Is work place fatigue and rush hour timing the strongest predictor of fatal traffic accident and general accidents?
  • Will reducing speed limits (particularly for rural regions) or transforming, single to duel carriage ways reduce fatal traffic accidents?

Future research in this area would work well for building modelling capability in the average accident rate (lambda) over time, both to understand how policy interventions have historically reduced fatal accident rates and how/what potential policy intervention[s] may reduce future fatal traffic accidents.

Policy Implications

  1. Reducing fatal accidents could be done via two differing types of policy initiatives. Firstly, policies that aim to reduce overall traffic accidents,and thereby lower the overall potential for fatal accident to occur. By way of example, these policies could look like:
    a. Introducing more bank holiday during the May - November period, where traffic accidents tend to rise again.
    b. Implementing increased traffic safety controls during 5pm rush hour, particularity for Fridays, due to the higher rates of traffic accidents, and/or
    c. Increased safety messaging in regions such as South East, and East of England where traffic accident rates are highest.
  2. Secondly, targeted policy response aimed at reducing fatal traffic accidents, such as:
    a. Increased policing in the late evening, where fatal crashes spike again,
    b. Further develop public transport through rural areas, where fatal accidents rates are highest,
    c. Targeted safety messaging in regions and cities that have the highest proportion of fatal accident rates.

The UK's traffic accident report for 2020

There were over 90,000 traffic accidents recorded in 2020, of which, 0.2% were considered fatal

There were about 91,200 recorded road accidents in 2020 across the UK. Of these accidents, around 0.2% (202) were considered fatal crashes ^[Fatal crashes are considered fatal if the crash severity is high and there are three or more casualties]. The lions share of car accident severity is typically considered slight (about 78%), then serious (20%). In general, most traffic accidents involve one person (81%), and then two people (13%), with three or more people having the lowest likelihood of occurring (about 5%). Given the interest in reducing fatal accidents most of the analysis will focus on comparing fatal crashes with other accidents groups of interest.

Peak traffic accidents occur in February, with a significant drop in April, coinciding with bank holidays. Across weekdays, traffic accidents typically increase over the working week, peaking on Fridays.

Notwithstanding the specific focus on fatal crashes, looking at the aggregate trend of crashes across 2020 reveals some notable features within the data. Firstly, a peak in accidents occurs in February, which follows a large dip in around April (about 3,300 accidents), coinciding with bank holidays. Following the dip, the crash rate rises steadily through the following months, to peak again around September (9,250 accidents).

## polyfreq graph
##
  acc_df %>% 
    ggplot(aes(date_time)) + 
    geom_freqpoly(binwidth = 86400) +
    labs(title = "",
         subtitle = "") +
    ylab("Number of accidents") +
    xlab("") +
  geom_vline(xintercept = dmy_hms(c("01/01/2020 00:00:00", "02/04/2020 00:00:00", "05/04/2020 00:00:00",
                                    "03/05/2020 00:00:00", "31/05/2020 00:00:00", "30/08/2020 00:00:00",
                                    "27/12/2020 00:00:00", "28/12/2020 00:00:00")), 
             colour = wes_palette("GrandBudapest2", n = 3)[[3]], linetype = "dashed") +
  annotate("text", x = dmy_hms("01/03/2020 00:00:00") , y = 450, label = "UK holidays", 
           colour = wes_palette("GrandBudapest2", n = 3)[[3]])

## Week graph
##
 acc_df %>% 
  select(date_time, accident_severity, number_of_casualties) %>% 
    pivot_longer(accident_severity, 
                 names_to = "Severity",
                 values_to =  "value") %>% 
          left_join(lookup %>% 
                      filter(`field name` == "accident_severity") %>% 
                      select(label, `code/format`) %>% 
                      mutate(`code/format` = as.numeric(`code/format`)),
                      by = c("value" = "code/format")) %>% 
  mutate(weekday = wday(date_time, label = TRUE)) %>% 
    ggplot(aes(x = weekday, fill = label)) +
      geom_bar(position = "stack", alpha = 0.6) +
   scale_fill_manual(values = wes_palette("GrandBudapest1", n = 3))+
   ylab("Number of accidents") +
   xlab("weekday") +
   theme(
     legend.title = element_blank()
   )

To better understand the characteristics of the fatal crashes, three other comparison groups are constructed - The groups of interest are define as:

  1. High fatal - this is the primary group of interest, as describe above,
  2. low fatal - less than three casualties, whose severity is considered fatal
  3. High serious - three or more casualties, whose severity is considered serious, and
  4. High slight - three or more casualties, whose severity is considered slight.

In addition to those considered high fatal accidents, three other accident groups are used to highlight the similarities and differences to the high fatal accident group of interest.

This leaves 6,006 data points that will be used by way of highlighting similarities and differences to our population of interest - high fatal crashes. In general, each of the four groups have two vehicles involved in accidents on average. High fatal accidents occur almost 4 times each week over the course of the year, high slight accidents occur 62 times a week. However, high fatal accidents are almost 6 times less likely to occur than the next lowest accident group, high serious (about 26 accidents per week).

# Insert Table


table01 <- acc_df %>% 
            filter(fatal_grp != "Other") %>% 
            group_by(fatal_grp) %>% 
            summarise(n=n(),
                      av_vehicles = mean(number_of_vehicles)) %>% 
            mutate(accidents_pw = n / 52)

kable(table01,
      col.names = c("Accident group", "Number of Accidents", "Average vehicles involved", "Average accidents per week"),
      caption = "Table 1: A breakdown of the chosen accident groups",
      digits = 2,
      align = "lccc") %>% 
      kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Furthermore, this report considers a select amount of variables for exploring fatal traffic accidents, which are as follows:

  • Time of day, week and yearly patterns,
  • The speed limit of the area,
  • Weather Conditions,
  • Road types - namely, the road classification system ^[The road classification system in the UK defines the type of roads and their primary purpose - more information can be found here], and
  • Regional information.

Although the overall number of high fatal accidents are low, they typically follow the trend on aggregate, with similar peaks and dip in February and April respectively.

Figure 2 displays traffic accidents over time by the specified groups. Importantly, each group shares in similarities relative to the aggregate trend (see Figure 1), which are: A peak in February followed by a major dip in April, following steady a rise through the year, to then dip again around December (Christmas).

## Dygraph plot
##

  dy_data <- acc_df %>% 
           filter(fatal_grp != "Other") %>% 
            group_by(date_time, fatal_grp) %>% 
            summarise(fatal = sum(number_of_casualties)) %>% 
            arrange(date_time) %>% 
            pivot_wider(names_from = fatal_grp,
                        values_from = fatal,
                        values_fill = 0)
## split data
  dy_data1 <- dy_data[,1:2]
  dy_data2 <- dy_data[,c(1,3)]
  dy_data3 <- dy_data[,c(1,4)]
  dy_data4 <- dy_data[,c(1,5)]
  
## make xts   
   don1 <- apply.daily(xts(x = dy_data1[,2], order.by = dy_data1$date_time), FUN=sum)
   don2 <- apply.daily(xts(x = dy_data2[,2], order.by = dy_data2$date_time), FUN=sum)
   don3 <- apply.daily(xts(x = dy_data3[,2], order.by = dy_data3$date_time), FUN=sum)
   don4 <- apply.daily(xts(x = dy_data4[,2], order.by = dy_data4$date_time), FUN=sum)
   don <- cbind(don1, don2, don3, don4)
  
## create dy graph  
   dygraph(don) %>%
    dyOptions(labelsUTC = TRUE, colors = wes_palette("GrandBudapest1", n = 4),
               fillGraph=TRUE, fillAlpha=0.1, drawGrid = FALSE) %>%
    dyRangeSelector() %>%
    dyCrosshair(direction = "vertical") %>%
    dyHighlight(highlightCircleSize = 5, highlightSeriesBackgroundAlpha = 0.2, hideOnMouseOut = TRUE) %>% 
    dySeries("low.fatal", label = "Low fatal") %>% 
    dySeries("high.fatal", label  = "High fatal") %>% 
    dySeries("high.slight", label = "High slight") %>% 
    dySeries("high.serious", label = "High serious") %>% 
    dyRoller(rollPeriod = 14)

# gif_gp <- acc_df %>%
#            filter(fatal_grp != "Other") %>%
#             mutate(day_time = round_date(date_time,
#               unit = "weeks"
#               )) %>%
#             group_by(day_time, fatal_grp) %>%
#             summarise(fatal = sum(number_of_casualties))  %>%
#   ggplot(aes(day_time, fatal,  col = fatal_grp)) +
#     geom_line() +
#         scale_colour_manual(values = wes_palette("GrandBudapest1", n = 4)) +
#       xlab("") +
#   ylab("Number of Accidents") +
#   theme(
#         legend.position = "bottom",
#         legend.title = element_blank()) +
#       labs(title = "")  +
#     transition_reveal(day_time)


#anim_save("goo.gif", gif_gp)

#![](goo.gif)


# acc_df %>% 
#            filter(fatal_grp != "Other") %>%
#             mutate(day_time = round_date(date_time,
#               unit = "weeks"
#               )) %>% 
#             group_by(day_time, fatal_grp) %>% 
#             summarise(fatal = sum(number_of_casualties))  %>% 
#   ggplot(aes(day_time, fatal,  col = fatal_grp)) +
#     geom_line() +
#         scale_colour_manual(values = wes_palette("GrandBudapest1", n = 4)) +
#       xlab("") +
#   ylab("Number of Accidents") +
#   theme(
#         legend.position = "bottom",
#         legend.title = element_blank()) +
#       labs(title = "")



  • AI Chat
  • Code