Finding Patterns in Missing Data

Code
# Generating some data
create_text_matrix <- function(text, height = 100, cex = 5, font = 2, 
                               col = "black", res = 96) {
  
  tmp <- tempfile(fileext = ".png")
  png(tmp, width = 500, height = height, res = res)
  w_inch <- strwidth(text, units = "inches", cex = cex, font = font)
  dev.off()
  unlink(tmp)
  width_px <- ceiling(w_inch * res * 1.2)
  tmp_file <- tempfile(fileext = ".png")
  png(tmp_file, width = width_px, height = height, units = "px", res = res)
  par(mar = c(0.1, 0.1, 0.1, 0.1))
  plot(0, 0, type = "n", xlim = c(0, 1), ylim = c(0, 1), 
       xlab = "", ylab = "", axes = FALSE)
  text(0.5, 0.5, text, cex = cex, font = font, col = col)
  dev.off()
  img_data <- png::readPNG(tmp_file)
  unlink(tmp_file)
  return(img_data)
}
library(tidyverse)
set.seed(42)
img_data <- create_text_matrix('oh no')
missingness_matrix <- apply(img_data < 0.5, c(1,2), mean) |>
  (\(x) x > 0.5)()
dat <- matrix(rnorm(n=nrow(missingness_matrix)*ncol(missingness_matrix)), 
       nrow = nrow(missingness_matrix), 
       ncol = ncol(missingness_matrix))
dat <- dat[order(dat[,1]),]
dat[missingness_matrix] <- NA
dat <- dat |>
  as.data.frame() |>
  mutate(date = as.Date('2001-01-01')+row_number()) |>
  slice_sample(prop=1) |>
  relocate(date)

The data

dim(dat)
[1] 100 263
head(dat, n = 10) |>
  select(1:10) |>
  knitr::kable(digits = 2)
date V1 V2 V3 V4 V5 V6 V7 V8 V9
2001-03-01 0.36 -1.00 1.17 0.04 0.06 0.00 0.99 -1.00 0.22
2001-03-07 0.50 1.44 -0.27 0.99 -0.04 -0.27 1.39 -0.38 -2.01
2001-04-06 1.51 -0.42 -0.71 0.48 0.00 -1.03 -1.62 -1.21 -0.45
2001-04-04 1.40 -0.36 -1.85 -0.34 -1.08 0.08 0.73 0.12 0.20
2001-02-10 -0.17 0.12 -0.52 0.05 0.10 0.60 -0.63 -0.36 -0.26
2001-02-18 0.08 1.82 0.10 -0.26 0.12 -2.17 -0.29 0.11 1.28
2001-04-07 1.51 0.15 0.82 2.42 -0.23 0.05 0.00 -0.23 -0.90
2001-03-13 0.64 -0.38 0.04 -0.42 -0.72 2.04 0.84 -0.49 -0.95
2001-02-27 0.32 -0.04 -1.10 0.72 -1.30 -1.56 0.84 -1.61 -1.03
2001-03-08 0.58 -0.15 1.31 -0.69 -0.39 1.05 -0.92 -0.32 -0.97

Visualising missing values

dat |>
  visdat::vis_miss()

Sorted by missingness

dat |>
  visdat::vis_miss(sort_miss = TRUE)

Hierarchical clustering

dat |>
  visdat::vis_miss(cluster = TRUE)

Sorted by date column

dat |>
  arrange(date) |>
  visdat::vis_miss()