---
title: "Finding Patterns in Missing Data"
format: html
code-tools: true
embed-resources: true
---
```{r}
#| echo: false
knitr::opts_chunk$set(message = FALSE, warning = FALSE, error = FALSE)
```
```{r}
#| code-fold: true
# Generating some data
create_text_matrix <- function(text, height = 100, cex = 5, font = 2,
col = "black", res = 96) {
tmp <- tempfile(fileext = ".png")
png(tmp, width = 500, height = height, res = res)
w_inch <- strwidth(text, units = "inches", cex = cex, font = font)
dev.off()
unlink(tmp)
width_px <- ceiling(w_inch * res * 1.2)
tmp_file <- tempfile(fileext = ".png")
png(tmp_file, width = width_px, height = height, units = "px", res = res)
par(mar = c(0.1, 0.1, 0.1, 0.1))
plot(0, 0, type = "n", xlim = c(0, 1), ylim = c(0, 1),
xlab = "", ylab = "", axes = FALSE)
text(0.5, 0.5, text, cex = cex, font = font, col = col)
dev.off()
img_data <- png::readPNG(tmp_file)
unlink(tmp_file)
return(img_data)
}
library(tidyverse)
set.seed(42)
img_data <- create_text_matrix('oh no')
missingness_matrix <- apply(img_data < 0.5, c(1,2), mean) |>
(\(x) x > 0.5)()
dat <- matrix(rnorm(n=nrow(missingness_matrix)*ncol(missingness_matrix)),
nrow = nrow(missingness_matrix),
ncol = ncol(missingness_matrix))
dat <- dat[order(dat[,1]),]
dat[missingness_matrix] <- NA
dat <- dat |>
as.data.frame() |>
mutate(date = as.Date('2001-01-01')+row_number()) |>
slice_sample(prop=1) |>
relocate(date)
```
### The data
```{r}
dim(dat)
```
```{r}
head(dat, n = 10) |>
select(1:10) |>
knitr::kable(digits = 2)
```
### Visualising missing values
```{r}
#| out-width: 100%
dat |>
visdat::vis_miss()
```
### Sorted by missingness
```{r}
#| out-width: 100%
dat |>
visdat::vis_miss(sort_miss = TRUE)
```
### Hierarchical clustering
```{r}
#| out-width: 100%
dat |>
visdat::vis_miss(cluster = TRUE)
```
### Sorted by date column
```{r}
#| out-width: 100%
dat |>
arrange(date) |>
visdat::vis_miss()
```