[This note was last updated using data downloaded on 2020-10-03. Here is the source of this analysis. Click here to show / hide the R code used. ]
Prediction is of course difficult, especially about the future.
As we see here, a plot of the case counts today, one week ago, and two weeks ago gives us a good sense of how different countries are managing the spread of COVID-19. The following plot is similar, showing the latest counts, and also shows how much the counts have increased in the last 4 days, and the 4 days before that. These are on a logarithmic scale, which means that the 4-day increases are a measure of the percentage increase; larger means faster rate of growth.
total.row <- apply(xcovid.row, 2, tail, 1)
total.row.1 <- apply(xcovid.row, 2, function(x) tail(x, 5)[1])
total.row.2 <- apply(xcovid.row, 2, function(x) tail(x, 9)[1])
torder <- tail(order(total.row), 60)
dotplot(total.row[torder], total.1 = total.row.1[torder], total.2 = total.row.2[torder],
xlab = "Total cases (NOTE: log scale)",
xlim = c(100, NA),
panel = function(x, y, ..., total.1, total.2, col) {
col <- trellis.par.get("superpose.line")$col
dot.line <- trellis.par.get("dot.line")
panel.abline(h = unique(y), col = dot.line$col, lwd = dot.line$lwd)
panel.segments(log10(total.2), y, log10(total.1), y, col = col[3], lwd = 2)
panel.segments(log10(total.1), y, x, y, col = col[2], lwd = 3)
panel.points(x, y, pch = 16, col = col[1])
},
scales = list(x = list(alternating = 3, log = 10, equispaced.log = FALSE)))
We can use this for a crude prediction of the number of cases 4 days further on, by assuming that on the log scale, the “increase” changes (actually decreases, one hopes) in a geometric progression. The countries are sorted according to their case counts 4 days ago.
predictCases <- function(x, days = 7, go.back = TRUE, smooth = TRUE)
## go.back = TRUE: use -days, -2*days to predict current
## go.back = FALSE: use -days, current to predict +days
## smooth: use loess() to smooth noise in data
{
u <- if (go.back) head(x, -days) else x # used for prediction
N <- nrow(u)
if (smooth)
u <- apply(u, 2,
function(v) predict(loess(v ~ seq_len(N),
span = 0.35)))
if (N - 2 * days > nrow(x)) stop("Not enough data")
if (any(u[N - 2 * days, ] <= 0))
{
warning(sprintf("dropping zero counts at %g days", N - 2 * days))
keep <- u[N - 2 * days, ] > 0
u <- u[, keep, drop = FALSE]
x <- x[, keep, drop = FALSE]
}
total.0 <- u[N, , drop = TRUE]
total.1 <- u[N - days, , drop = TRUE]
total.2 <- u[N - 2 * days, , drop = TRUE]
## predict assuming linear change in diff(log(count))
d2 <- log(total.1) - log(total.2)
d1 <- log(total.0) - log(total.1)
## assume these are in geometric progression
## d0 <- d1 * d1 / d2
## or assume these are in arithmetic progression (can go negative)
d0 <- 2 * d1 - d2
total.predict <- exp(log(total.0) + d0)
total.observed <- if (go.back) x[N + days, , drop = TRUE] else NA
data.frame(region = colnames(x),
total2 = total.2,
total1 = total.1,
total0 = total.0,
predicted = total.predict,
observed = total.observed)
}
This is how well these predictions performed when applied on data from 4 days ago to predict the current counts (for the 30 countries with highest current count). The predictions are sometimes bad when counts are low (not surprisingly), but generally reasonable.
torder <- tail(order(total.row), 30)
pred.past <- predictCases(xcovid.row[, torder, drop = FALSE], days = 4, go.back = TRUE)
dotplot(reorder(region, total0) ~ predicted + observed, data = pred.past,
xlab = "Predicted current number of cases based on data four days ago",
par.settings = simpleTheme(pch = 16, col = c(1, 2)), auto.key = list(columns = 2),
scales = list(x = list(alternating = 3, log = TRUE, equispaced.log = FALSE)))
Here are the predictions 4 days into the future ( 2020-10-07 ) using current data.
torder <- tail(order(total.row), 60)
pred.current <- predictCases(xcovid.row[, torder, drop = FALSE], days = 4, go.back = FALSE)
plot.col <- trellis.par.get("plot.symbol")$col
with(pred.current,
dotplot(reorder(region, predicted) ~ total0,
predicted = predicted,
xlab = sprintf("Predicted number of cases as on %s",
as.Date(file.mtime(TARGET)) + 4),
xlim = c(NA, exp(1.07 * max(log(predicted)))),
par.settings = simpleTheme(pch = c(16, 1), col = plot.col),
panel = function(x, y, ..., predicted) {
dot.line <- trellis.par.get("dot.line")
panel.abline(h = unique(y), col = dot.line$col, lwd = dot.line$lwd)
panel.points(x, y, ...)
panel.points(log10(predicted), y, pch = 1, ...)
panel.text(current.panel.limits()$xlim[2],
y, labels = round(predicted), pos = 2)
},
auto.key = list(columns = 2, text = c("current", "predicted")),
scales = list(x = list(log = 10, equispaced.log = FALSE))))