Computing and visualizing functional residuals with the funres package
intro.Rmd
Currently supported models
Model type | Family | Package | Function |
---|---|---|---|
GLM | Binomial | stats | glm() |
GLM | Poisson | stats | glm() |
GLM | Quasi-Poisson | stats | glm() |
GAM | Binomial | mgcv | gam() |
GAM | Poisson | mgcv | gam() |
GAM | Quasi-Poisson | mgcv | gam() |
Ordinal | NA | VGAM |
vgam() and vgam()
|
Logistic regression
We’ll start by generating data from a quadratic logistic regression (LR) model:
# Generate data from a logistic regression model with quadratic form
set.seed(1217)
n <- 1000
x <- rnorm(n)
# x[1] <- 10 # add an outlier
z <- 1 - 2*x + 3*x^2 + rlogis(n)
y <- ifelse(z > 0, 1, 0)
# Fit a couple of LR models
fit.bad <- glm(y ~ x, family = binomial) # wrong
fit.good <- glm(y ~ x + I(x^2), family = binomial) # right
#> Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Functional residuals
fres.bad <- fresiduals(fit.bad)
fres.good <- fresiduals(fit.good)
par(mfrow = c(1, 2), las = 1)
plot(fres.bad[[1]], xlab = "t", ylab = "F(t)")
plot(fres.good[[1]], xlab = "t", ylab = "F(t)") # plot FRs for first observation
Does plotting them all tell us anything interesting?
plot(fres.bad[[1]], xlab = "t", ylab = "F(t)", las = 1, type = "n")
tt <- 0:100/100
for (i in seq_along(fres.bad)) {
lines(tt, fres.bad[[i]](tt), col = adjustcolor(1, alpha.f = 0.05))
}
Surrogate and probability-scale residuals
sr.bad <- fresiduals(fit.bad, type = "surrogate")
sr.good <- fresiduals(fit.good, type = "surrogate")
par(mfrow = c(1, 2))
col <- adjustcolor(1, alpha.f = 0.2)
plot(x, y = sr.bad, col = col, las = 1, ylab = "Surrogate residual")
lines(lowess(x, y = sr.bad), lwd = 2, col = "red2")
plot(x, y = sr.good, col = col, las = 1, ylab = "Surrogate residual")
lines(lowess(x, y = sr.good), lwd = 2, col = "red2")
# Probability-scale residuals
ps.bad <- fresiduals(fit.bad, type = "probscale")
ps.good <- fresiduals(fit.good, type = "probscale")
par(mfrow = c(1, 2))
plot(x, y = ps.bad, col = col, las = 1, ylab = "Surrogate residual")
lines(lowess(x, y = sr.bad), lwd = 2, col = "red2")
plot(x, y = ps.good, col = col, las = 1, ylab = "Surrogate residual")
lines(lowess(x, y = sr.good), lwd = 2, col = "red2")
Functional residual density (FRED) plots
The FRED plots in R are based on the Trellis framework (e.g.,
lattice), which rely on grid graphics.
The gridExtra::grid.arrange()
function is the most
convenient approach to arranging several plots here. These graphs are
quicker to produce compared to ggplot2.
# Two-dimensional kernel density estimation
gridExtra::grid.arrange(
fredplot(fit.bad, x = x),
fredplot(fit.good, x = x),
nrow = 1
)
# Hexagonal binning
gridExtra::grid.arrange(
fredplot(fit.bad, x = x, type = "hex", aspect = 1),
fredplot(fit.good, x = x, type = "hex", aspect = 1),
nrow = 1
)
Ordinal model example with VGAM package
library(VGAM)
#> Loading required package: stats4
#> Loading required package: splines
# Helper functions to simulate quadratic ordinal response data
ordinalize <- function(z, threshold) {
sapply(z, FUN = function(x) {
ordinal.value <- 1
index <- 1
while(index <= length(threshold) && x > threshold[index]) {
ordinal.value <- ordinal.value + 1
index <- index + 1
}
ordinal.value
})
}
simoqd <- function(n = 2000) {
threshold <- c(0, 4, 8)
x <- runif(n, min = 1, max = 7)
z <- 16 - 8 * x + 1 * x ^ 2 + rnorm(n) # rlnorm(n)
y <- sapply(z, FUN = function(zz) {
ordinal.value <- 1
index <- 1
while(index <= length(threshold) && zz > threshold[index]) {
ordinal.value <- ordinal.value + 1
index <- index + 1
}
ordinal.value
})
data.frame("y" = as.ordered(y), "x" = x)
}
# Simulate data
set.seed(977)
oqdf <- simoqd(n = 2000)
# Fit models to simulated ordinal quadratice response data
fit1 <- vglm(y ~ x, data = oqdf, family = acat(reverse = TRUE, parallel = TRUE))
fit2 <- vglm(y ~ poly(x, degree = 2), data = oqdf, family = acat(reverse = TRUE, parallel = TRUE))
#> Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon): 1
#> diagonal elements of the working weights variable 'wz' have been replaced by
#> 1.819e-12
#> Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon): 6
#> diagonal elements of the working weights variable 'wz' have been replaced by
#> 1.819e-12
#> Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon): 6
#> diagonal elements of the working weights variable 'wz' have been replaced by
#> 1.819e-12
fit3 <- vgam(y ~ s(x), data = oqdf, family = acat(reverse = TRUE, parallel = TRUE))
#> Warning in vgam.fit(x = x, y = y, w = w, mf = mf, Xm2 = Xm2, Ym2 = Ym2, :
#> convergence not obtained in 30 IRLS iterations
# Residual plots
gridExtra::grid.arrange(
fredplot(fit1, x = oqdf$x), # linear term (wrong)
fredplot(fit2, x = oqdf$x), # quadratic term
fredplot(fit3, x = oqdf$x), # smooth term
nrow = 1
)
Zero-inflated Poisson (ZIP) model
# library(VGAM)
# library(pscl)
# Simulate ZIP data
set.seed(3)
x <- rnorm(1000, 0, 0.8)
lp <- 1 + 1 * x # linear predictor
lambda <- exp(lp)
p0 <- exp(1 + 0.2 * x) / (exp( 1 + 0.2 * x) + 1)
y <- VGAM::rzipois(1000, lambda = lambda, pstr0 = p0)
df <- cbind.data.frame(x, y)
fit1 <- glm(y ~ x, family = poisson, data = df)
fit2 <- pscl::zeroinfl(y ~ x, data = df)
gridExtra::grid.arrange(
fredplot(fit1, x = x),
fredplot(fit2, x = x),
ncol = 2
)