Wednesday, June 30, 2021

Bootstrap Confidence Interval for Data with Repeated Measures

stat.bootstrap.cluster <- function(id, val, func = mean, boot.size = 10000)
{
    ## Purpose: Calculate bootstrap-based 95% confidence interval for data with repeated measures
    ## Arguments:
    ##   id: uniquely identifies a subject (patient)
    ##   val: a numeric vector to be summarized.  It contains repeated measures per subject.
    ##   func: a function for calculating the summary statistic from a numeric vector.  Default to "mean". 
    ##   boot.size: number of bootstrap samples.  Default to 10000. 
    ## Return: Point estimate, bootstrap percentile 95% CI, histogram for the bootstrap distribution of the target statistic
    ## Author: Feiming Chen
    ## ________________________________________________

    fname <- deparse(substitute(func))
    ans <- func(val)
    cat("Summary Statistic:", fname, "=", ans, "\n")

    unique.ID <- unique(id)

    set.seed(1)
    replicate(n = boot.size, {
        s <- sample(unique.ID, replace = TRUE)  # a bootstrap sample of patient ID's
        ## find all rows with the ID's in the bootstrap sample of ID's 
        v <- c()
        for (j in s) v <- c(v, val[id == j]) # a new bootstrap sample
        func(v)             #  and its statistic
    }) -> est.boot

    hist(est.boot, xlab = fname, main = paste("Bootstrap Distribution: ", fname))
    cat("95% Bootstrap Percentile Confidence Interval:\n")
    quantile(est.boot, c(0.025, 0.975))
}
if (F) {                                # Unit Test
    id <- c(1, 1, 1, 2, 2, 3)
    val <- c(3, 3, 3, 4, 4, 5)
    stat.bootstrap.cluster(id, val)          # compare with t-test based CI: (2.8098 4.5235)
    ## Summary Statistic: mean = 3.6667 
    ## 95% Bootstrap Percentile Confidence Interval:
    ##  2.5% 97.5% 
    ##     3     5 
    stat.bootstrap.cluster(id, val, func = sd)
    ## Summary Statistic: sd = 0.8165 
    ## 95% Bootstrap Percentile Confidence Interval:
    ##   2.5%  97.5% 
    ## 0.0000 1.095
}