Thursday, April 29, 2021

Calculate bootstrap confidence intervals for a statistic of a numeric vector

Using the library "boot"

my.boot <- function(x, func = mean, R = 10000) {
    ## Purpose: Calculate bootstrap confidence interval for a statistic of a numeric vector
    ## Arguments:
    ##   x: a vector of numerical values
    ##   func: a function to calculate the statistic (default to "mean")
    ##   R: number of replicate bootstrap samples (default to 10000)
    ## Return: the bootstrap confidence intervals (percentile and BCa)
    ## Author: Feiming Chen
    ## ________________________________________________

    library(boot)
    b <- boot(x, function(x, i) func(x[i]), R = R)
    print(boot.ci(b, type = c("perc", "bca")))
}
if (F) {
    x <- rnorm(100)
    my.boot(x)
    my.boot(x, func = median)
    ## Level     Percentile            BCa          
    ## 95%   (-0.1284,  0.2726 )   (-0.1371,  0.2533 )  
}

Not using the library "boot"

stat.bootstrap.independent <- function(val, func = mean, N.bootstrap = 10000)
{
    ## Purpose: Calculate bootstrap-based 95% confidence interval for independent univariate data
    ## Arguments:
    ##   val: a numeric vector to be summarized. 
    ##   func: a function for calculating the summary statistic from a numeric vector.  Default to "mean". 
    ##   N.bootstrap: number of bootstrap samples.  Default to 10000. 
    ## Return: Point estimate, bootstrap percentile 95% CI, histogram for the bootstrap distribution of the target statistic
    ## Author: Feiming Chen
    ## ________________________________________________

    fname <- deparse(substitute(func))
    ans <- func(val)
    cat("Summary Statistic:", fname, "=", ans, "\n")

    N <- length(val)
    r <- rep(NA, N.bootstrap)
    set.seed(1)
    for (i in 1:N.bootstrap) {
        s <- sample(val, size = N, replace = TRUE)
        r[i] <- func(s)
    }

    hist(r, xlab = fname, main = paste("Bootstrap Distribution: ", fname))

    cat("95% Bootstrap Percentile Confidence Interval:\n")
    quantile(r, c(0.025, 0.975))
}
if (F) {                                # Unit Test
    val <- c(3, 3, 3, 4, 4, 5)
    stat.bootstrap.independent(val)          # compare with t-test based CI: (2.8098 4.5235)
    ## Summary Statistic: mean = 3.6667 
    ## 95% Bootstrap Percentile Confidence Interval:
    ##   2.5%  97.5% 
    ## 3.1667 4.3333 
}



No comments:

Post a Comment