Thursday, June 30, 2022

Use simulation to estimate the coverage probability of the quantile rank-based confidence interval

The original code comes from https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970

test.coverage.via.simulation <- function(lu, n, p, alpha = 0.1)
{
    ## Purpose: 
    ##   Use simulation to estimate the coverage probability of the quantile rank-based CI.
    ##   
    ## Reference (code origin):
    ##   https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970
    ## 
    ## Arguments:
    ##   - lu: a vector of two numbers for the lower and upper limit of the CI in ranks. 
    ##   - n: sample size
    ##   - p: quantile probability cutpoint (between 0 and 1)
    ##   - alpha: type I error level (default to 0.1 so a 90% CI is calculated)
    ## 
    ## Return: 
    ##   Average coverage probability from 10,000 simulated samples.
    ## ________________________________________________

    ## Generate many random samples of size "n" from a known distribution
    ## and compute actual CI's from those samples using the given rank-based CI. 
    set.seed(1)
    n.sim <- 1e4
    index <- function(x, i) ifelse(i == Inf, Inf, ifelse(i == -Inf, -Inf, x[i]))
    sim <- replicate(n.sim, index(sort(rnorm(n)), lu))

    ## Compute the actual proportion of those intervals that cover the theoretical quantile.
    F.q <- qnorm(p)
    covers <- sim[1, ] <= F.q & F.q <= sim[2, ]
    mean.coverage <- mean(covers)
    message("Mean Coverage Over 10,000 Simulated Samples = ", signif(mean.coverage, 4))
}
if (F) {                                # Unit Test
    lu <- c(85, 97)
    n <- 100
    p <- 0.9
    alpha <- 0.05
    test.coverage.via.simulation(lu, n, p, alpha)
    ## Mean Coverage Over 10,000 Simulated Samples = 0.9528
}

Rank-based Confidence Interval for a Quantile

The original code comes from https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970


quantile.CI.ranks <- function(n, p, alpha = 0.1) {
    ## Purpose:
    ##   Calculate a two-sided near-symmetric distribution-free
    ##   confidence interval with confidence level of (1 - alpha) for
    ##   a quantile, by searching over a small range of upper and
    ##   lower order statistics for the closest coverage to (1 -
    ##   alpha) (but not less than it, if possible).
    ##
    ## Reference (code origin):
    ##   https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970
    ## 
    ## Arguments:
    ##   - n: sample size
    ##   - p: quantile probability cutpoint (between 0 and 1)
    ##   - alpha: type I error level (default to 0.1 so a 90% CI is calculated)
    ##
    ## Return:
    ##   - CI: two indices into the order statistics for the two-sided CI of the quantile
    ##   - coverage: theoretical coverage probability of the CI, which should be close to (1 - alpha).

    ## a small candidate list of order statistics for the lower/upper limits of the CI of a quantile
    l <- qbinom(alpha/2, n, p) + (-2:2) + 1 # lower limit candidates
    u <- qbinom(1 - alpha/2, n, p) + (-2:2)  # upper limit candidates

    ## out-of-bound order statistics correspond to no limit
    l[l < 0] <- -Inf                    # no lower limit
    u[u > n] <- Inf                     # no upper limit

    ## for each pair of candidate lower/upper limit, calculate the coverage probability
    coverage <- outer(l, u, function(l, u) pbinom(u - 1, n, p) - pbinom(l - 1, n, p))

    ## if no coverage is above (1 - alpha), choose the max coverage; 
    ## otherwise, look for the smallest coverage that is above (1 - alpha). 
    if (max(coverage) < 1 - alpha) {
        i <- which(coverage == max(coverage)) 
    } else {                            
        i <- which(coverage == min(coverage[coverage >= 1 - alpha]))
    }

    j <- i[1]                           # in case there are multiple candidates

    ## identify the order statistics and its coverage.
    L <- rep(l, 5)[j]
    U <- rep(u, each = 5)[j]
    return(list(CI = c(L, U), coverage = coverage[j]))
}
if (F) {                                # Unit Test
    alpha <- 0.05
    n <- 100
    p <- 0.9
    quantile.CI(n, p, alpha)
    ## $CI
    ## [1] 85 97

    ## $coverage
    ## [1] 0.95227
}

Bootstrap Confidence Interval for a Quantile

quantile.CI.via.bootstrap <- function(x, p, alpha = 0.1) {
    ## Purpose:
    ##   Calculate a two-sided confidence interval with confidence level of (1 - alpha) for
    ##   a quantile, based on the (computing intensive) bootstrap resampling method. 
    ##
    ## Arguments:
    ##   - x: a vector of values, representing a data sample. 
    ##   - p: probability cutpoint for the quantile (between 0 and 1).
    ##   - alpha: type I error level (default to 0.1 so a 90% CI is calculated)
    ##
    ## Return:
    ##   - CI: the lower and upper limits of the two-sided CI. 

    q <- quantile(x, probs = p)         
    message("Quantile Point Estimate = ", q, " (Probability Cutpoint = ", p, ")\n")

    ## Bootstrap resampling with 2000 replications
    library(boot)
    set.seed(1)
    b <- boot(x, function(x, i) quantile(x[i], probs = p), R = 2000)

    boot.ci(b, conf = 1 - alpha, type = c("norm", "basic", "perc", "bca"))

}
if (F) {                                # Unit Test
    x <- 1:100
    p <- 0.9
    alpha <- 0.05
    quantile.CI.via.bootstrap(x, p, alpha)
    ## Intervals : 
    ## Level      Normal              Basic         
    ## 95%   (84.50, 96.34 )   (85.10, 97.10 )  

    ## Level     Percentile            BCa          
    ## 95%   (83.1, 95.1 )   (83.3, 95.1 )  
}