Thursday, June 30, 2022

Rank-based Confidence Interval for a Quantile

The original code comes from https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970


quantile.CI.ranks <- function(n, p, alpha = 0.1) {
    ## Purpose:
    ##   Calculate a two-sided near-symmetric distribution-free
    ##   confidence interval with confidence level of (1 - alpha) for
    ##   a quantile, by searching over a small range of upper and
    ##   lower order statistics for the closest coverage to (1 -
    ##   alpha) (but not less than it, if possible).
    ##
    ## Reference (code origin):
    ##   https://stats.stackexchange.com/questions/99829/how-to-obtain-a-confidence-interval-for-a-percentile/284970#284970
    ## 
    ## Arguments:
    ##   - n: sample size
    ##   - p: quantile probability cutpoint (between 0 and 1)
    ##   - alpha: type I error level (default to 0.1 so a 90% CI is calculated)
    ##
    ## Return:
    ##   - CI: two indices into the order statistics for the two-sided CI of the quantile
    ##   - coverage: theoretical coverage probability of the CI, which should be close to (1 - alpha).

    ## a small candidate list of order statistics for the lower/upper limits of the CI of a quantile
    l <- qbinom(alpha/2, n, p) + (-2:2) + 1 # lower limit candidates
    u <- qbinom(1 - alpha/2, n, p) + (-2:2)  # upper limit candidates

    ## out-of-bound order statistics correspond to no limit
    l[l < 0] <- -Inf                    # no lower limit
    u[u > n] <- Inf                     # no upper limit

    ## for each pair of candidate lower/upper limit, calculate the coverage probability
    coverage <- outer(l, u, function(l, u) pbinom(u - 1, n, p) - pbinom(l - 1, n, p))

    ## if no coverage is above (1 - alpha), choose the max coverage; 
    ## otherwise, look for the smallest coverage that is above (1 - alpha). 
    if (max(coverage) < 1 - alpha) {
        i <- which(coverage == max(coverage)) 
    } else {                            
        i <- which(coverage == min(coverage[coverage >= 1 - alpha]))
    }

    j <- i[1]                           # in case there are multiple candidates

    ## identify the order statistics and its coverage.
    L <- rep(l, 5)[j]
    U <- rep(u, each = 5)[j]
    return(list(CI = c(L, U), coverage = coverage[j]))
}
if (F) {                                # Unit Test
    alpha <- 0.05
    n <- 100
    p <- 0.9
    quantile.CI(n, p, alpha)
    ## $CI
    ## [1] 85 97

    ## $coverage
    ## [1] 0.95227
}

No comments:

Post a Comment