Tuesday, April 6, 2021

Calculate sample proportion and its confidence interval

sample.prop.CI <- function(x, n)
{
    ## Purpose: Calculate sample proportion and its confidence interval
    ## 
    ## Arguments:
    ##   x: a count in the numerator
    ##   n: total count in the denominator
    ## 
    ## Return: 
    ##    point estimate: sample proportion 
    ##    CI: using Agresti-Coull or Wilson Score method. 
    ## 
    ## Author: Feiming Chen
    ## ________________________________________________

    p <- x / n                          # point estimate
    z <- qnorm(0.975)                   # 1.96
    
    ## Agresti-Coull "Plus Two" Shortcut (for 95% CI):
    ## it is a close approximation of the Wilson score interval,
    ## essentially adding two successes and two failures to the observed data to
    ## pull the estimate away from the boundaries.
    p_adj <- (x + 2) / (n + 4)
    se_adj <- sqrt(p_adj * (1 - p_adj) / (n + 4))
    AC <- p_adj + c(-1, 1) * z * se_adj
    cat("Agresti-Coull (AC) method:\n")
    cat(paste0(round(p * 100, 2), "%  (", round(AC[1]*100, 2), "% , ", round(AC[2]*100, 2), "%)\n\n"))

    ## Wilson Score method
    WS <- (p + z^2/2/n + c(-1, 1) * z * sqrt(p*(1-p)/n + z^2/(4*n^2))) / (1 + z^2 / n)
    cat("Wilson Score method:\n")
    cat(paste0(round(p * 100, 2), "%  (", round(WS[1]*100, 2), "% , ", round(WS[2]*100, 2), "%)\n\n"))

    ## cat("R prop.test based on Wilson method with continuity correction:\n")
    ## prop.test(x, n) 

}
if (F) {                                # Unit Test
    sample.prop.CI(5, 25)
    ## Agresti-Coull (AC) method:
    ## 20%  (8.56% , 39.71%)
    
    ## Wilson Score method:
    ## 20%  (8.86% , 39.13%)

    sample.prop.CI(11, 20)
    ## Agresti-Coull (AC) method:
    ## 55%  (34.23% , 74.1%)
    
    ## Wilson Score method:
    ## 55%  (34.21% , 74.18%)
}

No comments:

Post a Comment