Thursday, May 20, 2021

Calculate the 95% CI of the ratio of two proportions, with application to diagnostic likelihood ratios

ratio.of.two.prop <- function(x, n)
{
    ## Purpose: Calculate the 95% CI of the ratio of two proportions, with application to diagnostic likelihood ratios. 
    ## Arguments:
    ##   x: a vector of two integers (numerator of the two proportions). 
    ##   n: a vector of two integers (denominator of the two proportions).
    ## Return: The point estimate and the 95% confidence interval of the ratio of two proportions, where the ratio is the first proportion divided by the second proportion. 
    ## Author: Feiming Chen
    ## ________________________________________________

    if (x[1] == 0 || x[1] == n[1] || x[2] == 0 || x[2] == n[2]) {
        x <- x + 0.5
        n <- n + 1
    }

    p <- x / n

    se <- sqrt((1 - p[1]) / (p[1] * n[1]) + (1 - p[2]) / (p[2] * n[2]))
    
    pp <- p[1] / p[2]                   # point estimate
    LB <- exp(log(pp) - 1.96 * se)
    UB <- exp(log(pp) + 1.96 * se)

    c(pp, LB, UB)
}
if (F) {                                # Unit Test
    ratio.of.two.prop(c(431, 30), c(460, 146))
    ## [1] 4.5599 3.3116 6.2786
    ratio.of.two.prop(c(0, 146), c(460, 146))
    ## [1] 0.00108830 0.00006817 0.01737423   
}

Tuesday, May 4, 2021

Calculate the correlation within subjects

repeated.measures.correlation <- function(subject, x, y)
{
    ## Purpose: Calculate the correlation within subjects based on the paper:
    ##    Bland, J. Martin, and Douglas G. Altman. "Statistics notes: Calculating
    ##    correlation coefficients with repeated observations: Part 1—correlation
    ##    within subjects." Bmj 310.6977 (1995): 446.
    ## Arguments:
    ##    subject: ID for subject.  Each subject has multiple correlated measurements (x, y)
    ##    x: the variable that correlates with y.
    ##    y: the variable that correlates with x. 
    ## Return: 
    ## Author: Feiming Chen
    ## ________________________________________________

    dat <- data.frame(subject = factor(subject), x = x, y = y)
    b <- summary(aov(y ~ subject + x, dat))
    print(b)
    s <- b[[1]][[2]][-1]    
    r0 <- sqrt(s[1]/sum(s))             # within-subject correlation
    cat("\nWithin-Subject Correlation =", round(r0, 4), "\n")
    r0
}
if (F) {                                # Unit Test
    ## simulated data
    r <- 0.7                            # True Within-Subject Correlation
    z <- atanh(r)
    N <- 30
    se <- 1/sqrt(N-3)
    zz <- rnorm(N, z, se)
    rr <- tanh(zz)

    library(MASS)
    set.seed(1)
    dat <- data.frame(subject = factor(rep(1:N, each = 100)))
    dat$x <- dat$y <- NA
    for (i in 1:N) {
        a <- mvrnorm(n = 100, mu = c(0, 0), Sigma = matrix(c(1, rr[i], rr[i], 1), 2))
        dat[dat$subject == i, c(2, 3)] <- a
    }

    ## ANOVA analysis
    b <- summary(aov(y ~ subject + x, dat))
    s <- b[[1]][[2]][-1]    
    r0 <- sqrt(s[1]/sum(s))             # 0.70471

    ## using average of correlations
    d1 <- as.numeric(by(dat, dat$subject, function(d) cor(d$x, d$y)))
    d2 <- atanh(d1)
    d3 <- sample.mean.CI(d2)
    tanh(d3)                            # mean = 0.72 (0.68,0.75) (with Fisher's Z)
    sample.mean.CI(d1)                  # mean = 0.71 (0.67,0.74) (without Fisher's Z)
    repeated.measures.correlation(dat$subject, dat$x, dat$y) # 0.7047
}