Tuesday, May 4, 2021

Calculate the correlation within subjects

repeated.measures.correlation <- function(subject, x, y)
{
    ## Purpose: Calculate the correlation within subjects based on the paper:
    ##    Bland, J. Martin, and Douglas G. Altman. "Statistics notes: Calculating
    ##    correlation coefficients with repeated observations: Part 1—correlation
    ##    within subjects." Bmj 310.6977 (1995): 446.
    ## Arguments:
    ##    subject: ID for subject.  Each subject has multiple correlated measurements (x, y)
    ##    x: the variable that correlates with y.
    ##    y: the variable that correlates with x. 
    ## Return: 
    ## Author: Feiming Chen
    ## ________________________________________________

    dat <- data.frame(subject = factor(subject), x = x, y = y)
    b <- summary(aov(y ~ subject + x, dat))
    print(b)
    s <- b[[1]][[2]][-1]    
    r0 <- sqrt(s[1]/sum(s))             # within-subject correlation
    cat("\nWithin-Subject Correlation =", round(r0, 4), "\n")
    r0
}
if (F) {                                # Unit Test
    ## simulated data
    r <- 0.7                            # True Within-Subject Correlation
    z <- atanh(r)
    N <- 30
    se <- 1/sqrt(N-3)
    zz <- rnorm(N, z, se)
    rr <- tanh(zz)

    library(MASS)
    set.seed(1)
    dat <- data.frame(subject = factor(rep(1:N, each = 100)))
    dat$x <- dat$y <- NA
    for (i in 1:N) {
        a <- mvrnorm(n = 100, mu = c(0, 0), Sigma = matrix(c(1, rr[i], rr[i], 1), 2))
        dat[dat$subject == i, c(2, 3)] <- a
    }

    ## ANOVA analysis
    b <- summary(aov(y ~ subject + x, dat))
    s <- b[[1]][[2]][-1]    
    r0 <- sqrt(s[1]/sum(s))             # 0.70471

    ## using average of correlations
    d1 <- as.numeric(by(dat, dat$subject, function(d) cor(d$x, d$y)))
    d2 <- atanh(d1)
    d3 <- sample.mean.CI(d2)
    tanh(d3)                            # mean = 0.72 (0.68,0.75) (with Fisher's Z)
    sample.mean.CI(d1)                  # mean = 0.71 (0.67,0.74) (without Fisher's Z)
    repeated.measures.correlation(dat$subject, dat$x, dat$y) # 0.7047
}

No comments:

Post a Comment