Saturday, March 18, 2023

Accuracy Analysis for Binary Data with Repeated Measurements

accuracy.metrics <- function(dat)
{
    ## Purpose: 
    ##   Calculate binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR). 
    ## 
    ## Author: Feiming Chen
    ## 
    ## Arguments: 
    ##   - dat : a data frame with the following variable names
    ##           device - binary response from device (1 = Positive; 0 = Negative)
    ##           reference - binary response from reference (1 = Positive; 0 = Negative)
    ##           
    ## Return: 
    ##   - a list of binary accuracy metrics. 
    ## 
    ## ________________________________________________

    d <- dat$device
    r <- dat$reference

    sensitivity = mean(d[r == 1])
    specificity = 1 - mean(d[r == 0])
    PPV  = mean(r[d == 1])
    NPV = 1 - mean(r[d == 0])
    PLR = sensitivity / (1 - specificity)
    NLR = (1 - sensitivity) / specificity

    c(Sensitivity = sensitivity, Specificity = specificity, PPV = PPV, NPV = NPV, PLR = PLR, NLR = NLR)
}
if (F) {
    accuracy.metrics(data.frame(device = rep(c(0,1), 5), reference = rep(c(1, 0), 5)))
    ## [1]   0   0   0   0   0 Inf
    accuracy.metrics(data.frame(device = rep(c(0,1), 5), reference = rep(c(0, 1), 5)))
    ## [1]   1   1   1   1 Inf   0             
}

accuracy.analysis <- function(dat, boot.size = 2000)
{
    ## Purpose: 
    ##   Calculate binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR), 
    ##   with their CI's, even in the presence of repeated measurements. 
    ## 
    ## Author: Feiming Chen
    ## 
    ## Arguments: 
    ##   - dat : a data frame with the following variable names
    ##           subject - subject identifier
    ##           device - binary response from device
    ##           reference - binary response from reference
    ##   - boot.size : bootstrap sample size. 
    ## 
    ## Return: 
    ##   - Point estimates and 95% CI's for a list of binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR).
    ## 
    ## ________________________________________________

    id <- dat$subject
    unique.ID <- unique(id)
    N <- length(unique.ID)
    cat("Number of Unique Patients =", N, "\n")

    K <- nrow(dat) / N 
    cat("Total number of repeated measurements =", nrow(dat), "\n")
    cat("Average number of repeated measurements per patient =", K, "\n")

    ## Point Estimates
    est <- accuracy.metrics(dat)
    cat("\nPoint Estimates of Accuracy Metrics:\n")
    print(round(est, 4))

    ## Make Bootstrap Samples
    cat("\nBootstrap Sample Size =", boot.size, "\n")
    set.seed(1)
    replicate(n = boot.size, {
        s <- sample(unique.ID, replace = TRUE)  # a bootstrap sample of patient ID's
        ## find all rows with the ID's in the bootstrap sample of ID's 
        v <- c()
        for (j in s) v <- c(v, which(id == j)) # a new bootstrap sample
        accuracy.metrics(dat[v,])
    }) -> est.boot

    CI <- apply(est.boot, 1, function(x) quantile(x, c(0.025, 0.975)))
    cat("95% Bootstrap Percentile Confidence Intervals of Accuracy Metrics:\n")
    print(round(CI, 4))

    invisible(list(est = est, CI = CI))}

if (F) {                                # Unit Test
    ## ans <- accuracy.analysis(dat, boot.size = 10)
}