Thursday, August 31, 2023

Diagnostic Evaluation from Data Presented in a Confusion Matrix

confusion.matrix.diagnostic.accuracy <- function(x)
{
    ## Purpose:
    ##   Calculate sensitivity, specificity, and other metrics. 
    ## 
    ## Arguments:
    ##   x: a numeric vector of length 4 for the raw confusion matrix.
    ##      First 2 numbers: count of (Test+, Test-) with reference positive results
    ##      Last 2 numbers: count of (Test+, Test-) with reference negative results
    ##                   Ref+, Ref-
    ##            Test+   x1    x3
    ##            Test-   x2    x4
    ## 
    ## Return:
    ##   Sensitivity, Specificity, PPV, NPV, PLR, NLR and their CI's
    ## 
    ## Author: Feiming Chen
    ## ________________________________________________

    d <- as.data.frame(addmargins(matrix(x, ncol = 2)))
    dimnames(d) <- list(c("Test.Pos", "Test.Neg", "Sum"), c("Ref.Pos", "Ref.Neg", "Sum"))

    d$Likelihood.Ratio <- sapply(1:3, function(i) convert.to.CI.text(ratio.of.two.prop(x=c(d[i, 1], d[i, 2]), n=c(d[3, 1], d[3, 2])), digits = 1)) 

    d$"Performance" <- c(paste(c("PPV =", "NPV ="), my.proportion(c(d[1,1], d[2,2]), c(d[1,3], d[2,3]), digits = 1)),
                                    paste("Prevalence =", my.proportion(d[3,1], d[3,3], digits = 1)))

    sens <- d[1,1] / d[3, 1]
    spec <- d[2,2] / d[3, 2]
    PPV <- d[1,1] / d[1, 3]
    NPV <- d[2,2] / d[2, 3]

    J <- sens + spec - 1                # Youden's index in percentage
    ## J2 <-  d[1,1] / d[3, 1] -  d[1,2] / d[3, 2] # identical in the form of risk difference 
    J.ci <- prop.test(c(d[1,1], d[1,2]), c(d[3,1], d[3,2]))$conf.int
    J.str <- convert.to.CI.text(c(J, J.ci))

    NND <- 1 / (PPV + NPV - 1)          # Number Needed to Diagnose
    ## NND2 <- 1 / (d[1,1] / d[1, 3] -  d[2,1] / d[2, 3])  # identical in the form of risk difference 
    NND.ci <- 1 / prop.test(c(d[1,1], d[2,1]), c(d[1,3], d[2,3]))$conf.int
    NND.str <- convert.to.CI.text(ceiling(c(NND, rev(NND.ci))))

    b <- c(paste(c("Sensitivity =", "Specificity ="), my.proportion(c(d[1,1], d[2,2]), c(d[3,1], d[3,2]), digits = 1)), "", "",
           paste0(paste0("J = ", J.str), ", ", paste0("NND = ", NND.str)))

    d2 <- rbind(d, b)
    rownames(d2)[4] <- "Performance"

    tex.print(d2, file = "~/temp/confusion-matrix-performance", caption = "Performance Evaluation from Confusion Matrix")

    d2
}
if (F) {
    ## bowel cancer diagnostic test (https://en.wikipedia.org/wiki/Sensitivity_and_specificity#Confusion_matrix)
    x <- c(20, 10, 180, 1820)
    confusion.matrix.diagnostic.accuracy(x)
    confusion.matrix.diagnostic.accuracy(c(95, 5, 5, 95))
    confusion.matrix.diagnostic.accuracy(c(95, 5, 500, 9500))
    confusion.matrix.diagnostic.accuracy(c(90, 10, 1000, 9000))
}