Thursday, April 27, 2023

Calculate the discriminatory power of a model prediction index using the concordance metric c-statistic (ROC-AUC)

c.statistic <- function(R, D)
{
    ## Purpose: Calculate the c-statistic (ROC-AUC), which is a
    ##   concordance measure.  If we take all possible pairs of
    ##   patients where one has disease (1) and the other has no
    ##   disease (0), we can calculate the proportion of all such
    ##   pairs where the diseased one has higher score (device output)
    ##   than the normal one (if they have the same value, we count
    ##   this as ‘half a victory’).
    ## 
    ## Author: Feiming Chen
    ## 
    ## Arguments: 
    ##   R: Clinical Reference Standard (0 = Negative, 1 = Positive)
    ##   D: Device Diagnostic Output (a continuous variable)
    ## 
    ## Return: 
    ##   - c : the c-statistic
    ## ________________________________________________

    f <- split(D, R)
    o <- outer(f$"1", f$"0", "-")
    c.statistic <- mean((o > 0) + (o == 0)/2)
    cat("c-statistic (ROC-AUC) =", c.statistic, "\n")
    c.statistic
}
if (F) {                                # Unit Test
    R <- rep(0:1, each = 5)
    c.statistic(R, 1:10)                # 1
    c.statistic(R, 10:1)                # 0
    c.statistic(R, rep(1, 10))          # 0.5
    neg <- rep(1:5, times=c(33,6,6,11,2))
    pos <- rep(1:5, times=c(3,2,2,11,33))
    R <- c(rep(0, length(neg)), rep(1, length(pos)))
    D <- c(neg, pos)
    c.statistic(R, D)                   # 0.89317
}

Import a text block (with only numeric values) and convert it to data frame

scan.text <- function(b, n.col = 1, v.names = "")
{
    ## Purpose: Import a text block (with only numeric values) and convert it to data frame. 
    ## 
    ## Author: Feiming Chen
    ## 
    ## Arguments: 
    ##   - b : a block of text containing only numeric values separated by spaces
    ##   - n.col : number of columns in the data frame
    ##   - v.names : variable names 
    ## Return: 
    ##   a data frame containing imported numbers
    ## 
    ## ________________________________________________

    d1 <- scan(textConnection(b))
    d2 <- as.data.frame(matrix(d1, ncol = n.col, byrow = T))
    names(d2) <- v.names
    d2
}
if (F) {                                # Unit Test
    scan.text("1 2 3 4 5 6", n.col = 3, v.names = c("ID", "V1", "V2"))
    ##   ID V1 V2
    ## 1  1  2  3
    ## 2  4  5  6
}