Thursday, June 11, 2020

ROC Curve Analysis

ROC.curve <- function(R, D, n.thres = 100)
{
    ## Purpose: Perform ROC Curve Analysis
    ## Arguments:
    ##   R: Clinical Reference Standard (0 = Negative, 1 = Positive)
    ##   D: Device Diagnostic Output (a continuous variable)
    ##   n.thres: Number of Thresholds
    ## Return: ROC Curve and a Plot of Sensitivity and Specificity by Thresholds
    ## Author: Feiming Chen
    ## ________________________________________________

    N <- length(R)                      # sample size

    thres <- quantile(D, probs = seq(0, 1, 1 / n.thres)) # list of thresholds
    M <- length(thres)                            # number of thresholds
    sens <- spec <- accu <- rep(0, M)
    for (i in 1:M) {
        D1 <- ifelse(D > thres[i], 1, 0) # convert continuous output to binary output 0-1
        sens[i] <- sum(D1[R == 1]) / sum(R==1)
        spec[i] <- sum(D1[R == 0] == 0) / sum(R==0)
        accu[i] <- (sum(D1[R == 0] == 0) + sum(D1[R == 1])) / N # accuracy
    }
    J <- sens + spec - 1                # Youden's Index

    ## Calculate AUC (Area Under the Curve)
    f <- approxfun(1 - spec, sens, yleft = 0, yright = 1)
    AUC <- integrate(f, lower = 0, upper = 1)$value # c-statistic
    Gini <- 2 * AUC - 1

    plot(1 - spec, sens, type = "l", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "blue",
         xlab = "1 - Specificity (False Positive Rate)", ylab = "Sensitivity (True Positive Rate)")

    title(main = paste0("ROC Curve (AUC = ", round(AUC, 3), ", Gini = ", round(Gini, 3), ")"))

    ## Random Test
    abline(0, 1)                        # uninformative line
    abline(h=c(0, 1), col = "gray")
    abline(v=c(0, 1), col = "gray")

    ## Plot of Sensitivity and Specificity by Thresholds
    dev.new()
    plot(thres, sens, ylim = c(0, 1), main = "Sensitivity/Specificity by Thresholds", type = "l", lwd = 2, col = "blue", xlab = "Thresholds", ylab = "Performance")
    lines(thres, spec, lwd = 2, col = "red")
    lines(thres, J, lwd = 2, col = "orange")
    lines(thres, accu, lwd = 2, lty = 2, col = "black")
    abline(h=c(0,1), col = "gray")
    legend("right", legend= c("Sensitivity", "Specificity", "Youden's Index", "Accuracy"), bg="lightyellow", col= c("blue", "red", "orange", "black"), title="Performance Metrics", lwd=2, lty=c(rep(1, 3), 2))

    res <- data.frame(Threshold = round(thres, 2), Sensitivity = round(sens, 3), Specificity = round(spec, 3), J = round(J, 3), Accuracy = round(accu, 3))
    invisible(res)
}
if (F) {                                # Unit Test
    D <- runif(10000)
    R <- sapply(D, function(p) rbinom(1, size = 1, prob = p)) # perfect probability prediction
    ## R <- sapply(D, function(p) rbinom(1, size = 1, prob = 0.5)) # random probability prediction
    ROC.curve(R, D)
    ## (ROC.curve(R, D, n.thres = 4))
}
## Random Probability Prediction


## Perfect Probability Prediction