R Function Library: 2020

Monday, September 14, 2020

Reading From and Writing To Clipboard (usage: copy a data table from a spreadsheet or paste a table into a spreadsheet)

wc <- wc.linux <- function(x) {
    ## Write to Clipboard
    ## Write a table/data frame "x" to the Clipboard for Excel use.
    ff <- pipe("xclip -i -selection clipboard", "w")
    utils::write.table(x, file=ff, sep="\t", col.names=T, row.names=F, na="")
    close(ff)
}


rc <- function(header, p=TRUE, ...){                       
    ## Read from Clipboard
    ## Check is Header Line exists.
    ## Checking if the first element in the first line is a numeric type or not.
    ## if "p=TRUE", print out the vector definition for copying 
    if (missing(header)) {
        if (is.numeric(unlist(read.delim("clipboard", nrows=1, header=F))[1]))
            header=F
        else
            header=T
    }

    a <- utils::read.delim("clipboard", header=header, as.is = TRUE, ...)
    if (p) {
        for (i in seq(ncol(a))) pvec(a[[i]], var=letters[(22+i) %% 26 + 1])
        return(invisible(a))
    } else {
        if (ncol(a) == 1 || nrow(a) == 1) {    # convert to a vector if there is only one column
            a <- unlist(a)
            cat("\nClipboard is read into a vector of length:", length(a), "\n")
        } else cat("Clipboard is read into a data.frame of dimension:", dim(a), "\n")
        print(head(a, n=3))
        return(invisible(a))
    }
}

Thursday, June 11, 2020

ROC Curve Analysis

ROC.curve <- function(R, D, n.thres = 100)
{
    ## Purpose: Perform ROC Curve Analysis
    ## Arguments:
    ##   R: Clinical Reference Standard (0 = Negative, 1 = Positive)
    ##   D: Device Diagnostic Output (a continuous variable)
    ##   n.thres: Number of Thresholds
    ## Return: ROC Curve and a Plot of Sensitivity and Specificity by Thresholds
    ## Author: Feiming Chen
    ## ________________________________________________

    N <- length(R)                      # sample size

    thres <- quantile(D, probs = seq(0, 1, 1 / n.thres)) # list of thresholds
    M <- length(thres)                            # number of thresholds
    sens <- spec <- accu <- rep(0, M)
    for (i in 1:M) {
        D1 <- ifelse(D > thres[i], 1, 0) # convert continuous output to binary output 0-1
        sens[i] <- sum(D1[R == 1]) / sum(R==1)
        spec[i] <- sum(D1[R == 0] == 0) / sum(R==0)
        accu[i] <- (sum(D1[R == 0] == 0) + sum(D1[R == 1])) / N # accuracy
    }
    J <- sens + spec - 1                # Youden's Index

    ## Calculate AUC (Area Under the Curve)
    f <- approxfun(1 - spec, sens, yleft = 0, yright = 1)
    AUC <- integrate(f, lower = 0, upper = 1)$value # c-statistic
    Gini <- 2 * AUC - 1

    plot(1 - spec, sens, type = "l", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "blue",
         xlab = "1 - Specificity (False Positive Rate)", ylab = "Sensitivity (True Positive Rate)")

    title(main = paste0("ROC Curve (AUC = ", round(AUC, 3), ", Gini = ", round(Gini, 3), ")"))

    ## Random Test
    abline(0, 1)                        # uninformative line
    abline(h=c(0, 1), col = "gray")
    abline(v=c(0, 1), col = "gray")

    ## Plot of Sensitivity and Specificity by Thresholds
    dev.new()
    plot(thres, sens, ylim = c(0, 1), main = "Sensitivity/Specificity by Thresholds", type = "l", lwd = 2, col = "blue", xlab = "Thresholds", ylab = "Performance")
    lines(thres, spec, lwd = 2, col = "red")
    lines(thres, J, lwd = 2, col = "orange")
    lines(thres, accu, lwd = 2, lty = 2, col = "black")
    abline(h=c(0,1), col = "gray")
    legend("right", legend= c("Sensitivity", "Specificity", "Youden's Index", "Accuracy"), bg="lightyellow", col= c("blue", "red", "orange", "black"), title="Performance Metrics", lwd=2, lty=c(rep(1, 3), 2))

    res <- data.frame(Threshold = round(thres, 2), Sensitivity = round(sens, 3), Specificity = round(spec, 3), J = round(J, 3), Accuracy = round(accu, 3))
    invisible(res)
}
if (F) {                                # Unit Test
    D <- runif(10000)
    R <- sapply(D, function(p) rbinom(1, size = 1, prob = p)) # perfect probability prediction
    ## R <- sapply(D, function(p) rbinom(1, size = 1, prob = 0.5)) # random probability prediction
    ROC.curve(R, D)
    ## (ROC.curve(R, D, n.thres = 4))
}

## Random Probability Prediction

## Perfect Probability Prediction

Friday, May 15, 2020

Decision Curve Analysis

Net.Benefit <- function(R, D, p.grid)
{
    ## Purpose: Calculate Net Benefit for Decision Curve 
    ## Arguments:
    ##   R: Clinical Reference Standard (0 = Negative, 1 = Positive)
    ##   D: Device Diagnostic Output (0 = Negative, 1 = Positive; OR D = Probability, 0 < D < 1)
    ##   p.grid: The probability levels at which net benefits are to be calculated. 
    ## Return: Net Benefits
    ## Author: Feiming Chen
    ## ________________________________________________

    N <- length(R)                      # sample size
    Net.Benefit <- rep(0, length(p.grid))
    for (i in seq_along(p.grid)) {
        p <- p.grid[i]
        N.TP <- sum( D > p & R == 1 )
        N.FP <- sum( D > p & R == 0 )
        Net.Benefit[i] <- (N.TP - N.FP * p / (1 - p)) / N
    }
    Net.Benefit
}
if (F) {                                # Unit Test
    D <- runif(100)
    R <- sapply(D, function(p) rbinom(1, size = 1, prob = p)) # perfect probability prediction
    p.grid <- seq(0, 0.99, 0.01)           # Grid of indifference probabilities
    Net.Benefit(R, D, p.grid)
}


decision.curve <- function(R, D)
{
    ## Purpose: Perform Decision Curve Analysis
    ## Arguments:
    ##   R: Clinical Reference Standard (0 = Negative, 1 = Positive)
    ##   D: Device Diagnostic Output (0 = Negative, 1 = Positive; OR D = Probability, 0 < D < 1)
    ## Return: Decision Curve
    ## Author: Feiming Chen
    ## ________________________________________________

    N <- length(R)                      # sample size
    p.grid <- seq(0, 0.99, 0.01)        # Grid of indifference probabilities
    NB <- Net.Benefit(R, D, p.grid)
    prevalence <- sum(R == 1) / N
    plot(p.grid, NB, type = "l", xlim = c(0, 1), ylim = c(0, prevalence), lwd = 2, col = "blue", main = "Decision Curve",
         xlab = "Preference (Indifference Probability)", ylab = "Net Benefit",
         sub = paste("Prevalence =", round(100*prevalence, 1), "%"))

    ## Intervention for all
    NB.all <- Net.Benefit(R, rep(1, N), p.grid)
    lines(p.grid, NB.all, type = "l", col = "red", lwd = 1.5)

    ## Perfect Binary Test
    NB.perfect.binary <- Net.Benefit(R, R, p.grid)
    lines(p.grid, NB.perfect.binary, type = "l", col = "orange", lwd = 1.5)
}
if (F) {                                # Unit Test
    D <- runif(100000)
    R <- sapply(D, function(p) rbinom(1, size = 1, prob = p)) # perfect probability prediction
    decision.curve(R, D)
}

 

    
  
if (F) {                                # Simulation Code
    ## Perfect Probability Prediction (D0)
    D <- runif(100000)
    R <- sapply(D, function(p) rbinom(1, size = 1, prob = p)) 
    decision.curve(R, D)

    ## Binary test (B1) with 50% sensitivity and 100% specificity.
    p.grid <- seq(0, 0.99, 0.01)           # Grid of indifference probabilities
    B1 <- sapply(R, function(x) ifelse(x == 1, rbinom(1, 1, 0.5), 0))
    NB.high.spec <- Net.Benefit(R, B1, p.grid)
    lines(p.grid, NB.high.spec, type = "l", col = "orange", lwd = 1.5)

    ## Binary test (B2) with 100% sensitivity and 50% specificity.
    B2 <- sapply(R, function(x) ifelse(x == 0, rbinom(1, 1, 0.5), 1))
    NB.high.sens <- Net.Benefit(R, B2, p.grid)
    lines(p.grid, NB.high.sens, type = "l", col = "orange", lwd = 1.5)

    ## Random prediction model (D1) for:  $D \sim \mbox{Uniform}(0, 1), R \sim \mbox{Bernoulli}(0.5)$
    NB.random <- Net.Benefit(rbinom(100000, 1, 0.5), D, p.grid)
    lines(p.grid, NB.random, type = "l", col = "red", lwd = 1.5)
}

Monday, September 14, 2020

Reading From and Writing To Clipboard (usage: copy a data table from a spreadsheet or paste a table into a spreadsheet)

Thursday, June 11, 2020

ROC Curve Analysis

Friday, May 15, 2020

Decision Curve Analysis

Blog Archive