Monday, March 18, 2024

Make a Sankey diagram to illustrate Diagnostic Performance using R package "NetworkD3"

make.Sankey.diagram.for.Dx.performance <- function(Sensitivity = 0.8, Specificity = 0.8, Prevalence = 0.2, N = 1000, by.prediction = FALSE)
{
    ## Purpose: Make a Sankey diagram to illustrate Diagnostic Performance using R package "NetworkD3"
    ## 
    ## Reference: https://r-graph-gallery.com/sankey-diagram.html
    ##            https://christophergandrud.github.io/networkD3
    ## 
    ## Author: Feiming Chen (Copyright 2024; GPLv3; No Warranty; gnu.org/licenses)
    ## 
    ## Arguments: 
    ##   - Prevalence : disease prevalence assumed in the target population
    ##   - Sensitivity : sensitivity estimate
    ##   - Specificity : specificity estimate
    ##   - N : assumed number of patients in a sample from the target population
    ##   - by.prediction: if T, make main branches by device predictions.
    ## 
    ## Return: 
    ##   - p1 : a HTML widget to be displayed in the browser. Main branches by disease status. 
    ##   - p2 : a HTML widget to be displayed in the browser. Main branches by device predictions.
    ## ________________________________________________

    np <- round(N * Prevalence)         # number of positives
    nn <- N - np                        # number of negatives
    npp <- round(np * Sensitivity)      # true positives
    npn <- np - npp                     # false negatives
    nnn <- round(nn * Specificity)      # true negatives
    nnp <- nn - nnn                     # false positives
    ndp <- npp + nnp                    # device positives
    ndn <- nnn + npn                    # device negatives


    cat("Simulation Assumptions:\n")
    cat("Prevalence =", Prevalence, "\n")
    cat("Sensitivity =", Sensitivity, "\n")
    cat("Specificity =", Specificity, "\n\n")

    cat("Among", N, "patients in the target population:\n")
    cat("Number of Positives =", N, "*", Prevalence, "=", np, "\n")
    cat("Number of True Positives =", np, "*", Sensitivity, "=", npp, "\n")
    cat("Number of False Negatives =", np, "-", npp, "=", npn, "\n\n")

    cat("Number of Negatives =", N, "*", (1 - Prevalence), "=", nn, "\n")
    cat("Number of True Negatives =", nn, "*", Specificity, "=", nnn, "\n")
    cat("Number of False Positives =", nn, "-", nnn, "=", nnp, "\n\n")

    cat("Number of Device Positives =", npp, "+", nnp, "=", ndp, "\n")
    cat("Number of Device Negatives =", nnn, "+", npn, "=", ndn, "\n\n")

    cat("Sensitivity =", npp, "/", np, "=", Sensitivity, "\n")
    cat("Specificity =", nnn, "/", nn, "=", Specificity, "\n")
    cat("PPV =", npp, "/", ndp, "=", round(npp / ndp, 2), "\n")
    cat("NPV =", nnn, "/", ndn, "=", round(nnn / ndn, 2), "\n")

    library(networkD3)

    if (!by.prediction) {
############################# Disease Branches ###########################################

        ## A connection data frame is a list of flows with intensity for each flow
        links <- data.frame(
            source=c("n","n", "np", "np", "nn", "nn"), 
            target=c("np","nn", "npp", "npn", "nnn", "nnp"), 
            value=c(np, nn, npp, npn, nnn, nnp)
        )

        ## From these flows we need to create a node data frame: it lists every entities involved in the flow
        nodes <- data.frame(id = unique(c(as.character(links$source), as.character(links$target))), 
                            name = c(paste0("Patients (", N, ")"),
                                     paste0("Disease Positives (", np, ")"), 
                                     paste0("Disease Negatives (", nn, ")"), 
                                     paste0("True Positives (", npp, ")"), 
                                     paste0("False Negatives (", npn, ")"), 
                                     paste0("True Negatives (", nnn, ")"), 
                                     paste0("False Positives (", nnp, ")")))


        ## With networkD3, connection must be provided using id, not using real name like in the links dataframe.
        links$IDsource <- match(links$source, nodes$id) - 1 
        links$IDtarget <- match(links$target, nodes$id) - 1

        ## Make the Sankey Diagram with Disease Branches
        p1 <- sankeyNetwork(Links = links, Nodes = nodes,
                            Source = "IDsource", Target = "IDtarget",
                            Value = "value", NodeID = "name", 
                            fontSize = 50, 
                            nodeWidth = 30, nodePadding = 200)
        print(p1)

        ## save the widget
        htmlwidgets::saveWidget(p1, file="~/temp/sankey1.html")
    } else {
############################# Prediction Branches ###########################################

        ## A connection data frame is a list of flows with intensity for each flow
        links <- data.frame(
            source=c("n","n", "ndp", "ndp", "ndn", "ndn"), 
            target=c("ndp","ndn", "npp", "nnp", "nnn", "npn"), 
            value=c(ndp, ndn, npp, nnp, nnn, npn)
        )

        ## From these flows we need to create a node data frame: it lists every entities involved in the flow
        nodes <- data.frame(id = unique(c(as.character(links$source), as.character(links$target))), 
                            name = c(paste0("Patients (", N, ")"),
                                     paste0("Device Positives (", ndp, ")"), 
                                     paste0("Device Negatives (", ndn, ")"), 
                                     paste0("True Positives (", npp, ")"), 
                                     paste0("False Positives (", nnp, ")"), 
                                     paste0("True Negatives (", nnn, ")"), 
                                     paste0("False Negatives (", npn, ")"))) 

        ## With networkD3, connection must be provided using id, not using real name like in the links dataframe.
        links$IDsource <- match(links$source, nodes$id) - 1 
        links$IDtarget <- match(links$target, nodes$id) - 1

        ## Make the Sankey Diagram 
        p2 <- sankeyNetwork(Links = links, Nodes = nodes,
                            Source = "IDsource", Target = "IDtarget",
                            Value = "value", NodeID = "name", 
                            fontSize = 50, 
                            nodeWidth = 30, nodePadding = 200)
        print(p2)
        htmlwidgets::saveWidget(p2, file="~/temp/sankey2.html")
    }
}
if (F) {                                # Unit Test
    make.Sankey.diagram.for.Dx.performance(Prevalence = 0.1, Sensitivity = 0.8, Specificity = 0.8)
}