make.Sankey.diagram.for.Dx.performance <- function(Sensitivity = 0.8, Specificity = 0.8, Prevalence = 0.2, N = 1000, by.prediction = FALSE) { ## Purpose: Make a Sankey diagram to illustrate Diagnostic Performance using R package "NetworkD3" ## ## Reference: https://r-graph-gallery.com/sankey-diagram.html ## https://christophergandrud.github.io/networkD3 ## ## Author: Feiming Chen (Copyright 2024; GPLv3; No Warranty; gnu.org/licenses) ## ## Arguments: ## - Prevalence : disease prevalence assumed in the target population ## - Sensitivity : sensitivity estimate ## - Specificity : specificity estimate ## - N : assumed number of patients in a sample from the target population ## - by.prediction: if T, make main branches by device predictions. ## ## Return: ## - p1 : a HTML widget to be displayed in the browser. Main branches by disease status. ## - p2 : a HTML widget to be displayed in the browser. Main branches by device predictions. ## ________________________________________________ np <- round(N * Prevalence) # number of positives nn <- N - np # number of negatives npp <- round(np * Sensitivity) # true positives npn <- np - npp # false negatives nnn <- round(nn * Specificity) # true negatives nnp <- nn - nnn # false positives ndp <- npp + nnp # device positives ndn <- nnn + npn # device negatives cat("Simulation Assumptions:\n") cat("Prevalence =", Prevalence, "\n") cat("Sensitivity =", Sensitivity, "\n") cat("Specificity =", Specificity, "\n\n") cat("Among", N, "patients in the target population:\n") cat("Number of Positives =", N, "*", Prevalence, "=", np, "\n") cat("Number of True Positives =", np, "*", Sensitivity, "=", npp, "\n") cat("Number of False Negatives =", np, "-", npp, "=", npn, "\n\n") cat("Number of Negatives =", N, "*", (1 - Prevalence), "=", nn, "\n") cat("Number of True Negatives =", nn, "*", Specificity, "=", nnn, "\n") cat("Number of False Positives =", nn, "-", nnn, "=", nnp, "\n\n") cat("Number of Device Positives =", npp, "+", nnp, "=", ndp, "\n") cat("Number of Device Negatives =", nnn, "+", npn, "=", ndn, "\n\n") cat("Sensitivity =", npp, "/", np, "=", Sensitivity, "\n") cat("Specificity =", nnn, "/", nn, "=", Specificity, "\n") cat("PPV =", npp, "/", ndp, "=", round(npp / ndp, 2), "\n") cat("NPV =", nnn, "/", ndn, "=", round(nnn / ndn, 2), "\n") library(networkD3) if (!by.prediction) { ############################# Disease Branches ########################################### ## A connection data frame is a list of flows with intensity for each flow links <- data.frame( source=c("n","n", "np", "np", "nn", "nn"), target=c("np","nn", "npp", "npn", "nnn", "nnp"), value=c(np, nn, npp, npn, nnn, nnp) ) ## From these flows we need to create a node data frame: it lists every entities involved in the flow nodes <- data.frame(id = unique(c(as.character(links$source), as.character(links$target))), name = c(paste0("Patients (", N, ")"), paste0("Disease Positives (", np, ")"), paste0("Disease Negatives (", nn, ")"), paste0("True Positives (", npp, ")"), paste0("False Negatives (", npn, ")"), paste0("True Negatives (", nnn, ")"), paste0("False Positives (", nnp, ")"))) ## With networkD3, connection must be provided using id, not using real name like in the links dataframe. links$IDsource <- match(links$source, nodes$id) - 1 links$IDtarget <- match(links$target, nodes$id) - 1 ## Make the Sankey Diagram with Disease Branches p1 <- sankeyNetwork(Links = links, Nodes = nodes, Source = "IDsource", Target = "IDtarget", Value = "value", NodeID = "name", fontSize = 50, nodeWidth = 30, nodePadding = 200) print(p1) ## save the widget htmlwidgets::saveWidget(p1, file="~/temp/sankey1.html") } else { ############################# Prediction Branches ########################################### ## A connection data frame is a list of flows with intensity for each flow links <- data.frame( source=c("n","n", "ndp", "ndp", "ndn", "ndn"), target=c("ndp","ndn", "npp", "nnp", "nnn", "npn"), value=c(ndp, ndn, npp, nnp, nnn, npn) ) ## From these flows we need to create a node data frame: it lists every entities involved in the flow nodes <- data.frame(id = unique(c(as.character(links$source), as.character(links$target))), name = c(paste0("Patients (", N, ")"), paste0("Device Positives (", ndp, ")"), paste0("Device Negatives (", ndn, ")"), paste0("True Positives (", npp, ")"), paste0("False Positives (", nnp, ")"), paste0("True Negatives (", nnn, ")"), paste0("False Negatives (", npn, ")"))) ## With networkD3, connection must be provided using id, not using real name like in the links dataframe. links$IDsource <- match(links$source, nodes$id) - 1 links$IDtarget <- match(links$target, nodes$id) - 1 ## Make the Sankey Diagram p2 <- sankeyNetwork(Links = links, Nodes = nodes, Source = "IDsource", Target = "IDtarget", Value = "value", NodeID = "name", fontSize = 50, nodeWidth = 30, nodePadding = 200) print(p2) htmlwidgets::saveWidget(p2, file="~/temp/sankey2.html") } } if (F) { # Unit Test make.Sankey.diagram.for.Dx.performance(Prevalence = 0.1, Sensitivity = 0.8, Specificity = 0.8) }
Monday, March 18, 2024
Make a Sankey diagram to illustrate Diagnostic Performance using R package "NetworkD3"
Subscribe to:
Posts (Atom)