confusion.matrix.diagnostic.accuracy <- function(x) { ## Purpose: ## Calculate sensitivity, specificity, and other metrics. ## ## Arguments: ## x: a numeric vector of length 4 for the raw confusion matrix. ## First 2 numbers: count of (Test+, Test-) with reference positive results ## Last 2 numbers: count of (Test+, Test-) with reference negative results ## Ref+, Ref- ## Test+ x1 x3 ## Test- x2 x4 ## ## Return: ## Sensitivity, Specificity, PPV, NPV, PLR, NLR and their CI's ## ## Author: Feiming Chen ## ________________________________________________ d <- as.data.frame(addmargins(matrix(x, ncol = 2))) dimnames(d) <- list(c("Test.Pos", "Test.Neg", "Sum"), c("Ref.Pos", "Ref.Neg", "Sum")) d$Likelihood.Ratio <- sapply(1:3, function(i) convert.to.CI.text(ratio.of.two.prop(x=c(d[i, 1], d[i, 2]), n=c(d[3, 1], d[3, 2])), digits = 1)) d$"Performance" <- c(paste(c("PPV =", "NPV ="), my.proportion(c(d[1,1], d[2,2]), c(d[1,3], d[2,3]), digits = 1)), paste("Prevalence =", my.proportion(d[3,1], d[3,3], digits = 1))) sens <- d[1,1] / d[3, 1] spec <- d[2,2] / d[3, 2] PPV <- d[1,1] / d[1, 3] NPV <- d[2,2] / d[2, 3] J <- sens + spec - 1 # Youden's index in percentage ## J2 <- d[1,1] / d[3, 1] - d[1,2] / d[3, 2] # identical in the form of risk difference J.ci <- prop.test(c(d[1,1], d[1,2]), c(d[3,1], d[3,2]))$conf.int J.str <- convert.to.CI.text(c(J, J.ci)) NND <- 1 / (PPV + NPV - 1) # Number Needed to Diagnose ## NND2 <- 1 / (d[1,1] / d[1, 3] - d[2,1] / d[2, 3]) # identical in the form of risk difference NND.ci <- 1 / prop.test(c(d[1,1], d[2,1]), c(d[1,3], d[2,3]))$conf.int NND.str <- convert.to.CI.text(ceiling(c(NND, rev(NND.ci)))) b <- c(paste(c("Sensitivity =", "Specificity ="), my.proportion(c(d[1,1], d[2,2]), c(d[3,1], d[3,2]), digits = 1)), "", "", paste0(paste0("J = ", J.str), ", ", paste0("NND = ", NND.str))) d2 <- rbind(d, b) rownames(d2)[4] <- "Performance" tex.print(d2, file = "~/temp/confusion-matrix-performance", caption = "Performance Evaluation from Confusion Matrix") d2 } if (F) { ## bowel cancer diagnostic test (https://en.wikipedia.org/wiki/Sensitivity_and_specificity#Confusion_matrix) x <- c(20, 10, 180, 1820) confusion.matrix.diagnostic.accuracy(x) confusion.matrix.diagnostic.accuracy(c(95, 5, 5, 95)) confusion.matrix.diagnostic.accuracy(c(95, 5, 500, 9500)) confusion.matrix.diagnostic.accuracy(c(90, 10, 1000, 9000)) }
Thursday, August 31, 2023
Diagnostic Evaluation from Data Presented in a Confusion Matrix
Thursday, April 27, 2023
Calculate the discriminatory power of a model prediction index using the concordance metric c-statistic (ROC-AUC)
c.statistic <- function(R, D) { ## Purpose: Calculate the c-statistic (ROC-AUC), which is a ## concordance measure. If we take all possible pairs of ## patients where one has disease (1) and the other has no ## disease (0), we can calculate the proportion of all such ## pairs where the diseased one has higher score (device output) ## than the normal one (if they have the same value, we count ## this as ‘half a victory’). ## ## Author: Feiming Chen ## ## Arguments: ## R: Clinical Reference Standard (0 = Negative, 1 = Positive) ## D: Device Diagnostic Output (a continuous variable) ## ## Return: ## - c : the c-statistic ## ________________________________________________ f <- split(D, R) o <- outer(f$"1", f$"0", "-") c.statistic <- mean((o > 0) + (o == 0)/2) cat("c-statistic (ROC-AUC) =", c.statistic, "\n") c.statistic } if (F) { # Unit Test R <- rep(0:1, each = 5) c.statistic(R, 1:10) # 1 c.statistic(R, 10:1) # 0 c.statistic(R, rep(1, 10)) # 0.5 neg <- rep(1:5, times=c(33,6,6,11,2)) pos <- rep(1:5, times=c(3,2,2,11,33)) R <- c(rep(0, length(neg)), rep(1, length(pos))) D <- c(neg, pos) c.statistic(R, D) # 0.89317 }
Import a text block (with only numeric values) and convert it to data frame
scan.text <- function(b, n.col = 1, v.names = "") { ## Purpose: Import a text block (with only numeric values) and convert it to data frame. ## ## Author: Feiming Chen ## ## Arguments: ## - b : a block of text containing only numeric values separated by spaces ## - n.col : number of columns in the data frame ## - v.names : variable names ## Return: ## a data frame containing imported numbers ## ## ________________________________________________ d1 <- scan(textConnection(b)) d2 <- as.data.frame(matrix(d1, ncol = n.col, byrow = T)) names(d2) <- v.names d2 } if (F) { # Unit Test scan.text("1 2 3 4 5 6", n.col = 3, v.names = c("ID", "V1", "V2")) ## ID V1 V2 ## 1 1 2 3 ## 2 4 5 6 }
Saturday, March 18, 2023
Accuracy Analysis for Binary Data with Repeated Measurements
accuracy.metrics <- function(dat) { ## Purpose: ## Calculate binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR). ## ## Author: Feiming Chen ## ## Arguments: ## - dat : a data frame with the following variable names ## device - binary response from device (1 = Positive; 0 = Negative) ## reference - binary response from reference (1 = Positive; 0 = Negative) ## ## Return: ## - a list of binary accuracy metrics. ## ## ________________________________________________ d <- dat$device r <- dat$reference sensitivity = mean(d[r == 1]) specificity = 1 - mean(d[r == 0]) PPV = mean(r[d == 1]) NPV = 1 - mean(r[d == 0]) PLR = sensitivity / (1 - specificity) NLR = (1 - sensitivity) / specificity c(Sensitivity = sensitivity, Specificity = specificity, PPV = PPV, NPV = NPV, PLR = PLR, NLR = NLR) } if (F) { accuracy.metrics(data.frame(device = rep(c(0,1), 5), reference = rep(c(1, 0), 5))) ## [1] 0 0 0 0 0 Inf accuracy.metrics(data.frame(device = rep(c(0,1), 5), reference = rep(c(0, 1), 5))) ## [1] 1 1 1 1 Inf 0 } accuracy.analysis <- function(dat, boot.size = 2000) { ## Purpose: ## Calculate binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR), ## with their CI's, even in the presence of repeated measurements. ## ## Author: Feiming Chen ## ## Arguments: ## - dat : a data frame with the following variable names ## subject - subject identifier ## device - binary response from device ## reference - binary response from reference ## - boot.size : bootstrap sample size. ## ## Return: ## - Point estimates and 95% CI's for a list of binary accuracy metrics (sensitivity, specificity, PPV, NPV, PLR, NLR). ## ## ________________________________________________ id <- dat$subject unique.ID <- unique(id) N <- length(unique.ID) cat("Number of Unique Patients =", N, "\n") K <- nrow(dat) / N cat("Total number of repeated measurements =", nrow(dat), "\n") cat("Average number of repeated measurements per patient =", K, "\n") ## Point Estimates est <- accuracy.metrics(dat) cat("\nPoint Estimates of Accuracy Metrics:\n") print(round(est, 4)) ## Make Bootstrap Samples cat("\nBootstrap Sample Size =", boot.size, "\n") set.seed(1) replicate(n = boot.size, { s <- sample(unique.ID, replace = TRUE) # a bootstrap sample of patient ID's ## find all rows with the ID's in the bootstrap sample of ID's v <- c() for (j in s) v <- c(v, which(id == j)) # a new bootstrap sample accuracy.metrics(dat[v,]) }) -> est.boot CI <- apply(est.boot, 1, function(x) quantile(x, c(0.025, 0.975))) cat("95% Bootstrap Percentile Confidence Intervals of Accuracy Metrics:\n") print(round(CI, 4)) invisible(list(est = est, CI = CI))} if (F) { # Unit Test ## ans <- accuracy.analysis(dat, boot.size = 10) }
Tuesday, February 28, 2023
Agreement Analysis for Continuous Data with Repeated Measurements
agreement.metrics <- function(dat) { ## Purpose: ## Calculate agreement metrics. ## ## Author: Feiming Chen ## ## Arguments: ## - dat : a data frame with the following variable names and with no NA values ## subject - subject identifier ## reference - continuous response from reference ## device - continuous response from device ## ## Return: ## - a list of agreement metrics (MAE, MAE%, RMSE, LOA, Bias, Average Correlation). ## ## ________________________________________________ reference.mean <- mean(dat$reference) dat$difference <- dat$device - dat$reference ## Bland-Altman Analysis with Repeated Measures BA.analysis <- bland.altman.repeated.vary(difference ~ factor(subject), dat, silent = TRUE) ## For MAE and RMSE MAE <- mean(abs(dat$difference)) # Mean Absolute Error MAE.pct <- MAE / reference.mean RMSE <- sqrt(mean(dat$difference^2)) # Root Mean Squared Error ## For Correlation: calculate patient-level summary metrics from repeated measures, then average across patients. ds <- split(dat, dat$subject) sapply(ds, function(d) cor(d$reference, d$device, method = "pearson")) -> ds2 Correlation <- mean(ds2) c(ref.mean = reference.mean, MAE = MAE, MAE.pct = MAE.pct, RMSE = RMSE, Correlation = Correlation, Bias = BA.analysis$m, SD = BA.analysis$s, LoA.L = BA.analysis$loa[1], LoA.U = BA.analysis$loa[2], SD0 = BA.analysis$s0, # no adjustment for repeated measurements LoA.L0 = BA.analysis$loa0[1], # no adjustment for repeated measurements LoA.U0 = BA.analysis$loa0[2]) # no adjustment for repeated measurements } if (F) { agreement.metrics(data.frame(subject = rep(1:30, 10), reference = 5 + rnorm(300), device = 6 + rnorm(300))) } agreement.analysis <- function(dat, boot.size = 2000) { ## Purpose: ## Calculate agreement metrics (MAE, MAE%, RMSE, LOA, Bias, Average Correlation) ## with their CI's, even in the presence of repeated measurements. ## ## Author: Feiming Chen ## ## Arguments: ## - dat : a data frame with the following variable names ## subject - subject identifier ## reference - continuous response from reference ## device - continuous response from device ## - boot.size : bootstrap sample size. ## ## Return: ## - Point estimates and 95% CI's for a list of agreement metrics (MAE, MAE%, RMSE, LOA, Bias, Average Correlation). ## ## ________________________________________________ id <- dat$subject unique.ID <- unique(id) N <- length(unique.ID) cat("Number of Unique Patients =", N, "\n") K <- nrow(dat) / N cat("Total number of repeated measurements =", nrow(dat), "\n") cat("Average number of repeated measurements per patient =", K, "\n") ## Point Estimates est <- agreement.metrics(dat) cat("\nPoint Estimates of Agreement Metrics:\n") print(round(est, 4)) ## 2000 Bootstrap Samples cat("\nBootstrap Sample Size =", boot.size, "\n") set.seed(1) replicate(n = boot.size, { s <- sample(unique.ID, replace = TRUE) # a bootstrap sample of patient ID's ## find all rows with the ID's in the bootstrap sample of ID's v <- c() for (j in s) v <- c(v, which(id == j)) # a new bootstrap sample agreement.metrics(dat[v,]) }) -> est.boot CI <- apply(est.boot, 1, function(x) quantile(x, c(0.025, 0.975))) cat("95% Bootstrap Percentile Confidence Intervals of Agreement Metrics:\n") print(round(CI, 4)) invisible(list(est = est, CI = CI))} if (F) { # Unit Test ## ans <- agreement.analysis(dat, boot.size = 10) }
Calculate the mean and 95% confidence interval of a sample of numbers under a transform
sample.mean.CI.transform <- function(x, transform.func = identity, anti.transform.func = identity) { ## Purpose: ## Calculate the mean and 95% CI of a sample of numbers under a transform. ## ## Author: Feiming Chen ## ## Arguments: ## - x : a vector of positive numerical values ## - transform.func: a function for transforming the original values to a new scale. ## - anti.transform.func: a function for back-transforming the values to the original scale. ## ## Return: ## - m : mean based on a particular transform function. ## ## ________________________________________________ y <- transform.func(x) # transform to a new scale a <- t.test(y) # estimate the mean and 95% CI based on t-distribution c(anti.transform.func(a$estimate), CI = anti.transform.func(a$conf.int)) } if (F) { # Unit Test x <- exp(1:10) sample.mean.CI.transform(x) # no transform ## estimate the mean under the log-normal distributional assumption sample.mean.CI.transform(x, transform.func = log, anti.transform.func = exp) }
Subscribe to:
Posts (Atom)