Wednesday, February 15, 2017

Look up value in a reference table (Similar to Excel's VLOOKUP function)

vlookup <- function(x, x.vector, y.vector = seq(x.vector), exact = FALSE, na.action = FALSE) 
{
    ## PURPOSE: Look up value in a reference table (Similar to Excel's VLOOKUP function)
    ## ARGUMENT:
    ##    x: a vector of Look Up Value (should not contain NA values)
    ##    x.vector: a vector where the "x" value can be located (approximately).
    ##    y.vector: a vector containing the return value that correponds the identified location in the "x.vector".
    ##              Default to the index to the x.vector.
    ##    exact: if T, the numeric values have to match exactly. 
    ##    na.action: If T and if cannot find the lookup value, return the input value. 
    ## RETURN: a scale value that corresponds to the "x" lookup value. 
    ## DATE: 20 Oct 2016, 16:30
    ## -----------------------------------------------------
    
    ans <- NULL
    if (is.numeric(x)) {
        if (exact) {
            ans <- y.vector[match(x, x.vector)]
        } else {
            ans <- y.vector[match.approx(x, x.vector)]
        }
    } else if (is.character(x)) {
        ans <- y.vector[match(x, x.vector)]
    }      

    if (na.action) {
        idx <- is.na(ans)
        ans[idx] <- x[idx]
    }
    ans
}
if (F) {                                # Unit Test 
    vlookup(c(68.2, 73), c(54, 60, 68.1, 72, 80), LETTERS[1:5]) # "C" "D"
    vlookup(c(2, 3.1), 1:5)                                     # 2 3
    vlookup(c(2, 3.1), 1:5, exact = T)                          # 2 NA
    vlookup(c("C", "E"), LETTERS[1:5], LETTERS[11:15]) #  "M" "O"
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15]) #  "M" NA
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15], na.action = T) #  "M" "X"
}


match.approx <- function(x, y) {
    ## Purpose: Match Approximately for Numerical Data
    ## Arguments:
    ##   "x":  a vector of numeric values.
    ##   "y":  a vector of numeric values. 
    ## RETURN:
    ##   The index in "y" that indicates the closest y value to each of "x" value. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 10:41
    ## ________________________________________________
    
    sapply(x, function(x0) which.min(abs(x0 - y)))
}
if (F) {
  match.approx(c(4.2, 1.2, 15), 1:10)                #  4  1 10
}

Simple Principal Component Analysis


my.pca  <- function(dat.feature)
{ 
    ## Purpose: Principal Component Analysis
    ## Arguments:
    ##   dat.feature: a data feature matrix, where each column is a feature vector. 
    ## Return: PCA plots and result. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 09:59
    ## ________________________________________________
    p <- prcomp(dat.feature)
    plot(p, main = "Scree Plot", xlab = "Principal Components")
    dev.new()
    biplot(p)
    print(summary(p))
    invisible(p)
}
if (F) {                                # Unit Test
    d <- matrix(rnorm(1000), 20)
    my.pca(d)
}



Tuesday, February 14, 2017

Making Barplot with Confidence Intervals


my.barplot  <- function(h, hl, hu, ...)
{ 
    ## Purpose: Template function for making Barplot with Confidence Interval.
    ##          Require R package "gplots". Refer to documentation of "gplots::barplots2". 
    ## Arguments:
    ##   h: Height Matrix. Each column is a group of heights. The dimnames will be used for labeling.
    ##   hl: Lower CI. Same dimension as "h". 
    ##   hu: Upper CI. Same dimension as "h".
    ##   ...: passed to "gplots::barplot2"
    ## Return: 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 14:22
    ## ________________________________________________

    old.par <- par("mar")
    par(mar = c(5, 10, 4, 5))
    gplots::barplot2(h, beside = T, legend.text = T, horiz = T, col=rainbow(ncol(h)), border = NA, 
                     cex.axis = 2, cex.names = 1.5, cex.lab=2, las = 1, xpd = F,
                     plot.ci = TRUE, ci.l = hl, ci.u = hu, ci.width = 0.3, ci.lwd = 2, 
                     plot.grid = TRUE, 
                     ...) -> mp

    ## Put numbers on the bars 
    for (i in seq(ncol(mp))) text(1, mp[,i], labels=round(h[,i], 2), cex=2)
    par(old.par)
}
if (F) {                                # Unit Test
    x <- structure(c(9.5, 10.5, 1.2, 0.4, 7, 12, 9.8, 11.2, 8.3, 9.6, 0.8, 0.3, 6.7, 9.8, 9, 10.2), .Dim = c(8L, 2L), .Dimnames = list(c("Type R", "Type B", "R.SD", "B.SD", "Rl", "Ru", "Bl", "Bu"), c("Class 1", "Class 2")))
    h <- x[1:2,]
    hl <- x[c(5,7),]
    hu <- x[c(6,8),]
    my.barplot(h, hl, hu, main="Comparison", xlab="Average Result", xlim = c(0, 20))
}


Generate HTML or PNG representation of a data / table / model object


tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0) {
    ## Purpose: Generate HTML or PNG representation of a data/table/model object.
    ##          Require R "memisc" package. 
    ## Arguments:
    ##   x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
    ##   type: what to generate -- 
    ##         "HTML" shows a HTML table in a web browser. 
    ##         "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
    ##   file: make a copy of the PNG file with this name.  Also used for the table name.
    ##   caption: table caption. 
    ##   digits: number of decimal places to use.
    ## Return: Display the data representation and an image file (type="PNG") for insertion. 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
    ## ________________________________________________

    type <- match.arg(type)

    is.tabular.data <- "tabular" %in% class(x) # "x" is from the "tables" package output
    is.tbl.data <- "tbl" %in% class(x)         # "x" is from the "dplyr" package output

    if (is.tbl.data) {
        x <- as.data.frame(x)
    }
    
    switch(type,
           HTML = {
               ## Show HTML table in a Web Browser
               if (is.null(file)) {
                   memisc::show_html(x, digits = digits)
               } else {
                   memisc::write_html(x, file = paste0(file,".html"), digits = digits)
               }
           }, 
           PNG = {
               ## Generate LaTeX and PNG Image file
               
               if (is.tabular.data) {
                   library(tables)
                   booktabs()
               }

               if (is.tabular.data || is.tbl.data) {

                   w <- Hmisc::latex(x, file="X.tex", ctable=TRUE, caption = caption, digits = digits)
                   if (is.tabular.data) w$style <- c("booktabs", "dcolumn")
                   d <- Hmisc::dvi(w)                         # convert to DVI file 
                   system(paste("dvipng -q* -o X.png -T tight -D 200", d$file)) # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex")

               } else {

                   r <- memisc:::toLatex.default(x, digits=digits, show.vars=TRUE)
                   
                   f <- file("X.tex", "w")
                   writeLines(c("\\documentclass{article}",
                                "\\usepackage{booktabs}",
                                "\\usepackage{dcolumn}",
                                "\\begin{document}", 
                                "\\pagenumbering{gobble}",
                                "\\begin{table}", 
                                "\\centering"),  f)

                   if (!is.null(caption)) writeLines(paste0("\\caption{", caption, "}"), f)

                   writeLines(r, f)

                   writeLines(c("\\end{table}", "\\end{document}"), f)

                   close(f)

                   system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
                   system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex", "X.log", "X.aux", "X.dvi")
               }

               if (is.null(file)) {
                   if (!grepl("/tmp", getwd())) {
                       file.copy("X.png", "~/tmp/X.png", overwrite = T)
                   }
                   system("display ~/tmp/X.png &")                                     # display the PNG file. 
               } else {
                   file.rename("X.png", paste0(file,".png"))
               }
           }
           )
    invisible(NULL)
}
if (F) {                                # Unit Test
    x <- data.frame(age=c(1,1,1,5,5,5), height=c(10,12,9,20,23,18))
    tex.print(x)
    tex.print(ftable(x))
    x %>% group_by(age) %>% summarise(heights = mean(height)) -> y
    tex.print(y)
    tex.print(x, file="Test")
    tex.print(y, type="HTML")
    tex.print(y, type="HTML", file = "Test")

    library(tables)
    a <- tabular( (Species + 1) ~ (n=1) + Format(digits=2) * (Sepal.Length + Sepal.Width) * (mean + sd), data=iris )
    tex.print(a)
    tex.print(a, file = "test")

}







Apply a function to a vector in an accumulative manner (like cumsum)

cum.func <- function(x, func, ...)
{ 
    ## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
    ## Arguments:
    ##   x: a vector
    ##   func: a function
    ##   ...: passed to "func"
    ## Return: a vector that is the result of:
    ##   c( func(x[1]),  func(x[1:2]),  func(x[1:3]),  func(x[1:4]),  func(x[1:5]), ... )
    ## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
    ## ________________________________________________
    
    N <- length(x)
    y <- rep_len(NA, N)
    for (i in 1:N) y[i] <- func(x[1:i], ...)
    y[is.na(x)] <- NA
    y
}
if (F) {                                # Unit Test
    cum.func(5:1, mean)                 #  c(5.0, 4.5, 4.0, 3.5, 3.0))
    cum.func(c(NA, 5:1, NA), mean, na.rm=T)                 #  c(NA, 5.0, 4.5, 4.0, 3.5, 3.0, NA)
    cum.func(c(NA, 5:1, NA), sd, na.rm=T)        # NA NA 0.70711 1.00000 1.29099 1.58114 NA
}

Overlay a new plot on an existing plot


plot.overlay <- function(..., label="2nd Y", col="blue")
{ 
    ## Purpose: Overlay a new plot on an existing plot. 
    ## Arguments:
    ##   ...: Passed to the "plot" function for the new plot.
    ##   label: The secondary Y-axis label.
    ##   col: The color of the ticks and tick labels for the new plot. 
    ## Return: A modified plot. 
    ## Author: Feiming Chen, Date:  6 Feb 2017, 10:19
    ## ________________________________________________

    par(new=T)
    plot(..., axes=F, xlab="", ylab="", col=col)
    axis(side=4, col.ticks = col, col.axis=col)
    mtext(label, side=4, line=-1, col=col)
}
if (F) {                                # Unit Test
    plot(rnorm(100))
    plot.overlay(rnorm(100), type="l", label="Test")
}


Apply a function to each consecutive pairs in a vector.

diff.func <- function(x, func)
{ 
    ## Purpose: Apply a function to each consecutive pairs in a vector. 
    ## Arguments:
    ##   x: a vector
    ##   func: a function that operature on two elements (of "x")
    ## Return: a vector, whose length is length(x) - 1, that looks like (where N=length(x))
    ##    func(x[2], x[1]), func(x[3], x[2]), func(x[4], x[3]), ..., func(x[N], x[N-1]).
    ## Author: Feiming Chen, Date:  9 Feb 2017, 11:25
    ## ________________________________________________
    
    ## embed in 2-dim space to make consecutive pairs
    x2 <- embed(x, 2)                  # N - 1 pairs
    ## apply a function to each consecutive paris (reversely)
    x3 <- apply(x2, 1, func)
    x3
}
if (F) {                                # Unit Test
    diff.func(1:5, diff)                # -1 -1 -1 -1   
    diff.func(1:5, mean)                # 1.5 2.5 3.5 4.5
}

Plot multiple lines together using "matplot"


my.line.plot  <- function(x, y, ...)
{ 
    ## Purpose: Template function for making lines by group. 
    ## Arguments:
    ##   x: X-coords
    ##   y: a matrix with Y-coords (each column has a set of Y values)
    ##   ...: passed to "plot"
    ## Return: A plot assuming log X-axis with axes tickes given by "x". 
    ## Author: Feiming Chen, Date:  9 Feb 2017, 14:02
    ## ________________________________________________n
    N <- ncol(y)                        # number of lines
    matplot(x, y, type="b", log="x", xaxt="n", lwd=2, lty=seq(N), col = rainbow(N), ...)  
    matpoints(x, y, log="x", xaxt="n", col = "black", ...)
    axis(1, at=x)
}
if (F) {                                # Unit Test
    x <- c(0.1, 1, 10)
    y <- matrix(rnorm(9), 3)
    my.line.plot(x, y, xlab="Tiime", ylab="Signal", main="Line Plot by Groups")
}


Monday, February 6, 2017

R Code: Apply a function to each element of a list (or data frame) or to each element of a list within a list of data while retaining the names attribute of each nested list


lf.compute.data.column <- function(dat.list, func, ...)
{
## Purpose: Apply a function to each column of a list (e.g. data frame). Retains the names information.
## Basis for processing more complex data (e.g. a list of a list)
## Arguments:
## dat.list: a list of data (e.g. data frame)
## func: a function that computes a vector (may have an argument "name")
## ...: passed to "func"
## Return:
## a list of outputs.
## Author: Feiming Chen, Date: 30 Jan 2017, 13:21
## ________________________________________________

this.name <- null2na(names(dat.list))

mapply(func,
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list = data.frame(x=rnorm(10), y=rnorm(10))
lf.compute.data.column(dat.list, mean)
lf.compute.data.column(dat.list, function(x, ...) { c(a=mean(x), b=sd(x)) })
lf.compute.data.column(dat.list, function(x, name) { list(a=mean(x), n=name) })
}


lf.compute.data.column.2 <- function(dat.list, func, ...)
{
## Purpose: Apply a function to a list of a list of data. Retains the names information.
## Arguments:
## dat.list: a list of a list
## func: a function that computes a vector. (may have an argument "name")
## Return: a list of list of results
## Author: Feiming Chen, Date: 6 Feb 2017, 12:50
## ________________________________________________

this.name <- null2na(names(dat.list))

mapply(
function(d, name) {
if (!is.na(name)) names(d) <- paste0(name, ".", names(d))
lf.compute.data.column(d, func)
},
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list.2 = list(test1 = data.frame(x=1:5, y=6:10), test2= data.frame(x=11:15, y=16:20))
lf.compute.data.column.2(dat.list.2, mean)
lf.compute.data.column.2(dat.list.2, function(x, ...) { c(m=mean(x), s=sd(x)) })
lf.compute.data.column.2(dat.list.2, function(x, name) { list(m=mean(x), n=name) })
}

null2na <- function(x) {
## if the value of x is NULL(as determined by length test since is.null may
## not always work), turn it into NA so that an object
## made of it will always exist.
if (length(x)==0)
x <- NA
x
}

R Code: Extract a fixed component of a list within a list and optionally apply a function


lf <- function(obj, comp, fun=identity, wrap=identity,
return.vector=TRUE, verbose=FALSE,
...) {
## WARNING: "comp" have to be a real string instead of a variable
## Functional Apply
## "comp": a character string, indicated a list component.
## "obj" is a list. "comp" is a component of each element of the
## list "obj". Apply a funcion (just an unquoted name) to each "comp" of the list
## "obj". "..." is passed to "fun". use "fun=identity" if no
## processing is needed. "wrap" is applied to the final result.
## "return.vector" will force the result to be a vector.

comp <- substitute(comp)
e <- paste(deparse(substitute(wrap)), "(lapply(obj,",
deparse(substitute(function(x) fun(x[[comp]], ...))), "))")
ans <- eval(parse(text=e))
if (verbose) cat("expression:", e, "\n")
if (return.vector)
ans <- sapply(ans, function(x) if (is.null(x)) NA else x)

ans
}
if (F) {
a <- list(x=c(1:3), y=c(4:6))
lf(a, 2) # c(x=2, y=5)

a <- list(x=list(median=3, mean=4), y=list(median=5, mean=6))
lf(a, "mean") # c(x=4, y=6)
lf(a, "mean", sqrt, verbose=T) # c(x=2, y=2.4495)
lf(a, "mean", sqrt, wrap=length, verbose=T) # 2
}

R Code: Apply a function to a vector in an accumulative manner


cum.func <- function(x, func)
{
## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
## Arguments:
## x: a vector
## func: a function
## Return: a vector that is the result of:
## c( func(x[1]), func(x[1:2]), func(x[1:3]), func(x[1:4]), func(x[1:5]), ... )
## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
## ________________________________________________

N <- length(x)
y <- rep_len(NA, N)
for (i in 1:N) y[i] <- func(x[1:i])
y
}
if (F) { # Unit Test
identical(cum.func(5:1, mean), c(5.0, 4.5, 4.0, 3.5, 3.0))
}

R Function for Overlaying a New Plot on an Existing Plot


plot.overlay <- function(..., label="2nd Y", col="blue")
{
## Purpose: Overlay a new plot on an existing plot.
## Arguments:
## ...: Passed to the "plot" function for the new plot.
## label: The secondary Y-axis label.
## col: The color of the ticks and tick labels for the new plot.
## Return: A modified plot.
## Author: Feiming Chen, Date: 6 Feb 2017, 10:19
## ________________________________________________

par(new=T)
plot(..., axes=F, xlab="", ylab="", col=col)
axis(side=4, col.ticks = col, col.axis=col)
mtext(label, side=4, line=-1, col=col)
}
if (F) { # Unit Test
plot(rnorm(100))
plot.overlay(rnorm(100), type="l", label="Test")
}