Tuesday, December 12, 2017

Compare two tables with the same structure by merging them


compare.tables <- function(d1, d2, by.idx = 1)
{ 
    ## Purpose: Compare two tables with the same structure by merging them. 
    ## Arguments:
    ##    d1: table A
    ##    d2: table B
    ##    by.idx: an index vector to common variables.
    ## Return: a full joined table for comparision

    b <- names(d1)[by.idx]
    d <- full_join(d1, d2, by = b)
    tex.print(d, type = "HTML")
    invisible(d)
}
if (F) {                                # Unit Test
    d1 <- rc()
    d2 <- rc()
    by.idx <- 1:4
    compare.tables(d1, d2, by.idx)
}

List duplicated files in a directory


list.dup.files <- function(p, file.name.head = 20)
{ 
    ## Purpose: List duplicated files in a directory. 
    ## Arguments:
    ##   file.name.head: number of starting characters in the file name to use for duplication detection
    ## Return: a list of file names that are duplicated (by sorted order). 
    ## Author: Feiming Chen, Date: 13 Nov 2017, 16:36
    ## ________________________________________________
    
    p1 <- dir(p)
    p2 <- sort(p1, decreasing = T)
    p3 <- as.character(sapply(p2, function(x) substr(x, 1, file.name.head)))
    p4 <- p2[duplicated(p3)]
    if (length(p4) > 0) {
        cat("Duplicated Older Files:\n")
        print(p4)
    } else {
        cat("NO Duplicated Files.\n")
    }
    file.path(p, p4)
}
if (F) {                                # Unit Test
    p <- "~/tmp/Updated-Databases"
    file.name.head <- 25
    x <- list.dup.files(p, file.name.head)
    file.remove(x)
}

Wednesday, October 18, 2017

Convert all character variables in your data.frame to factors


df.make.factors <- function(df)
{ 
    ## Purpose: Convert all character variables in your data.frame to factors 
    ## Arguments:
    ##   df: a data frame
    ## Return: a data frame with converted factors on those character variables.
    ## Author: Feiming Chen, Date: 17 Oct 2017, 14:26
    ## ________________________________________________

    character_vars <- sapply(df, class) == "character"
    df[, character_vars] <- lapply(df[, character_vars], factor)
    df
}
if (F) {                                # Unit Test
    df <- data.frame(x=c("x", "x", "y"), y=1:3, z=c("1", "3", "3"))
    df.make.factors(df)
}

Monday, October 2, 2017

Display a (numeric) vector in a pretty format


display.vector <- function(x)
{ 
    ## Purpose: Display a vector in a pretty format
    ## Arguments:
    ##   x: a vector
    ## Return: a png picture for the vector. (each row has 10 elements)
    ## Author: Feiming Chen, Date:  2 Oct 2017, 14:54
    ## ________________________________________________

    N <- length(x)
    n1 <- N %% 10
    if (n1 > 0) {
        n2 <- 10 - n1
        x.add <- rep(NA, n2)
        x <- c(x, x.add)
    }

    x1 <- matrix(x, ncol = 10, byrow = T)
    n2 <- nrow(x1)
    nr <- paste0(10 * (seq.int(n2) - 1) + 1, ": ")
    dimnames(x1) <- list(nr, 1:10)

    tex.print(x1)
}
if (F) {                                # Unit Test
    x <- c(402.6615584,402.1343365,402.5467634,401.835917,402.9268321,404.5185276,406.0735012,408.7713965,408.2033098,411.043676,413.953578,420.6021333,426.6352944,440.9007592,471.5614004,523.8426555,619.173643,782.3175971,1033.289048,1321.267143,1615.935774,1898.979548,2173.03472,2442.864835,2682.25644,2884.575978,3064.278857,3252.58248,3413.084683,3588.447246,3701.244129,3796.824264,3840.251429,3943.184319,4084.131835,4037.30822,4116.338229,4191.71438,4173.699815,4110.847391,4197.181334,4221.081422,4207.343855,4209.140892,4263.463393,4306.872737,4312.087451,4269.522631,4313.924265,4265.306511) 
    display.vector(x)
}


tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0, ...) {
    ## Purpose: Generate HTML or PNG representation of a data/table/model object.
    ##          Require R "memisc" package. 
    ## Arguments:
    ##   x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
    ##   type: what to generate -- 
    ##         "HTML" shows a HTML table in a web browser. 
    ##         "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
    ##   file: make a copy of the PNG file with this name.  Also used for the table name.
    ##   caption: table caption. 
    ##   digits: number of decimal places to use. 
    ##    ...: passed to "toLatex"
    ## Return: Display the data representation and an image file (type="PNG") for insertion. 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
    ## ________________________________________________

    require(memisc)
    type <- match.arg(type)
    
    switch(type,
           HTML = {
               ## Show HTML table in a Web Browser
               memisc::show_html(x)      
           }, 
           PNG = {
               ## Generate LaTeX and PNG Image file
               r <- toLatex(x, ..., digits=digits, show.vars=TRUE)
               f <- file("X.tex", "w")
               writeLines(c("\\documentclass{article}",
                            "\\usepackage{booktabs}",
                            "\\usepackage{dcolumn}",
                            "\\begin{document}", 
                            "\\pagenumbering{gobble}"), f)
               if (!is.null(file)) writeLines("\\begin{table}  \\centering", f)

               writeLines(r, f)

               if (!is.null(caption)) {
                   writeLines(paste0("\\caption{", caption, "} \\end{table}"), f)
               }

               writeLines("\\end{document}", f)
               close(f)

               system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
               system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
               system("display X.png &")                                     # display the PNG file. 
               file.remove("X.tex", "X.log", "X.aux", "X.dvi")

               if (!is.null(file)) file.copy("X.png", paste0(file,".png"), overwrite = T) 
               if (!grepl("/tmp", getwd())) {
                   file.copy("X.png", "~/tmp/X.png", overwrite = T)
               }
           }
           )
    invisible(NULL)
}

Thursday, September 7, 2017

Extracts all calls to a function (except for those in standard packages)

my.fun_calls <- function(f)
{ 
    ## Purpose: Extracts all calls to a function (except for those in standard packages) in a function 
    ##          Differs from pryr::fun_calls by excluding basic functions. 
    ## Arguments:
    ##   f: a function
    ## Return: All calls to a function (that are not in standard packages, e.g. base, stats, etc.)
    ## Author: Feiming Chen, Date:  7 Sep 2017, 11:23
    ## ________________________________________________

    if (is.function(f)) {
        my.fun_calls(body(f))
    } else if (is.call(f)) {
        fname <- as.character(f[[1]])
        if (identical(fname, ".Internal")) fname3 <- NULL
        f1 <- try(pryr::where(fname), silent = TRUE)
        if (is.tryerror(f1)) {
            if (fname[1] == "::") {   # Example: pryr::where  fname = c("::", "pryr", "where")
                fname3 <- paste0(fname[2], fname[1], fname[3])
            } else {
                fname3 <- NULL
            }
        } else {
            fname2 <- environmentName(f1)
            if (fname2 %in% c("base", "package:stats", "package:graphics", "package:grDevices", "package:utils", "package:datasets", "package:methods")) {
                fname3 <- NULL
            } else {
                if (fname2 == "R_GlobalEnv") {
                    fname3 <- fname
                } else {                # attach package name if other than Global Environment
                    fname2m <- sub("package:", "", fname2) # remove "package:" prefix. 
                    if (fname2m == "") {
                        fname3 <- paste0(fname)
                    } else {                         
                        fname3 <- paste0(fname2m, "::", fname) 
                    }
                }
            }
        }

        if (F) fun_calls(lm)            # for testing purpose only

        unique(c(fname3, unlist(lapply(f[-1], my.fun_calls), use.names = FALSE)))
    }
}
if (F) {                                # Unit Test
    my.fun_calls(lm)                    # NULL

    sort(my.fun_calls(my.fun_calls))
    ## "is.tryerror"     "my.fun_calls"    "pryr::fun_calls" "pryr::where"    
}


## Test for the try-error class (from "try" function)
is.tryerror <- function(x) inherits(x, "try-error")

Wednesday, June 28, 2017

Span: A Predicate Functional that finds the location of the longest sequential run of elements where the predicate is true

Span <- function(f, x)
{ 
    ## Purpose: A Predicate Functional that finds the location of the longest sequential run
    ##          of elements where the predicate is true. 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) 
    ##   x: a list or data frame
    ## Return: The location of the longest sequential run of elements where the predicate is true. 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    y <- rle(Where(f, x))
    z <- y$lengths
    w <- which.max(z)          # position of the segment with longest sequential run of TRUE
    pos.end <- sum(z[1:w])
    pos.start <- pos.end - z[w] + 1
    c(start=pos.start, end=pos.end)
}
if (F) {                                # Unit Test
    x <- c(3, 1, NA, 4, NA, NA, NA, 5, 9, NA, NA)
    Span(is.na, x)
    ## start   end 
    ##     5     7 
}

Where <- function(f, x)
{ 
    ## Purpose: A Predicate Functional
    ## See other Predicate Functionals: Filter(), Find(), Position()
    ## See other Functionals: Map(), Reduce() 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) (e.g. is.character, all, is.NULL)
    ##   x: a list or data frame
    ## Return: a logical vector 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    vapply(x, f, logical(1))
}
if (F) {                                # Unit Test
    df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = T)
    Where(is.factor, df)
    ## Compare with: 
    Filter(is.factor, df)
    Find(is.factor, df)
    Position(is.factor, df)
}

Thursday, June 22, 2017

Draw a data frame as a R plot


my.table.plot0 <- function(x, 
                           highlight.matrix = NULL,
                           main = "Table", title2 = NULL, size = 20,
                           bold.lastrow = FALSE, bold.lastcol = FALSE
                           ) 
{
    ## PURPOSE: Draw a data frame as a R plot. Require R package "gridExtra". 
    ## ARGUMENT:
    ##    x: a data frame
    ##    highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##    main: title for the plot
    ##    title2: additional title with more verbose text
    ##    size: text size of table text
    ##    bold.lastrow: if T, make the last row bold font.
    ##    bold.lastcol: if T, make the last column bold font.
    ## RETURN: a plot
    ## DATE: 21 Nov 2016, 15:46
    ## -----------------------------------------------------

    x <- as.data.frame(x)
    n1 <- nrow(x)
    n2 <- ncol(x)
    cl <- matrix("black", n1, n2)
    cl[is.na(x)] <- "gray"

    ## Bold Total Column and Row
    cl2 <- cl
    cl2[] <- "plain"
    if (bold.lastrow) {
        cl2[n1,] <- "bold"
    }
    if (bold.lastcol) {
        cl2[, n2] <- "bold"
    }
    
    require(gridExtra)
    table_theme <- ttheme_default(base_size = size,
                                  core = list(fg_params=list(col=cl, fontface = cl2)))
    ## Highlight Cells
    if (!is.null(highlight.matrix)) {
        cl3 <- cl
        z <- ttheme_default()$core$bg_params$fill # default color
        z1 <- rep(z, length.out = nrow(cl3))
        for (i in 1:ncol(cl3)) cl3[,i] <- z1
        cl3[highlight.matrix] <- "yellow"
        table_theme$core$bg_params$fill <- cl3
    }

    plot.new()
    gridExtra::grid.table(as.data.frame(x), theme=table_theme)
    ## Title Text
    t2 <- ifelse(is.null(title2), "", paste0("\n", graph.title.split.lines(title2, sep=" ", num.chars = 40)))
    title(main = paste0(main, t2))

}
if (F) {                                # Unit Test 
    x <- data.frame(Type=c('Car', 'Plane'), Speed=c(NA, 200))
    my.table.plot0(x)
    my.table.plot0(x, main='Test', title2='a very complex test', size=30, bold.lastcol = T, highlight.matrix = matrix(c(NA, F, F, T) , 2, 2))
    x <- data.frame(Type=rep('Car', 20), Speed=rep(100, 20))
    my.table.plot0(x)
}

my.table.plot <- function(x, highlight.matrix = NULL, bold.lastrow = FALSE, n = 20, ...)
{ 
    ## Purpose: Split a data frame to fit in each page and call "my.table.plot0". 
    ## Arguments:
    ##   x: a data frame or matrix
    ##   highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##   bold.lastrow: if T, make the last row bold font.
    ##   n: how many rows to fit into each page.
    ##   ...: pass to "my.table.plot0". 
    ## Return: multiple plots to plot the data matrix. 
    ## Author: Feiming Chen, Date:  5 Jul 2017, 15:16
    ## ________________________________________________

    x <- as.data.frame(x)
    N <- nrow(x)
    if ( N <= n ) {
        my.table.plot0(x, highlight.matrix = highlight.matrix, bold.lastrow = bold.lastrow, ...)
    } else {
        s <- rep(seq.int(N %/% 20 + 1), each = n, length.out = N)
        xs <- split(x, s)
        
        ng <- length(xs)                # number of pages
        bold.lastrow.list <- c(rep(FALSE, ng-1), bold.lastrow)
        
        if (is.null(highlight.matrix)) {
            mapply(my.table.plot0, xs, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        } else {
            ys <- split(highlight.matrix, s)
            mapply(my.table.plot0, xs, highlight.matrix = ys, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        }
    }
}
if (F) {                                # Unit Test
     x <- data.frame(Type=rep('Car', 50), Speed=rep(100, 50))
     h <- matrix(rep(c(F, T), 50), ncol=2)
     my.table.plot(x, highlight.matrix = h, bold.lastrow = T)
}

Wednesday, June 21, 2017

Add a "Total" level to each specified categorical variable in the data frame


df.add.total.category  <- function(dat, var.list, new.level.label = "Total")
{ 
    ## Purpose: Add a "Total" level to each specified categorical variable in the data frame. 
    ## Arguments:
    ##   dat: a data frame with several categorical variables for making tables.
    ##   var.list: a list of variable names in "dat".
    ##   new.level.label: the name for the new "total" level for the specified variable. 
    ## Return: 
    ## Author: Feiming Chen, Date: 21 Jun 2017, 13:02
    ## ________________________________________________

    if ( missing(var.list) )  {
        var.list <- names(dat)
    }

    d0 <- dat                           # to make new label across all variables
    lapply(var.list,  function( v )  {
        d <- dat
        d[[v]] <- new.level.label
        d0[[v]] <<- new.level.label
        d
    }) -> dd

    do.call(rbind, c(list(dat), dd, list(d0)))
}
if (F) {                                # Unit Test
    dat <- data.frame(A=gl(3,3), B=gl(3, 1, 9), C=11:19)
    df.add.total.category(dat, var.list=c("A", "B"))
}

Monday, June 19, 2017

Put unique elements of a string vector into one long string

paste.unique.string  <- function(x, sep = "/")
{ 
    ## Purpose: Put unique elements of a string vector into one long string. 
    ## Arguments:
    ##    x: a string vector
    ##    sep: what separator to use when pasting all unique elements together. 
    ## Return:  a single string with unique elements of "x". 
    ## Author: Feiming Chen, Date: 19 Jun 2017, 14:00
    ## ________________________________________________
    
    paste0(unique(x), collapse = sep)
}
if (F) {                                # Unit Test
    paste.unique.string(c("a", "b", "a", "b")) # "a/b"
}

Add "0" to the front of numbers to make n-digit string

prepend.zero.to.numbers  <- function(x, n = NULL, pad.char = "0")
{ 
    ## Purpose: Add "0" to the front of numbers to make n-digit string. 
    ## Arguments:
    ##   x: a vector of integers/characters
    ##   n: Number of total digits after padding with zero. Default to
    ##      using the largest number to calculate "n".
    ##   pad.char: what character to pad.  Default to "0". 
    ## Return: a character vector with nuumbers padded with zero's in the head.
    ## Author: Feiming Chen, Date: 19 Jun 2017, 13:19
    ## ________________________________________________

    x1 <- as.character(x)
    y <- nchar(x1)
    if (is.null(n)) n <- nchar(as.character(max(x)))
    y1 <- n - y                         # how long is the padding for each number
    pad.vector <- sapply(y1, function(w) paste(rep(pad.char, w), collapse = ""))
    paste0(pad.vector, x1)
}
if (F) {                                # Unit Test
    x <- c(2, 14, 156, 1892)
    prepend.zero.to.numbers(x)          # "0002" "0014" "0156" "1892"
    prepend.zero.to.numbers(x, pad.char = "X") # "XXX2" "XX14" "X156" "1892"
    prepend.zero.to.numbers(x, n = 6)          # "000002" "000014" "000156" "001892"
    y <- c("A", "BC", "DEF")
    prepend.zero.to.numbers(y, 3) # "00A" "0BC" "DEF"
}

Parse a string to generate a data frame


parse2data.frame  <- function(x)
{ 
    ## Purpose: Parse a string to generate a data frame.
    ## Arguments:
    ##   x: a string with header and multiple rows of text that resembles a data frame.
    ##      Fields are separated by comma. 
    ## Return: a data frame
    ## Author: Feiming Chen, Date: 19 Jun 2017, 10:12
    ## ________________________________________________

    d <- read.csv(textConnection(x), as.is = T)
    i <- sapply(d, is.character)
    d[i] <- lapply(d[i], trimws)
    d
}
if (F) {                                # Unit Test
    a = 
"

  Test, ID, Name
  t1 ,  3,  Sun
  t2 ,  5,  Moon
  t3,   2,  Earth

"    
    b <- parse2data.frame(a)
    str(b)
    ## 'data.frame':    3 obs. of  2 variables:
    ##  $ ID  : int  3 5 2
    ##  $ Name: Factor w/ 3 levels " Earth"," Moon",..: 3 2 1
}

Thursday, June 15, 2017

Tokenize a string into a vector of tokens

tokenize.string <- function(x, split = "[ ,:;]+")
{ 
    ## Purpose: Tokenize a string into a vector of tokens
    ## Arguments:
    ##   x: a string or a vector of strings
    ##   split: split characters (regular expression)
    ## Return: a character vector (if "x" is a string) or a list of character vectors. 
    ## Author: Feiming Chen, Date: 15 Jun 2017, 15:01
    ## ________________________________________________
    
    ans <- strsplit(x, split="[ ,:;]+", fixed=F)
    if (length(x) == 1) ans <- ans[[1]]
    ans
}
if (F) {                                # Unit Test
    x <- "IND,  UNR   INC ; TCU"
    tokenize.string(x)
    ## [1] "IND" "UNR" "INC" "TCU"
    tokenize.string(rep(x, 3))
    ## [[1]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[2]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[3]]
    ## [1] "IND" "UNR" "INC" "TCU"
}

Wednesday, June 14, 2017

Count the number of distinct values in a vector or distinct rows in a data frame

N.levels <- function(x)
{ 
    ## Purpose: Count the number of distinct values in a vector or distinct rows in a data frame
    ## Arguments:
    ##    x: a vector (numeric or string), or a data frame. 
    ## Return: a count for the distinct values/rows in the vector or data frame. 
    ## Author: Feiming Chen, Date: 20 Mar 2017, 10:58
    ## ________________________________________________

    sum(!duplicated(x))
}
if (F) {                                # Unit Test
    N.levels(c(1,2,1,3))                # 3
    N.levels(c("a", "b", "a"))          # 2
    x <- data.frame(a=c(1,2,1), b=c(1, 2, 1))
    N.levels(x) # 2
}

Tuesday, June 6, 2017

Write data tables to Excel (.xlsx) file


wxls <- function(x, file = "test", ...)
{ 
    ## Purpose: Write data tables to Excel (.xlsx) file.
    ##          Require package "openxlsx". 
    ## Arguments:
    ##   x: a data frame or a list of data frames. 
    ##   file: a naked file name with no extension.
    ##   ...: passed to "write.xlsx". 
    ## Return: Generate an Excel (.xlsx) file.
    ## Author: Feiming Chen, Date:  6 Jun 2017, 15:11
    ## ________________________________________________
    
    require(openxlsx)
    f <- paste0(file, ".xlsx")

    write.xlsx(x, file = f, asTable = TRUE,
               creator = "Feiming Chen", 
               tableStyle = "TableStyleMedium2",   ...)
}
if (F) {                                # Unit Test
    df <- data.frame("Date" = Sys.Date()-0:4,
                     "Logical" = c(TRUE, FALSE, TRUE, TRUE, FALSE),
                     "Currency" = paste("$",-2:2),
                     "Accounting" = -2:2,
                     "hLink" = "https://CRAN.R-project.org/",
                     "Percentage" = seq(-1, 1, length.out=5),
                     "TinyNumber" = runif(5) / 1E9, stringsAsFactors = FALSE)
    class(df$Currency) <- "currency"
    class(df$Accounting) <- "accounting"
    class(df$hLink) <- "hyperlink"
    class(df$Percentage) <- "percentage"
    class(df$TinyNumber) <- "scientific"

    wxls(df)

    wxls(list(A = df, B = df, C= df))   # write to 3 separate tabs with corresponding list names. 
}

Monday, June 5, 2017

Plot "y" against "x" where "x" is a vector of labels


my.plot.vs.label <- function(x, y, ...)
{ 
    ## Purpose: Plot "y" against "x" where "x" is a vector of labels (string)
    ## Arguments:
    ##   x: a string vector (labels)
    ##   y: a numeric vector or matrix
    ##   ...: passed to "matplot"
    ## Return: a plot
    ## Author: Feiming Chen, Date:  5 Jun 2017, 14:46
    ## ________________________________________________

    s <- seq(x)
    matplot(s, y, type="b", axes = F, xlab ="", ylab="", ...)
    axis(side=2)
    axis(side=1, at=s, labels=x, las=2)

    yn <- ncol(y)
    yna <- names(y)
    if (!is.null(yn) && !is.null(yna)) {
        legend("topright", legend=yna, col=1:yn, lwd=1.5, lty=1:yn)
    }
}
if (F) {                                # Unit Test
    x <- LETTERS
    y <- matrix(rnorm(260), ncol=10)
    my.plot.vs.label(x, y)
    y2 <- as.data.frame(y)
    my.plot.vs.label(x, y2)
}

Friday, June 2, 2017

Read/Import all CSV files in a specified directory

batch.read.csv.files <- function(p)
{ 
    ## Purpose: Read/Import all CSV files in a specified directory.
    ##          Require package "readr". 
    ## Arguments:
    ##   p: a file directory with CSV files. 
    ## Return:
    ##   a list, each element is a data frame (imported from the file) and its name is the corresponding file name. 
    ## Author: Feiming Chen, Date:  2 Jun 2017, 11:09
    ## ________________________________________________

    file.list <- list.files(p, pattern=".csv", full.names = TRUE, ignore.case = TRUE)

    if (length(file.list) > 0) { 
        dat <- lapply(file.list, readr::read_csv)
        names(dat) <- sapply(file.list, basename)
    } else dat <- list()

    cat("Import", length(dat), "CSV Files.\n")
    dat
}

Wednesday, May 3, 2017

Feature Filtering with Natural B-Splines for Functional Modeling Application


feature.filtering.ns <- function(X, df = 10, graph = FALSE)
{ 
    ## Purpose: Feature Filtering with Natural B-Splines for Functional Modeling Application.
    ##          Preprocess the feature data (design matrix) to enforce a natural regularization.
    ##          In other words, project each observation to each of "df" axis represented by B-spline Basis. 
    ## Arguments:
    ##   X: a design matrix (predictors) of dimension N x p (N observations, p predictors).
    ##      If "X" is a vector, convert it to a matrix with one row. 
    ##   df: Degrees of Freedom (number of natural B-splines basis functions to approximate the (continuous) coefficient function.
    ##       Beta (p x 1) = H (p x df) Theta (df x 1), where H represents "df" number of continuous basis functions
    ##       in the coefficient space, and Theta represents the linear combination parameters for constructing Beta.
    ##   graph: if TRUE, plot each observation (each row of X) as a time series,
    ##          its projection to the reduced space (formed by the natural B-spline basis),
    ##          and the natural B-spline basis. 
    ## Return: Preprocessed Features (X H) of dimension N x df, which can be used as "df" input into a predictive model.
    ##         X Beta = X H Theta = (X H) Theta
    ## Author: Feiming Chen, Date:  3 May 2017, 12:12
    ## ________________________________________________

    if (is.vector(X)) X <- matrix(X, nrow = 1)

    p <- ncol(X)                        # number of predictors
    require(splines)
    ## B-splines for Natural Cubic Spline: a p x df  matrix.
    H <- ns(1:p, df=df)   

    ## Filtered Features.  Dimensions: N x p  p x df = N x df
    ## Project each observation "x" into one of "df" Basis (each column of H matrix) so that
    ## each "x" is represented by "df" coefficient (coordinates in the coordinate system defined by "H")
    ans <- X %*% H                             

    if (graph) {
        par(mfrow=c(3,1))
        matplot(t(X), type = "b", xlab = "Feature Index", ylab = "Observation Value",
                main = "Original Features (High-Dimensional Space)")
        matplot(H, type="b", xlab = "Index", ylab ="Value", main="Natural B-spline Basis")
        matplot(t(ans), type = "b", xlab = "Feature Index", ylab = "Observation Value",
                main = "Filtered Features (Low-Dimensional Space)")
        par(mfrow=c(1,1))
    }

    ans
}
if (F) {                                # Unit Test
    X <- matrix(rnorm(1000), 20, 50)
    y <- feature.filtering.ns(X)
    y <- feature.filtering.ns(X, df = 20)    

    x <- rnorm(100)
    y <- feature.filtering.ns(x, graph=T)
}

Tuesday, April 25, 2017

Apply a stamp (text string) to each page of a PDF file

my.pdf.stamp <- function(stamp.text="Stamp", pos=c(0.4, 0.9), pdf.in.file,  pdf.out.file)
{ 
    ## Purpose: Apply a stamp (text string) to each page of a PDF file.
    ##          Requires Linux program "pdftk". 
    ## Arguments:
    ##   pdf.in.file: Input PDF file. 
    ##   stamp.text: a vector of strings (stamps). Each value will be stamped on the corresponding page of the PDF file.
    ##               Its values will be recycled to match the total number of pages in the input PDF file.
    ##   pos: position of the stamp text (located between [0,1] X [0,1])
    ##   pdf.out.file: Output PDF file with the stamps. 
    ## Return: PDF file with the stamps. 
    ## Author: Feiming Chen, Date: 25 Apr 2017, 10:42
    ## ________________________________________________
    
    p1 <- pdf.file.page.number(pdf.in.file)
    p2 <- length(stamp.text)
    if (p1 != p2) stamp.text <- rep(stamp.text, length.out = p1)

    ## burst the PDF file into single page PDF's
    f <- "page_%05d.pdf"
    fs <- "stamp_%05d"
    fs2 <- "stamp_%05d-Plot.pdf"
    fo <- "out_%05d.pdf"
    system(paste("pdftk", pdf.in.file, "burst output", f))
    
    for (i in 1:p1) {
        X(text.in.plot(stamp.text[i], cex = 2, pos = pos), file =  sprintf(fs, i), open.pdf=F)
        system(paste("pdftk", sprintf(f, i), "stamp", sprintf(fs2, i), "output", sprintf(fo, i)))
    }

    merge.file.list <- paste(sapply(1:p1, function(i) sprintf(fo, i)), collapse = " ")

    system(paste("pdftk", merge.file.list, "cat output", pdf.out.file))
}
if (F) {                                # Unit Test
    dir.create("tmp"); setwd("tmp")
    pdf("test.pdf"); replicate(3, plot(rnorm(100))); dev.off()

    pdf.in.file <- "test.pdf"
    pdf.out.file <- "result.pdf"
    stamp.text <- LETTERS[1:3]
    pos <- c(0.4, 0.9)

    my.pdf.stamp(stamp.text, pos=pos, pdf.in.file,  pdf.out.file)
}
pdf.file.page.number <- function(fname) {
    ## Return the total number of pages in a PDF file.
    ## Require Linux program "pdfinfo"
    a <- pipe(paste0("pdfinfo '", fname, "' | grep Pages | cut -d: -f2"))
    page.number <- as.numeric(readLines(a))
    close(a)
    page.number
}
if (F) {
    ## pdf.file.page.number("ALL-Plots.pdf")
}
text.in.plot <- function(x, cex=1, pos=c(0.5, 0.5)) {
    ## "x" is a string.  Put "x" in the center of a plot.  For putting
    ## text summary in a PDF file that is mostly plots.
    ## "pos" is the position of the text string
    ## "cex" is the expansion ratio of the text string
  plot(c(0, 1), c(0, 1), type = "n", main = "", xlab = "", ylab = "", xaxt="n", yaxt="n", axes = F, mar=c(0,0,0,0), oma=c(0,0,0,0))
  text(pos[1], pos[2], x, cex=cex)
}
if (F) {
    text.in.plot("good", cex=5)
    text.in.plot("bad", pos=c(0.1, 0.1))
}
X <- function(expr, file="test", open.pdf = TRUE) {
    ## USED IN UNIX!
    ## Generate a PDF file based on evaluation of expr and open it
    ## Put multiple plot commands like "{ cmd1; cmd2; cmd3; cmd4 }".  
    ## USAGE: xpdf(plot(sig1))
    graphics.off()
    ## png("~/tmp/plot.png", width=600, height=600)
    r <- my.pdf(file)
    ans <- eval(expr)
    dev.off()
    ## system("convert ~/tmp/plot.pdf ~/tmp/plot.png")
    if (open.pdf) open.pdf(r$plot)
    invisible(ans)
}
if (F) {
    X(plot(rnorm(100)))
    a <- X({plot(a <- rnorm(100)); a})
    X({plot(a <- rnorm(100)); hist(a); boxplot(a); dotchart(a); qqnorm(a); plot(lm(a~seq(a)))})
}
open.pdf <- function(file) {
    ## Open a PDF file.
    system(paste("evince ", file, " &", sep=""))
    cat("See Plot In: ", file, "\n")
}

Return the total number of pages in a PDF file.

pdf.file.page.number <- function(fname) {
    ## Return the total number of pages in a PDF file.
    ## Require Linux program "pdfinfo"
    a <- pipe(paste0("pdfinfo '", fname, "' | grep Pages | cut -d: -f2"))
    page.number <- as.numeric(readLines(a))
    close(a)
    page.number
}
if (F) {
    ## pdf.file.page.number("ALL-Plots.pdf")
}

Friday, April 21, 2017

Rename all files in a directory according to a pattern (regular expression)

batch.rename.file <- function(pattern, replacement, path=".")
{ 
    ## Purpose: Rename all files in a directory according to a pattern (regular expression). 
    ## Arguments:
    ##   pattern: a regular expression (as is used by R base function "sub") to match the file names. 
    ##   replacement: replacement text for the new file name (as is used by R base function "gsub")
    ##   path: path to a directory containing the files to be renamed.  Default to the current directory. 
    ## Return: Side Effect (all files in the directory will be renamed)
    ## Author: Feiming Chen, Date: 21 Apr 2017, 13:32
    ## ________________________________________________
    
    f1 <- dir(path, pattern, full.names = T, recursive = T) # Current file names. Include all files in subdirectories. 
    f1.base <- basename(f1)
    f2.base <-  sub(pattern, replacement, f1.base)                   # Corresponding New file names
    f2 <- file.path(dirname(f1), f2.base)
    ans <- file.rename(f1, f2)
    idx <- which(!ans)                  # which operation (file renaming) fails
    if (length(idx) == 0) {
        cat("All", length(ans), "files are renamed.\n")
    } else {
        cat(length(idx), "out of", length(ans), "files are not renamed.\n")
    }
}
if (F) {                                # Unit Test
    dir.create("tmp")
    setwd("tmp")
    file.create("a.b.html", "c.d.html")
    batch.rename.file("\\.", "_")
    dir()                               # "a_b.html" "c_d.html"
}

Thursday, April 20, 2017

Abbreivate a binomial scientific name

abbreviate.species.names <- function(x)
{ 
    ## Purpose: Abbreivate a binomial scientific name. 
    ## Arguments:
    ##   x: a vector of strings for the names of the species. 
    ## Return: a vector of strings with the abbreviated names
    ## Author: Feiming Chen, Date: 20 Apr 2017, 14:01
    ## ________________________________________________

    x1 <- gsub(" +", " ", x)
    x2 <- sub("^ +", "", x1)
    x3 <- sub(" +$", "", x2)
    x4 <- strsplit(x3, " ")

    sapply(x4, function(y) {
        if (length(y) == 2) {
            ans <- paste0(toupper(substr(y[1], 1, 1)), ".", tolower(y[2]))
        } else ans <- y
        ans
    })
}
if (F) {                                # Unit Test
    x = c("Escherichia coli", "Tyrannosaurus rex", "N/A", " Canis   lupus  ")
    abbreviate.species.names(x)         # "E.coli"  "T.rex"   "N/A"     "C.lupus"
}

Tuesday, April 18, 2017

Extract a specific "pattern" from each element of a character vector

extract.pattern <- function(x, pattern = "([[:alnum:]]+)")
{ 
    ## Purpose: Extract a specific "pattern" from each element of a character vector.
    ## Arguments:
    ##   x: a character vector
    ##   pattern: a regular expression which specifies the pattern (inside brackets) to be extracted. 
    ## Return: a vector with the extracted pattern.  NA is filled in where no match is found. 
    ## Author: Feiming Chen, Date: 18 Apr 2017, 14:34
    ## ________________________________________________

    r <- paste0(".*", pattern, ".*")
    sub(r, "\\1", x)
}
if (F) {                                # Unit Test
    x = c(NA, "a-b", "a-d", "b-c", "d-e")
    extract.pattern(x)                  # extract alpha-numeric pattern
    ## [1] NA  "b" "d" "c" "e"

    x = c("ab(A)x", "dc(B)y")
    extract.pattern(x, "(\\(.*\\))")      # extract the string inside brackets
    ## [1] "(A)" "(B)"
    extract.pattern(x, "\\)(.*)")      # extract the string after ")"
    ## [1] "x" "y"

    x = c("V167.G56", "V166.R56", "V122.G41", "V163.R55", "V165.B55", "V175.R59")
    extract.pattern(x, "(R|G|B)")       # extract R or G or B character only. 
    ## [1] "G" "R" "G" "R" "B" "R"
    extract.pattern(x, "[RGB]{1}([0-9]+)") # extract the numbers following the R, G, B character. 
    ## [1] "56" "56" "41" "55" "55" "59"
}

Format a confusion matrix (removing zero entry for visual clarity)


format.confusion.matrix <- function(x)
{ 
    ## Purpose: Format a confusion matrix (removing zero entry for visual clarity)
    ##          Requires package "formattable". 
    ## Arguments:
    ##    x: a confusion matrix (nrow = ncol, with counts in each cell) 
    ## Return: a formated confusion matrix in html browser and in a CSV file. 
    ## Author: Feiming Chen, Date: 17 Apr 2017, 15:45
    ## ________________________________________________

    x1 <- as.character(as.matrix(x))
    x1[x1 == "0"] <- ""
    x2 <- matrix(x1, nrow(x), ncol(x), dimnames=dimnames((x)))
    write.csv(x2, file="Confusion-Matrix.csv")
    formattable::formattable(as.data.frame(x2))
}
if (F) {                                # Unit Test
    x <- matrix(c(3,1,0,1,4,0,0,1,6), 3, 3, dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
    format.confusion.matrix(x)
}

Sort a Data Frame

sort.df <-  function(d)
{ 
    ## Purpose: Sort a Data Frame along 1st column, ties along 2nd, ..., until its last column. 
    ## Arguments:
    ##   d: a data frame
    ## Return: a sorted data frame
    ## Author: Feiming Chen, Date: 18 Apr 2017, 13:49
    ## ________________________________________________
    d[ do.call(order, d), ]    
}
if (F) {                                # Unit Test
    df <- data.frame(X = c("b", "a", "a", "c"), Y = c(4, 2, 1, 3))
    sort.df(df)
    ##   X Y
    ## 3 a 1
    ## 2 a 2
    ## 1 b 4
    ## 4 c 3

    sort.df(df[2:1])
    ##   Y X
    ## 3 1 a
    ## 2 2 a
    ## 4 3 c
    ## 1 4 b
}

Thursday, April 13, 2017

Check if a vector's values are all the same

is.constant  <- function(x)
{ 
    ## Purpose: Check if a vector's values are all the same. 
    ## Arguments:
    ##   x: a vector (numeric, character, logical, etc.)
    ## Return: a logical value tha is TRUE if and only if all the values are the same (constant). 
    ## Author: Feiming Chen, Date: 13 Apr 2017, 14:59
    ## ________________________________________________
    y <- rep.int(x[1], length(x))
    isTRUE(all.equal(x, y))
}
if (F) {                                # Unit Test
    is.constant(c(T, T, F))             # F
    is.constant(c(F, F, F))             # T
    is.constant(c(1, 1, 2))             # F
    is.constant(c(2, 2, 2))             # T
    is.constant(c("a", "a", "b"))       # F
    is.constant(c("b", "b", "b"))       # T
}

Tuesday, April 11, 2017

Find the value that is the maximum in absolute value

my.abs.max <- function(x)
{ 
    ## Purpose: Find the value that is the maximum in absolute value
    ## Arguments:
    ##   x: a numeric vector
    ## Return:  a value in the "x" vector that is the largest in absolute value (keep the sign). 
    ## Author: Feiming Chen, Date: 11 Apr 2017, 15:19
    ## ________________________________________________

    i <- which.max(abs(x))
    x[i]
}
if (F) {                                # Unit Test
    my.abs.max(c(1, -3, 2))             # -3
    my.abs.max(c(1, 3, -2))             # 3
}

Test Time Series Second Order Curvature Significance


test.ts.2nd.order.sig <- function(x, graph=F)
{ 
    ## Purpose: Test Time Series 2nd Order Significance. 
    ## Arguments:
    ##    x: a numeric vector (time series)
    ## Return:  P-value for testing the 2nd order significance. 
    ## Author: Feiming Chen, Date: 10 Apr 2017, 14:57
    ## ________________________________________________

    d <- data.frame(Response = x, Time = seq(x))
    m1 <- lm(Response ~ Time, d)
    m2 <- lm(Response ~ Time + I(Time^2), d)
    a <- anova(m1, m2)
    model.compare.P.value <- a[["Pr(>F)"]][2]
    
    if (graph) {
        plot(d$Time, d$Response, xlab="Time", ylab="Response", type="b")
        lines(d$Time, fitted(m1), col="red")
        lines(d$Time, fitted(m2), col="blue")
        title(main=paste("Model Comparison P-value =", round(model.compare.P.value, 3)))
    }

    model.compare.P.value
}
if (F) {                                # Unit Test
    x <- rnorm(100)
    test.ts.2nd.order.sig(x, graph = T)
    r <- replicate(10000, test.ts.2nd.order.sig(rnorm(20))) # should follow a uniform distribution. 
    ## summary(r)
    ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    ## 0.00011 0.25800 0.50600 0.50500 0.75500 1.00000 
}

Wednesday, April 5, 2017

Convert RGB Color Value to Spherical Coordinates


rgb2sph <- function(rgb, nbits)
{ 
    ## Purpose: Convert RGB Color Value to Spherical Coordinates
    ## Arguments:
    ##   rgb: a (N X 3) matrix of RGB values (each row is a triplet of R, G, B).
    ##          Assume each integer value range from 0 to (2^nbits - 1).
    ##   nbits: number of bits used to code each of R, G, B channel. 
    ## Return: a (N X 3) matrix of Shperical Coordinates (each row is a triplet of M, Theta, Phi), which are scaled to be within (0, 1).
    ## Author: Feiming Chen, Date:  5 Apr 2017, 10:21
    ## ________________________________________________
    
    max.scale <- 2^nbits
    M.scale <- sqrt(3 * max.scale^2)  
    angle.scale <- pi / 2

    t(apply(rgb + 1, 1, function(x) {
        M <- sqrt(sum(x^2))             # Color Intensity/Luminosity (0-1) 
        Theta <- atan(x[2] / x[1])      # Azimuthal Angle
        Phi <- acos(x[3] / M)           # Zenith Angle 
        c(M / M.scale, Theta / angle.scale, Phi / angle.scale)
    }))
}
if (F) {                                # Unit Test
    rgb <- matrix(c(0, 0, 0,  4095,4095,4095, 3527, 3513, 3504, 2470, 3034,3218), ncol=3, byrow = T)
    rgb2sph(rgb, nbits = 12)
##            [,1]    [,2]    [,3]
## [1,] 0.00024414 0.50000 0.60817
## [2,] 1.00000000 0.50000 0.60817
## [3,] 0.85832017 0.49873 0.60954
## [4,] 0.71428036 0.56498 0.56181
}

Wednesday, February 15, 2017

Look up value in a reference table (Similar to Excel's VLOOKUP function)

vlookup <- function(x, x.vector, y.vector = seq(x.vector), exact = FALSE, na.action = FALSE) 
{
    ## PURPOSE: Look up value in a reference table (Similar to Excel's VLOOKUP function)
    ## ARGUMENT:
    ##    x: a vector of Look Up Value (should not contain NA values)
    ##    x.vector: a vector where the "x" value can be located (approximately).
    ##    y.vector: a vector containing the return value that correponds the identified location in the "x.vector".
    ##              Default to the index to the x.vector.
    ##    exact: if T, the numeric values have to match exactly. 
    ##    na.action: If T and if cannot find the lookup value, return the input value. 
    ## RETURN: a scale value that corresponds to the "x" lookup value. 
    ## DATE: 20 Oct 2016, 16:30
    ## -----------------------------------------------------
    
    ans <- NULL
    if (is.numeric(x)) {
        if (exact) {
            ans <- y.vector[match(x, x.vector)]
        } else {
            ans <- y.vector[match.approx(x, x.vector)]
        }
    } else if (is.character(x)) {
        ans <- y.vector[match(x, x.vector)]
    }      

    if (na.action) {
        idx <- is.na(ans)
        ans[idx] <- x[idx]
    }
    ans
}
if (F) {                                # Unit Test 
    vlookup(c(68.2, 73), c(54, 60, 68.1, 72, 80), LETTERS[1:5]) # "C" "D"
    vlookup(c(2, 3.1), 1:5)                                     # 2 3
    vlookup(c(2, 3.1), 1:5, exact = T)                          # 2 NA
    vlookup(c("C", "E"), LETTERS[1:5], LETTERS[11:15]) #  "M" "O"
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15]) #  "M" NA
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15], na.action = T) #  "M" "X"
}


match.approx <- function(x, y) {
    ## Purpose: Match Approximately for Numerical Data
    ## Arguments:
    ##   "x":  a vector of numeric values.
    ##   "y":  a vector of numeric values. 
    ## RETURN:
    ##   The index in "y" that indicates the closest y value to each of "x" value. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 10:41
    ## ________________________________________________
    
    sapply(x, function(x0) which.min(abs(x0 - y)))
}
if (F) {
  match.approx(c(4.2, 1.2, 15), 1:10)                #  4  1 10
}

Simple Principal Component Analysis


my.pca  <- function(dat.feature)
{ 
    ## Purpose: Principal Component Analysis
    ## Arguments:
    ##   dat.feature: a data feature matrix, where each column is a feature vector. 
    ## Return: PCA plots and result. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 09:59
    ## ________________________________________________
    p <- prcomp(dat.feature)
    plot(p, main = "Scree Plot", xlab = "Principal Components")
    dev.new()
    biplot(p)
    print(summary(p))
    invisible(p)
}
if (F) {                                # Unit Test
    d <- matrix(rnorm(1000), 20)
    my.pca(d)
}



Tuesday, February 14, 2017

Making Barplot with Confidence Intervals


my.barplot  <- function(h, hl, hu, ...)
{ 
    ## Purpose: Template function for making Barplot with Confidence Interval.
    ##          Require R package "gplots". Refer to documentation of "gplots::barplots2". 
    ## Arguments:
    ##   h: Height Matrix. Each column is a group of heights. The dimnames will be used for labeling.
    ##   hl: Lower CI. Same dimension as "h". 
    ##   hu: Upper CI. Same dimension as "h".
    ##   ...: passed to "gplots::barplot2"
    ## Return: 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 14:22
    ## ________________________________________________

    old.par <- par("mar")
    par(mar = c(5, 10, 4, 5))
    gplots::barplot2(h, beside = T, legend.text = T, horiz = T, col=rainbow(ncol(h)), border = NA, 
                     cex.axis = 2, cex.names = 1.5, cex.lab=2, las = 1, xpd = F,
                     plot.ci = TRUE, ci.l = hl, ci.u = hu, ci.width = 0.3, ci.lwd = 2, 
                     plot.grid = TRUE, 
                     ...) -> mp

    ## Put numbers on the bars 
    for (i in seq(ncol(mp))) text(1, mp[,i], labels=round(h[,i], 2), cex=2)
    par(old.par)
}
if (F) {                                # Unit Test
    x <- structure(c(9.5, 10.5, 1.2, 0.4, 7, 12, 9.8, 11.2, 8.3, 9.6, 0.8, 0.3, 6.7, 9.8, 9, 10.2), .Dim = c(8L, 2L), .Dimnames = list(c("Type R", "Type B", "R.SD", "B.SD", "Rl", "Ru", "Bl", "Bu"), c("Class 1", "Class 2")))
    h <- x[1:2,]
    hl <- x[c(5,7),]
    hu <- x[c(6,8),]
    my.barplot(h, hl, hu, main="Comparison", xlab="Average Result", xlim = c(0, 20))
}


Generate HTML or PNG representation of a data / table / model object


tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0) {
    ## Purpose: Generate HTML or PNG representation of a data/table/model object.
    ##          Require R "memisc" package. 
    ## Arguments:
    ##   x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
    ##   type: what to generate -- 
    ##         "HTML" shows a HTML table in a web browser. 
    ##         "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
    ##   file: make a copy of the PNG file with this name.  Also used for the table name.
    ##   caption: table caption. 
    ##   digits: number of decimal places to use.
    ## Return: Display the data representation and an image file (type="PNG") for insertion. 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
    ## ________________________________________________

    type <- match.arg(type)

    is.tabular.data <- "tabular" %in% class(x) # "x" is from the "tables" package output
    is.tbl.data <- "tbl" %in% class(x)         # "x" is from the "dplyr" package output

    if (is.tbl.data) {
        x <- as.data.frame(x)
    }
    
    switch(type,
           HTML = {
               ## Show HTML table in a Web Browser
               if (is.null(file)) {
                   memisc::show_html(x, digits = digits)
               } else {
                   memisc::write_html(x, file = paste0(file,".html"), digits = digits)
               }
           }, 
           PNG = {
               ## Generate LaTeX and PNG Image file
               
               if (is.tabular.data) {
                   library(tables)
                   booktabs()
               }

               if (is.tabular.data || is.tbl.data) {

                   w <- Hmisc::latex(x, file="X.tex", ctable=TRUE, caption = caption, digits = digits)
                   if (is.tabular.data) w$style <- c("booktabs", "dcolumn")
                   d <- Hmisc::dvi(w)                         # convert to DVI file 
                   system(paste("dvipng -q* -o X.png -T tight -D 200", d$file)) # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex")

               } else {

                   r <- memisc:::toLatex.default(x, digits=digits, show.vars=TRUE)
                   
                   f <- file("X.tex", "w")
                   writeLines(c("\\documentclass{article}",
                                "\\usepackage{booktabs}",
                                "\\usepackage{dcolumn}",
                                "\\begin{document}", 
                                "\\pagenumbering{gobble}",
                                "\\begin{table}", 
                                "\\centering"),  f)

                   if (!is.null(caption)) writeLines(paste0("\\caption{", caption, "}"), f)

                   writeLines(r, f)

                   writeLines(c("\\end{table}", "\\end{document}"), f)

                   close(f)

                   system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
                   system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex", "X.log", "X.aux", "X.dvi")
               }

               if (is.null(file)) {
                   if (!grepl("/tmp", getwd())) {
                       file.copy("X.png", "~/tmp/X.png", overwrite = T)
                   }
                   system("display ~/tmp/X.png &")                                     # display the PNG file. 
               } else {
                   file.rename("X.png", paste0(file,".png"))
               }
           }
           )
    invisible(NULL)
}
if (F) {                                # Unit Test
    x <- data.frame(age=c(1,1,1,5,5,5), height=c(10,12,9,20,23,18))
    tex.print(x)
    tex.print(ftable(x))
    x %>% group_by(age) %>% summarise(heights = mean(height)) -> y
    tex.print(y)
    tex.print(x, file="Test")
    tex.print(y, type="HTML")
    tex.print(y, type="HTML", file = "Test")

    library(tables)
    a <- tabular( (Species + 1) ~ (n=1) + Format(digits=2) * (Sepal.Length + Sepal.Width) * (mean + sd), data=iris )
    tex.print(a)
    tex.print(a, file = "test")

}







Apply a function to a vector in an accumulative manner (like cumsum)

cum.func <- function(x, func, ...)
{ 
    ## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
    ## Arguments:
    ##   x: a vector
    ##   func: a function
    ##   ...: passed to "func"
    ## Return: a vector that is the result of:
    ##   c( func(x[1]),  func(x[1:2]),  func(x[1:3]),  func(x[1:4]),  func(x[1:5]), ... )
    ## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
    ## ________________________________________________
    
    N <- length(x)
    y <- rep_len(NA, N)
    for (i in 1:N) y[i] <- func(x[1:i], ...)
    y[is.na(x)] <- NA
    y
}
if (F) {                                # Unit Test
    cum.func(5:1, mean)                 #  c(5.0, 4.5, 4.0, 3.5, 3.0))
    cum.func(c(NA, 5:1, NA), mean, na.rm=T)                 #  c(NA, 5.0, 4.5, 4.0, 3.5, 3.0, NA)
    cum.func(c(NA, 5:1, NA), sd, na.rm=T)        # NA NA 0.70711 1.00000 1.29099 1.58114 NA
}

Overlay a new plot on an existing plot


plot.overlay <- function(..., label="2nd Y", col="blue")
{ 
    ## Purpose: Overlay a new plot on an existing plot. 
    ## Arguments:
    ##   ...: Passed to the "plot" function for the new plot.
    ##   label: The secondary Y-axis label.
    ##   col: The color of the ticks and tick labels for the new plot. 
    ## Return: A modified plot. 
    ## Author: Feiming Chen, Date:  6 Feb 2017, 10:19
    ## ________________________________________________

    par(new=T)
    plot(..., axes=F, xlab="", ylab="", col=col)
    axis(side=4, col.ticks = col, col.axis=col)
    mtext(label, side=4, line=-1, col=col)
}
if (F) {                                # Unit Test
    plot(rnorm(100))
    plot.overlay(rnorm(100), type="l", label="Test")
}


Apply a function to each consecutive pairs in a vector.

diff.func <- function(x, func)
{ 
    ## Purpose: Apply a function to each consecutive pairs in a vector. 
    ## Arguments:
    ##   x: a vector
    ##   func: a function that operature on two elements (of "x")
    ## Return: a vector, whose length is length(x) - 1, that looks like (where N=length(x))
    ##    func(x[2], x[1]), func(x[3], x[2]), func(x[4], x[3]), ..., func(x[N], x[N-1]).
    ## Author: Feiming Chen, Date:  9 Feb 2017, 11:25
    ## ________________________________________________
    
    ## embed in 2-dim space to make consecutive pairs
    x2 <- embed(x, 2)                  # N - 1 pairs
    ## apply a function to each consecutive paris (reversely)
    x3 <- apply(x2, 1, func)
    x3
}
if (F) {                                # Unit Test
    diff.func(1:5, diff)                # -1 -1 -1 -1   
    diff.func(1:5, mean)                # 1.5 2.5 3.5 4.5
}

Plot multiple lines together using "matplot"


my.line.plot  <- function(x, y, ...)
{ 
    ## Purpose: Template function for making lines by group. 
    ## Arguments:
    ##   x: X-coords
    ##   y: a matrix with Y-coords (each column has a set of Y values)
    ##   ...: passed to "plot"
    ## Return: A plot assuming log X-axis with axes tickes given by "x". 
    ## Author: Feiming Chen, Date:  9 Feb 2017, 14:02
    ## ________________________________________________n
    N <- ncol(y)                        # number of lines
    matplot(x, y, type="b", log="x", xaxt="n", lwd=2, lty=seq(N), col = rainbow(N), ...)  
    matpoints(x, y, log="x", xaxt="n", col = "black", ...)
    axis(1, at=x)
}
if (F) {                                # Unit Test
    x <- c(0.1, 1, 10)
    y <- matrix(rnorm(9), 3)
    my.line.plot(x, y, xlab="Tiime", ylab="Signal", main="Line Plot by Groups")
}


Monday, February 6, 2017

R Code: Apply a function to each element of a list (or data frame) or to each element of a list within a list of data while retaining the names attribute of each nested list


lf.compute.data.column <- function(dat.list, func, ...)
{
## Purpose: Apply a function to each column of a list (e.g. data frame). Retains the names information.
## Basis for processing more complex data (e.g. a list of a list)
## Arguments:
## dat.list: a list of data (e.g. data frame)
## func: a function that computes a vector (may have an argument "name")
## ...: passed to "func"
## Return:
## a list of outputs.
## Author: Feiming Chen, Date: 30 Jan 2017, 13:21
## ________________________________________________

this.name <- null2na(names(dat.list))

mapply(func,
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list = data.frame(x=rnorm(10), y=rnorm(10))
lf.compute.data.column(dat.list, mean)
lf.compute.data.column(dat.list, function(x, ...) { c(a=mean(x), b=sd(x)) })
lf.compute.data.column(dat.list, function(x, name) { list(a=mean(x), n=name) })
}


lf.compute.data.column.2 <- function(dat.list, func, ...)
{
## Purpose: Apply a function to a list of a list of data. Retains the names information.
## Arguments:
## dat.list: a list of a list
## func: a function that computes a vector. (may have an argument "name")
## Return: a list of list of results
## Author: Feiming Chen, Date: 6 Feb 2017, 12:50
## ________________________________________________

this.name <- null2na(names(dat.list))

mapply(
function(d, name) {
if (!is.na(name)) names(d) <- paste0(name, ".", names(d))
lf.compute.data.column(d, func)
},
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list.2 = list(test1 = data.frame(x=1:5, y=6:10), test2= data.frame(x=11:15, y=16:20))
lf.compute.data.column.2(dat.list.2, mean)
lf.compute.data.column.2(dat.list.2, function(x, ...) { c(m=mean(x), s=sd(x)) })
lf.compute.data.column.2(dat.list.2, function(x, name) { list(m=mean(x), n=name) })
}

null2na <- function(x) {
## if the value of x is NULL(as determined by length test since is.null may
## not always work), turn it into NA so that an object
## made of it will always exist.
if (length(x)==0)
x <- NA
x
}

R Code: Extract a fixed component of a list within a list and optionally apply a function


lf <- function(obj, comp, fun=identity, wrap=identity,
return.vector=TRUE, verbose=FALSE,
...) {
## WARNING: "comp" have to be a real string instead of a variable
## Functional Apply
## "comp": a character string, indicated a list component.
## "obj" is a list. "comp" is a component of each element of the
## list "obj". Apply a funcion (just an unquoted name) to each "comp" of the list
## "obj". "..." is passed to "fun". use "fun=identity" if no
## processing is needed. "wrap" is applied to the final result.
## "return.vector" will force the result to be a vector.

comp <- substitute(comp)
e <- paste(deparse(substitute(wrap)), "(lapply(obj,",
deparse(substitute(function(x) fun(x[[comp]], ...))), "))")
ans <- eval(parse(text=e))
if (verbose) cat("expression:", e, "\n")
if (return.vector)
ans <- sapply(ans, function(x) if (is.null(x)) NA else x)

ans
}
if (F) {
a <- list(x=c(1:3), y=c(4:6))
lf(a, 2) # c(x=2, y=5)

a <- list(x=list(median=3, mean=4), y=list(median=5, mean=6))
lf(a, "mean") # c(x=4, y=6)
lf(a, "mean", sqrt, verbose=T) # c(x=2, y=2.4495)
lf(a, "mean", sqrt, wrap=length, verbose=T) # 2
}

R Code: Apply a function to a vector in an accumulative manner


cum.func <- function(x, func)
{
## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
## Arguments:
## x: a vector
## func: a function
## Return: a vector that is the result of:
## c( func(x[1]), func(x[1:2]), func(x[1:3]), func(x[1:4]), func(x[1:5]), ... )
## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
## ________________________________________________

N <- length(x)
y <- rep_len(NA, N)
for (i in 1:N) y[i] <- func(x[1:i])
y
}
if (F) { # Unit Test
identical(cum.func(5:1, mean), c(5.0, 4.5, 4.0, 3.5, 3.0))
}

R Function for Overlaying a New Plot on an Existing Plot


plot.overlay <- function(..., label="2nd Y", col="blue")
{
## Purpose: Overlay a new plot on an existing plot.
## Arguments:
## ...: Passed to the "plot" function for the new plot.
## label: The secondary Y-axis label.
## col: The color of the ticks and tick labels for the new plot.
## Return: A modified plot.
## Author: Feiming Chen, Date: 6 Feb 2017, 10:19
## ________________________________________________

par(new=T)
plot(..., axes=F, xlab="", ylab="", col=col)
axis(side=4, col.ticks = col, col.axis=col)
mtext(label, side=4, line=-1, col=col)
}
if (F) { # Unit Test
plot(rnorm(100))
plot.overlay(rnorm(100), type="l", label="Test")
}