R Function Library: 2017

Tuesday, December 12, 2017

Compare two tables with the same structure by merging them

compare.tables <- function(d1, d2, by.idx = 1)
{ 
    ## Purpose: Compare two tables with the same structure by merging them. 
    ## Arguments:
    ##    d1: table A
    ##    d2: table B
    ##    by.idx: an index vector to common variables.
    ## Return: a full joined table for comparision

    b <- names(d1)[by.idx]
    d <- full_join(d1, d2, by = b)
    tex.print(d, type = "HTML")
    invisible(d)
}
if (F) {                                # Unit Test
    d1 <- rc()
    d2 <- rc()
    by.idx <- 1:4
    compare.tables(d1, d2, by.idx)
}

List duplicated files in a directory

list.dup.files <- function(p, file.name.head = 20)
{ 
    ## Purpose: List duplicated files in a directory. 
    ## Arguments:
    ##   file.name.head: number of starting characters in the file name to use for duplication detection
    ## Return: a list of file names that are duplicated (by sorted order). 
    ## Author: Feiming Chen, Date: 13 Nov 2017, 16:36
    ## ________________________________________________
    
    p1 <- dir(p)
    p2 <- sort(p1, decreasing = T)
    p3 <- as.character(sapply(p2, function(x) substr(x, 1, file.name.head)))
    p4 <- p2[duplicated(p3)]
    if (length(p4) > 0) {
        cat("Duplicated Older Files:\n")
        print(p4)
    } else {
        cat("NO Duplicated Files.\n")
    }
    file.path(p, p4)
}
if (F) {                                # Unit Test
    p <- "~/tmp/Updated-Databases"
    file.name.head <- 25
    x <- list.dup.files(p, file.name.head)
    file.remove(x)
}

Wednesday, October 18, 2017

Convert all character variables in your data.frame to factors

df.make.factors <- function(df)
{ 
    ## Purpose: Convert all character variables in your data.frame to factors 
    ## Arguments:
    ##   df: a data frame
    ## Return: a data frame with converted factors on those character variables.
    ## Author: Feiming Chen, Date: 17 Oct 2017, 14:26
    ## ________________________________________________

    character_vars <- sapply(df, class) == "character"
    df[, character_vars] <- lapply(df[, character_vars], factor)
    df
}
if (F) {                                # Unit Test
    df <- data.frame(x=c("x", "x", "y"), y=1:3, z=c("1", "3", "3"))
    df.make.factors(df)
}

Monday, October 2, 2017

Display a (numeric) vector in a pretty format

display.vector <- function(x)
{ 
    ## Purpose: Display a vector in a pretty format
    ## Arguments:
    ##   x: a vector
    ## Return: a png picture for the vector. (each row has 10 elements)
    ## Author: Feiming Chen, Date:  2 Oct 2017, 14:54
    ## ________________________________________________

    N <- length(x)
    n1 <- N %% 10
    if (n1 > 0) {
        n2 <- 10 - n1
        x.add <- rep(NA, n2)
        x <- c(x, x.add)
    }

    x1 <- matrix(x, ncol = 10, byrow = T)
    n2 <- nrow(x1)
    nr <- paste0(10 * (seq.int(n2) - 1) + 1, ": ")
    dimnames(x1) <- list(nr, 1:10)

    tex.print(x1)
}
if (F) {                                # Unit Test
    x <- c(402.6615584,402.1343365,402.5467634,401.835917,402.9268321,404.5185276,406.0735012,408.7713965,408.2033098,411.043676,413.953578,420.6021333,426.6352944,440.9007592,471.5614004,523.8426555,619.173643,782.3175971,1033.289048,1321.267143,1615.935774,1898.979548,2173.03472,2442.864835,2682.25644,2884.575978,3064.278857,3252.58248,3413.084683,3588.447246,3701.244129,3796.824264,3840.251429,3943.184319,4084.131835,4037.30822,4116.338229,4191.71438,4173.699815,4110.847391,4197.181334,4221.081422,4207.343855,4209.140892,4263.463393,4306.872737,4312.087451,4269.522631,4313.924265,4265.306511) 
    display.vector(x)
}

tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0, ...) {
    ## Purpose: Generate HTML or PNG representation of a data/table/model object.
    ##          Require R "memisc" package. 
    ## Arguments:
    ##   x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
    ##   type: what to generate -- 
    ##         "HTML" shows a HTML table in a web browser. 
    ##         "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
    ##   file: make a copy of the PNG file with this name.  Also used for the table name.
    ##   caption: table caption. 
    ##   digits: number of decimal places to use. 
    ##    ...: passed to "toLatex"
    ## Return: Display the data representation and an image file (type="PNG") for insertion. 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
    ## ________________________________________________

    require(memisc)
    type <- match.arg(type)
    
    switch(type,
           HTML = {
               ## Show HTML table in a Web Browser
               memisc::show_html(x)      
           }, 
           PNG = {
               ## Generate LaTeX and PNG Image file
               r <- toLatex(x, ..., digits=digits, show.vars=TRUE)
               f <- file("X.tex", "w")
               writeLines(c("\\documentclass{article}",
                            "\\usepackage{booktabs}",
                            "\\usepackage{dcolumn}",
                            "\\begin{document}", 
                            "\\pagenumbering{gobble}"), f)
               if (!is.null(file)) writeLines("\\begin{table}  \\centering", f)

               writeLines(r, f)

               if (!is.null(caption)) {
                   writeLines(paste0("\\caption{", caption, "} \\end{table}"), f)
               }

               writeLines("\\end{document}", f)
               close(f)

               system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
               system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
               system("display X.png &")                                     # display the PNG file. 
               file.remove("X.tex", "X.log", "X.aux", "X.dvi")

               if (!is.null(file)) file.copy("X.png", paste0(file,".png"), overwrite = T) 
               if (!grepl("/tmp", getwd())) {
                   file.copy("X.png", "~/tmp/X.png", overwrite = T)
               }
           }
           )
    invisible(NULL)
}

Thursday, September 7, 2017

Extracts all calls to a function (except for those in standard packages)

my.fun_calls <- function(f)
{ 
    ## Purpose: Extracts all calls to a function (except for those in standard packages) in a function 
    ##          Differs from pryr::fun_calls by excluding basic functions. 
    ## Arguments:
    ##   f: a function
    ## Return: All calls to a function (that are not in standard packages, e.g. base, stats, etc.)
    ## Author: Feiming Chen, Date:  7 Sep 2017, 11:23
    ## ________________________________________________

    if (is.function(f)) {
        my.fun_calls(body(f))
    } else if (is.call(f)) {
        fname <- as.character(f[[1]])
        if (identical(fname, ".Internal")) fname3 <- NULL
        f1 <- try(pryr::where(fname), silent = TRUE)
        if (is.tryerror(f1)) {
            if (fname[1] == "::") {   # Example: pryr::where  fname = c("::", "pryr", "where")
                fname3 <- paste0(fname[2], fname[1], fname[3])
            } else {
                fname3 <- NULL
            }
        } else {
            fname2 <- environmentName(f1)
            if (fname2 %in% c("base", "package:stats", "package:graphics", "package:grDevices", "package:utils", "package:datasets", "package:methods")) {
                fname3 <- NULL
            } else {
                if (fname2 == "R_GlobalEnv") {
                    fname3 <- fname
                } else {                # attach package name if other than Global Environment
                    fname2m <- sub("package:", "", fname2) # remove "package:" prefix. 
                    if (fname2m == "") {
                        fname3 <- paste0(fname)
                    } else {                         
                        fname3 <- paste0(fname2m, "::", fname) 
                    }
                }
            }
        }

        if (F) fun_calls(lm)            # for testing purpose only

        unique(c(fname3, unlist(lapply(f[-1], my.fun_calls), use.names = FALSE)))
    }
}
if (F) {                                # Unit Test
    my.fun_calls(lm)                    # NULL

    sort(my.fun_calls(my.fun_calls))
    ## "is.tryerror"     "my.fun_calls"    "pryr::fun_calls" "pryr::where"    
}


## Test for the try-error class (from "try" function)
is.tryerror <- function(x) inherits(x, "try-error")

Wednesday, June 28, 2017

Span: A Predicate Functional that finds the location of the longest sequential run of elements where the predicate is true

Span <- function(f, x)
{ 
    ## Purpose: A Predicate Functional that finds the location of the longest sequential run
    ##          of elements where the predicate is true. 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) 
    ##   x: a list or data frame
    ## Return: The location of the longest sequential run of elements where the predicate is true. 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    y <- rle(Where(f, x))
    z <- y$lengths
    w <- which.max(z)          # position of the segment with longest sequential run of TRUE
    pos.end <- sum(z[1:w])
    pos.start <- pos.end - z[w] + 1
    c(start=pos.start, end=pos.end)
}
if (F) {                                # Unit Test
    x <- c(3, 1, NA, 4, NA, NA, NA, 5, 9, NA, NA)
    Span(is.na, x)
    ## start   end 
    ##     5     7 
}

Where <- function(f, x)
{ 
    ## Purpose: A Predicate Functional
    ## See other Predicate Functionals: Filter(), Find(), Position()
    ## See other Functionals: Map(), Reduce() 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) (e.g. is.character, all, is.NULL)
    ##   x: a list or data frame
    ## Return: a logical vector 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    vapply(x, f, logical(1))
}
if (F) {                                # Unit Test
    df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = T)
    Where(is.factor, df)
    ## Compare with: 
    Filter(is.factor, df)
    Find(is.factor, df)
    Position(is.factor, df)
}

Thursday, June 22, 2017

Draw a data frame as a R plot

my.table.plot0 <- function(x, 
                           highlight.matrix = NULL,
                           main = "Table", title2 = NULL, size = 20,
                           bold.lastrow = FALSE, bold.lastcol = FALSE
                           ) 
{
    ## PURPOSE: Draw a data frame as a R plot. Require R package "gridExtra". 
    ## ARGUMENT:
    ##    x: a data frame
    ##    highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##    main: title for the plot
    ##    title2: additional title with more verbose text
    ##    size: text size of table text
    ##    bold.lastrow: if T, make the last row bold font.
    ##    bold.lastcol: if T, make the last column bold font.
    ## RETURN: a plot
    ## DATE: 21 Nov 2016, 15:46
    ## -----------------------------------------------------

    x <- as.data.frame(x)
    n1 <- nrow(x)
    n2 <- ncol(x)
    cl <- matrix("black", n1, n2)
    cl[is.na(x)] <- "gray"

    ## Bold Total Column and Row
    cl2 <- cl
    cl2[] <- "plain"
    if (bold.lastrow) {
        cl2[n1,] <- "bold"
    }
    if (bold.lastcol) {
        cl2[, n2] <- "bold"
    }
    
    require(gridExtra)
    table_theme <- ttheme_default(base_size = size,
                                  core = list(fg_params=list(col=cl, fontface = cl2)))
    ## Highlight Cells
    if (!is.null(highlight.matrix)) {
        cl3 <- cl
        z <- ttheme_default()$core$bg_params$fill # default color
        z1 <- rep(z, length.out = nrow(cl3))
        for (i in 1:ncol(cl3)) cl3[,i] <- z1
        cl3[highlight.matrix] <- "yellow"
        table_theme$core$bg_params$fill <- cl3
    }

    plot.new()
    gridExtra::grid.table(as.data.frame(x), theme=table_theme)
    ## Title Text
    t2 <- ifelse(is.null(title2), "", paste0("\n", graph.title.split.lines(title2, sep=" ", num.chars = 40)))
    title(main = paste0(main, t2))

}
if (F) {                                # Unit Test 
    x <- data.frame(Type=c('Car', 'Plane'), Speed=c(NA, 200))
    my.table.plot0(x)
    my.table.plot0(x, main='Test', title2='a very complex test', size=30, bold.lastcol = T, highlight.matrix = matrix(c(NA, F, F, T) , 2, 2))
    x <- data.frame(Type=rep('Car', 20), Speed=rep(100, 20))
    my.table.plot0(x)
}

my.table.plot <- function(x, highlight.matrix = NULL, bold.lastrow = FALSE, n = 20, ...)
{ 
    ## Purpose: Split a data frame to fit in each page and call "my.table.plot0". 
    ## Arguments:
    ##   x: a data frame or matrix
    ##   highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##   bold.lastrow: if T, make the last row bold font.
    ##   n: how many rows to fit into each page.
    ##   ...: pass to "my.table.plot0". 
    ## Return: multiple plots to plot the data matrix. 
    ## Author: Feiming Chen, Date:  5 Jul 2017, 15:16
    ## ________________________________________________

    x <- as.data.frame(x)
    N <- nrow(x)
    if ( N <= n ) {
        my.table.plot0(x, highlight.matrix = highlight.matrix, bold.lastrow = bold.lastrow, ...)
    } else {
        s <- rep(seq.int(N %/% 20 + 1), each = n, length.out = N)
        xs <- split(x, s)
        
        ng <- length(xs)                # number of pages
        bold.lastrow.list <- c(rep(FALSE, ng-1), bold.lastrow)
        
        if (is.null(highlight.matrix)) {
            mapply(my.table.plot0, xs, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        } else {
            ys <- split(highlight.matrix, s)
            mapply(my.table.plot0, xs, highlight.matrix = ys, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        }
    }
}
if (F) {                                # Unit Test
     x <- data.frame(Type=rep('Car', 50), Speed=rep(100, 50))
     h <- matrix(rep(c(F, T), 50), ncol=2)
     my.table.plot(x, highlight.matrix = h, bold.lastrow = T)
}

Wednesday, June 21, 2017

Add a "Total" level to each specified categorical variable in the data frame

df.add.total.category  <- function(dat, var.list, new.level.label = "Total")
{ 
    ## Purpose: Add a "Total" level to each specified categorical variable in the data frame. 
    ## Arguments:
    ##   dat: a data frame with several categorical variables for making tables.
    ##   var.list: a list of variable names in "dat".
    ##   new.level.label: the name for the new "total" level for the specified variable. 
    ## Return: 
    ## Author: Feiming Chen, Date: 21 Jun 2017, 13:02
    ## ________________________________________________

    if ( missing(var.list) )  {
        var.list <- names(dat)
    }

    d0 <- dat                           # to make new label across all variables
    lapply(var.list,  function( v )  {
        d <- dat
        d[[v]] <- new.level.label
        d0[[v]] <<- new.level.label
        d
    }) -> dd

    do.call(rbind, c(list(dat), dd, list(d0)))
}
if (F) {                                # Unit Test
    dat <- data.frame(A=gl(3,3), B=gl(3, 1, 9), C=11:19)
    df.add.total.category(dat, var.list=c("A", "B"))
}

Monday, June 19, 2017

Put unique elements of a string vector into one long string

paste.unique.string  <- function(x, sep = "/")
{ 
    ## Purpose: Put unique elements of a string vector into one long string. 
    ## Arguments:
    ##    x: a string vector
    ##    sep: what separator to use when pasting all unique elements together. 
    ## Return:  a single string with unique elements of "x". 
    ## Author: Feiming Chen, Date: 19 Jun 2017, 14:00
    ## ________________________________________________
    
    paste0(unique(x), collapse = sep)
}
if (F) {                                # Unit Test
    paste.unique.string(c("a", "b", "a", "b")) # "a/b"
}

Add "0" to the front of numbers to make n-digit string

prepend.zero.to.numbers  <- function(x, n = NULL, pad.char = "0")
{ 
    ## Purpose: Add "0" to the front of numbers to make n-digit string. 
    ## Arguments:
    ##   x: a vector of integers/characters
    ##   n: Number of total digits after padding with zero. Default to
    ##      using the largest number to calculate "n".
    ##   pad.char: what character to pad.  Default to "0". 
    ## Return: a character vector with nuumbers padded with zero's in the head.
    ## Author: Feiming Chen, Date: 19 Jun 2017, 13:19
    ## ________________________________________________

    x1 <- as.character(x)
    y <- nchar(x1)
    if (is.null(n)) n <- nchar(as.character(max(x)))
    y1 <- n - y                         # how long is the padding for each number
    pad.vector <- sapply(y1, function(w) paste(rep(pad.char, w), collapse = ""))
    paste0(pad.vector, x1)
}
if (F) {                                # Unit Test
    x <- c(2, 14, 156, 1892)
    prepend.zero.to.numbers(x)          # "0002" "0014" "0156" "1892"
    prepend.zero.to.numbers(x, pad.char = "X") # "XXX2" "XX14" "X156" "1892"
    prepend.zero.to.numbers(x, n = 6)          # "000002" "000014" "000156" "001892"
    y <- c("A", "BC", "DEF")
    prepend.zero.to.numbers(y, 3) # "00A" "0BC" "DEF"
}

Parse a string to generate a data frame

parse2data.frame  <- function(x)
{ 
    ## Purpose: Parse a string to generate a data frame.
    ## Arguments:
    ##   x: a string with header and multiple rows of text that resembles a data frame.
    ##      Fields are separated by comma. 
    ## Return: a data frame
    ## Author: Feiming Chen, Date: 19 Jun 2017, 10:12
    ## ________________________________________________

    d <- read.csv(textConnection(x), as.is = T)
    i <- sapply(d, is.character)
    d[i] <- lapply(d[i], trimws)
    d
}
if (F) {                                # Unit Test
    a = 
"

  Test, ID, Name
  t1 ,  3,  Sun
  t2 ,  5,  Moon
  t3,   2,  Earth

"    
    b <- parse2data.frame(a)
    str(b)
    ## 'data.frame':    3 obs. of  2 variables:
    ##  $ ID  : int  3 5 2
    ##  $ Name: Factor w/ 3 levels " Earth"," Moon",..: 3 2 1
}

Thursday, June 15, 2017

Tokenize a string into a vector of tokens

tokenize.string <- function(x, split = "[ ,:;]+")
{ 
    ## Purpose: Tokenize a string into a vector of tokens
    ## Arguments:
    ##   x: a string or a vector of strings
    ##   split: split characters (regular expression)
    ## Return: a character vector (if "x" is a string) or a list of character vectors. 
    ## Author: Feiming Chen, Date: 15 Jun 2017, 15:01
    ## ________________________________________________
    
    ans <- strsplit(x, split="[ ,:;]+", fixed=F)
    if (length(x) == 1) ans <- ans[[1]]
    ans
}
if (F) {                                # Unit Test
    x <- "IND,  UNR   INC ; TCU"
    tokenize.string(x)
    ## [1] "IND" "UNR" "INC" "TCU"
    tokenize.string(rep(x, 3))
    ## [[1]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[2]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[3]]
    ## [1] "IND" "UNR" "INC" "TCU"
}

Wednesday, June 14, 2017

Count the number of distinct values in a vector or distinct rows in a data frame

N.levels <- function(x)
{ 
    ## Purpose: Count the number of distinct values in a vector or distinct rows in a data frame
    ## Arguments:
    ##    x: a vector (numeric or string), or a data frame. 
    ## Return: a count for the distinct values/rows in the vector or data frame. 
    ## Author: Feiming Chen, Date: 20 Mar 2017, 10:58
    ## ________________________________________________

    sum(!duplicated(x))
}
if (F) {                                # Unit Test
    N.levels(c(1,2,1,3))                # 3
    N.levels(c("a", "b", "a"))          # 2
    x <- data.frame(a=c(1,2,1), b=c(1, 2, 1))
    N.levels(x) # 2
}

Tuesday, June 6, 2017

Write data tables to Excel (.xlsx) file

wxls <- function(x, file = "test", ...)
{ 
    ## Purpose: Write data tables to Excel (.xlsx) file.
    ##          Require package "openxlsx". 
    ## Arguments:
    ##   x: a data frame or a list of data frames. 
    ##   file: a naked file name with no extension.
    ##   ...: passed to "write.xlsx". 
    ## Return: Generate an Excel (.xlsx) file.
    ## Author: Feiming Chen, Date:  6 Jun 2017, 15:11
    ## ________________________________________________
    
    require(openxlsx)
    f <- paste0(file, ".xlsx")

    write.xlsx(x, file = f, asTable = TRUE,
               creator = "Feiming Chen", 
               tableStyle = "TableStyleMedium2",   ...)
}
if (F) {                                # Unit Test
    df <- data.frame("Date" = Sys.Date()-0:4,
                     "Logical" = c(TRUE, FALSE, TRUE, TRUE, FALSE),
                     "Currency" = paste("$",-2:2),
                     "Accounting" = -2:2,
                     "hLink" = "https://CRAN.R-project.org/",
                     "Percentage" = seq(-1, 1, length.out=5),
                     "TinyNumber" = runif(5) / 1E9, stringsAsFactors = FALSE)
    class(df$Currency) <- "currency"
    class(df$Accounting) <- "accounting"
    class(df$hLink) <- "hyperlink"
    class(df$Percentage) <- "percentage"
    class(df$TinyNumber) <- "scientific"

    wxls(df)

    wxls(list(A = df, B = df, C= df))   # write to 3 separate tabs with corresponding list names. 
}

Monday, June 5, 2017

Plot "y" against "x" where "x" is a vector of labels

my.plot.vs.label <- function(x, y, ...)
{ 
    ## Purpose: Plot "y" against "x" where "x" is a vector of labels (string)
    ## Arguments:
    ##   x: a string vector (labels)
    ##   y: a numeric vector or matrix
    ##   ...: passed to "matplot"
    ## Return: a plot
    ## Author: Feiming Chen, Date:  5 Jun 2017, 14:46
    ## ________________________________________________

    s <- seq(x)
    matplot(s, y, type="b", axes = F, xlab ="", ylab="", ...)
    axis(side=2)
    axis(side=1, at=s, labels=x, las=2)

    yn <- ncol(y)
    yna <- names(y)
    if (!is.null(yn) && !is.null(yna)) {
        legend("topright", legend=yna, col=1:yn, lwd=1.5, lty=1:yn)
    }
}
if (F) {                                # Unit Test
    x <- LETTERS
    y <- matrix(rnorm(260), ncol=10)
    my.plot.vs.label(x, y)
    y2 <- as.data.frame(y)
    my.plot.vs.label(x, y2)
}

Friday, June 2, 2017

Read/Import all CSV files in a specified directory

batch.read.csv.files <- function(p)
{ 
    ## Purpose: Read/Import all CSV files in a specified directory.
    ##          Require package "readr". 
    ## Arguments:
    ##   p: a file directory with CSV files. 
    ## Return:
    ##   a list, each element is a data frame (imported from the file) and its name is the corresponding file name. 
    ## Author: Feiming Chen, Date:  2 Jun 2017, 11:09
    ## ________________________________________________

    file.list <- list.files(p, pattern=".csv", full.names = TRUE, ignore.case = TRUE)

    if (length(file.list) > 0) { 
        dat <- lapply(file.list, readr::read_csv)
        names(dat) <- sapply(file.list, basename)
    } else dat <- list()

    cat("Import", length(dat), "CSV Files.\n")
    dat
}

Wednesday, May 3, 2017

Feature Filtering with Natural B-Splines for Functional Modeling Application

feature.filtering.ns <- function(X, df = 10, graph = FALSE)
{ 
    ## Purpose: Feature Filtering with Natural B-Splines for Functional Modeling Application.
    ##          Preprocess the feature data (design matrix) to enforce a natural regularization.
    ##          In other words, project each observation to each of "df" axis represented by B-spline Basis. 
    ## Arguments:
    ##   X: a design matrix (predictors) of dimension N x p (N observations, p predictors).
    ##      If "X" is a vector, convert it to a matrix with one row. 
    ##   df: Degrees of Freedom (number of natural B-splines basis functions to approximate the (continuous) coefficient function.
    ##       Beta (p x 1) = H (p x df) Theta (df x 1), where H represents "df" number of continuous basis functions
    ##       in the coefficient space, and Theta represents the linear combination parameters for constructing Beta.
    ##   graph: if TRUE, plot each observation (each row of X) as a time series,
    ##          its projection to the reduced space (formed by the natural B-spline basis),
    ##          and the natural B-spline basis. 
    ## Return: Preprocessed Features (X H) of dimension N x df, which can be used as "df" input into a predictive model.
    ##         X Beta = X H Theta = (X H) Theta
    ## Author: Feiming Chen, Date:  3 May 2017, 12:12
    ## ________________________________________________

    if (is.vector(X)) X <- matrix(X, nrow = 1)

    p <- ncol(X)                        # number of predictors
    require(splines)
    ## B-splines for Natural Cubic Spline: a p x df  matrix.
    H <- ns(1:p, df=df)   

    ## Filtered Features.  Dimensions: N x p  p x df = N x df
    ## Project each observation "x" into one of "df" Basis (each column of H matrix) so that
    ## each "x" is represented by "df" coefficient (coordinates in the coordinate system defined by "H")
    ans <- X %*% H                             

    if (graph) {
        par(mfrow=c(3,1))
        matplot(t(X), type = "b", xlab = "Feature Index", ylab = "Observation Value",
                main = "Original Features (High-Dimensional Space)")
        matplot(H, type="b", xlab = "Index", ylab ="Value", main="Natural B-spline Basis")
        matplot(t(ans), type = "b", xlab = "Feature Index", ylab = "Observation Value",
                main = "Filtered Features (Low-Dimensional Space)")
        par(mfrow=c(1,1))
    }

    ans
}
if (F) {                                # Unit Test
    X <- matrix(rnorm(1000), 20, 50)
    y <- feature.filtering.ns(X)
    y <- feature.filtering.ns(X, df = 20)    

    x <- rnorm(100)
    y <- feature.filtering.ns(x, graph=T)
}

Tuesday, April 25, 2017

Apply a stamp (text string) to each page of a PDF file

my.pdf.stamp <- function(stamp.text="Stamp", pos=c(0.4, 0.9), pdf.in.file,  pdf.out.file)
{ 
    ## Purpose: Apply a stamp (text string) to each page of a PDF file.
    ##          Requires Linux program "pdftk". 
    ## Arguments:
    ##   pdf.in.file: Input PDF file. 
    ##   stamp.text: a vector of strings (stamps). Each value will be stamped on the corresponding page of the PDF file.
    ##               Its values will be recycled to match the total number of pages in the input PDF file.
    ##   pos: position of the stamp text (located between [0,1] X [0,1])
    ##   pdf.out.file: Output PDF file with the stamps. 
    ## Return: PDF file with the stamps. 
    ## Author: Feiming Chen, Date: 25 Apr 2017, 10:42
    ## ________________________________________________
    
    p1 <- pdf.file.page.number(pdf.in.file)
    p2 <- length(stamp.text)
    if (p1 != p2) stamp.text <- rep(stamp.text, length.out = p1)

    ## burst the PDF file into single page PDF's
    f <- "page_%05d.pdf"
    fs <- "stamp_%05d"
    fs2 <- "stamp_%05d-Plot.pdf"
    fo <- "out_%05d.pdf"
    system(paste("pdftk", pdf.in.file, "burst output", f))
    
    for (i in 1:p1) {
        X(text.in.plot(stamp.text[i], cex = 2, pos = pos), file =  sprintf(fs, i), open.pdf=F)
        system(paste("pdftk", sprintf(f, i), "stamp", sprintf(fs2, i), "output", sprintf(fo, i)))
    }

    merge.file.list <- paste(sapply(1:p1, function(i) sprintf(fo, i)), collapse = " ")

    system(paste("pdftk", merge.file.list, "cat output", pdf.out.file))
}
if (F) {                                # Unit Test
    dir.create("tmp"); setwd("tmp")
    pdf("test.pdf"); replicate(3, plot(rnorm(100))); dev.off()

    pdf.in.file <- "test.pdf"
    pdf.out.file <- "result.pdf"
    stamp.text <- LETTERS[1:3]
    pos <- c(0.4, 0.9)

    my.pdf.stamp(stamp.text, pos=pos, pdf.in.file,  pdf.out.file)
}

pdf.file.page.number <- function(fname) {
    ## Return the total number of pages in a PDF file.
    ## Require Linux program "pdfinfo"
    a <- pipe(paste0("pdfinfo '", fname, "' | grep Pages | cut -d: -f2"))
    page.number <- as.numeric(readLines(a))
    close(a)
    page.number
}
if (F) {
    ## pdf.file.page.number("ALL-Plots.pdf")
}

text.in.plot <- function(x, cex=1, pos=c(0.5, 0.5)) {
    ## "x" is a string.  Put "x" in the center of a plot.  For putting
    ## text summary in a PDF file that is mostly plots.
    ## "pos" is the position of the text string
    ## "cex" is the expansion ratio of the text string
  plot(c(0, 1), c(0, 1), type = "n", main = "", xlab = "", ylab = "", xaxt="n", yaxt="n", axes = F, mar=c(0,0,0,0), oma=c(0,0,0,0))
  text(pos[1], pos[2], x, cex=cex)
}
if (F) {
    text.in.plot("good", cex=5)
    text.in.plot("bad", pos=c(0.1, 0.1))
}

X <- function(expr, file="test", open.pdf = TRUE) {
    ## USED IN UNIX!
    ## Generate a PDF file based on evaluation of expr and open it
    ## Put multiple plot commands like "{ cmd1; cmd2; cmd3; cmd4 }".  
    ## USAGE: xpdf(plot(sig1))
    graphics.off()
    ## png("~/tmp/plot.png", width=600, height=600)
    r <- my.pdf(file)
    ans <- eval(expr)
    dev.off()
    ## system("convert ~/tmp/plot.pdf ~/tmp/plot.png")
    if (open.pdf) open.pdf(r$plot)
    invisible(ans)
}
if (F) {
    X(plot(rnorm(100)))
    a <- X({plot(a <- rnorm(100)); a})
    X({plot(a <- rnorm(100)); hist(a); boxplot(a); dotchart(a); qqnorm(a); plot(lm(a~seq(a)))})
}

open.pdf <- function(file) {
    ## Open a PDF file.
    system(paste("evince ", file, " &", sep=""))
    cat("See Plot In: ", file, "\n")
}

Return the total number of pages in a PDF file.

pdf.file.page.number <- function(fname) {
    ## Return the total number of pages in a PDF file.
    ## Require Linux program "pdfinfo"
    a <- pipe(paste0("pdfinfo '", fname, "' | grep Pages | cut -d: -f2"))
    page.number <- as.numeric(readLines(a))
    close(a)
    page.number
}
if (F) {
    ## pdf.file.page.number("ALL-Plots.pdf")
}

Friday, April 21, 2017

Rename all files in a directory according to a pattern (regular expression)

batch.rename.file <- function(pattern, replacement, path=".")
{ 
    ## Purpose: Rename all files in a directory according to a pattern (regular expression). 
    ## Arguments:
    ##   pattern: a regular expression (as is used by R base function "sub") to match the file names. 
    ##   replacement: replacement text for the new file name (as is used by R base function "gsub")
    ##   path: path to a directory containing the files to be renamed.  Default to the current directory. 
    ## Return: Side Effect (all files in the directory will be renamed)
    ## Author: Feiming Chen, Date: 21 Apr 2017, 13:32
    ## ________________________________________________
    
    f1 <- dir(path, pattern, full.names = T, recursive = T) # Current file names. Include all files in subdirectories. 
    f1.base <- basename(f1)
    f2.base <-  sub(pattern, replacement, f1.base)                   # Corresponding New file names
    f2 <- file.path(dirname(f1), f2.base)
    ans <- file.rename(f1, f2)
    idx <- which(!ans)                  # which operation (file renaming) fails
    if (length(idx) == 0) {
        cat("All", length(ans), "files are renamed.\n")
    } else {
        cat(length(idx), "out of", length(ans), "files are not renamed.\n")
    }
}
if (F) {                                # Unit Test
    dir.create("tmp")
    setwd("tmp")
    file.create("a.b.html", "c.d.html")
    batch.rename.file("\\.", "_")
    dir()                               # "a_b.html" "c_d.html"
}

Thursday, April 20, 2017

Abbreivate a binomial scientific name

abbreviate.species.names <- function(x)
{ 
    ## Purpose: Abbreivate a binomial scientific name. 
    ## Arguments:
    ##   x: a vector of strings for the names of the species. 
    ## Return: a vector of strings with the abbreviated names
    ## Author: Feiming Chen, Date: 20 Apr 2017, 14:01
    ## ________________________________________________

    x1 <- gsub(" +", " ", x)
    x2 <- sub("^ +", "", x1)
    x3 <- sub(" +$", "", x2)
    x4 <- strsplit(x3, " ")

    sapply(x4, function(y) {
        if (length(y) == 2) {
            ans <- paste0(toupper(substr(y[1], 1, 1)), ".", tolower(y[2]))
        } else ans <- y
        ans
    })
}
if (F) {                                # Unit Test
    x = c("Escherichia coli", "Tyrannosaurus rex", "N/A", " Canis   lupus  ")
    abbreviate.species.names(x)         # "E.coli"  "T.rex"   "N/A"     "C.lupus"
}

Tuesday, April 18, 2017

Extract a specific "pattern" from each element of a character vector

extract.pattern <- function(x, pattern = "([[:alnum:]]+)")
{ 
    ## Purpose: Extract a specific "pattern" from each element of a character vector.
    ## Arguments:
    ##   x: a character vector
    ##   pattern: a regular expression which specifies the pattern (inside brackets) to be extracted. 
    ## Return: a vector with the extracted pattern.  NA is filled in where no match is found. 
    ## Author: Feiming Chen, Date: 18 Apr 2017, 14:34
    ## ________________________________________________

    r <- paste0(".*", pattern, ".*")
    sub(r, "\\1", x)
}
if (F) {                                # Unit Test
    x = c(NA, "a-b", "a-d", "b-c", "d-e")
    extract.pattern(x)                  # extract alpha-numeric pattern
    ## [1] NA  "b" "d" "c" "e"

    x = c("ab(A)x", "dc(B)y")
    extract.pattern(x, "(\\(.*\\))")      # extract the string inside brackets
    ## [1] "(A)" "(B)"
    extract.pattern(x, "\\)(.*)")      # extract the string after ")"
    ## [1] "x" "y"

    x = c("V167.G56", "V166.R56", "V122.G41", "V163.R55", "V165.B55", "V175.R59")
    extract.pattern(x, "(R|G|B)")       # extract R or G or B character only. 
    ## [1] "G" "R" "G" "R" "B" "R"
    extract.pattern(x, "[RGB]{1}([0-9]+)") # extract the numbers following the R, G, B character. 
    ## [1] "56" "56" "41" "55" "55" "59"
}

Format a confusion matrix (removing zero entry for visual clarity)

format.confusion.matrix <- function(x)
{ 
    ## Purpose: Format a confusion matrix (removing zero entry for visual clarity)
    ##          Requires package "formattable". 
    ## Arguments:
    ##    x: a confusion matrix (nrow = ncol, with counts in each cell) 
    ## Return: a formated confusion matrix in html browser and in a CSV file. 
    ## Author: Feiming Chen, Date: 17 Apr 2017, 15:45
    ## ________________________________________________

    x1 <- as.character(as.matrix(x))
    x1[x1 == "0"] <- ""
    x2 <- matrix(x1, nrow(x), ncol(x), dimnames=dimnames((x)))
    write.csv(x2, file="Confusion-Matrix.csv")
    formattable::formattable(as.data.frame(x2))
}
if (F) {                                # Unit Test
    x <- matrix(c(3,1,0,1,4,0,0,1,6), 3, 3, dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
    format.confusion.matrix(x)
}

Sort a Data Frame

sort.df <-  function(d)
{ 
    ## Purpose: Sort a Data Frame along 1st column, ties along 2nd, ..., until its last column. 
    ## Arguments:
    ##   d: a data frame
    ## Return: a sorted data frame
    ## Author: Feiming Chen, Date: 18 Apr 2017, 13:49
    ## ________________________________________________
    d[ do.call(order, d), ]    
}
if (F) {                                # Unit Test
    df <- data.frame(X = c("b", "a", "a", "c"), Y = c(4, 2, 1, 3))
    sort.df(df)
    ##   X Y
    ## 3 a 1
    ## 2 a 2
    ## 1 b 4
    ## 4 c 3

    sort.df(df[2:1])
    ##   Y X
    ## 3 1 a
    ## 2 2 a
    ## 4 3 c
    ## 1 4 b
}

Thursday, April 13, 2017

Check if a vector's values are all the same

is.constant  <- function(x)
{ 
    ## Purpose: Check if a vector's values are all the same. 
    ## Arguments:
    ##   x: a vector (numeric, character, logical, etc.)
    ## Return: a logical value tha is TRUE if and only if all the values are the same (constant). 
    ## Author: Feiming Chen, Date: 13 Apr 2017, 14:59
    ## ________________________________________________
    y <- rep.int(x[1], length(x))
    isTRUE(all.equal(x, y))
}
if (F) {                                # Unit Test
    is.constant(c(T, T, F))             # F
    is.constant(c(F, F, F))             # T
    is.constant(c(1, 1, 2))             # F
    is.constant(c(2, 2, 2))             # T
    is.constant(c("a", "a", "b"))       # F
    is.constant(c("b", "b", "b"))       # T
}

Tuesday, April 11, 2017

Find the value that is the maximum in absolute value

my.abs.max <- function(x)
{ 
    ## Purpose: Find the value that is the maximum in absolute value
    ## Arguments:
    ##   x: a numeric vector
    ## Return:  a value in the "x" vector that is the largest in absolute value (keep the sign). 
    ## Author: Feiming Chen, Date: 11 Apr 2017, 15:19
    ## ________________________________________________

    i <- which.max(abs(x))
    x[i]
}
if (F) {                                # Unit Test
    my.abs.max(c(1, -3, 2))             # -3
    my.abs.max(c(1, 3, -2))             # 3
}

Test Time Series Second Order Curvature Significance

test.ts.2nd.order.sig <- function(x, graph=F)
{ 
    ## Purpose: Test Time Series 2nd Order Significance. 
    ## Arguments:
    ##    x: a numeric vector (time series)
    ## Return:  P-value for testing the 2nd order significance. 
    ## Author: Feiming Chen, Date: 10 Apr 2017, 14:57
    ## ________________________________________________

    d <- data.frame(Response = x, Time = seq(x))
    m1 <- lm(Response ~ Time, d)
    m2 <- lm(Response ~ Time + I(Time^2), d)
    a <- anova(m1, m2)
    model.compare.P.value <- a[["Pr(>F)"]][2]
    
    if (graph) {
        plot(d$Time, d$Response, xlab="Time", ylab="Response", type="b")
        lines(d$Time, fitted(m1), col="red")
        lines(d$Time, fitted(m2), col="blue")
        title(main=paste("Model Comparison P-value =", round(model.compare.P.value, 3)))
    }

    model.compare.P.value
}
if (F) {                                # Unit Test
    x <- rnorm(100)
    test.ts.2nd.order.sig(x, graph = T)
    r <- replicate(10000, test.ts.2nd.order.sig(rnorm(20))) # should follow a uniform distribution. 
    ## summary(r)
    ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    ## 0.00011 0.25800 0.50600 0.50500 0.75500 1.00000 
}

Wednesday, April 5, 2017

Convert RGB Color Value to Spherical Coordinates

rgb2sph <- function(rgb, nbits)
{ 
    ## Purpose: Convert RGB Color Value to Spherical Coordinates
    ## Arguments:
    ##   rgb: a (N X 3) matrix of RGB values (each row is a triplet of R, G, B).
    ##          Assume each integer value range from 0 to (2^nbits - 1).
    ##   nbits: number of bits used to code each of R, G, B channel. 
    ## Return: a (N X 3) matrix of Shperical Coordinates (each row is a triplet of M, Theta, Phi), which are scaled to be within (0, 1).
    ## Author: Feiming Chen, Date:  5 Apr 2017, 10:21
    ## ________________________________________________
    
    max.scale <- 2^nbits
    M.scale <- sqrt(3 * max.scale^2)  
    angle.scale <- pi / 2

    t(apply(rgb + 1, 1, function(x) {
        M <- sqrt(sum(x^2))             # Color Intensity/Luminosity (0-1) 
        Theta <- atan(x[2] / x[1])      # Azimuthal Angle
        Phi <- acos(x[3] / M)           # Zenith Angle 
        c(M / M.scale, Theta / angle.scale, Phi / angle.scale)
    }))
}
if (F) {                                # Unit Test
    rgb <- matrix(c(0, 0, 0,  4095,4095,4095, 3527, 3513, 3504, 2470, 3034,3218), ncol=3, byrow = T)
    rgb2sph(rgb, nbits = 12)
##            [,1]    [,2]    [,3]
## [1,] 0.00024414 0.50000 0.60817
## [2,] 1.00000000 0.50000 0.60817
## [3,] 0.85832017 0.49873 0.60954
## [4,] 0.71428036 0.56498 0.56181
}

Wednesday, February 15, 2017

Look up value in a reference table (Similar to Excel's VLOOKUP function)

vlookup <- function(x, x.vector, y.vector = seq(x.vector), exact = FALSE, na.action = FALSE) 
{
    ## PURPOSE: Look up value in a reference table (Similar to Excel's VLOOKUP function)
    ## ARGUMENT:
    ##    x: a vector of Look Up Value (should not contain NA values)
    ##    x.vector: a vector where the "x" value can be located (approximately).
    ##    y.vector: a vector containing the return value that correponds the identified location in the "x.vector".
    ##              Default to the index to the x.vector.
    ##    exact: if T, the numeric values have to match exactly. 
    ##    na.action: If T and if cannot find the lookup value, return the input value. 
    ## RETURN: a scale value that corresponds to the "x" lookup value. 
    ## DATE: 20 Oct 2016, 16:30
    ## -----------------------------------------------------
    
    ans <- NULL
    if (is.numeric(x)) {
        if (exact) {
            ans <- y.vector[match(x, x.vector)]
        } else {
            ans <- y.vector[match.approx(x, x.vector)]
        }
    } else if (is.character(x)) {
        ans <- y.vector[match(x, x.vector)]
    }      

    if (na.action) {
        idx <- is.na(ans)
        ans[idx] <- x[idx]
    }
    ans
}
if (F) {                                # Unit Test 
    vlookup(c(68.2, 73), c(54, 60, 68.1, 72, 80), LETTERS[1:5]) # "C" "D"
    vlookup(c(2, 3.1), 1:5)                                     # 2 3
    vlookup(c(2, 3.1), 1:5, exact = T)                          # 2 NA
    vlookup(c("C", "E"), LETTERS[1:5], LETTERS[11:15]) #  "M" "O"
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15]) #  "M" NA
    vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15], na.action = T) #  "M" "X"
}


match.approx <- function(x, y) {
    ## Purpose: Match Approximately for Numerical Data
    ## Arguments:
    ##   "x":  a vector of numeric values.
    ##   "y":  a vector of numeric values. 
    ## RETURN:
    ##   The index in "y" that indicates the closest y value to each of "x" value. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 10:41
    ## ________________________________________________
    
    sapply(x, function(x0) which.min(abs(x0 - y)))
}
if (F) {
  match.approx(c(4.2, 1.2, 15), 1:10)                #  4  1 10
}

Simple Principal Component Analysis

my.pca  <- function(dat.feature)
{ 
    ## Purpose: Principal Component Analysis
    ## Arguments:
    ##   dat.feature: a data feature matrix, where each column is a feature vector. 
    ## Return: PCA plots and result. 
    ## Author: Feiming Chen, Date: 15 Feb 2017, 09:59
    ## ________________________________________________
    p <- prcomp(dat.feature)
    plot(p, main = "Scree Plot", xlab = "Principal Components")
    dev.new()
    biplot(p)
    print(summary(p))
    invisible(p)
}
if (F) {                                # Unit Test
    d <- matrix(rnorm(1000), 20)
    my.pca(d)
}

Tuesday, February 14, 2017

Making Barplot with Confidence Intervals

my.barplot  <- function(h, hl, hu, ...)
{ 
    ## Purpose: Template function for making Barplot with Confidence Interval.
    ##          Require R package "gplots". Refer to documentation of "gplots::barplots2". 
    ## Arguments:
    ##   h: Height Matrix. Each column is a group of heights. The dimnames will be used for labeling.
    ##   hl: Lower CI. Same dimension as "h". 
    ##   hu: Upper CI. Same dimension as "h".
    ##   ...: passed to "gplots::barplot2"
    ## Return: 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 14:22
    ## ________________________________________________

    old.par <- par("mar")
    par(mar = c(5, 10, 4, 5))
    gplots::barplot2(h, beside = T, legend.text = T, horiz = T, col=rainbow(ncol(h)), border = NA, 
                     cex.axis = 2, cex.names = 1.5, cex.lab=2, las = 1, xpd = F,
                     plot.ci = TRUE, ci.l = hl, ci.u = hu, ci.width = 0.3, ci.lwd = 2, 
                     plot.grid = TRUE, 
                     ...) -> mp

    ## Put numbers on the bars 
    for (i in seq(ncol(mp))) text(1, mp[,i], labels=round(h[,i], 2), cex=2)
    par(old.par)
}
if (F) {                                # Unit Test
    x <- structure(c(9.5, 10.5, 1.2, 0.4, 7, 12, 9.8, 11.2, 8.3, 9.6, 0.8, 0.3, 6.7, 9.8, 9, 10.2), .Dim = c(8L, 2L), .Dimnames = list(c("Type R", "Type B", "R.SD", "B.SD", "Rl", "Ru", "Bl", "Bu"), c("Class 1", "Class 2")))
    h <- x[1:2,]
    hl <- x[c(5,7),]
    hu <- x[c(6,8),]
    my.barplot(h, hl, hu, main="Comparison", xlab="Average Result", xlim = c(0, 20))
}

Generate HTML or PNG representation of a data / table / model object

tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0) {
    ## Purpose: Generate HTML or PNG representation of a data/table/model object.
    ##          Require R "memisc" package. 
    ## Arguments:
    ##   x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
    ##   type: what to generate -- 
    ##         "HTML" shows a HTML table in a web browser. 
    ##         "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
    ##   file: make a copy of the PNG file with this name.  Also used for the table name.
    ##   caption: table caption. 
    ##   digits: number of decimal places to use.
    ## Return: Display the data representation and an image file (type="PNG") for insertion. 
    ## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
    ## ________________________________________________

    type <- match.arg(type)

    is.tabular.data <- "tabular" %in% class(x) # "x" is from the "tables" package output
    is.tbl.data <- "tbl" %in% class(x)         # "x" is from the "dplyr" package output

    if (is.tbl.data) {
        x <- as.data.frame(x)
    }
    
    switch(type,
           HTML = {
               ## Show HTML table in a Web Browser
               if (is.null(file)) {
                   memisc::show_html(x, digits = digits)
               } else {
                   memisc::write_html(x, file = paste0(file,".html"), digits = digits)
               }
           }, 
           PNG = {
               ## Generate LaTeX and PNG Image file
               
               if (is.tabular.data) {
                   library(tables)
                   booktabs()
               }

               if (is.tabular.data || is.tbl.data) {

                   w <- Hmisc::latex(x, file="X.tex", ctable=TRUE, caption = caption, digits = digits)
                   if (is.tabular.data) w$style <- c("booktabs", "dcolumn")
                   d <- Hmisc::dvi(w)                         # convert to DVI file 
                   system(paste("dvipng -q* -o X.png -T tight -D 200", d$file)) # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex")

               } else {

                   r <- memisc:::toLatex.default(x, digits=digits, show.vars=TRUE)
                   
                   f <- file("X.tex", "w")
                   writeLines(c("\\documentclass{article}",
                                "\\usepackage{booktabs}",
                                "\\usepackage{dcolumn}",
                                "\\begin{document}", 
                                "\\pagenumbering{gobble}",
                                "\\begin{table}", 
                                "\\centering"),  f)

                   if (!is.null(caption)) writeLines(paste0("\\caption{", caption, "}"), f)

                   writeLines(r, f)

                   writeLines(c("\\end{table}", "\\end{document}"), f)

                   close(f)

                   system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
                   system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
                   file.remove("X.tex", "X.log", "X.aux", "X.dvi")
               }

               if (is.null(file)) {
                   if (!grepl("/tmp", getwd())) {
                       file.copy("X.png", "~/tmp/X.png", overwrite = T)
                   }
                   system("display ~/tmp/X.png &")                                     # display the PNG file. 
               } else {
                   file.rename("X.png", paste0(file,".png"))
               }
           }
           )
    invisible(NULL)
}
if (F) {                                # Unit Test
    x <- data.frame(age=c(1,1,1,5,5,5), height=c(10,12,9,20,23,18))
    tex.print(x)
    tex.print(ftable(x))
    x %>% group_by(age) %>% summarise(heights = mean(height)) -> y
    tex.print(y)
    tex.print(x, file="Test")
    tex.print(y, type="HTML")
    tex.print(y, type="HTML", file = "Test")

    library(tables)
    a <- tabular( (Species + 1) ~ (n=1) + Format(digits=2) * (Sepal.Length + Sepal.Width) * (mean + sd), data=iris )
    tex.print(a)
    tex.print(a, file = "test")

}

Apply a function to a vector in an accumulative manner (like cumsum)

cum.func <- function(x, func, ...)
{ 
    ## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
    ## Arguments:
    ##   x: a vector
    ##   func: a function
    ##   ...: passed to "func"
    ## Return: a vector that is the result of:
    ##   c( func(x[1]),  func(x[1:2]),  func(x[1:3]),  func(x[1:4]),  func(x[1:5]), ... )
    ## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
    ## ________________________________________________
    
    N <- length(x)
    y <- rep_len(NA, N)
    for (i in 1:N) y[i] <- func(x[1:i], ...)
    y[is.na(x)] <- NA
    y
}
if (F) {                                # Unit Test
    cum.func(5:1, mean)                 #  c(5.0, 4.5, 4.0, 3.5, 3.0))
    cum.func(c(NA, 5:1, NA), mean, na.rm=T)                 #  c(NA, 5.0, 4.5, 4.0, 3.5, 3.0, NA)
    cum.func(c(NA, 5:1, NA), sd, na.rm=T)        # NA NA 0.70711 1.00000 1.29099 1.58114 NA
}

Overlay a new plot on an existing plot

plot.overlay <- function(..., label="2nd Y", col="blue")
{ 
    ## Purpose: Overlay a new plot on an existing plot. 
    ## Arguments:
    ##   ...: Passed to the "plot" function for the new plot.
    ##   label: The secondary Y-axis label.
    ##   col: The color of the ticks and tick labels for the new plot. 
    ## Return: A modified plot. 
    ## Author: Feiming Chen, Date:  6 Feb 2017, 10:19
    ## ________________________________________________

    par(new=T)
    plot(..., axes=F, xlab="", ylab="", col=col)
    axis(side=4, col.ticks = col, col.axis=col)
    mtext(label, side=4, line=-1, col=col)
}
if (F) {                                # Unit Test
    plot(rnorm(100))
    plot.overlay(rnorm(100), type="l", label="Test")
}

Apply a function to each consecutive pairs in a vector.

diff.func <- function(x, func)
{ 
    ## Purpose: Apply a function to each consecutive pairs in a vector. 
    ## Arguments:
    ##   x: a vector
    ##   func: a function that operature on two elements (of "x")
    ## Return: a vector, whose length is length(x) - 1, that looks like (where N=length(x))
    ##    func(x[2], x[1]), func(x[3], x[2]), func(x[4], x[3]), ..., func(x[N], x[N-1]).
    ## Author: Feiming Chen, Date:  9 Feb 2017, 11:25
    ## ________________________________________________
    
    ## embed in 2-dim space to make consecutive pairs
    x2 <- embed(x, 2)                  # N - 1 pairs
    ## apply a function to each consecutive paris (reversely)
    x3 <- apply(x2, 1, func)
    x3
}
if (F) {                                # Unit Test
    diff.func(1:5, diff)                # -1 -1 -1 -1   
    diff.func(1:5, mean)                # 1.5 2.5 3.5 4.5
}

Plot multiple lines together using "matplot"

my.line.plot  <- function(x, y, ...)
{ 
    ## Purpose: Template function for making lines by group. 
    ## Arguments:
    ##   x: X-coords
    ##   y: a matrix with Y-coords (each column has a set of Y values)
    ##   ...: passed to "plot"
    ## Return: A plot assuming log X-axis with axes tickes given by "x". 
    ## Author: Feiming Chen, Date:  9 Feb 2017, 14:02
    ## ________________________________________________n
    N <- ncol(y)                        # number of lines
    matplot(x, y, type="b", log="x", xaxt="n", lwd=2, lty=seq(N), col = rainbow(N), ...)  
    matpoints(x, y, log="x", xaxt="n", col = "black", ...)
    axis(1, at=x)
}
if (F) {                                # Unit Test
    x <- c(0.1, 1, 10)
    y <- matrix(rnorm(9), 3)
    my.line.plot(x, y, xlab="Tiime", ylab="Signal", main="Line Plot by Groups")
}

Monday, February 6, 2017

R Code: Apply a function to each element of a list (or data frame) or to each element of a list within a list of data while retaining the names attribute of each nested list


lf.compute.data.column <- function(dat.list, func, ...)
{ 
    ## Purpose: Apply a function to each column of a list (e.g. data frame). Retains the names information.  
    ##          Basis for processing more complex data (e.g. a list of a list)
    ## Arguments:
    ##     dat.list: a list of data (e.g. data frame)
    ##     func: a function that computes a vector (may have an argument "name")
    ##     ...: passed to "func"
    ## Return:
    ##    a list of outputs. 
    ## Author: Feiming Chen, Date: 30 Jan 2017, 13:21
    ## ________________________________________________

    this.name <- null2na(names(dat.list))

    mapply(func, 
           dat.list, 
           name = this.name, 
           ..., 
           SIMPLIFY = FALSE
           )
}
if (F) {                                # Unit Test
    dat.list = data.frame(x=rnorm(10), y=rnorm(10))
    lf.compute.data.column(dat.list, mean)
    lf.compute.data.column(dat.list, function(x, ...) { c(a=mean(x), b=sd(x)) })
    lf.compute.data.column(dat.list, function(x, name) { list(a=mean(x), n=name) })
}


lf.compute.data.column.2 <- function(dat.list, func, ...)
{ 
    ## Purpose: Apply a function to a list of a list of data. Retains the names information.  
    ## Arguments:
    ##   dat.list: a list of a list
    ##   func: a function that computes a vector.  (may have an argument "name")
    ## Return: a list of list of results
    ## Author: Feiming Chen, Date:  6 Feb 2017, 12:50
    ## ________________________________________________
    
    this.name <- null2na(names(dat.list))

    mapply(
        function(d, name) {
            if (!is.na(name)) names(d) <- paste0(name, ".", names(d))
            lf.compute.data.column(d, func)
        },
        dat.list, 
        name = this.name, 
        ..., 
        SIMPLIFY = FALSE
    )
}
if (F) {                                # Unit Test
    dat.list.2 = list(test1 = data.frame(x=1:5, y=6:10), test2= data.frame(x=11:15, y=16:20))
    lf.compute.data.column.2(dat.list.2, mean)
    lf.compute.data.column.2(dat.list.2, function(x, ...) { c(m=mean(x), s=sd(x)) })
    lf.compute.data.column.2(dat.list.2, function(x, name) { list(m=mean(x), n=name) })
}


null2na <- function(x) {
    ## if the value of x is NULL(as determined by length test since is.null may
    ## not always work), turn it into NA so that an object
    ## made of it will always exist.
    if (length(x)==0)
        x <- NA
    x
}

R Code: Extract a fixed component of a list within a list and optionally apply a function


lf <- function(obj, comp, fun=identity, wrap=identity,
               return.vector=TRUE, verbose=FALSE,
               ...) {
    ## WARNING: "comp" have to be a real string instead of a variable
    ## Functional Apply
    ## "comp": a character string, indicated a list component.
    ## "obj" is a list. "comp" is a component of each element of the
    ## list "obj".  Apply a funcion (just an unquoted name) to each "comp" of the list
    ## "obj".  "..." is passed to "fun".  use "fun=identity" if no
    ## processing is needed.  "wrap" is applied to the final result.
    ## "return.vector" will force the result to be a vector.

    comp <- substitute(comp)
    e <- paste(deparse(substitute(wrap)), "(lapply(obj,",
               deparse(substitute(function(x) fun(x[[comp]], ...))), "))")
    ans <- eval(parse(text=e))
    if (verbose) cat("expression:", e, "\n")
    if (return.vector)
        ans <- sapply(ans, function(x) if (is.null(x)) NA else x)

    ans
}
if (F) {
    a <- list(x=c(1:3), y=c(4:6))
    lf(a, 2)                            # c(x=2, y=5)

    a <- list(x=list(median=3, mean=4), y=list(median=5, mean=6))
    lf(a, "mean")                       # c(x=4, y=6)
    lf(a, "mean", sqrt, verbose=T)               # c(x=2, y=2.4495)
    lf(a, "mean", sqrt, wrap=length, verbose=T)  # 2
}

R Code: Apply a function to a vector in an accumulative manner


cum.func <- function(x, func)
{ 
    ## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
    ## Arguments:
    ##   x: a vector
    ##   func: a function 
    ## Return: a vector that is the result of:
    ##   c( func(x[1]),  func(x[1:2]),  func(x[1:3]),  func(x[1:4]),  func(x[1:5]), ... )
    ## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
    ## ________________________________________________
    
    N <- length(x)
    y <- rep_len(NA, N)
    for (i in 1:N) y[i] <- func(x[1:i])
    y
}
if (F) {                                # Unit Test
    identical(cum.func(5:1, mean),  c(5.0, 4.5, 4.0, 3.5, 3.0))
}

R Function for Overlaying a New Plot on an Existing Plot


plot.overlay <- function(..., label="2nd Y", col="blue")
{ 
    ## Purpose: Overlay a new plot on an existing plot. 
    ## Arguments:
    ##   ...: Passed to the "plot" function for the new plot.
    ##   label: The secondary Y-axis label.
    ##   col: The color of the ticks and tick labels for the new plot. 
    ## Return: A modified plot. 
    ## Author: Feiming Chen, Date:  6 Feb 2017, 10:19
    ## ________________________________________________

    par(new=T)
    plot(..., axes=F, xlab="", ylab="", col=col)
    axis(side=4, col.ticks = col, col.axis=col)
    mtext(label, side=4, line=-1, col=col)
}
if (F) {                                # Unit Test
    plot(rnorm(100))
    plot.overlay(rnorm(100), type="l", label="Test")
}

Tuesday, December 12, 2017

Wednesday, October 18, 2017

Monday, October 2, 2017

Thursday, September 7, 2017

Wednesday, June 28, 2017

Thursday, June 22, 2017

Wednesday, June 21, 2017

Monday, June 19, 2017

Thursday, June 15, 2017

Wednesday, June 14, 2017

Tuesday, June 6, 2017

Monday, June 5, 2017

Friday, June 2, 2017

Wednesday, May 3, 2017

Tuesday, April 25, 2017

Friday, April 21, 2017

Thursday, April 20, 2017

Tuesday, April 18, 2017

Thursday, April 13, 2017

Tuesday, April 11, 2017

Wednesday, April 5, 2017

Wednesday, February 15, 2017

Tuesday, February 14, 2017

Monday, February 6, 2017

Blog Archive