R Function Library: June 2017

Wednesday, June 28, 2017

Span: A Predicate Functional that finds the location of the longest sequential run of elements where the predicate is true

Span <- function(f, x)
{ 
    ## Purpose: A Predicate Functional that finds the location of the longest sequential run
    ##          of elements where the predicate is true. 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) 
    ##   x: a list or data frame
    ## Return: The location of the longest sequential run of elements where the predicate is true. 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    y <- rle(Where(f, x))
    z <- y$lengths
    w <- which.max(z)          # position of the segment with longest sequential run of TRUE
    pos.end <- sum(z[1:w])
    pos.start <- pos.end - z[w] + 1
    c(start=pos.start, end=pos.end)
}
if (F) {                                # Unit Test
    x <- c(3, 1, NA, 4, NA, NA, NA, 5, 9, NA, NA)
    Span(is.na, x)
    ## start   end 
    ##     5     7 
}

Where <- function(f, x)
{ 
    ## Purpose: A Predicate Functional
    ## See other Predicate Functionals: Filter(), Find(), Position()
    ## See other Functionals: Map(), Reduce() 
    ## Arguments:
    ##   f: a predicate (a function that returns a single TRUE or FALSE) (e.g. is.character, all, is.NULL)
    ##   x: a list or data frame
    ## Return: a logical vector 
    ## Author: Feiming Chen, Date: 28 Jun 2017, 09:45
    ## ________________________________________________

    vapply(x, f, logical(1))
}
if (F) {                                # Unit Test
    df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = T)
    Where(is.factor, df)
    ## Compare with: 
    Filter(is.factor, df)
    Find(is.factor, df)
    Position(is.factor, df)
}

Thursday, June 22, 2017

Draw a data frame as a R plot

my.table.plot0 <- function(x, 
                           highlight.matrix = NULL,
                           main = "Table", title2 = NULL, size = 20,
                           bold.lastrow = FALSE, bold.lastcol = FALSE
                           ) 
{
    ## PURPOSE: Draw a data frame as a R plot. Require R package "gridExtra". 
    ## ARGUMENT:
    ##    x: a data frame
    ##    highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##    main: title for the plot
    ##    title2: additional title with more verbose text
    ##    size: text size of table text
    ##    bold.lastrow: if T, make the last row bold font.
    ##    bold.lastcol: if T, make the last column bold font.
    ## RETURN: a plot
    ## DATE: 21 Nov 2016, 15:46
    ## -----------------------------------------------------

    x <- as.data.frame(x)
    n1 <- nrow(x)
    n2 <- ncol(x)
    cl <- matrix("black", n1, n2)
    cl[is.na(x)] <- "gray"

    ## Bold Total Column and Row
    cl2 <- cl
    cl2[] <- "plain"
    if (bold.lastrow) {
        cl2[n1,] <- "bold"
    }
    if (bold.lastcol) {
        cl2[, n2] <- "bold"
    }
    
    require(gridExtra)
    table_theme <- ttheme_default(base_size = size,
                                  core = list(fg_params=list(col=cl, fontface = cl2)))
    ## Highlight Cells
    if (!is.null(highlight.matrix)) {
        cl3 <- cl
        z <- ttheme_default()$core$bg_params$fill # default color
        z1 <- rep(z, length.out = nrow(cl3))
        for (i in 1:ncol(cl3)) cl3[,i] <- z1
        cl3[highlight.matrix] <- "yellow"
        table_theme$core$bg_params$fill <- cl3
    }

    plot.new()
    gridExtra::grid.table(as.data.frame(x), theme=table_theme)
    ## Title Text
    t2 <- ifelse(is.null(title2), "", paste0("\n", graph.title.split.lines(title2, sep=" ", num.chars = 40)))
    title(main = paste0(main, t2))

}
if (F) {                                # Unit Test 
    x <- data.frame(Type=c('Car', 'Plane'), Speed=c(NA, 200))
    my.table.plot0(x)
    my.table.plot0(x, main='Test', title2='a very complex test', size=30, bold.lastcol = T, highlight.matrix = matrix(c(NA, F, F, T) , 2, 2))
    x <- data.frame(Type=rep('Car', 20), Speed=rep(100, 20))
    my.table.plot0(x)
}

my.table.plot <- function(x, highlight.matrix = NULL, bold.lastrow = FALSE, n = 20, ...)
{ 
    ## Purpose: Split a data frame to fit in each page and call "my.table.plot0". 
    ## Arguments:
    ##   x: a data frame or matrix
    ##   highlight.matrix: if provided a logical matrix (same dimension as "x"). Then "TRUE" cells will be highlighted. 
    ##   bold.lastrow: if T, make the last row bold font.
    ##   n: how many rows to fit into each page.
    ##   ...: pass to "my.table.plot0". 
    ## Return: multiple plots to plot the data matrix. 
    ## Author: Feiming Chen, Date:  5 Jul 2017, 15:16
    ## ________________________________________________

    x <- as.data.frame(x)
    N <- nrow(x)
    if ( N <= n ) {
        my.table.plot0(x, highlight.matrix = highlight.matrix, bold.lastrow = bold.lastrow, ...)
    } else {
        s <- rep(seq.int(N %/% 20 + 1), each = n, length.out = N)
        xs <- split(x, s)
        
        ng <- length(xs)                # number of pages
        bold.lastrow.list <- c(rep(FALSE, ng-1), bold.lastrow)
        
        if (is.null(highlight.matrix)) {
            mapply(my.table.plot0, xs, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        } else {
            ys <- split(highlight.matrix, s)
            mapply(my.table.plot0, xs, highlight.matrix = ys, bold.lastrow = bold.lastrow.list, MoreArgs = list(...))
        }
    }
}
if (F) {                                # Unit Test
     x <- data.frame(Type=rep('Car', 50), Speed=rep(100, 50))
     h <- matrix(rep(c(F, T), 50), ncol=2)
     my.table.plot(x, highlight.matrix = h, bold.lastrow = T)
}

Wednesday, June 21, 2017

Add a "Total" level to each specified categorical variable in the data frame

df.add.total.category  <- function(dat, var.list, new.level.label = "Total")
{ 
    ## Purpose: Add a "Total" level to each specified categorical variable in the data frame. 
    ## Arguments:
    ##   dat: a data frame with several categorical variables for making tables.
    ##   var.list: a list of variable names in "dat".
    ##   new.level.label: the name for the new "total" level for the specified variable. 
    ## Return: 
    ## Author: Feiming Chen, Date: 21 Jun 2017, 13:02
    ## ________________________________________________

    if ( missing(var.list) )  {
        var.list <- names(dat)
    }

    d0 <- dat                           # to make new label across all variables
    lapply(var.list,  function( v )  {
        d <- dat
        d[[v]] <- new.level.label
        d0[[v]] <<- new.level.label
        d
    }) -> dd

    do.call(rbind, c(list(dat), dd, list(d0)))
}
if (F) {                                # Unit Test
    dat <- data.frame(A=gl(3,3), B=gl(3, 1, 9), C=11:19)
    df.add.total.category(dat, var.list=c("A", "B"))
}

Monday, June 19, 2017

Put unique elements of a string vector into one long string

paste.unique.string  <- function(x, sep = "/")
{ 
    ## Purpose: Put unique elements of a string vector into one long string. 
    ## Arguments:
    ##    x: a string vector
    ##    sep: what separator to use when pasting all unique elements together. 
    ## Return:  a single string with unique elements of "x". 
    ## Author: Feiming Chen, Date: 19 Jun 2017, 14:00
    ## ________________________________________________
    
    paste0(unique(x), collapse = sep)
}
if (F) {                                # Unit Test
    paste.unique.string(c("a", "b", "a", "b")) # "a/b"
}

Add "0" to the front of numbers to make n-digit string

prepend.zero.to.numbers  <- function(x, n = NULL, pad.char = "0")
{ 
    ## Purpose: Add "0" to the front of numbers to make n-digit string. 
    ## Arguments:
    ##   x: a vector of integers/characters
    ##   n: Number of total digits after padding with zero. Default to
    ##      using the largest number to calculate "n".
    ##   pad.char: what character to pad.  Default to "0". 
    ## Return: a character vector with nuumbers padded with zero's in the head.
    ## Author: Feiming Chen, Date: 19 Jun 2017, 13:19
    ## ________________________________________________

    x1 <- as.character(x)
    y <- nchar(x1)
    if (is.null(n)) n <- nchar(as.character(max(x)))
    y1 <- n - y                         # how long is the padding for each number
    pad.vector <- sapply(y1, function(w) paste(rep(pad.char, w), collapse = ""))
    paste0(pad.vector, x1)
}
if (F) {                                # Unit Test
    x <- c(2, 14, 156, 1892)
    prepend.zero.to.numbers(x)          # "0002" "0014" "0156" "1892"
    prepend.zero.to.numbers(x, pad.char = "X") # "XXX2" "XX14" "X156" "1892"
    prepend.zero.to.numbers(x, n = 6)          # "000002" "000014" "000156" "001892"
    y <- c("A", "BC", "DEF")
    prepend.zero.to.numbers(y, 3) # "00A" "0BC" "DEF"
}

Parse a string to generate a data frame

parse2data.frame  <- function(x)
{ 
    ## Purpose: Parse a string to generate a data frame.
    ## Arguments:
    ##   x: a string with header and multiple rows of text that resembles a data frame.
    ##      Fields are separated by comma. 
    ## Return: a data frame
    ## Author: Feiming Chen, Date: 19 Jun 2017, 10:12
    ## ________________________________________________

    d <- read.csv(textConnection(x), as.is = T)
    i <- sapply(d, is.character)
    d[i] <- lapply(d[i], trimws)
    d
}
if (F) {                                # Unit Test
    a = 
"

  Test, ID, Name
  t1 ,  3,  Sun
  t2 ,  5,  Moon
  t3,   2,  Earth

"    
    b <- parse2data.frame(a)
    str(b)
    ## 'data.frame':    3 obs. of  2 variables:
    ##  $ ID  : int  3 5 2
    ##  $ Name: Factor w/ 3 levels " Earth"," Moon",..: 3 2 1
}

Thursday, June 15, 2017

Tokenize a string into a vector of tokens

tokenize.string <- function(x, split = "[ ,:;]+")
{ 
    ## Purpose: Tokenize a string into a vector of tokens
    ## Arguments:
    ##   x: a string or a vector of strings
    ##   split: split characters (regular expression)
    ## Return: a character vector (if "x" is a string) or a list of character vectors. 
    ## Author: Feiming Chen, Date: 15 Jun 2017, 15:01
    ## ________________________________________________
    
    ans <- strsplit(x, split="[ ,:;]+", fixed=F)
    if (length(x) == 1) ans <- ans[[1]]
    ans
}
if (F) {                                # Unit Test
    x <- "IND,  UNR   INC ; TCU"
    tokenize.string(x)
    ## [1] "IND" "UNR" "INC" "TCU"
    tokenize.string(rep(x, 3))
    ## [[1]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[2]]
    ## [1] "IND" "UNR" "INC" "TCU"

    ## [[3]]
    ## [1] "IND" "UNR" "INC" "TCU"
}

Wednesday, June 14, 2017

Count the number of distinct values in a vector or distinct rows in a data frame

N.levels <- function(x)
{ 
    ## Purpose: Count the number of distinct values in a vector or distinct rows in a data frame
    ## Arguments:
    ##    x: a vector (numeric or string), or a data frame. 
    ## Return: a count for the distinct values/rows in the vector or data frame. 
    ## Author: Feiming Chen, Date: 20 Mar 2017, 10:58
    ## ________________________________________________

    sum(!duplicated(x))
}
if (F) {                                # Unit Test
    N.levels(c(1,2,1,3))                # 3
    N.levels(c("a", "b", "a"))          # 2
    x <- data.frame(a=c(1,2,1), b=c(1, 2, 1))
    N.levels(x) # 2
}

Tuesday, June 6, 2017

Write data tables to Excel (.xlsx) file

wxls <- function(x, file = "test", ...)
{ 
    ## Purpose: Write data tables to Excel (.xlsx) file.
    ##          Require package "openxlsx". 
    ## Arguments:
    ##   x: a data frame or a list of data frames. 
    ##   file: a naked file name with no extension.
    ##   ...: passed to "write.xlsx". 
    ## Return: Generate an Excel (.xlsx) file.
    ## Author: Feiming Chen, Date:  6 Jun 2017, 15:11
    ## ________________________________________________
    
    require(openxlsx)
    f <- paste0(file, ".xlsx")

    write.xlsx(x, file = f, asTable = TRUE,
               creator = "Feiming Chen", 
               tableStyle = "TableStyleMedium2",   ...)
}
if (F) {                                # Unit Test
    df <- data.frame("Date" = Sys.Date()-0:4,
                     "Logical" = c(TRUE, FALSE, TRUE, TRUE, FALSE),
                     "Currency" = paste("$",-2:2),
                     "Accounting" = -2:2,
                     "hLink" = "https://CRAN.R-project.org/",
                     "Percentage" = seq(-1, 1, length.out=5),
                     "TinyNumber" = runif(5) / 1E9, stringsAsFactors = FALSE)
    class(df$Currency) <- "currency"
    class(df$Accounting) <- "accounting"
    class(df$hLink) <- "hyperlink"
    class(df$Percentage) <- "percentage"
    class(df$TinyNumber) <- "scientific"

    wxls(df)

    wxls(list(A = df, B = df, C= df))   # write to 3 separate tabs with corresponding list names. 
}

Monday, June 5, 2017

Plot "y" against "x" where "x" is a vector of labels

my.plot.vs.label <- function(x, y, ...)
{ 
    ## Purpose: Plot "y" against "x" where "x" is a vector of labels (string)
    ## Arguments:
    ##   x: a string vector (labels)
    ##   y: a numeric vector or matrix
    ##   ...: passed to "matplot"
    ## Return: a plot
    ## Author: Feiming Chen, Date:  5 Jun 2017, 14:46
    ## ________________________________________________

    s <- seq(x)
    matplot(s, y, type="b", axes = F, xlab ="", ylab="", ...)
    axis(side=2)
    axis(side=1, at=s, labels=x, las=2)

    yn <- ncol(y)
    yna <- names(y)
    if (!is.null(yn) && !is.null(yna)) {
        legend("topright", legend=yna, col=1:yn, lwd=1.5, lty=1:yn)
    }
}
if (F) {                                # Unit Test
    x <- LETTERS
    y <- matrix(rnorm(260), ncol=10)
    my.plot.vs.label(x, y)
    y2 <- as.data.frame(y)
    my.plot.vs.label(x, y2)
}

Friday, June 2, 2017

Read/Import all CSV files in a specified directory

batch.read.csv.files <- function(p)
{ 
    ## Purpose: Read/Import all CSV files in a specified directory.
    ##          Require package "readr". 
    ## Arguments:
    ##   p: a file directory with CSV files. 
    ## Return:
    ##   a list, each element is a data frame (imported from the file) and its name is the corresponding file name. 
    ## Author: Feiming Chen, Date:  2 Jun 2017, 11:09
    ## ________________________________________________

    file.list <- list.files(p, pattern=".csv", full.names = TRUE, ignore.case = TRUE)

    if (length(file.list) > 0) { 
        dat <- lapply(file.list, readr::read_csv)
        names(dat) <- sapply(file.list, basename)
    } else dat <- list()

    cat("Import", length(dat), "CSV Files.\n")
    dat
}

Wednesday, June 28, 2017

Thursday, June 22, 2017

Wednesday, June 21, 2017

Monday, June 19, 2017

Thursday, June 15, 2017

Wednesday, June 14, 2017

Tuesday, June 6, 2017

Monday, June 5, 2017

Friday, June 2, 2017

Blog Archive