vlookup <- function(x, x.vector, y.vector = seq(x.vector), exact = FALSE, na.action = FALSE)
{
## PURPOSE: Look up value in a reference table (Similar to Excel's VLOOKUP function)
## ARGUMENT:
## x: a vector of Look Up Value (should not contain NA values)
## x.vector: a vector where the "x" value can be located (approximately).
## y.vector: a vector containing the return value that correponds the identified location in the "x.vector".
## Default to the index to the x.vector.
## exact: if T, the numeric values have to match exactly.
## na.action: If T and if cannot find the lookup value, return the input value.
## RETURN: a scale value that corresponds to the "x" lookup value.
## DATE: 20 Oct 2016, 16:30
## -----------------------------------------------------
ans <- NULL
if (is.numeric(x)) {
if (exact) {
ans <- y.vector[match(x, x.vector)]
} else {
ans <- y.vector[match.approx(x, x.vector)]
}
} else if (is.character(x)) {
ans <- y.vector[match(x, x.vector)]
}
if (na.action) {
idx <- is.na(ans)
ans[idx] <- x[idx]
}
ans
}
if (F) { # Unit Test
vlookup(c(68.2, 73), c(54, 60, 68.1, 72, 80), LETTERS[1:5]) # "C" "D"
vlookup(c(2, 3.1), 1:5) # 2 3
vlookup(c(2, 3.1), 1:5, exact = T) # 2 NA
vlookup(c("C", "E"), LETTERS[1:5], LETTERS[11:15]) # "M" "O"
vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15]) # "M" NA
vlookup(c("C", "X"), LETTERS[1:5], LETTERS[11:15], na.action = T) # "M" "X"
}
match.approx <- function(x, y) {
## Purpose: Match Approximately for Numerical Data
## Arguments:
## "x": a vector of numeric values.
## "y": a vector of numeric values.
## RETURN:
## The index in "y" that indicates the closest y value to each of "x" value.
## Author: Feiming Chen, Date: 15 Feb 2017, 10:41
## ________________________________________________
sapply(x, function(x0) which.min(abs(x0 - y)))
}
if (F) {
match.approx(c(4.2, 1.2, 15), 1:10) # 4 1 10
}
Wednesday, February 15, 2017
Look up value in a reference table (Similar to Excel's VLOOKUP function)
Simple Principal Component Analysis
my.pca <- function(dat.feature)
{
## Purpose: Principal Component Analysis
## Arguments:
## dat.feature: a data feature matrix, where each column is a feature vector.
## Return: PCA plots and result.
## Author: Feiming Chen, Date: 15 Feb 2017, 09:59
## ________________________________________________
p <- prcomp(dat.feature)
plot(p, main = "Scree Plot", xlab = "Principal Components")
dev.new()
biplot(p)
print(summary(p))
invisible(p)
}
if (F) { # Unit Test
d <- matrix(rnorm(1000), 20)
my.pca(d)
}
Tuesday, February 14, 2017
Making Barplot with Confidence Intervals
my.barplot <- function(h, hl, hu, ...)
{
## Purpose: Template function for making Barplot with Confidence Interval.
## Require R package "gplots". Refer to documentation of "gplots::barplots2".
## Arguments:
## h: Height Matrix. Each column is a group of heights. The dimnames will be used for labeling.
## hl: Lower CI. Same dimension as "h".
## hu: Upper CI. Same dimension as "h".
## ...: passed to "gplots::barplot2"
## Return:
## Author: Feiming Chen, Date: 14 Feb 2017, 14:22
## ________________________________________________
old.par <- par("mar")
par(mar = c(5, 10, 4, 5))
gplots::barplot2(h, beside = T, legend.text = T, horiz = T, col=rainbow(ncol(h)), border = NA,
cex.axis = 2, cex.names = 1.5, cex.lab=2, las = 1, xpd = F,
plot.ci = TRUE, ci.l = hl, ci.u = hu, ci.width = 0.3, ci.lwd = 2,
plot.grid = TRUE,
...) -> mp
## Put numbers on the bars
for (i in seq(ncol(mp))) text(1, mp[,i], labels=round(h[,i], 2), cex=2)
par(old.par)
}
if (F) { # Unit Test
x <- structure(c(9.5, 10.5, 1.2, 0.4, 7, 12, 9.8, 11.2, 8.3, 9.6, 0.8, 0.3, 6.7, 9.8, 9, 10.2), .Dim = c(8L, 2L), .Dimnames = list(c("Type R", "Type B", "R.SD", "B.SD", "Rl", "Ru", "Bl", "Bu"), c("Class 1", "Class 2")))
h <- x[1:2,]
hl <- x[c(5,7),]
hu <- x[c(6,8),]
my.barplot(h, hl, hu, main="Comparison", xlab="Average Result", xlim = c(0, 20))
}
Generate HTML or PNG representation of a data / table / model object
tex.print <- function(x, type=c("PNG", "HTML"), file=NULL, caption = file, digits=0) {
## Purpose: Generate HTML or PNG representation of a data/table/model object.
## Require R "memisc" package.
## Arguments:
## x: an object (table, ftable, data frame, model object, etc. that are acceptable in R "memisc" package.
## type: what to generate --
## "HTML" shows a HTML table in a web browser.
## "PNG" shows a PNG (image) file (require linux commands: latex, dvipng, display).
## file: make a copy of the PNG file with this name. Also used for the table name.
## caption: table caption.
## digits: number of decimal places to use.
## Return: Display the data representation and an image file (type="PNG") for insertion.
## Author: Feiming Chen, Date: 14 Feb 2017, 11:59
## ________________________________________________
type <- match.arg(type)
is.tabular.data <- "tabular" %in% class(x) # "x" is from the "tables" package output
is.tbl.data <- "tbl" %in% class(x) # "x" is from the "dplyr" package output
if (is.tbl.data) {
x <- as.data.frame(x)
}
switch(type,
HTML = {
## Show HTML table in a Web Browser
if (is.null(file)) {
memisc::show_html(x, digits = digits)
} else {
memisc::write_html(x, file = paste0(file,".html"), digits = digits)
}
},
PNG = {
## Generate LaTeX and PNG Image file
if (is.tabular.data) {
library(tables)
booktabs()
}
if (is.tabular.data || is.tbl.data) {
w <- Hmisc::latex(x, file="X.tex", ctable=TRUE, caption = caption, digits = digits)
if (is.tabular.data) w$style <- c("booktabs", "dcolumn")
d <- Hmisc::dvi(w) # convert to DVI file
system(paste("dvipng -q* -o X.png -T tight -D 200", d$file)) # convert to PNG file with Tight box and Resolution of 200
file.remove("X.tex")
} else {
r <- memisc:::toLatex.default(x, digits=digits, show.vars=TRUE)
f <- file("X.tex", "w")
writeLines(c("\\documentclass{article}",
"\\usepackage{booktabs}",
"\\usepackage{dcolumn}",
"\\begin{document}",
"\\pagenumbering{gobble}",
"\\begin{table}",
"\\centering"), f)
if (!is.null(caption)) writeLines(paste0("\\caption{", caption, "}"), f)
writeLines(r, f)
writeLines(c("\\end{table}", "\\end{document}"), f)
close(f)
system("latex X.tex >/dev/null") # Generate DVI file from LaTeX file (X.tex -=> X.dvi)
system("dvipng -q* -o X.png -T tight -D 200 X.dvi >/dev/null") # convert to PNG file with Tight box and Resolution of 200
file.remove("X.tex", "X.log", "X.aux", "X.dvi")
}
if (is.null(file)) {
if (!grepl("/tmp", getwd())) {
file.copy("X.png", "~/tmp/X.png", overwrite = T)
}
system("display ~/tmp/X.png &") # display the PNG file.
} else {
file.rename("X.png", paste0(file,".png"))
}
}
)
invisible(NULL)
}
if (F) { # Unit Test
x <- data.frame(age=c(1,1,1,5,5,5), height=c(10,12,9,20,23,18))
tex.print(x)
tex.print(ftable(x))
x %>% group_by(age) %>% summarise(heights = mean(height)) -> y
tex.print(y)
tex.print(x, file="Test")
tex.print(y, type="HTML")
tex.print(y, type="HTML", file = "Test")
library(tables)
a <- tabular( (Species + 1) ~ (n=1) + Format(digits=2) * (Sepal.Length + Sepal.Width) * (mean + sd), data=iris )
tex.print(a)
tex.print(a, file = "test")
}
Apply a function to a vector in an accumulative manner (like cumsum)
cum.func <- function(x, func, ...)
{
## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
## Arguments:
## x: a vector
## func: a function
## ...: passed to "func"
## Return: a vector that is the result of:
## c( func(x[1]), func(x[1:2]), func(x[1:3]), func(x[1:4]), func(x[1:5]), ... )
## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
## ________________________________________________
N <- length(x)
y <- rep_len(NA, N)
for (i in 1:N) y[i] <- func(x[1:i], ...)
y[is.na(x)] <- NA
y
}
if (F) { # Unit Test
cum.func(5:1, mean) # c(5.0, 4.5, 4.0, 3.5, 3.0))
cum.func(c(NA, 5:1, NA), mean, na.rm=T) # c(NA, 5.0, 4.5, 4.0, 3.5, 3.0, NA)
cum.func(c(NA, 5:1, NA), sd, na.rm=T) # NA NA 0.70711 1.00000 1.29099 1.58114 NA
}
Overlay a new plot on an existing plot
plot.overlay <- function(..., label="2nd Y", col="blue")
{
## Purpose: Overlay a new plot on an existing plot.
## Arguments:
## ...: Passed to the "plot" function for the new plot.
## label: The secondary Y-axis label.
## col: The color of the ticks and tick labels for the new plot.
## Return: A modified plot.
## Author: Feiming Chen, Date: 6 Feb 2017, 10:19
## ________________________________________________
par(new=T)
plot(..., axes=F, xlab="", ylab="", col=col)
axis(side=4, col.ticks = col, col.axis=col)
mtext(label, side=4, line=-1, col=col)
}
if (F) { # Unit Test
plot(rnorm(100))
plot.overlay(rnorm(100), type="l", label="Test")
}
Apply a function to each consecutive pairs in a vector.
diff.func <- function(x, func)
{
## Purpose: Apply a function to each consecutive pairs in a vector.
## Arguments:
## x: a vector
## func: a function that operature on two elements (of "x")
## Return: a vector, whose length is length(x) - 1, that looks like (where N=length(x))
## func(x[2], x[1]), func(x[3], x[2]), func(x[4], x[3]), ..., func(x[N], x[N-1]).
## Author: Feiming Chen, Date: 9 Feb 2017, 11:25
## ________________________________________________
## embed in 2-dim space to make consecutive pairs
x2 <- embed(x, 2) # N - 1 pairs
## apply a function to each consecutive paris (reversely)
x3 <- apply(x2, 1, func)
x3
}
if (F) { # Unit Test
diff.func(1:5, diff) # -1 -1 -1 -1
diff.func(1:5, mean) # 1.5 2.5 3.5 4.5
}
Plot multiple lines together using "matplot"
my.line.plot <- function(x, y, ...)
{
## Purpose: Template function for making lines by group.
## Arguments:
## x: X-coords
## y: a matrix with Y-coords (each column has a set of Y values)
## ...: passed to "plot"
## Return: A plot assuming log X-axis with axes tickes given by "x".
## Author: Feiming Chen, Date: 9 Feb 2017, 14:02
## ________________________________________________n
N <- ncol(y) # number of lines
matplot(x, y, type="b", log="x", xaxt="n", lwd=2, lty=seq(N), col = rainbow(N), ...)
matpoints(x, y, log="x", xaxt="n", col = "black", ...)
axis(1, at=x)
}
if (F) { # Unit Test
x <- c(0.1, 1, 10)
y <- matrix(rnorm(9), 3)
my.line.plot(x, y, xlab="Tiime", ylab="Signal", main="Line Plot by Groups")
}
Monday, February 6, 2017
R Code: Apply a function to each element of a list (or data frame) or to each element of a list within a list of data while retaining the names attribute of each nested list
lf.compute.data.column <- function(dat.list, func, ...)
{
## Purpose: Apply a function to each column of a list (e.g. data frame). Retains the names information.
## Basis for processing more complex data (e.g. a list of a list)
## Arguments:
## dat.list: a list of data (e.g. data frame)
## func: a function that computes a vector (may have an argument "name")
## ...: passed to "func"
## Return:
## a list of outputs.
## Author: Feiming Chen, Date: 30 Jan 2017, 13:21
## ________________________________________________
this.name <- null2na(names(dat.list))
mapply(func,
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list = data.frame(x=rnorm(10), y=rnorm(10))
lf.compute.data.column(dat.list, mean)
lf.compute.data.column(dat.list, function(x, ...) { c(a=mean(x), b=sd(x)) })
lf.compute.data.column(dat.list, function(x, name) { list(a=mean(x), n=name) })
}
lf.compute.data.column.2 <- function(dat.list, func, ...)
{
## Purpose: Apply a function to a list of a list of data. Retains the names information.
## Arguments:
## dat.list: a list of a list
## func: a function that computes a vector. (may have an argument "name")
## Return: a list of list of results
## Author: Feiming Chen, Date: 6 Feb 2017, 12:50
## ________________________________________________
this.name <- null2na(names(dat.list))
mapply(
function(d, name) {
if (!is.na(name)) names(d) <- paste0(name, ".", names(d))
lf.compute.data.column(d, func)
},
dat.list,
name = this.name,
...,
SIMPLIFY = FALSE
)
}
if (F) { # Unit Test
dat.list.2 = list(test1 = data.frame(x=1:5, y=6:10), test2= data.frame(x=11:15, y=16:20))
lf.compute.data.column.2(dat.list.2, mean)
lf.compute.data.column.2(dat.list.2, function(x, ...) { c(m=mean(x), s=sd(x)) })
lf.compute.data.column.2(dat.list.2, function(x, name) { list(m=mean(x), n=name) })
}
null2na <- function(x) {
## if the value of x is NULL(as determined by length test since is.null may
## not always work), turn it into NA so that an object
## made of it will always exist.
if (length(x)==0)
x <- NA
x
}
R Code: Extract a fixed component of a list within a list and optionally apply a function
lf <- function(obj, comp, fun=identity, wrap=identity,
return.vector=TRUE, verbose=FALSE,
...) {
## WARNING: "comp" have to be a real string instead of a variable
## Functional Apply
## "comp": a character string, indicated a list component.
## "obj" is a list. "comp" is a component of each element of the
## list "obj". Apply a funcion (just an unquoted name) to each "comp" of the list
## "obj". "..." is passed to "fun". use "fun=identity" if no
## processing is needed. "wrap" is applied to the final result.
## "return.vector" will force the result to be a vector.
comp <- substitute(comp)
e <- paste(deparse(substitute(wrap)), "(lapply(obj,",
deparse(substitute(function(x) fun(x[[comp]], ...))), "))")
ans <- eval(parse(text=e))
if (verbose) cat("expression:", e, "\n")
if (return.vector)
ans <- sapply(ans, function(x) if (is.null(x)) NA else x)
ans
}
if (F) {
a <- list(x=c(1:3), y=c(4:6))
lf(a, 2) # c(x=2, y=5)
a <- list(x=list(median=3, mean=4), y=list(median=5, mean=6))
lf(a, "mean") # c(x=4, y=6)
lf(a, "mean", sqrt, verbose=T) # c(x=2, y=2.4495)
lf(a, "mean", sqrt, wrap=length, verbose=T) # 2
}
R Code: Apply a function to a vector in an accumulative manner
cum.func <- function(x, func)
{
## Purpose: Apply a function to a vector in an accumulative manner (like cumsum)
## Arguments:
## x: a vector
## func: a function
## Return: a vector that is the result of:
## c( func(x[1]), func(x[1:2]), func(x[1:3]), func(x[1:4]), func(x[1:5]), ... )
## Author: Feiming Chen, Date: 30 Jan 2017, 14:06
## ________________________________________________
N <- length(x)
y <- rep_len(NA, N)
for (i in 1:N) y[i] <- func(x[1:i])
y
}
if (F) { # Unit Test
identical(cum.func(5:1, mean), c(5.0, 4.5, 4.0, 3.5, 3.0))
}
R Function for Overlaying a New Plot on an Existing Plot
plot.overlay <- function(..., label="2nd Y", col="blue")
{
## Purpose: Overlay a new plot on an existing plot.
## Arguments:
## ...: Passed to the "plot" function for the new plot.
## label: The secondary Y-axis label.
## col: The color of the ticks and tick labels for the new plot.
## Return: A modified plot.
## Author: Feiming Chen, Date: 6 Feb 2017, 10:19
## ________________________________________________
par(new=T)
plot(..., axes=F, xlab="", ylab="", col=col)
axis(side=4, col.ticks = col, col.axis=col)
mtext(label, side=4, line=-1, col=col)
}
if (F) { # Unit Test
plot(rnorm(100))
plot.overlay(rnorm(100), type="l", label="Test")
}
Subscribe to:
Posts (Atom)