Tuesday, December 12, 2017

Compare two tables with the same structure by merging them


compare.tables <- function(d1, d2, by.idx = 1)
{ 
    ## Purpose: Compare two tables with the same structure by merging them. 
    ## Arguments:
    ##    d1: table A
    ##    d2: table B
    ##    by.idx: an index vector to common variables.
    ## Return: a full joined table for comparision

    b <- names(d1)[by.idx]
    d <- full_join(d1, d2, by = b)
    tex.print(d, type = "HTML")
    invisible(d)
}
if (F) {                                # Unit Test
    d1 <- rc()
    d2 <- rc()
    by.idx <- 1:4
    compare.tables(d1, d2, by.idx)
}

List duplicated files in a directory


list.dup.files <- function(p, file.name.head = 20)
{ 
    ## Purpose: List duplicated files in a directory. 
    ## Arguments:
    ##   file.name.head: number of starting characters in the file name to use for duplication detection
    ## Return: a list of file names that are duplicated (by sorted order). 
    ## Author: Feiming Chen, Date: 13 Nov 2017, 16:36
    ## ________________________________________________
    
    p1 <- dir(p)
    p2 <- sort(p1, decreasing = T)
    p3 <- as.character(sapply(p2, function(x) substr(x, 1, file.name.head)))
    p4 <- p2[duplicated(p3)]
    if (length(p4) > 0) {
        cat("Duplicated Older Files:\n")
        print(p4)
    } else {
        cat("NO Duplicated Files.\n")
    }
    file.path(p, p4)
}
if (F) {                                # Unit Test
    p <- "~/tmp/Updated-Databases"
    file.name.head <- 25
    x <- list.dup.files(p, file.name.head)
    file.remove(x)
}