Tuesday, December 12, 2017

List duplicated files in a directory


list.dup.files <- function(p, file.name.head = 20)
{ 
    ## Purpose: List duplicated files in a directory. 
    ## Arguments:
    ##   file.name.head: number of starting characters in the file name to use for duplication detection
    ## Return: a list of file names that are duplicated (by sorted order). 
    ## Author: Feiming Chen, Date: 13 Nov 2017, 16:36
    ## ________________________________________________
    
    p1 <- dir(p)
    p2 <- sort(p1, decreasing = T)
    p3 <- as.character(sapply(p2, function(x) substr(x, 1, file.name.head)))
    p4 <- p2[duplicated(p3)]
    if (length(p4) > 0) {
        cat("Duplicated Older Files:\n")
        print(p4)
    } else {
        cat("NO Duplicated Files.\n")
    }
    file.path(p, p4)
}
if (F) {                                # Unit Test
    p <- "~/tmp/Updated-Databases"
    file.name.head <- 25
    x <- list.dup.files(p, file.name.head)
    file.remove(x)
}

No comments:

Post a Comment