Tuesday, April 18, 2017

Extract a specific "pattern" from each element of a character vector

extract.pattern <- function(x, pattern = "([[:alnum:]]+)")
{ 
    ## Purpose: Extract a specific "pattern" from each element of a character vector.
    ## Arguments:
    ##   x: a character vector
    ##   pattern: a regular expression which specifies the pattern (inside brackets) to be extracted. 
    ## Return: a vector with the extracted pattern.  NA is filled in where no match is found. 
    ## Author: Feiming Chen, Date: 18 Apr 2017, 14:34
    ## ________________________________________________

    r <- paste0(".*", pattern, ".*")
    sub(r, "\\1", x)
}
if (F) {                                # Unit Test
    x = c(NA, "a-b", "a-d", "b-c", "d-e")
    extract.pattern(x)                  # extract alpha-numeric pattern
    ## [1] NA  "b" "d" "c" "e"

    x = c("ab(A)x", "dc(B)y")
    extract.pattern(x, "(\\(.*\\))")      # extract the string inside brackets
    ## [1] "(A)" "(B)"
    extract.pattern(x, "\\)(.*)")      # extract the string after ")"
    ## [1] "x" "y"

    x = c("V167.G56", "V166.R56", "V122.G41", "V163.R55", "V165.B55", "V175.R59")
    extract.pattern(x, "(R|G|B)")       # extract R or G or B character only. 
    ## [1] "G" "R" "G" "R" "B" "R"
    extract.pattern(x, "[RGB]{1}([0-9]+)") # extract the numbers following the R, G, B character. 
    ## [1] "56" "56" "41" "55" "55" "59"
}

No comments:

Post a Comment