tokenize.string <- function(x, split = "[ ,:;]+")
{
## Purpose: Tokenize a string into a vector of tokens
## Arguments:
## x: a string or a vector of strings
## split: split characters (regular expression)
## Return: a character vector (if "x" is a string) or a list of character vectors.
## Author: Feiming Chen, Date: 15 Jun 2017, 15:01
## ________________________________________________
ans <- strsplit(x, split="[ ,:;]+", fixed=F)
if (length(x) == 1) ans <- ans[[1]]
ans
}
if (F) { # Unit Test
x <- "IND, UNR INC ; TCU"
tokenize.string(x)
## [1] "IND" "UNR" "INC" "TCU"
tokenize.string(rep(x, 3))
## [[1]]
## [1] "IND" "UNR" "INC" "TCU"
## [[2]]
## [1] "IND" "UNR" "INC" "TCU"
## [[3]]
## [1] "IND" "UNR" "INC" "TCU"
}
Thursday, June 15, 2017
Tokenize a string into a vector of tokens
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment