Update
borrow @benson23的 idea 插入一个特殊字符,例如@
,我们可以try 使用strsplit
+嵌套(g)sub
的代码
unlist(
lapply(
unlist(
strsplit(
sub(
"(.*)\\)", "\\1)@",
sub(
"\\(", "@(",
gsub("(\\))([^()]+)(\\()", "\\1@\\2@\\3", example)
)
), "@"
)
),
function(s) {
if (startsWith(s, "(")) {
s
} else {
strsplit(s, "")
}
}
)
)
下面是一个庞大的实现,用于查找成对的括号并进行拆分
# split string by characters
v <- unlist(strsplit(example, ""))
# positions of "(" and ")"
a <- which(v == "(")
b <- which(v == ")")
# split as per the position of ")"
lst1 <- split(v, cumsum(replace(rep(0, length(v)), 1 + by(b, findInterval(b, a), max), 1)))
# split as per the position of "("
lst2 <- unlist(lapply(lst1, function(x) split(x, cumsum(x == "(") > 0)), recursive = FALSE)
# output
res <- unlist(
lapply(
lst2,
function(s) {
if (s[1] == "(") {
paste0(s, collapse = "")
} else {
s
}
}
),
use.names = FALSE
)
Test
让我们来试一试复杂的example <- c("_(Acetyl (Protein (N-term)) XXX) DDDIAAM(Oxidation (M))CK_")
,我们将res
看作
[1] "_" "(Acetyl (Protein (N-term)) XXX)"
[3] " " "D"
[5] "D" "D"
[7] "I" "A"
[9] "A" "M"
[11] "(Oxidation (M))" "C"
[13] "K"