一个使用dplyr commands的潜在解决方案:
library(dplyr)
df <- data.frame(id = paste0("ID",1:4),
Medication1= c("paracetamol", "ibuprofen", "opiate", "sertraline"),
Medication2= c("Lipitor", "ketamine", "zoloft", "xanax"),
Medication3= c("ibuprofen", "paracetamol", "Zocor", "Zestril"),
other= c(LETTERS[1:3], "paracetamol"))
df
#> id Medication1 Medication2 Medication3 other
#> 1 ID1 paracetamol Lipitor ibuprofen A
#> 2 ID2 ibuprofen ketamine paracetamol B
#> 3 ID3 opiate zoloft Zocor C
#> 4 ID4 sertraline xanax Zestril paracetamol
# only detect "paracetamol" in "Medication" columns
df %>%
filter(if_any(.cols = starts_with("Medication"),
.fns = ~grepl("paracetamol", .x)))
#> id Medication1 Medication2 Medication3 other
#> 1 ID1 paracetamol Lipitor ibuprofen A
#> 2 ID2 ibuprofen ketamine paracetamol B
创建于2023-03-10,共reprex v2.0.2个
要获得带有"paraceamol"和"paraceamol"的行,您可以使用ignore.case = TRUE
:
df %>%
filter(if_any(.cols = starts_with("Medication"),
.fns = ~grepl("paracetamol", .x, ignore.case = TRUE)))
#> id Medication1 Medication2 Medication3 other
#> 1 ID1 paracetamol Lipitor ibuprofen A
#> 2 ID2 ibuprofen ketamine paracetamol B
如果您想要具有相同有效成分但名称不同的行:
df %>%
filter(if_any(.cols = starts_with("Medication"),
.fns = ~grepl("paracetamol|Tylenol", .x, ignore.case = TRUE)))
#> id Medication1 Medication2 Medication3 other
#> 1 ID1 paracetamol Lipitor ibuprofen A
#> 2 ID2 ibuprofen ketamine paracetamol B
如果您有跨多行的ID(例如ID1),则情况会变得更复杂,但有一种 Select 是:
library(dplyr)
library(tidyr)
df2 <- data.frame(id = paste0("ID",c(1,2,1,3)),
Medication1= c("paracetamol", "ibuprofen", "opiate", "sertraline"),
Medication2= c("Lipitor", "ketamine", "zoloft", "xanax"),
Medication3= c("ibuprofen", "paracetamol", "Zocor", "Zestril"),
other= c(LETTERS[1:3], "paracetamol"))
df2
#> id Medication1 Medication2 Medication3 other
#> 1 ID1 paracetamol Lipitor ibuprofen A
#> 2 ID2 ibuprofen ketamine paracetamol B
#> 3 ID1 opiate zoloft Zocor C
#> 4 ID3 sertraline xanax Zestril paracetamol
df2 %>%
pivot_longer(starts_with("Medication"),
names_to = "medications") %>%
group_by(id) %>%
filter(any(value == "paracetamol")) %>%
pivot_wider(names_from = medications)
#> # A tibble: 3 × 5
#> # Groups: id [2]
#> id other Medication1 Medication2 Medication3
#> <chr> <chr> <chr> <chr> <chr>
#> 1 ID1 A paracetamol Lipitor ibuprofen
#> 2 ID2 B ibuprofen ketamine paracetamol
#> 3 ID1 C opiate zoloft Zocor