以下是data.table
%的解决方案
Input
library(data.table)
dat1 <- data.table(id = c(1, 2, 34, 99),
class = c("sports", "", "music, sports", ""),
hobby = c("knitting, music, sports", "", "", "music"))
dat1
#> id class hobby
#> 1: 1 sports knitting, music, sports
#> 2: 2
#> 3: 34 music, sports
#> 4: 99 music
Dataprep
# in long format
dt_melted <- melt.data.table(dat1, id.vars = "id", variable.name = "type", value.name = "value")
dt_melted
#> id type value
#> 1: 1 class sports
#> 2: 2 class
#> 3: 34 class music, sports
#> 4: 99 class
#> 5: 1 hobby knitting, music, sports
#> 6: 2 hobby
#> 7: 34 hobby
#> 8: 99 hobby music
# split values by comma
dt_splitted <- dt_melted[, .(input = unlist(data.table::tstrsplit(value, ","))), by = .(id, type)]
dt_splitted
#> id type input
#> 1: 1 class sports
#> 2: 34 class music
#> 3: 34 class sports
#> 4: 1 hobby knitting
#> 5: 1 hobby music
#> 6: 1 hobby sports
#> 7: 99 hobby music
Last Step 1
# bring back to desired wide format
dt_casted <- dcast.data.table(dt_splitted,
formula = "input ~ type",
value.var = "id",
fun.aggregate = paste,
collapse = ", ")
dt_casted
#> input class hobby
#> 1: knitting 1
#> 2: music 34 1, 99
#> 3: sports 1, 34 1
Last Step 2 (more verbose)
# combine ids by class/hobby
dt_splitted[, .(class = paste(id[type == "class"], collapse = ", "),
hobby = paste(id[type == "hobby"], collapse = ", ")),
by = .(input = trimws(input))]
#> input class hobby
#> 1: sports 1, 34 1
#> 2: music 34 1, 99
#> 3: knitting 1