我有一个包含50种中介技术的数据框架,以及4种不同的分类.每个人都可以将一种技术分为多个类别.我想计算每次为特定类别(1、2、3或4) Select 一种技术时发生的次数,并用这些加起来创建一个新的数据帧.

当前数据帧的示例:

Techniques_1 Techniques_2 Techniques_3 Techniques_4 Techniques_5 Techniques_6
1,3 2 4 2,3 1 1
2,3 3 4 2,3 1 1
2 2 3 3 1,2 1,2,3
1 3 3 1 2 2
2 2,3 2,3 1,2,3 1 1,2

所需的新数据帧:

Category_Count Technique_1 Technique_2 Technique_3 Technique_4 Technique_5 Technique_6
Category_1 2 0 0 2 4 4
Category_2 3 3 1 3 2 3
Categroy_3 2 3 3 4 0 1
Category_4 0 0 2 0 0 0
    Here is my data set, the counts are different from my example as they 
were made up. Had a go at using dput, hopefully this is correct:

Med_Tech_Structure <- structure(list(Techniques_1 = c("2", "2", "2", 
NA, "1", "1"), 
Techniques_2 = c("2", "2", "2", NA, "1,2", "2"), Techniques_3 = 
c("2", 
"2", "2", NA, "1", "1"), Techniques_4 = c("2", "2", "2", 
NA, "1", "2"), Techniques_5 = c("2,3", "4", "3", NA, "4", 
"1"), Techniques_6 = c("3", "3", "3", NA, "4", "3"), Techniques_7 = 
c("2", 
"2", "2", NA, "1", "1"), Techniques_8 = c("1", "4", "1,3", 
NA, "4", "2"), Techniques_9 = c("2", "2", "2", NA, "2", "2"
), Techniques_10 = c("1", "4", "1", NA, "2", "1"), Techniques_11 = 
c("2", 
"4", "1,2", NA, "4", "4"), Techniques_12 = c("1", "4", "2", 
NA, "4", "4"), Techniques_13 = c("2,3", "4", "1,2", NA, "4", 
"4"), Techniques_14 = c("2", "4", "1,2", NA, "2", "4"), Techniques_15 
= c("2", 
"4", "2,3", NA, "4", "4"), Techniques_16 = c("1", "1", "3", 
NA, "1", "1"), Techniques_17 = c("3", "3", "3", NA, "4", 
"4"), Techniques_18 = c("2", "4", "3", NA, "1", "1"), Techniques_19 = 
c("2", 
"2", "2", NA, "2", "2"), Techniques_20 = c("2", "4", "1", 
NA, "4", "4"), Techniques_21 = c("1,2", "4", "1,2", NA, "4", 
"4"), Techniques_22 = c("1,2", "1", "1,2", NA, "2", "4"), 
Techniques_23 = c("2", "4", "2", NA, "2", "4"), Techniques_24 = 
c("2", 
"2", "2", NA, "2", "2"), Techniques_25 = c("2", "4", "2", 
NA, "4", "4"), Techniques_26 = c("1,2", "4", "1,3", NA, "4", 
"3"), Techniques_27 = c("2", "4", "3", NA, "1", "1,2"), Techniques_28 
= c("2", 
"4", "2", NA, "3", "4"), Techniques_29 = c("1,2", "4", "1,2", 
NA, "4", "4"), Techniques_30 = c("1,2", "4", "2", NA, "4", 
"4"), Techniques_31 = c("2", "4", "2,3", NA, "1,2", "1"), 
Techniques_32 = c("2,3", "4", "1,3", NA, "4", "4"), Techniques_33 = 
c("1,2", 
"4", "1,2", NA, "4", "4"), Techniques_34 = c("1,2", "4", 
"2,3", NA, "4", "4"), Techniques_35 = c("1,2", "4", "2,3", 
NA, "4", "4"), Techniques_36 = c("2", "4", "2,3", NA, "4", 
"4"), Techniques_37 = c("1,2", "4", "2,3", NA, "4", "4"), 
Techniques_38 = c("1", "1", "2", NA, "1", "1"), Techniques_39 = 
c("1,2", 
"4", "2", NA, "1", "4"), Techniques_40 = c("1,2", "4", "2", 
NA, "4", "4"), Techniques_41 = c("1,2", "4", "2", NA, "4", 
"4"), Techniques_42 = c("1,2", "4", "1,2", NA, "4", "4"), 
Techniques_43 = c("1,2,3", "4", "1,2", NA, "4", "4"), Techniques_44 = 
c("1,2,3", 
"3", "1,2,3", NA, "4", "4"), Techniques_45 = c("1,2", "1", 
"2,3", NA, "4", "4"), Techniques_46 = c("1", "4", "1,2", 
NA, "1,2", "4"), Techniques_47 = c("2", "4", "2", NA, "4", 
"4"), Techniques_48 = c("1", "2", "2", NA, "1", "2"), Techniques_49 = 
c("2", 
"4", "2", NA, "1", "4"), Techniques_50 = c("1,2", "4", "2,3", 
NA, "4", "4")), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))
    CurrentFrame <- 
    data.frame(Techniques_1 = c("1,3", "2,3", "2", "1", "2"), 
             Techniques_2 = c("2", "3", "2", "3", "2,3"), 
             Techniques_3 = c("4", "4", "3", "3", "2,3"), 
             Techniques_4 = c("2,3", "2,3", "3", "1", "1,2,3"), 
             Techniques_5 = c("1", "1", "1,2", "2", "1"), 
             Techniques_6 = c("1", "1", "1,2,3", "2", "1,2"), 
             stringsAsFactors = FALSE)

    dput(CurrentFrame)

Console output from dput

    structure(list(Techniques_1 = c("1,3", "2,3", "2", "1", "2"), 
    Techniques_2 = c("2", "3", "2", "3", "2,3"), Techniques_3 = c("4", 
    "4", "3", "3", "2,3"), Techniques_4 = c("2,3", "2,3", "3", 
    "1", "1,2,3"), Techniques_5 = c("1", "1", "1,2", "2", "1"
    ), Techniques_6 = c("1", "1", "1,2,3", "2", "1,2")), class = 
    "data.frame", row.names = c(NA, 
    -5L))

推荐答案

#load data.frame
dt <- data.frame(Techniques_1 = c("1,3", "2,3", "2", "1", "2"), 
             Techniques_2 = c("2", "3", "2", "3", "2,3"), 
             Techniques_3 = c("4", "4", "3", "3", "2,3"), 
             Techniques_4 = c("2,3", "2,3", "3", "1", "1,2,3"), 
             Techniques_5 = c("1", "1", "1,2", "2", "1"), 
             Techniques_6 = c("1", "1", "1,2,3", "2", "1,2"), 
             stringsAsFactors = FALSE)

#create list to bind results to
result <- list()

#create a vector of categories
categories <- 1:4

#loop through categories
for(i in categories){
  #find occurayce of techniques in a category with the use of grepl
  occurance_count <- lapply(dt, FUN = function(x) length(grepl(i, x)[grepl(i, x) == TRUE]))
  
  #convert from list to data.frame
  occurance_count <- data.frame(occurance_count)
  
  #create data.frame to bind results to
  temporary <- data.frame(Category_Count = paste0("Category_", i))
  
  #add occurance counts to data.frame
  temporary <- cbind(temporary, occurance_count)
  
  #bind data.frame to list
  result[[i]] <- temporary
}

#from list to data.frame
result <- do.call(rbind, result)
result
  Category_Count Techniques_1 Techniques_2 Techniques_3 Techniques_4 Techniques_5 Techniques_6
1     Category_1            2            0            0            2            4            4
2     Category_2            3            3            1            3            2            3
3     Category_3            2            3            3            4            0            1
4     Category_4            0            0            2            0            0            0

R相关问答推荐

咕噜中的元素列表:map

r替换lme S4对象的字符串的一部分

提取第一个下划线和最后一个下划线之间的任何内容,例外情况除外

如何在分组条形图中移动相关列?

为了网络分析目的,将数据框转换为长格式列联表

如何在R中描绘#符号?

DEN扩展包中的RECT树形图出现异常行为

以NA为通配符的R中的FULL_JOIN以匹配其他数据中的任何值.Frame

在保留列表元素属性的同时替换列表元素

将全局环境变量的名称分配给列表中的所有元素

在不对R中的变量分组的情况下取两行的平均值

快速合并R内的值

对R中的列表列执行ROW Mean操作

使用LAG和dplyr执行计算,以便按行和按组迭代

识别部分重复行,其中一行为NA,其重复行为非NA

从单个html段落中提取键-值对

对数据帧中的列进行子集设置以通过迭代创建新的数据帧

Ggplot2水平线和垂直线的图例图标不匹配

基于日期输入的子集数据集,其中应包括NAS作为 Select

看似重叠的剧情