我有一个数据框apcd_hud_ex.我想获取一些列名(例如x2014_03_15),并根据列的当前值、列名中解析的日期和数据框中的另一列(无烟日期)动态更改列的值.我可以在列上循环完成,但我真的很想知道如何使用dplyr和mutate进行动态编程.任何帮助都将不胜感激!

apcd_hud_ex = structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(16283, 
16283, 16071, 16071, 16648), class = "Date"), x2014_03_15 = c(1, 
1, 1, 0, 1), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(1, 
1, 1, 1, 1), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(1, 
1, 1, 1, 1), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(1, 
1, 1, 1, 1), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(1, 
1, 1, 1, 1), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(1, 
1, 1, 1, 1)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", 
"data.frame"))

> apcd_hud_ex
# A tibble: 5 x 13
  studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
    <int> <date>               <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
1       1 2014-08-01               1           1           1           1           1           1           1           1
2       2 2014-08-01               1           1           1           1           1           1           1           1
3       3 2014-01-01               1           1           1           1           1           1           1           1
4       4 2014-01-01               0           1           1           1           1           1           1           1
5       5 2015-08-01               1           1           1           1           1           1           1           1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>
>


#function for loop
assign_PHRes_enrollIns_fn <- function(SFdate,insValue,insDate){
  val = if_else(insValue == 0,
                0,
                if_else(as.Date(insDate) < as.Date(SFdate,"%Y-%m-%d"),
                        1,
                        2))
  return(val)
}

#vectorized function
assign_PHRes_enrollIns_fn_vec <- Vectorize(assign_PHRes_enrollIns_fn)

dateCols = names(apcd_hud_ex)[which(names(apcd_hud_ex) == "x2014_03_15"):which(names(apcd_hud_ex) == "x2015_01_15")]

列名(dateCols)上的循环工作:

for(i in 1:length(dateCols)){
  dateCol = dateCols[i]
  insDate = as.Date(paste0(str_sub(dateCol,2,5),"/",str_sub(dateCol,7,8),"/",str_sub(dateCol,10,11)),"%Y/%m/%d")
  apcd_hud_ex[,dateCol] = assign_PHRes_enrollIns_fn_vec(apcd_hud_ex[,"SMOKEFREE_DATE"],apcd_hud_ex[,dateCol],insDate)
}

现在被操纵的数据框看起来像这样,这就是我想要的:

> apcd_hud_ex
# A tibble: 5 x 13
  studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15 x2014_07_15 x2014_08_15 x2014_09_15 x2014_10_15
    <int> <date>               <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
1       1 2014-08-01               1           1           1           1           1           2           2           2
2       2 2014-08-01               1           1           1           1           1           2           2           2
3       3 2014-01-01               2           2           2           2           2           2           2           2
4       4 2014-01-01               0           2           2           2           2           2           2           2
5       5 2015-08-01               1           1           1           1           1           1           1           1
# ... with 3 more variables: x2014_11_15 <dbl>, x2014_12_15 <dbl>, x2015_01_15 <dbl>

然而,我想学习如何使用动态编程和dplyr实现这一点.我try 了两种功能:

newInsValCols_fn1 <- function(df,dateCols){
  insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")

  df1 <- df %>%
    mutate({{dateCols}} := if_else({{dateCols}} == 0,
                                   0,
                                   if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
                                           1,
                                           2)))
 return(df1)
} 
newInsValCols_fn1(apcd_hud_ex,dateCols)

这就产生了错误:

 Error: The LHS of `:=` must be a string or a symbol

所以我试着使用符号:

newInsValCols_fn2 <- function(df,dateCols){
  dateCols_syms = syms(dateCols)
  insDate = as.Date(paste0(str_sub(dateCols,2,5),"/",str_sub(dateCols,7,8),"/",str_sub(dateCols,10,11)),"%Y/%m/%d")
  df1 <- df %>%
    mutate(!!dateCols_syms := if_else({{dateCols}} == 0,
                                      0,
                                      if_else(as.Date(insDate) < as.Date(SMOKEFREE_DATE,"%Y-%m-%d"),
                                              1,
                                              2)))
  return(df1)
} 
newInsValCols_fn2(apcd_hud_ex,dateCols)

这就产生了同样的错误:

Error: The LHS of `:=` must be a string or a symbol

我也试过用!!!而不是但这导致了以下错误:

 Error: The LHS of `:=` can't be spliced with `!!!`

我的理解有些欠缺.

推荐答案

可以使用pivot_longer只修改一列,这是mutate(across())的替代选项.

可以使用case_when来设置多个条件,因此不需要嵌套多个if语句.该值将是第一个true语句中的一个.

library(tidyverse)

apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
  16283,
  16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
  1,
  1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
  1,
  1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
  1,
  1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
  1,
  1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
  1,
  1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
  1,
  1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
  "tbl_df", "tbl",
  "data.frame"
))

apcd_hud_ex %>%
  pivot_longer(starts_with("x")) %>%
  mutate(
    insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
    value = case_when(
      value == 0 ~ 0,
      insDate < SMOKEFREE_DATE ~ 1,
      insDate >= SMOKEFREE_DATE ~ 2
    )
  ) %>%
  select(-insDate) %>%
  pivot_wider()
#> # A tibble: 5 × 13
#>   studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#>     <int> <date>               <dbl>       <dbl>       <dbl>       <dbl>
#> 1       1 2014-08-01               1           1           1           1
#> 2       2 2014-08-01               1           1           1           1
#> 3       3 2014-01-01               2           2           2           2
#> 4       4 2014-01-01               0           2           2           2
#> 5       5 2015-08-01               1           1           1           1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> #   x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> #   x2015_01_15 <dbl>

reprex package(v2.0.0)于2022-05-05创建

R相关问答推荐

在ggplot的注释表格中突出显示最大值

查找具有平局的多个列的最大值并返回列名或平局 destruct 者NA值

如何使用shinyChatR包配置聊天机器人

ggplot geom_smooth()用于线性回归虚拟变量-没有回归线

R Tidymodels textercipes-使用spacyR进行标记化-如何从生成的标记列表中删除标点符号

带有叠加饼图系列的Highmap

使用tidyverse方法绑定行并从一组管道列表执行左连接

使用data.table::fcase()而不是dplyr::case_When()时保持值

SHINY:使用JS函数应用的CSS样式显示HTML表格

更新R中的数据表(使用data.table)

派生程序包| ;无法检索';return()';的正文

来自程序包AFEX和amp;的类/函数和NICE_TABLE&冲突

如何在R中创建条形图,使条形图在y轴上围绕0.5而不是0构建条形图?

长/纬点继续在堪萨斯-SF结束,整齐的人口普查

隐藏基于 case 总数的值

根据排名的顶点属性调整曲线图布局(&Q)

使用列名和r中的前缀 Select 列的CREATE函数

我有2011-2022年的年度数据.如何计算最低年份和最高年份之间的差额?

创建由三个单独的shapefile组成的单个 map

R Bupar:获取每个 case 的踪迹