df <- structure (list(
  subject_id = c("232-5467", "232-6784", "232-3457", "232-0987", "232-1245", "232-1945"), 
  HIV_VL_result_date_1 = c("2015-10-11","2015-10-10","2015-11-06","2016-02-02","2017-12-04","2019-02-15"),
  VL_results_1 = c("LDL", "LDL", "LDL", "<100", "44405", "2322"), 
  HIV_VL_result_date_2 = c("2017-05-21", "2022-04-07", "2016-08-21", "2016-11-01", "2018-02-26",NA),
  VL_results_2 = c("LDL", "5613", "LDL", "LDL", "93356", NA), 
  HIV_VL_result_date_3 = c("2018-06-27", "2022-07-15", "2022-04-13", "2017-03-01","2018-05-19",NA), 
  VL_results_3 = c("LDL", "6590", "LDL", "LDL", "19078",NA), 
  HIV_VL_result_date_4 = c("2020-04-16", "2022-08-15", NA, "2022-06-07", "2020-01-16",NA),
  VL_results_4 = c("LDL", "375", NA, "36", "44",NA),
  HIV_VL_result_date_5 = c("2021-03-25", "2023-01-28", NA, NA, "2022-05-03",NA),
  VL_results_5 = c("LDL", "9125", NA, NA, "LDL",NA),
  HIV_VL_result_date_6 = c("2022-03-07", NA, NA, NA, "2022-11-15",NA),
  VL_results_6 = c("LDL", NA, NA, NA, "<20",NA),
  preg_date = c("2022-03-04","2022-08-13","2022-05-04","2022-06-02","2022-04-14",NA)), 
  class = "data.frame", row.names = c(NA, -6L))

我需要执行以下操作:

  1. 在所有6个HIV_DL_select_Date中, Select 与preg_Date最接近的HIV_DL_select_Date以及相应的DL_results.
  2. 将所选日期和结果分别同步到vl_clest_Date和vl_results_clest

我try 过的是: 首先计算每个HIV_BL_select_Date和Preg日期之间的日期差异.产生6个新列.

date_columns <- grep("HIV_VL_result_date_", names(df), value = TRUE)

df <- df %>%
  mutate(
    across(
      all_of(date_columns),
      .fns = diff_lambda_list,
      .names = "{.col}_diff_from_pregnancy_outcome"
    )
  )

然后我将 Select 每一行的绝对最低值,然后以某种方式提取与该最低值对应的原始日期.但我正在努力解决这个问题,当然必须有一种更简单的方法来做到这一点.我try 过搜索类似的问题,但找不到具有类似数据 struct 的问题.

推荐答案

我们可以使用rowwisec_across,但我认为以更长的格式工作更好.

试试这个:

# library(tidyverse)

# Finding the closest results (after or before `preg_date`)
aux <- df %>% 
  # Wrangling and cleaning
  pivot_longer(cols = contains("VL_result")) %>%
  mutate(
    result_id = as.integer(str_extract(name, "\\d+$")),
    name = str_remove(name, "_\\d+$")) %>%
  pivot_wider(names_from = name, values_from = value) %>%
  mutate(across(contains("_date"), \(x) ymd(x))) %>%
  
  # Sorting to find the closest
  arrange(abs(int_length(interval(preg_date, HIV_VL_result_date)))) %>% 
  
  # Picking the closest for each subject
  slice_head(by = subject_id, n = 1) %>% 
  transmute(
    subject_id,
    closest_result_id = result_id,
    vl_closest_date = HIV_VL_result_date, 
    vl_results_closest = VL_results)

# Joining all together
df <- left_join(df, aux, by = "subject_id")

输出:

# Closest results (after or before `preg_date`)
> aux
# A tibble: 6 × 4
  subject_id closest_result_id vl_closest_date vl_results_closest
  <chr>                  <int> <date>          <chr>             
1 232-6784                   4 2022-08-15      375               
2 232-5467                   6 2022-03-07      LDL               
3 232-0987                   4 2022-06-07      36                
4 232-1245                   5 2022-05-03      LDL               
5 232-3457                   3 2022-04-13      LDL               
6 232-1945                   1 2019-02-15      2322 

# Not all columns - just for convenience
> select(df, 1:3, contains("closest"))
  subject_id HIV_VL_result_date_1 VL_results_1 closest_result_id vl_closest_date vl_results_closest
1   232-5467           2015-10-11          LDL                 6      2022-03-07                LDL
2   232-6784           2015-10-10          LDL                 4      2022-08-15                375
3   232-3457           2015-11-06          LDL                 3      2022-04-13                LDL
4   232-0987           2016-02-02         <100                 4      2022-06-07                 36
5   232-1245           2017-12-04        44405                 5      2022-05-03                LDL
6   232-1945           2019-02-15         2322                 1      2019-02-15               2322

R相关问答推荐

以R表示的gglikert地块调整总数

行式dppr中的变量列名

如何删除R中除某些特定名称外的所有字符串?

如何根据条件计算时差(天)

R:更新后无法运行控制台

警告:lmdif:info = 0. nls. lm()函数的输入参数不正确

如何将dygraph调用到R Markdown作为一个shiny 的react 对象的参数?

如何在ggplot中标记qqplot上的点?

对于变量的每个值,仅 Select 包含列表中所有值的值.R

使用列/行匹配将两个不同维度的矩阵相加

矩阵的堆叠条形图,条形图上有数字作为标签

以相同的方式对每个表进行排序

如何通过匹配R中所有可能的组合来从宽到长旋转多个列?

判断函数未加载R中的库

如何在GALT包的函数&geom_x样条线中调整线宽

如何在AER::ivreg中指定仪器?

有没有办法将勾选/审查标记添加到R中的累积关联图中?

在GT()中的列之间添加空格

从矩阵创建系数图

如何从矩阵绘制环弦图