我有两个生物标志物(分析物)的值,在两个不同的时期(从2022-06-03到2022-06-06的4天内用两种不同的"方法"测量,而在2023-06-01到2023-06-07的7天内用"新"方法测量).

Question: How to count exactly the same (maximal) number of values for 'old' and 'new' methods in a new column, by analyte, starting from the earliest common period, considering month-day regardless of the year?
For example, in the desired output data below, the new 'count' column shows:
n=16 values for old and new methods of the first analyte 'chol', and
n=18 values for old and new methods of the second analyte 'ldh'.

Input data:

dat0 <-
structure(list(analyte = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L), .Label = c("amy", "auu", "chol", "ggt", 
"iron", "ldh", "pal", "prot", "trig", "uree", "ureeu", "uric"
), class = "factor"), method = structure(c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("new", "old"), class = "factor"), 
    date = structure(c(1654214400, 1654214400, 1654214400, 1654214400, 
    1654214400, 1654300800, 1654300800, 1654300800, 1654300800, 
    1654387200, 1654387200, 1654387200, 1654473600, 1654473600, 
    1654473600, 1654473600, 1685577600, 1685577600, 1685577600, 
    1685577600, 1685577600, 1685577600, 1685664000, 1685664000, 
    1685664000, 1685664000, 1685664000, 1685664000, 1685750400, 
    1685750400, 1685750400, 1685750400, 1685750400, 1685836800, 
    1685836800, 1685836800, 1685836800, 1685923200, 1685923200, 
    1685923200, 1685923200, 1685923200, 1685923200, 1686009600, 
    1686009600, 1686009600, 1686009600, 1686009600, 1686009600, 
    1686096000, 1686096000, 1686096000, 1686096000, 1686096000, 
    1686096000, 1654214400, 1654214400, 1654214400, 1654214400, 
    1654214400, 1654300800, 1654300800, 1654300800, 1654300800, 
    1654300800, 1654300800, 1654387200, 1654387200, 1654387200, 
    1654387200, 1654387200, 1654473600, 1654473600, 1654473600, 
    1654473600, 1654473600, 1654473600, 1654473600, 1654473600, 
    1685577600, 1685577600, 1685577600, 1685577600, 1685577600, 
    1685664000, 1685664000, 1685664000, 1685664000, 1685664000, 
    1685750400, 1685750400, 1685750400, 1685836800, 1685836800, 
    1685836800, 1685923200, 1685923200, 1685923200, 1685923200, 
    1686009600, 1686009600, 1686009600, 1686009600, 1686096000, 
    1686096000, 1686096000, 1686096000), tzone = "UTC", class = c("POSIXct", 
    "POSIXt")), value = c(2.84, 2.79, 2.82, 2.84, 2.84, 2.81, 
    2.8, 2.83, 2.82, 2.82, 2.8, 2.82, 2.81, 2.79, 2.82, 2.84, 
    2.74, 2.76, 2.77, 2.79, 2.76, 2.81, 2.73, 2.75, 2.77, 2.78, 
    2.76, 2.86, 2.73, 2.78, 2.84, 2.82, 2.8, 2.74, 2.76, 2.79, 
    2.82, 2.8, 2.76, 2.77, 2.83, 2.85, 2.83, 2.78, 2.74, 2.8, 
    2.8, 2.79, 2.85, 2.75, 2.76, 2.78, 2.82, 2.76, 2.86, 121, 
    119, 113, 117, 119, 123, 120, 117, 118, 125, 121, 121, 124, 
    123, 117, 115, 113, 120, 113, 113, 115, 117, 119, 115, 111, 
    111, 113, 116, 115, 111, 115, 112, 118, 115, 113, 106, 118, 
    113, 113, 113, 110, 114, 113, 117, 111, 111, 115, 120, 112, 
    113, 114, 108)), row.names = c(NA, -107L), class = c("tbl_df", 
"tbl", "data.frame"))  

Desired output data:

dat1 <-
structure(list(analyte = c("chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol", 
"chol", "chol", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", 
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", 
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", 
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", 
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", 
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh"
), method = c("old", "old", "old", "old", "old", "old", "old", 
"old", "old", "old", "old", "old", "old", "old", "old", "old", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "old", "old", "old", "old", "old", "old", 
"old", "old", "old", "old", "old", "old", "old", "old", "old", 
"old", "old", "old", "old", "old", "old", "old", "old", "old", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new", "new", "new", "new", "new", "new", "new", "new", "new", 
"new"), date = structure(c(1654214400, 1654214400, 1654214400, 
1654214400, 1654214400, 1654300800, 1654300800, 1654300800, 1654300800, 
1654387200, 1654387200, 1654387200, 1654473600, 1654473600, 1654473600, 
1654473600, 1685577600, 1685577600, 1685577600, 1685577600, 1685577600, 
1685577600, 1685664000, 1685664000, 1685664000, 1685664000, 1685664000, 
1685664000, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400, 
1685836800, 1685836800, 1685836800, 1685836800, 1685923200, 1685923200, 
1685923200, 1685923200, 1685923200, 1685923200, 1686009600, 1686009600, 
1686009600, 1686009600, 1686009600, 1686009600, 1686096000, 1686096000, 
1686096000, 1686096000, 1686096000, 1686096000, 1654214400, 1654214400, 
1654214400, 1654214400, 1654214400, 1654300800, 1654300800, 1654300800, 
1654300800, 1654300800, 1654300800, 1654387200, 1654387200, 1654387200, 
1654387200, 1654387200, 1654473600, 1654473600, 1654473600, 1654473600, 
1654473600, 1654473600, 1654473600, 1654473600, 1685577600, 1685577600, 
1685577600, 1685577600, 1685577600, 1685664000, 1685664000, 1685664000, 
1685664000, 1685664000, 1685750400, 1685750400, 1685750400, 1685836800, 
1685836800, 1685836800, 1685923200, 1685923200, 1685923200, 1685923200, 
1686009600, 1686009600, 1686009600, 1686009600, 1686096000, 1686096000, 
1686096000, 1686096000), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    value = c(2.84, 2.79, 2.82, 2.84, 2.84, 2.81, 2.8, 2.83, 
    2.82, 2.82, 2.8, 2.82, 2.81, 2.79, 2.82, 2.84, 2.74, 2.76, 
    2.77, 2.79, 2.76, 2.81, 2.73, 2.75, 2.77, 2.78, 2.76, 2.86, 
    2.73, 2.78, 2.84, 2.82, 2.8, 2.74, 2.76, 2.79, 2.82, 2.8, 
    2.76, 2.77, 2.83, 2.85, 2.83, 2.78, 2.74, 2.8, 2.8, 2.79, 
    2.85, 2.75, 2.76, 2.78, 2.82, 2.76, 2.86, 121, 119, 113, 
    117, 119, 123, 120, 117, 118, 125, 121, 121, 124, 123, 117, 
    115, 113, 120, 113, 113, 115, 117, 119, 115, 111, 111, 113, 
    116, 115, 111, 115, 112, 118, 115, 113, 106, 118, 113, 113, 
    113, 110, 114, 113, 117, 111, 111, 115, 120, 112, 113, 114, 
    108), count = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 
    14, 15, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3, 4, 5, 6, 
    7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 
    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -107L))

谢谢你的帮助

推荐答案

我不是百分之百确定,但我认为这可能有助于向前迈进.我得到了一点输出上的差异,但这可能是合理的.这假设层序不会环绕数年,或有更复杂的产状.

首先,你需要把你的日期分开才能考虑monthday,而不考虑year.您可以使用monthday对每个analyte的数据进行排序或arrange.在一年中有重叠的第一天("新"和"旧"对于给定日期和analyte都可用)将在名为overlap的新列中用TRUE表示.

将 for each method计算count,如果有overlap,则计算analyte.

下一部分,您可以使用arrangexanalytecount,以及filter或保留行,其中这两种方法都适用于给定的countanalyte.

我认为输出的不同之处在于,以下仅包括两种方法的14个"LDH"测试,尽管其中一个在6月5日结束,另一个在6月6日结束.如果您希望它们继续使用相同的结束日期,可以对其进行修改.

library(tidyverse)

dat0 |>
  separate(date, c('year', 'month', 'day'), sep = "-", remove = FALSE, convert = TRUE) |>
  arrange(analyte, month, day) |>
  mutate(overlap = all(c("old", "new") %in% method), .by = c(analyte, month, day)) |>
  mutate(count = cumsum(overlap), .by = c(analyte, method)) |>
  filter(overlap & all(c("old", "new") %in% method), .by = c(analyte, count)) |>
  arrange(analyte, method, date)

Output

   analyte method date                 year month   day  value overlap count
   <fct>   <fct>  <dttm>              <int> <int> <int>  <dbl> <lgl>   <int>
 1 chol    new    2023-06-03 00:00:00  2023     6     3   2.73 TRUE        1
 2 chol    new    2023-06-03 00:00:00  2023     6     3   2.78 TRUE        2
 3 chol    new    2023-06-03 00:00:00  2023     6     3   2.84 TRUE        3
 4 chol    new    2023-06-03 00:00:00  2023     6     3   2.82 TRUE        4
 5 chol    new    2023-06-03 00:00:00  2023     6     3   2.8  TRUE        5
 6 chol    new    2023-06-04 00:00:00  2023     6     4   2.74 TRUE        6
 7 chol    new    2023-06-04 00:00:00  2023     6     4   2.76 TRUE        7
 8 chol    new    2023-06-04 00:00:00  2023     6     4   2.79 TRUE        8
 9 chol    new    2023-06-04 00:00:00  2023     6     4   2.82 TRUE        9
10 chol    new    2023-06-05 00:00:00  2023     6     5   2.8  TRUE       10
11 chol    new    2023-06-05 00:00:00  2023     6     5   2.76 TRUE       11
12 chol    new    2023-06-05 00:00:00  2023     6     5   2.77 TRUE       12
13 chol    new    2023-06-05 00:00:00  2023     6     5   2.83 TRUE       13
14 chol    new    2023-06-05 00:00:00  2023     6     5   2.85 TRUE       14
15 chol    new    2023-06-05 00:00:00  2023     6     5   2.83 TRUE       15
16 chol    new    2023-06-06 00:00:00  2023     6     6   2.78 TRUE       16
17 chol    old    2022-06-03 00:00:00  2022     6     3   2.84 TRUE        1
18 chol    old    2022-06-03 00:00:00  2022     6     3   2.79 TRUE        2
19 chol    old    2022-06-03 00:00:00  2022     6     3   2.82 TRUE        3
20 chol    old    2022-06-03 00:00:00  2022     6     3   2.84 TRUE        4
21 chol    old    2022-06-03 00:00:00  2022     6     3   2.84 TRUE        5
22 chol    old    2022-06-04 00:00:00  2022     6     4   2.81 TRUE        6
23 chol    old    2022-06-04 00:00:00  2022     6     4   2.8  TRUE        7
24 chol    old    2022-06-04 00:00:00  2022     6     4   2.83 TRUE        8
25 chol    old    2022-06-04 00:00:00  2022     6     4   2.82 TRUE        9
26 chol    old    2022-06-05 00:00:00  2022     6     5   2.82 TRUE       10
27 chol    old    2022-06-05 00:00:00  2022     6     5   2.8  TRUE       11
28 chol    old    2022-06-05 00:00:00  2022     6     5   2.82 TRUE       12
29 chol    old    2022-06-06 00:00:00  2022     6     6   2.81 TRUE       13
30 chol    old    2022-06-06 00:00:00  2022     6     6   2.79 TRUE       14
31 chol    old    2022-06-06 00:00:00  2022     6     6   2.82 TRUE       15
32 chol    old    2022-06-06 00:00:00  2022     6     6   2.84 TRUE       16
33 ldh     new    2023-06-03 00:00:00  2023     6     3 113    TRUE        1
34 ldh     new    2023-06-03 00:00:00  2023     6     3 106    TRUE        2
35 ldh     new    2023-06-03 00:00:00  2023     6     3 118    TRUE        3
36 ldh     new    2023-06-04 00:00:00  2023     6     4 113    TRUE        4
37 ldh     new    2023-06-04 00:00:00  2023     6     4 113    TRUE        5
38 ldh     new    2023-06-04 00:00:00  2023     6     4 113    TRUE        6
39 ldh     new    2023-06-05 00:00:00  2023     6     5 110    TRUE        7
40 ldh     new    2023-06-05 00:00:00  2023     6     5 114    TRUE        8
41 ldh     new    2023-06-05 00:00:00  2023     6     5 113    TRUE        9
42 ldh     new    2023-06-05 00:00:00  2023     6     5 117    TRUE       10
43 ldh     new    2023-06-06 00:00:00  2023     6     6 111    TRUE       11
44 ldh     new    2023-06-06 00:00:00  2023     6     6 111    TRUE       12
45 ldh     new    2023-06-06 00:00:00  2023     6     6 115    TRUE       13
46 ldh     new    2023-06-06 00:00:00  2023     6     6 120    TRUE       14
47 ldh     old    2022-06-03 00:00:00  2022     6     3 121    TRUE        1
48 ldh     old    2022-06-03 00:00:00  2022     6     3 119    TRUE        2
49 ldh     old    2022-06-03 00:00:00  2022     6     3 113    TRUE        3
50 ldh     old    2022-06-03 00:00:00  2022     6     3 117    TRUE        4
51 ldh     old    2022-06-03 00:00:00  2022     6     3 119    TRUE        5
52 ldh     old    2022-06-04 00:00:00  2022     6     4 123    TRUE        6
53 ldh     old    2022-06-04 00:00:00  2022     6     4 120    TRUE        7
54 ldh     old    2022-06-04 00:00:00  2022     6     4 117    TRUE        8
55 ldh     old    2022-06-04 00:00:00  2022     6     4 118    TRUE        9
56 ldh     old    2022-06-04 00:00:00  2022     6     4 125    TRUE       10
57 ldh     old    2022-06-04 00:00:00  2022     6     4 121    TRUE       11
58 ldh     old    2022-06-05 00:00:00  2022     6     5 121    TRUE       12
59 ldh     old    2022-06-05 00:00:00  2022     6     5 124    TRUE       13
60 ldh     old    2022-06-05 00:00:00  2022     6     5 123    TRUE       14

R相关问答推荐

创建重复删除的唯一数据集组合列表

在值和NA的行顺序中寻找中断模式

如何使用shinyChatR包配置聊天机器人

使用gggrassure减少地块之间的空间

如何将旋转后的NetCDF转换回正常的纬度/经度网格,并使用R?

使用较长的查询提取具有部分匹配的列表中的较短目标,

在rpart. plot或fancyRpartPlot中使用带有下标的希腊字母作为标签?

在R gggplot2中是否有一种方法将绘图轴转换成连续的 colored颜色 尺度?

是否有新方法来更改Facet_WRAP(Ggplot2)中条文本的文本 colored颜色 ?

安全地测试文件是否通过R打开

如何使用For-R循环在向量中找到一系列数字

R中Gamma回归模型均方误差的两种计算方法不一致

R代码,用于在线条图下显示观测表

删除数据帧中特定行号之间的每第三行和第四行

使用列中的值来调用函数调用中应使用的其他列

删除在R中的write.table()函数期间创建的附加行

如何根据其他列中的两个条件来计算数据帧中的行之间的差异?

使用函数从R中的列中删除标高

位置_道奇在geom_point图中不躲避

在子图内和子图之间对齐行数不均匀的表格罗布对