我有以下名为df(以下为dput
)的数据帧:
# A tibble: 14 × 5
group date indicator value diff_hours
<chr> <dttm> <lgl> <dbl> <dbl>
1 A 2022-11-01 01:00:00 FALSE 2 4
2 A 2022-11-01 02:00:00 FALSE 1 3
3 A 2022-11-01 03:00:00 FALSE 4 2
4 A 2022-11-01 04:00:00 FALSE 1 1
5 A 2022-11-01 05:00:00 TRUE 3 0
6 A 2022-11-01 06:00:00 FALSE 1 1
7 A 2022-11-01 07:00:00 FALSE 3 2
8 B 2022-11-01 01:00:00 FALSE 1 4
9 B 2022-11-01 02:00:00 FALSE 2 3
10 B 2022-11-01 03:00:00 FALSE 3 2
11 B 2022-11-01 04:00:00 FALSE 1 1
12 B 2022-11-01 05:00:00 TRUE 4 0
13 B 2022-11-01 06:00:00 FALSE 1 1
14 B 2022-11-01 07:00:00 FALSE 5 2
我想计算相对于条件行indicator == TRUE
每n行的斜率(lm(value ~ diff_hours)
).具有TRUE的行的斜率应为NA.下面是名为df_desired的所需输出,其中n=2(见下图dput
):
# A tibble: 14 × 6
# Groups: group [2]
group date indicator value diff_hours slope
<chr> <dttm> <lgl> <dbl> <dbl> <dbl>
1 A 2022-11-01 01:00:00 FALSE 2 4 1
2 A 2022-11-01 02:00:00 FALSE 1 3 1
3 A 2022-11-01 03:00:00 FALSE 4 2 3
4 A 2022-11-01 04:00:00 FALSE 1 1 3
5 A 2022-11-01 05:00:00 TRUE 3 0 NA
6 A 2022-11-01 06:00:00 FALSE 1 1 2
7 A 2022-11-01 07:00:00 FALSE 3 2 2
8 B 2022-11-01 01:00:00 FALSE 1 4 -1
9 B 2022-11-01 02:00:00 FALSE 2 3 -1
10 B 2022-11-01 03:00:00 FALSE 3 2 2
11 B 2022-11-01 04:00:00 FALSE 1 1 2
12 B 2022-11-01 05:00:00 TRUE 4 0 NA
13 B 2022-11-01 06:00:00 FALSE 1 1 4
14 B 2022-11-01 07:00:00 FALSE 5 2 4
例如,第1行和第2行的斜率为lm(c(2,1)~c(4,3))=1
.所以我想知道,是否有人知道如何计算每n行相对于每组条件行的斜率?
Df和df_desired的dput
:
df <- structure(list(group = c("A", "A", "A", "A", "A", "A", "A", "B",
"B", "B", "B", "B", "B", "B"), date = structure(c(1667260800,
1667264400, 1667268000, 1667271600, 1667275200, 1667278800, 1667282400,
1667260800, 1667264400, 1667268000, 1667271600, 1667275200, 1667278800,
1667282400), class = c("POSIXct", "POSIXt"), tzone = ""), indicator = c(FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), value = c(2, 1, 4, 1, 3, 1, 3, 1,
2, 3, 1, 4, 1, 5), diff_hours = c(4, 3, 2, 1, 0, 1, 2, 4, 3,
2, 1, 0, 1, 2)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -14L), groups = structure(list(group = c("A",
"B"), .rows = structure(list(1:7, 8:14), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))
df_desired <- structure(list(group = c("A", "A", "A", "A", "A", "A", "A", "B",
"B", "B", "B", "B", "B", "B"), date = structure(c(1667260800,
1667264400, 1667268000, 1667271600, 1667275200, 1667278800, 1667282400,
1667260800, 1667264400, 1667268000, 1667271600, 1667275200, 1667278800,
1667282400), class = c("POSIXct", "POSIXt"), tzone = ""), indicator = c(FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), value = c(2, 1, 4, 1, 3, 1, 3, 1,
2, 3, 1, 4, 1, 5), diff_hours = c(4, 3, 2, 1, 0, 1, 2, 4, 3,
2, 1, 0, 1, 2), slope = c(1, 1, 3, 3, NA, 2, 2, -1, -1, 2, 2,
NA, 4, 4)), row.names = c(NA, -14L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(group = c("A",
"B"), .rows = structure(list(1:7, 8:14), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))