我认为这是游程编码(base::rle
、dplyr::consecutive_id
或data.table::rleid
之一)和一些更简单的过滤的组合.
您将看到2*365+2
作为一个判别器的用法:用于时差的POSIXt
方法没有"year"
作为选项,所以我们需要以天为单位,并且在2002-2007有一个闰年.
dplyr
library(dplyr)
dat %>%
group_by(grp = consecutive_id(value <= -50)) %>%
filter(any(value <= -50), row_number() == n()) %>%
ungroup() %>%
filter(row_number() == 1L | difftime(time, time[1], units="day") >= (2*365+2))
# # A tibble: 2 × 3
# time value grp
# <date> <dbl> <int>
# 1 2002-12-31 -52.4 1
# 2 2007-12-31 -52.4 5
data.table
library(data.table)
as.data.table(dat)[, .SD[any(value <= -50), .(time, value)][.N,], by = .(grp = rleid(value <= -50))
][(seq(.N) == 1 | difftime(time, time[1], units="day") >= (2*365+2)),]
# grp time value
# <int> <Date> <num>
# 1: 1 2002-12-31 -52.44
# 2: 5 2007-12-31 -52.40
base R
(还有一点工作要做.)
# a home-grown base-R version of `rleid` and `consecutive_id` above
my_rleid <- function(...) {
r <- rle(do.call(paste, c(list(...), sep = "_")))$lengths
rep(seq_along(r), times = r)
}
dat |>
transform(grp = my_rleid(value <= -50)) |>
subset(ave(value <= -50, grp, FUN = function(z) any(z) & seq_along(z) == length(z))) |>
subset(seq_along(time) == 1L | difftime(time, time[1], units="day") >= (2*365+2))
# time value grp
# 2 2002-12-31 -52.44 1
# 7 2007-12-31 -52.40 5