BLUF:sample_frac
使用round
,而slice_sample
使用floor
.由于volcano
具有五个行数为奇数的组,因此这些组在后采样中少了一行,导致了5行的差异.
sample_frac
(被取代),从5735f03
开始,注意round
的用法:
sample_frac.data.frame <- function(tbl, size = 1, replace = FALSE,
weight = NULL, .env = NULL, ...) {
if (!is_null(.env)) {
inform("`.env` is deprecated and no longer has any effect")
}
size <- enquo(size)
weight <- enquo(weight)
dplyr_local_error_call()
slice(tbl, local({
size <- round(n() * check_frac(!!size, replace = replace))
sample.int(n(), size, replace = replace, prob = !!weight)
}))
}
slice_sample
(当前),也是截至5735f03
:
slice_sample.data.frame <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) {
size <- get_slice_size(n = n, prop = prop, allow_outsize = replace)
dplyr_local_error_call()
slice(.data, local({
weight_by <- {{ weight_by }}
n <- dplyr::n()
if (!is.null(weight_by)) {
weight_by <- vec_assert(weight_by, size = n, arg = "weight_by")
}
sample_int(n, size(n), replace = replace, wt = weight_by)
}))
}
及其助手函数(相同的文件),请注意这里floor
的用法:
get_slice_size <- function(n, prop, allow_outsize = FALSE, error_call = caller_env()) {
slice_input <- check_slice_n_prop(n, prop, error_call = error_call)
if (slice_input$type == "n") {
if (slice_input$n >= 0) {
function(n) clamp(0, floor(slice_input$n), if (allow_outsize) Inf else n)
} else {
function(n) clamp(0, ceiling(n + slice_input$n), n)
}
} else if (slice_input$type == "prop") {
if (slice_input$prop >= 0) {
function(n) clamp(0, floor(slice_input$prop * n), if (allow_outsize) Inf else n)
} else {
function(n) clamp(0, ceiling(n + slice_input$prop * n), n)
}
}
}
附注:允许使用负prop
会产生不同的结果,不过slice_sample(prop=-0.5)
返回48,而不是sample_frac
中的44.