对于带替换的抽样(参见下文以获取参考和模拟判断):
frexp <- function(m, R, x) {
y <- rexp(R)
for (r in 1:R) {
cs <- cumsum(rexp(m))
sample.data <- x[ceiling(cs*length(x)/(cs[m] + y[r]))]
}
}
对于不更换的抽样:
fbool2 <- function(m, R, x) {
n <- length(x)
b <- logical(n)
for (r in 1:R) {
b[i <- sample(n, m)] <- TRUE
sample.data <- x[b]
b[i] <- FALSE
}
}
用于基准测试的其他功能,包括不需要排序的功能:
fNoSort <- function(m, R, x) for (r in 1:R) sample.data <- sample(x, m)
fsort <- function(m, R, x) for (r in 1:R) sample.data <- sort(sample(x, m))
fbool1 <- function(m, R, x) { # from @lotus
for (r in 1:R) sample.data <- x[sample(rep.int(!0:1, c(m, length(x) - m)))]
}
基准:
microbenchmark::microbenchmark(
fNoSort = fNoSort(m, R, all.data),
fsort = fsort(m, R, all.data),
fbool1 = fbool1(m, R, all.data),
fbool2 = fbool2(m, R, all.data),
frexp = frexp(m, R, all.data)
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> fNoSort 8.3386 8.52895 9.746159 8.80505 9.37925 19.4995 100
#> fsort 35.6090 38.05500 40.689749 39.67485 41.73925 79.2346 100
#> fbool1 29.2213 29.64240 31.838542 30.65475 32.70145 40.3151 100
#> fbool2 10.2221 10.43100 11.616368 10.79390 12.22930 19.7826 100
#> frexp 5.0672 5.16110 6.028450 5.30465 5.81935 16.2895 100
frexp
甚至比sample
without the sort还快.
快速模拟以判断frexp
中使用的指数是否构成带有替换的(排序的)均匀随机样本:
(seed <- sample(.Machine$integer.max, 1))
#> [1] 904968452
set.seed(seed)
n <- 20 # sample the numbers 1 through 20
N <- 1e6 # number of samples
tabulate(sample(n, N, 1), n)
#> [1] 50176 50058 50270 50279 50208 49722 50005 49668 49856 50196 49890 49798
#> [13] 50163 49489 49918 49698 50052 50240 50333 49981
tabulate(ceiling((cs <- cumsum(rexp(N)))*n/(cs[N] + rexp(1))), n)
#> [1] 49918 49883 49957 50063 49815 50226 49792 49970 49879 49788 50119 49965
#> [13] 50417 49832 50059 50281 49930 49904 50416 49786
参考资料:
https://math.stackexchange.com/questions/74218/relations-between-order-statistics-of-uniform-rvs-and-exponential-rvs
https://stackoverflow.com/a/63430876/9463489
https://djalil.chafai.net/blog/2014/06/03/back-to-basics-order-statistics-of-exponential-distribution/