我必须根据1e6点的网格计算出的三个条件对许多农作物进行分类.我正在try 优化下面的函数(希望不转移到C或Rust).有什么 idea 吗?

如有必要,可以重新格式化输入数据.我已经试过data.table次了,但表现更差.

这是我最好的镜头:

condtion1 <- letters[1:8]
condtion2 <- letters[9:15]
condtion3 <- letters[16:24]

crop <- sample(0:1, 24, replace = T)
names(crop) <- letters[1:24]

n <- 1e6

condtions1 <- sample(condtion1, n, replace = T)
condtions2 <- sample(condtion2, n, replace = T)
condtions3 <- sample(condtion3, n, replace = T)

get_suitability <- function(){
  result <- character(n)
  
  for (i in seq_along(result)) {
    if (crop[[condtions1[[i]]]] == 0 | crop[[condtions2[[i]]]] == 0) result[[i]] <- "not suitable"
    else if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 1) result[[i]] <- "suitable"
    else if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 0) result[[i]] <- "suitable with irrigation"
  }
  result
}


microbenchmark::microbenchmark(
  get_suitability(),
  times = 5
)
#> Unit: seconds
#>               expr      min       lq     mean   median       uq      max neval
#>  get_suitability() 2.402434 2.408322 2.568981 2.641211 2.667943 2.724993     5

创建于2024-03-24,共reprex v2.1.0

推荐答案

Vectorise over the condtions getting rid of for/if. The logical indices take care of both for and if.
In a comment to the question I write:

你可以初始化result <- rep("not suitable", n)并从循环中删除其中一个if.

注:

  • get_suitability2是我对这个问题的 comments ,结果是一个坏主意;
  • get_suitability3bget_suitability3的简化版本,也是最快的.
  • get_suitability4user2554330的最后一个功能,比原来的问题代码快.
condtion1 <- letters[1:8]
condtion2 <- letters[9:15]
condtion3 <- letters[16:24]

crop <- sample(0:1, 24, replace = T)
names(crop) <- letters[1:24]

n <- 1e6

condtions1 <- sample(condtion1, n, replace = T)
condtions2 <- sample(condtion2, n, replace = T)
condtions3 <- sample(condtion3, n, replace = T)

get_suitability <- function(){
  result <- character(n)
  
  for (i in seq_along(result)) {
    if (crop[[condtions1[[i]]]] == 0 | crop[[condtions2[[i]]]] == 0) result[[i]] <- "not suitable"
    else if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 1) result[[i]] <- "suitable"
    else if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 0) result[[i]] <- "suitable with irrigation"
  }
  result
}
get_suitability2 <- function(){
  result <- rep("not suitable", n)
  for (i in seq_along(result)) {
    if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 1) result[[i]] <- "suitable"
    else if(crop[[condtions1[[i]]]] == 1 & crop[[condtions2[[i]]]] == 1 & crop[[condtions3[[i]]]] == 0) result[[i]] <- "suitable with irrigation"
  }
  result
}
get_suitability3 <- function(){
  result <- rep("not suitable", n)
  i1 <- crop[ condtions1 ] == 1 
  i2 <- crop[ condtions2 ] == 1
  i3 <- crop[ condtions3 ] == 1
  result[i1 & i2 & i3] <- "suitable"
  result[i1 & i2 & !i3] <- "suitable with irrigation"
  result
}
get_suitability3b <- function(){
  result <- rep("not suitable", n)
  i1 <- crop[ condtions1 ] == 1 & crop[ condtions2 ] == 1
  i3 <- crop[ condtions3 ] == 1
  result[i1 & i3] <- "suitable"
  result[i1 & !i3] <- "suitable with irrigation"
  result
}
get_suitability4 <- function(){
  result <- ifelse(crop[condtions1] == 0 | 
                     crop[condtions2] == 0, "not suitable",
                   ifelse(crop[condtions3] == 1, "suitable", 
                          "suitable with irrigation"))
  names(result) <- NULL
  result
}

library(microbenchmark)

res <- get_suitability()
res2 <- get_suitability2()
res3 <- get_suitability3()
res3b <- get_suitability3b()
res4 <- get_suitability4()

identical(res, res2)
#> [1] TRUE
identical(res, res3)
#> [1] TRUE
identical(res, res3b)
#> [1] TRUE
identical(res, res4)
#> [1] TRUE

mb <- microbenchmark(
  get_suitability(),
  get_suitability2(),
  get_suitability3(),
  get_suitability3b(),
  get_suitability4(),
  times = 5L
)
print(mb, order = "median")
#> Unit: milliseconds
#>                 expr       min        lq      mean    median        uq
#>  get_suitability3b()  120.5004  123.8272  144.3827  137.7121  158.9400
#>   get_suitability3()  130.9886  141.4570  158.9099  154.2719  179.9035
#>   get_suitability4()  630.0646  651.2294  677.3693  687.7445  703.8762
#>    get_suitability() 1496.4989 1522.9126 1540.5882 1535.8001 1566.6336
#>   get_suitability2() 2999.3825 3008.2696 3064.8530 3083.5560 3102.7165
#>        max neval  cld
#>   180.9339     5   c 
#>   187.9287     5   c 
#>   713.9316     5    d
#>  1581.0956     5 a   
#>  3130.3405     5  b

创建于2024—03—24,reprex v2.1.0

R相关问答推荐

给定R中另一行中的值,如何插补缺失值

提取R中值和列名的所有可能组合

为什么观察不会被无功值变化触发?

自动变更列表

当月份额减go 当月份额

在组中添加值增加和减少的行

Ggplot2中的重复注记

在df中保留原始变量和新变量

移除仪表板Quarto中顶盖和车身之间的白色区域

将饼图插入条形图

在rpart. plot或fancyRpartPlot中使用带有下标的希腊字母作为标签?

迭代到DataFrame列并获得成对的值列表(col1->;col2、col2->;col3、col3->;col4等)的正确方法.

跨列查找多个时间报告

在多页PDF中以特定布局排列的绘图列表不起作用

如何将一列中的值拆分到R中各自的列中

当每个变量值只能 Select 一次时,如何从数据框中 Select 两个变量的组合?

为什么我对圆周率图的蒙特卡罗估计是空的?

我如何使用循环来编写冗余的Rmarkdown脚本?

使用来自嵌套列和非嵌套列的输入的PURRR:MAP和dplyr::Mariate

reshape 数据帧-基于组将行转换为列