如何在 csv 中导出多个随机森林回归的多个 r2

发布于08月24日

我有一个CSV，它包含响应变量(称为NTL)和预测值.我正在运行随机森林(RF)回归来预测NTL值.因为我多次执行RF算法，所以我在预测器名称后面添加了一个数字(030,040)，以便在执行for循环时更方便(参见下面的代码).CSV的前六行如下所示:

head(block.data)
                 ntl  agbh030  agbh040   blue030   blue040    nir030    nir040   pop030   pop040  road030  road040  tirs030
1 52.94 1.773147 1.778323 0.7179366 0.7178904 0.9911458 0.9909238 33.98462 33.99507 152.4204 153.0130 25.39851
2 49.45 2.895677 2.893922 0.9956004 0.9937335 1.3766788 1.3741488 62.45861 62.34375 435.8899 434.7412 35.85143
3 49.12 3.996930 3.992193 1.0742660 1.0725741 1.4975094 1.4952157 78.47204 78.32093 412.6563 411.8447 37.86271
4 44.37 4.146727 4.142556 1.1628517 1.1606724 1.5424725 1.5395863 64.81855 64.78107 295.8405 295.5381 38.16120
5 39.97 4.324530 4.319309 1.1125427 1.1113752 1.4355043 1.4340464 69.18198 68.98098 289.2147 288.5684 36.89496
6 38.44 3.946339 3.943006 1.0540453 1.0531118 1.4150538 1.4135966 47.66867 47.65901 194.1575 193.8339 34.88610
   tirs040
1 25.39134
2 35.77946
3 37.79913
4 38.09968
5 36.84203
6 34.84804

如你所见，当我执行第一个循环时，我使用的是名称中有030的预测值，而在第二个循环中，名称中有040的预测值等等.

library("randomForest")
library("doFuture") 

wd <- "test/"

# Load the data
block.data <- read.csv(paste0(wd, "block.data.psf.csv"))
eq1 <- ntl ~ .

# for reproduciblity
set.seed(123)

foreach (i = seq(030, 040, by = 10)) %do% {
  std <- sprintf("%03.0f", i)
  
  popCol <- paste0("pop", std)
  tirsCol <- paste0("tirs", std)
  agbhCol <- paste0("agbh", std)
  roadCol <- paste0("road", std)
  blueCol <- paste0("blue", std)
  nirCol <- paste0("nir", std)

  testVect = c("ntl", 
               popCol,
               tirsCol,
               agbhCol,
               roadCol,
               blueCol,
               nirCol)
  
  subBlockData <- subset(block.data, select = testVect)
  
  # default RF model
  m1 <- randomForest(
    formula = eq1,
    data    = subBlockData)
  
  # number of trees with highest r2
  btree = which.max(m1$rsq)
  
  features <- subBlockData[, 2:7]
  
  m1 <- randomForest(
    formula = ntl ~ .,
    data    = subBlockData,
    ntree = btree,
    mtry = ncol(features)/3,
    importance = FALSE)
  
  m1 # "% Var explained" is the number I want in the csv
}

我想要的是创建一个CSV(称为r2.csv)，它将有两列(称为std和r2).第一列将在预测值名称之后包含数字(030,040等)，第二列将包含RF的r平方(在我的代码中称为M1).我怎么能做到这一点？

以下是我的CSV:

block.data = structure(list(ntl = c(52.93999863, 49.45000076, 49.11999893, 
44.36999893, 39.97000122, 38.43999863, 38.99000168, 40.88000107, 
37.47999954, 40.65000153, 44.79999924, 50.38000107, 49.38000107, 
68.80000305, 59.88000107), agbh030 = c(1.773146749, 2.89567709, 
3.996930361, 4.146727085, 4.324529648, 3.946338654, 3.430744886, 
2.554599762, 2.052541256, 1.614771366, 1.991117001, 2.35665369, 
0.801872492, 3.310310841, 5.005721092), agbh040 = c(1.778323054, 
2.893922091, 3.992192984, 4.14255619, 4.319309235, 3.943005562, 
3.425927639, 2.553015947, 2.049137354, 1.619293809, 1.997152209, 
2.3637712, 0.81288743, 3.306827068, 4.997349739), blue030 = c(0.717936635, 
0.995600402, 1.074265957, 1.162851691, 1.112542748, 1.05404532, 
0.972470403, 0.864918828, 0.797388136, 0.780449748, 0.717492938, 
0.659686863, 0.544718802, 0.954298615, 1.098036289), blue040 = c(0.717890441, 
0.993733525, 1.072574139, 1.160672426, 1.111375213, 1.053111792, 
0.971249402, 0.863858879, 0.796197593, 0.780405819, 0.717574954, 
0.661465645, 0.546935678, 0.95236063, 1.097277403), nir030 = c(0.99114579, 
1.376678824, 1.49750936, 1.542472482, 1.435504317, 1.415053844, 
1.312831402, 1.290184379, 1.151046991, 1.062780142, 0.961566031, 
0.851697564, 0.746279657, 1.258480549, 1.483649731), nir040 = c(0.990923822, 
1.374148846, 1.495215654, 1.539586306, 1.434046388, 1.41359663, 
1.31127429, 1.288556933, 1.149601102, 1.062715054, 0.961549222, 
0.854078174, 0.749200106, 1.256223083, 1.48290813), pop030 = c(33.98461914, 
62.45861053, 78.47203827, 64.81855011, 69.18198395, 47.66866684, 
85.6471405, 63.21319962, 17.80360603, 57.13486862, 88.91275024, 
58.64352036, 34.46327209, 54.0263176, 61.17818451), pop040 = c(33.99507141, 
62.34374619, 78.32093048, 64.78107452, 68.98097992, 47.65901184, 
85.38433838, 63.14036942, 17.84538078, 57.06893158, 88.97719574, 
58.98400879, 34.64493942, 53.96364212, 61.17742538), road030 = c(152.4203644, 
435.8898926, 412.6563416, 295.8404541, 289.2147217, 194.1574554, 
136.9489441, 213.915741, 195.634903, 275.4249573, 129.0567017, 
110.0097885, 35.32381439, 138.662796, 365.485321), road040 = c(153.013031, 
434.7412109, 411.8447266, 295.538147, 288.5683899, 193.8339233, 
137.1492004, 213.4812012, 196.2715302, 276.1116638, 129.8981934, 
110.5063858, 35.86360931, 139.2144165, 364.8994446), tirs030 = c(25.39851189, 
35.85142899, 37.86270905, 38.16120148, 36.8949585, 34.88610458, 
33.1179924, 29.98550034, 26.51055908, 26.58303452, 25.10464287, 
22.69356346, 18.32171249, 30.8216877, 36.87643814), tirs040 = c(25.39133835, 
35.77946091, 37.79912567, 38.09967804, 36.84203339, 34.84803772, 
33.07061386, 29.94636917, 26.48450851, 26.57940674, 25.10551834, 
22.75337219, 18.39633369, 30.77775002, 36.85960007)), class = "data.frame", row.names = c(NA, 
-15L))

如何在 csv 中导出多个随机森林回归的多个 r2

推荐答案

R相关问答推荐

在水平条形图中zoom x_轴

DT：：可数据的正规表达OR运算符问题

self_函数无法工作--无法子集结束后的列

如何判断某列中由某些行组成的百分比

如何将具有重复名称的收件箱合并到R中的另一列中，而结果不同？

使用lapply的重新定位功能

提取R中值和列名的所有可能组合

通过使用str_detect对具有相似字符串的组进行分组

迭代通过1个长度的字符串长字符R

如何优化向量的以下条件赋值？

如何对2个列表元素的所有组合进行操作？

将二进制数据库转换为频率表

从R中的对数正态分布生成随机数的正确方法

有没有可能用shiny 的书签恢复手风琴面板？

使用不同的定性属性定制主成分分析中点的 colored颜色和形状

`-`是否也用于数据帧，有时使用引用调用？

快速合并R内的值

抽样变换-REXP与RWEIBUR

位置_道奇在geom_point图中不躲避

从两个数据帧中，有没有办法计算R中一列的唯一值？