R 列名具有特殊字符时的循环回归

发布于02月23日

如下所示的虚拟数据.我try 修改我的循环回归代码以解决列名中的空格，但仍然给出错误.此外，我也不确定如何调整.x以考虑自变量列名中的空格. 代码的目的是一次一列地将第一列倒退到后续的每一列.即Canada Price ~ Global Price，然后是Canada Price ~ Canada Bond Price，依此类推.

lm.test <- map_dfr(
  set_names(names(df)),
  ~ tidy(lm(as.formula(paste("`",colnames(df[1]),"`","~", .x)),
            data = df)),
  .id = "predictor")

Canada Price	Global Price	Canada 2Y Bond Price	US - Canada Inflation	Oil Price
-0.030661468	-0.000207995	0.084	-0.075	0.002116
-0.034269304	-0.140212141	0.363	-1.062	-0.06122
0.02371464	0.019987	-0.781	0.273	0.209895
-0.007971484	0.090597341	-0.221	0.068	-0.10657
0.005342716	0.060627149	0.151	0.234	-0.22191
-0.002210376	0.046010712	-0.106	0.13	0.073975
0.018338299	0.019799534	0.036	0.005	0.058091
0.000662471	-0.025356047	0.523	0.045	-0.03765
0.002124411	0.045979342	-0.433	0.282	0.365933
0.034191555	0.043211347	-0.08	0.147	0.113365
-0.011470069	-0.033773176	0.396	-0.037	-0.09753
0.007039847	0.050327089	-0.052	0.151	0.145487
-0.028872959	-0.004501371	0.324	-0.214	0.064282
0.009244373	-0.00092413	0.166	-0.066	0.076961
0.017448201	-0.010638258	-0.179	0.053	0.108548
-0.002379819	0.050111674	0.352	-0.161	-0.11261
-0.002103406	0.03094893	0.121	0.106	0.130575
0.019154969	0.08307375	0.051	0.149	0.041074
-0.00124318	-0.054526954	0.345	0.032	0.079687
-0.001241636	0.003319001	-0.289	0.219	0.100941

Edit

dput格式的数据.

df1 <-
  structure(list(
    `Canada Price` = c(-0.030661468, -0.034269304, 0.02371464, -0.007971484, 
                       0.005342716, -0.002210376, 0.018338299, 0.000662471, 0.002124411, 
                       0.034191555, -0.011470069, 0.007039847, -0.028872959, 0.009244373, 
                       0.017448201, -0.002379819, -0.002103406, 0.019154969, -0.00124318, -0.001241636), 
    `Global Price` = c(-0.000207995, -0.140212141, 0.019987, 0.090597341, 
                       0.060627149, 0.046010712, 0.019799534, -0.025356047, 0.045979342,
                       0.043211347, -0.033773176, 0.050327089, -0.004501371, -0.00092413, 
                       -0.010638258, 0.050111674, 0.03094893, 0.08307375, -0.054526954, 0.003319001), 
    `Canada 2Y Bond Price` = c(0.084, 0.363, -0.781, -0.221, 0.151, -0.106, 
                               0.036, 0.523, -0.433, -0.08, 0.396, -0.052, 0.324, 0.166, -0.179, 0.352,
                               0.121, 0.051, 0.345, -0.289), 
    `US - Canada Inflation` = c(-0.075, -1.062, 0.273, 0.068, 
                                0.234, 0.13, 0.005, 0.045, 0.282, 0.147, -0.037, 0.151, -0.214, 
                                -0.066, 0.053, -0.161, 0.106, 0.149, 0.032, 0.219), 
    `Oil Price` = c(0.002116, -0.06122, 0.209895, -0.10657, -0.22191, 0.073975, 
                    0.058091, -0.03765, 0.365933, 0.113365, -0.09753, 0.145487, 0.064282, 
                    0.076961, 0.108548, -0.11261, 0.130575, 0.041074, 0.079687, 0.100941)), 
    class = "data.frame", row.names = c(NA, -20L))

suppressPackageStartupMessages({ library(magrittr) library(purrr) }) # create the formula with 'value' as regressor, # it will be the column name after pivoting to long format fmla <- reformulate("value", response = names(df1)[1L] |> as.name()) # run the regressions lm_fit_list <- df1 %>% tidyr::pivot_longer(-1L) %>% split(.$name) %>% map(\(df) lm(fmla, data = df)) # this gives some statistics lm_smry_list <- lm_fit_list %>% map(summary) lm_smry_list %>% map(coef) #> $`Canada 2Y Bond Price` #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 0.001736073 0.003667346 0.4733866 0.64162781 #> value -0.025789455 0.011690588 -2.2060015 0.04061964 #> #> $`Global Price` #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) -0.001515084 0.003712875 -0.4080622 0.68804100 #> value 0.164831106 0.070018144 2.3541199 0.03012634 #> #> $`Oil Price` #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) -0.001424249 0.004112478 -0.3463238 0.7331147 #> value 0.046411939 0.030404220 1.5264966 0.1442663 #> #> $`US - Canada Inflation` #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 0.0001923356 0.003176245 0.06055441 0.952381362 #> value 0.0393945114 0.011325922 3.47826091 0.002683133

Edit

在输出列表上运行summary之后，您可以从第二个列表(摘要)中提取所需的值.

suppressPackageStartupMessages({ library(magrittr) library(purrr) }) # create the formula with 'value' as regressor, # it will be the column name after pivoting to long format fmla <- reformulate("value", response = names(df1)[1L] |> as.name()) # run the regressions lm_fit_list <- df1 %>% tidyr::pivot_longer(-1L) %>% split(.$name) %>% map(\(df) lm(fmla, data = df)) lm_smry_list <- lm_fit_list %>% map(summary) stats <- lm_smry_list %>% map(coef) %>% map2(names(lm_smry_list), \(x, y) { data.frame(Variable = y) %>% cbind(x) }) %>% list_rbind() row.names(stats) <- NULL stats #> Variable Estimate Std. Error t value Pr(>|t|) #> 1 Canada 2Y Bond Price 0.0017360725 0.003667346 0.47338661 0.641627809 #> 2 Canada 2Y Bond Price -0.0257894550 0.011690588 -2.20600149 0.040619640 #> 3 Global Price -0.0015150839 0.003712875 -0.40806217 0.688041004 #> 4 Global Price 0.1648311056 0.070018144 2.35411990 0.030126342 #> 5 Oil Price -0.0014242490 0.004112478 -0.34632381 0.733114717 #> 6 Oil Price 0.0464119390 0.030404220 1.52649664 0.144266337 #> 7 US - Canada Inflation 0.0001923356 0.003176245 0.06055441 0.952381362 #> 8 US - Canada Inflation 0.0393945114 0.011325922 3.47826091 0.002683133

^{创建于2024-02-23，共reprex v2.0.2个}

R 列名具有特殊字符时的循环回归

Edit

推荐答案

Edit

R相关问答推荐

按崩溃类别分类的指数

如何在ggplot 2线性图的每个方面显示每个组的误差条？

r中的stat_difference函数不起作用

ggplot geom_smooth()用于线性回归虚拟变量-没有回归线

derrr mutate case_when grepl不能在R中正确返回值

根据日期从参考帧中创建不同的帧

将文件保存到新文件夹时，切换r设置以不必创建目录

从圆到R中的多边形的标绘雷达图

根据r中另一个文本列中给定的范围对各列求和

如何提取R中其他字符串和数字之间的字符串？

如何筛选截止年份之前最后一个测量年度的所有观测值以及截止年份之后所有年份的所有观测值

根据r中每行中的日期序列，使用列名序列创建新列

在ggploy中创建GeV分布时出错

整理曲线图、曲线图和点图

按镜像列值自定义行顺序

按组使用dummy r获取高于标准的行的平均值

抽样变换-REXP与RWEIBUR

对数据帧中的列进行子集设置以通过迭代创建新的数据帧

通过分析特定列中的字符串在数据框中创建新的行和列

如何在分组蜂群小区中正确定位标签