我有一个很大的数据集,我需要为其预测一些值.我有这个小的 下面是可重现的例子.我想对future 几年进行预测,使用 无论是GAM还是GLM,我都不能理解它,因为我没有 统计背景.有人可以指导我如何预测和添加预测吗? 值到我的原始数据集吗?

library(mgcv)
library(tidyverse)
m <- structure(list(year = c(2003, 2003, 2003, 2003, 2003, 2003, 2003, 
2003, 2003, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 
2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2007, 2007, 2007, 
2007, 2007), month = c("August", "August", "September", "September", 
"October", "October", "November", "November", "December", "August", 
"August", "September", "September", "October", "October", "November", 
"November", "December", "August", "August", "September", "October", 
"October", "November", "November", "December", "August", "August", 
"September", "September", "October"), date = structure(c(12265, 
12281, 12296, 12311, 12326, 12342, 12357, 12372, 12387, 12631, 
12647, 12662, 12677, 12692, 12708, 12723, 12738, 12753, 12996, 
13012, 13027, 13422, 13438, 13453, 13468, 13483, 13726, 13742, 
13757, 13772, 13787), class = "Date"), turb = c(12.5, 11.8, 8.9, 
6.2, 5.1, 3.4, -0.1, 0, -0.1, 12.5, 10.4, 8.6, 6.1, 4.7, 1.4, 
0.1, -0.1, -0.1, 12.5, 11, 8, 4.4, 1.1, 0.5, -0.2, -0.2, 12.5, 
11.7, 10, 5.9, 3.6)), row.names = c(NA, -31L), class = c("tbl_df", 
"tbl", "data.frame"))

m$date <- as.numeric(m$date)
mod1 <- gam(turb ~ s(date), data = m)
summary(mod1)

#How can I predict 3 more years (to 2010) and have those predictions
#added to the bottom of the original dataset(m)?

推荐答案

也许我错了,但我认为你正试图根据平滑后的date来预测turb.如果是这样,那么

m$datenum <- as.numeric(m$date) # just to keep `date` unchanged
mod1 <- mgcv::gam(turb ~ s(datenum), data = m)
new_m <- tibble(date = c(outer(max(m$date) %m+% months(0:36), c(0, 15), `+`))[-1]) %>%
  mutate(datenum = as.numeric(date))
new_m
# # A tibble: 73 × 2
#    date       datenum
#    <date>       <dbl>
#  1 2007-11-01   13818
#  2 2007-12-01   13848
#  3 2008-01-01   13879
#  4 2008-02-01   13910
#  5 2008-03-01   13939
#  6 2008-04-01   13970
#  7 2008-05-01   14000
#  8 2008-06-01   14031
#  9 2008-07-01   14061
# 10 2008-08-01   14092
# # ℹ 63 more rows
# # ℹ Use `print(n = ...)` to see more rows
predict(mod1, newdata = new_m)
#           1           2           3           4           5           6           7           8           9          10          11          12          13 
#   -1.246062   -6.392131  -11.709736  -17.027341  -22.001874  -27.319479  -32.465548  -37.783153  -42.929222  -48.246827  -53.564432  -58.710501  -64.028106 
#          14          15          16          17          18          19          20          21          22          23          24          25          26 
#  -69.174175  -74.491780  -79.809385  -84.612383  -89.929987  -95.076057 -100.393661 -105.539731 -110.857335 -116.174940 -121.321010 -126.638614 -131.784684 
#          27          28          29          30          31          32          33          34          35          36          37          38          39 
# -137.102288 -142.419893 -147.222891 -152.540496 -157.686565 -163.004170 -168.150239 -173.467844 -178.785449 -183.931518    1.498509   -3.819096   -8.965165 
#          40          41          42          43          44          45          46          47          48          49          50          51          52 
#  -14.282770  -19.600375  -24.574909  -29.892514  -35.038583  -40.356188  -45.502257  -50.819862  -56.137466  -61.283536  -66.601140  -71.747210  -77.064814 
#          53          54          55          56          57          58          59          60          61          62          63          64          65 
#  -82.382419  -87.185417  -92.503022  -97.649091 -102.966696 -108.112765 -113.430370 -118.747975 -123.894044 -129.211649 -134.357718 -139.675323 -144.992928 
#          66          67          68          69          70          71          72          73 
# -149.795926 -155.113531 -160.259600 -165.577205 -170.723274 -176.040879 -181.358483 -186.504553 

这可以与原始数据结合使用

combined <- new_m %>%
  mutate(turb = predict(mod1, newdata = cur_data()), year = as.integer(format(date, format="%Y")), month = format(date, format = "%B")) %>%
  bind_rows(m) %>%
  arrange(date)
combined
# # A tibble: 104 × 5
#    date       datenum      turb  year month    
#    <date>       <dbl> <dbl[1d]> <dbl> <chr>    
#  1 2003-08-01   12265      12.5  2003 August   
#  2 2003-08-17   12281      11.8  2003 August   
#  3 2003-09-01   12296       8.9  2003 September
#  4 2003-09-16   12311       6.2  2003 September
#  5 2003-10-01   12326       5.1  2003 October  
#  6 2003-10-17   12342       3.4  2003 October  
#  7 2003-11-01   12357      -0.1  2003 November 
#  8 2003-11-16   12372       0    2003 November 
#  9 2003-12-01   12387      -0.1  2003 December 
# 10 2004-08-01   12631      12.5  2004 August   
# # ℹ 94 more rows
# # ℹ Use `print(n = ...)` to see more rows
tail(combined)
# # A tibble: 6 × 5
#   date       datenum      turb  year month    
#   <date>       <dbl> <dbl[1d]> <dbl> <chr>    
# 1 2010-08-01   14822     -173.  2010 August   
# 2 2010-08-16   14837     -176.  2010 August   
# 3 2010-09-01   14853     -179.  2010 September
# 4 2010-09-16   14868     -181.  2010 September
# 5 2010-10-01   14883     -184.  2010 October  
# 6 2010-10-16   14898     -187.  2010 October  

R相关问答推荐

高质量地将R格式的图表从Word中输出

根据收件箱中的特定值提取列名

用值序列对行进行子集化,并标识序列开始的列

根据文本字符串中的值粘贴新列

如何得到每四个元素向量R?

如何同时从多个列表中获取名字?

使用带有OR条件的grepl过滤字符串

以相同的方式对每个表进行排序

在GG图中绘制射线的自动程序

在R中创建连续的期间

R:如果为NA,则根据条件,使用列名模式将缺少的值替换为另一列中的值

将多个变量组合成宽格式

在点图上绘制置信度或预测区间ggplot2

在R中,如何从一系列具有索引名的变量快速创建数据帧?

多元正态分布的计算

使用列中的值来调用函数调用中应使用的其他列

是否有一个R函数可以输出在输入的字符向量中找到的相应正则表达式模式?

将边列表转换为路径长度列表

图中显示错误 colored颜色 的图例geom_sf

创建两个变量组合的索引矩阵