我正试着把年度数据插入月度数据.我在网上看过几个例子,但我不能让它们工作 Here's onehere's another,我得到了各种错误.

我的数据是这样的

structure(list(countryname = c("Aruba", "Aruba", "Aruba", "Aruba", 
"Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
"Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
"Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
"Aruba", "Aruba", "Aruba", "Aruba", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Anguilla", 
"Anguilla", "Anguilla", "Anguilla", "Anguilla", "Anguilla", "Anguilla", 
"Anguilla", "Anguilla", "Anguilla"), year = c(1986, 1987, 1988, 
1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 
2011, 2012, 2013, 2014, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 
1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 
1976, 1977, 1978, 1979, 1980, 1981, 1982, 1984, 1985, 1986, 1987, 
1988, 1989, 1990, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
2014, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 1990, 
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999), total = c(286.511, 
344.328, 467.524, 539.787, 609.052, 690.367, 742.735, 793.093, 
946.521, 1017.982, 1121.778, 1185.47, 1248.148, 1376.097, 1502.349, 
1581.668, 1783.635, 2012.886, 2090.128, 2283.757, 2382.169, 2479.276, 
2578.016, 2555.033, 2582.489, 2656.015, 2747.218, 2888.874, 3009.389, 
1527, 2037, 2244, 2162, 1760, 1713, 1867, 2252, 2395, 2449, 2488.4, 
2872.3, 2853.1, 2928, 3660.3, 4280.4, 5214.8, 5016.4, 6243.1, 
5493.6, 6770.4, 9789.5, 9603.1, 10053, 4185.38, 4787.87, 6728.06, 
8773.05, 12698.5, 17579.82, 19972.779, 26547.45, 16855.486058, 
33202.5772169922, 48354.2870012583, 64742.29199, 84061.57373, 
42170.25521, 46008.7436596187, 49215.7766771853, 46365.3341990244, 
0.718514, 26.547404, 90.561909, 90.0346783, 441.87615963, 1845.82841034, 
7453.3254978, 23556.243282338, 54332.057918413, 89699.02591742, 
138396.82327341, 273708.10979785, 492519.2833002, 801048.13352684, 
1285702.23670599, 1532272.06111931, 1973567.61658344, 2451384.32434691, 
2819717.17874511, 2852043.16889253, 135.972994003056, 165.698, 
190, 200.98, 199.009, 212.701, 240.183, 282.136, 308.716, 358.751
), hh = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
12409.359888, 2669.05197895018, 34449.662, 3595.39872, 10187.97902, 
10224.02617, 11409.47128, 9743.61473, 8246.96039, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 50.331, 55.832, 79.786, 85.04, 93.316, 104.006, 126.771, 
147.306, 171.725), corp = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 4446.12617, 30533.525238042, 13904.6250012583, 
61146.89327, 73873.59471, 31946.22904, 34599.2723796187, 39472.1619471853, 
38118.3738090244, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 115.367, 134.168, 121.194, 
113.969, 119.385, 136.177, 155.365, 161.41, 187.026), agr = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.08, 2.09, 1.415, 1.276, 
1.703, 1.85, 2.434, 2.385, 2.161), manufac = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 0.103556200987511, 0.241631135637525, 
0.691812953819343, 1.02980888759802, 4.50900958466454, 5.02391402846355, 
4.91891954690677, 4.36805808887598, 9.904), real_estate = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13.327, 13.813, 6.75, 
13.223, 11.223, 13.284, 13.509, 11.515, 11.234), whole_sale = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 77.119, 92.189, 91.358, 
79.837, 76.949, 79.75, 92.241, 95.118, 109.549), transport_comm = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 14.572, 13.047, 12.774, 
11.398, 8.998, 10.174, 10.226, 10.132, 8.803), others = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 9.16544379901249, 12.7873688643625, 
8.20518704618064, 7.20519111240198, 16.0029904153354, 26.0950859715364, 
32.0360804530932, 37.891941911124, 45.375), error = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

请注意,这是一个国家的面板,因此插值应分别 for each 国家执行.

到目前为止,我最好的try 是改编第二个环节中的例子,

expand_data <- function(x) {
  years <- min(x$year):max(x$year)
  months_data <- 1:12
  grid <- expand.grid(month=months_data, year=years)
  x$quarter <- 1
  merged <- merge(grid, x, by=c('year', 'month'), all.x=TRUE)  # Use merge instead of left_join
  merged$country <- x$country[1]
  return(merged)
}

interpolate_data <- function(data) {
  xout <- 1:nrow(data)
  y <- data$value
  interpolation <- approx(x=xout[!is.na(y)], y=y[!is.na(y)], xout=xout)
  data$value_interpolated <- interpolation$y
  return(data)
}

expand_and_interpolate <- function(x) interpolate_data(expand_data(x))

monthly_data <- credit_data %>%
  group_by(countryname) %>%
  do(expand_and_interpolate(.))

但我得到了这个错误

Error in fix.by(by.y, y) : 'by' must specify a uniquely valid column

推荐答案

此过程将year转换为Date类列,然后将其扩展为第一个和最后一个观察日期之间的每个月.从那里开始,它在所有其他领域使用approx.

library(dplyr)
out <- credit_data |>
  mutate(date = as.Date(paste0(year, "-01-01"))) |>
  reframe(
    .by = countryname, 
    month = seq(min(date), max(date), by = "month"), 
    year = as.integer(format(month, format = "%Y")), 
    across(-c(year, date, month), 
           ~ if (any(!is.na(.x))) approx(date, .x, xout = month, na.rm = TRUE)$y else rep(.x[1], length(month)))
  )

filter(out, countryname == "Anguilla")[20:30,]
# # A tibble: 11 × 13
#    countryname month       year total    hh  corp   agr manufac real_estate whole_sale transport_comm others error
#    <chr>       <date>     <int> <dbl> <dbl> <dbl> <dbl>   <dbl>       <dbl>      <dbl>          <dbl>  <dbl> <chr>
#  1 Anguilla    1991-08-01  1991  180.  53.5  126.  1.67   0.184        13.6       85.9           13.7   11.3 NA   
#  2 Anguilla    1991-09-01  1991  182.  54.0  128.  1.75   0.195        13.7       87.2           13.6   11.6 NA   
#  3 Anguilla    1991-10-01  1991  184.  54.4  129.  1.84   0.207        13.7       88.4           13.4   11.9 NA   
#  4 Anguilla    1991-11-01  1991  186.  54.9  131.  1.92   0.219        13.7       89.7           13.3   12.2 NA   
#  5 Anguilla    1991-12-01  1991  188.  55.4  133.  2.00   0.230        13.8       90.9           13.2   12.5 NA   
#  6 Anguilla    1992-01-01  1992  190   55.8  134.  2.09   0.242        13.8       92.2           13.0   12.8 NA   
#  7 Anguilla    1992-02-01  1992  191.  57.9  133.  2.03   0.280        13.2       92.1           13.0   12.4 NA   
#  8 Anguilla    1992-03-01  1992  192.  59.8  132.  1.98   0.315        12.7       92.1           13.0   12.0 NA   
#  9 Anguilla    1992-04-01  1992  193.  61.8  131.  1.92   0.354        12.1       92.0           13.0   11.6 NA   
# 10 Anguilla    1992-05-01  1992  194.  63.8  130.  1.87   0.390        11.5       91.9           13.0   11.3 NA   
# 11 Anguilla    1992-06-01  1992  195.  65.8  129.  1.81   0.429        10.9       91.8           12.9   10.9 NA   

使用.by=需要dplyr_1.1.0或更高版本;如果您使用的是旧版本,请从mutate(.by=c(..), stuff)更改为group_by(..) |> mutate(stuff) |> ungroup().

R相关问答推荐

是否有R代码来判断一个组中的所有值是否与另一个组中的所有值相同?

给定R中另一行中的值,如何插补缺失值

从API中抓取R数据SON

对lme 4对象运行summary()时出错(diag中的错误(from,names = RST):对象unpackedMatrix_diag_get找不到)

如何求解arg必须为NULL或deSolve包的ode函数中的字符向量错误

有没有一种方法可以从函数中创建一个值的列表,然后将这些值变成R中的直方图?我一直觉得不行

次级y轴R gggplot2

pickerInput用于显示一条或多条geom_hline,这些线在图中具有不同 colored颜色

任意列的欧几里得距离

如何从当前行上方找到符合特定条件的最接近值?

修改用R编写的用户定义函数

筛选出以特定顺序患病的个体

从BRM预测价值

无法正确设置动态创建的Quarto标注的格式

如何在R中描绘#符号?

函数可以跨多个列搜索多个字符串并创建二进制输出变量

在gggraph中显示来自不同数据帧的单个值

使用来自嵌套列和非嵌套列的输入的PURRR:MAP和dplyr::Mariate

在使用SliderInput In Shiny(R)设置输入数据的子集时,保留一些情节痕迹

使用LAG和dplyr执行计算,以便按行和按组迭代