为了重现该问题,我使用了以下数据框:
library(tidyverse)
library(lubridate)
#Step 1. Load data frame and libraries
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
Date = c("01/11/1876","01/12/1876",
"01/01/1877","01/02/1877","01/03/1877",
"01/04/1877","01/05/1877","01/06/1877",
"01/07/1877","01/08/1877","01/09/1877",
"01/10/1877","01/11/1877","01/12/1877",
"01/01/1878"),
`Att-Bissen P [mm]` = c(48.5,111.2,29.7,139.4,90.1,25.9,
216,94.6,40.5,NA,64.4,68.8,44.7,
34.8,71.9),
`Att-Bissen PET [mm]` = c(88.4,88.3,80.5,53.4,36.7,20.2,
21.6,21.7,21.3,37.6,46.1,66.5,89.8,
121.5,87.7),
`Att-Bissen Q [mm]` = c(13.5,12.6,11.3,12.9,44.6,21.3,
194.9,NA,49.1,46.7,63.6,25.4,19.8,
15.3,16),
`Rau. Merl P [mm]` = c(43.7,104.2,25.5,131.3,83.7,21.9,
205.2,88.1,35.9,61,59,63.2,40,
30.4,66.2),
`Rau. Merl PET [mm]` = c(91.4,91.3,83.2,54.9,37.5,20.3,
21.8,21.8,21.4,38.4,47.3,68.6,NA,
125.9,90.7),
`Rau. Merl Q [mm]` = c(8.7,10.6,8.4,14.3,23.7,14.1,
131.6,106.7,40.1,42.4,50.3,24.6,16.7,
11.3,13.7),
`Syre Felsmuhle/Mertert P [mm]` = c(37.8,89.5,22.3,112.7,72,19.2,
175.8,75.8,31.2,52.6,50.9,54.5,34.7,
26.5,57.1),
`Syre Felsmuhle/Mertert PET [mm]` = c(95.6,95.6,86.9,57.2,38.8,20.7,
22.3,22.3,21.9,39.8,49.2,71.6,97.2,
132,94.9),
`Syre Felsmuhle/Mertert Q [mm]` = c(16,22,17.9,24,23.1,11.4,91,NA,
NA,45.2,65.6,NA,NA,NA,NA),
`Wiltz-Winseler P [mm]` = c(50.1,106.9,33,132.4,87.7,29.7,
201.8,91.8,42.8,66.4,64.5,68.5,46.7,
37.7,71.3),
`Wiltz-Winseler PET [mm]` = c(87.4,87.3,79.5,52.5,35.8,19.4,
20.8,20.8,20.4,36.7,NA,NA,88.8,
120.4,86.7),
`Wiltz-Winseler Q [mm]` = c(7.2,6.3,5,8.6,33.9,32.2,234.2,
148.1,68.5,51.5,101.4,25.7,18.7,
14.3,12.1))
数据框由四个站点组成,每个站点有三个参数:P、PET和Q. 在步骤2中,我已经创建了具有三个公式的函数,这些公式需要应用于每个站点.请记住,这些公式适用于每个时间点.
# Step 2: Create Anomalies
# Calculate anomalies for P, PET, and Q
formula_1 <- function(P, PET, Q) {
Anomaly_P = P - mean(P, na.rm = TRUE)
Anomaly_PET = PET - mean(PET, na.rm = TRUE)
Anomaly_Q = Q - mean(Q, na.rm = TRUE)
return(list(Anomaly_P = Anomaly_P, Anomaly_PET = Anomaly_PET, Anomaly_Q = Anomaly_Q))
}
第3步将每个站点的名称子集
#Step 3: Extract the site names from the column names
site_names <- sub(" P \\[mm\\]| PET \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
unique()
site_names
#Step 4: Loop through each site and calculate the formula
results <- list()
for (site in site_names) {
site_data <- df[, grepl(site, names(df))]
results[[site]] <- formula_1(site_data[[paste0(site, " P [mm]")]],
site_data[[paste0(site, " PET [mm]")]],
site_data[[paste0(site, " Q [mm]")]])
}
#Step 5: unlist results
results_sum <- data.frame(Site = names(results), unlist(results))
我不知道我在哪里犯了错.该代码生成了一个只有2列和180个条目的数据框.我想得到的是一个数据框,其中每个站点添加了三个多列,其中包含P
、PET
和Q
(每个时间点)的异常.
任何帮助都将不胜感激.
EDIT The following is what I would like to end up with: a data frame where the anomalies of P, PET and Q (per time step) are added after each site. (The brown/red columns are the result of the anomalies calculation = x-mean(xn)