很抱歉问了同样的问题,但我就是不知道如何使用自定义函数并在分组数据上迭代它.所以,如果有人能给我指出一些深入的资源,我会得到额外的分数.

这样做的目的是:

library(dplyr)

a_df <- tibble(team = "A",
             cluster = list(
               c(0.01, 0.01, 0.09, 0.03, 0.14, 0.28, 0.09, 0.25, 0.18, 0.17, 0.54, 0.41, 0.16, 0.18, 0.25, 0.02, 0.2, 0.69, 0.02, 0.01, 0.02, 0.07, 0.07, 0.21)),
             tot_matches = 20,
             tot_ck = 121)

#Function that will take a list of xG and return the probability of n goals being scored.Normalized
calculate_goals_norm <- function(shot_xg_list, n_matches, norm_matches){
  
  #set seed only for stackoverflow question!!!!
  set.seed(123)
  
  #Start goal total at 0
  goals_total <- 0
  
  #Function to take an xG number and simulate whether it was a goal or not
  xg_to_goals_sim <- function(shot_xg){

    #Start goal count at 0
    Goals <- 0

    #For each shot, if it goes in, add a goal
    for (shot in shot_xg){
      if (runif(1)<=shot){
        Goals <- Goals + 1
      }
    }

    #Finally, return the number of goals
    return(Goals)

  }
  
  #Run xG calculator 10000 times to test winner %
  sim_goal_list <- c()
  # Create a for statement to populate the list
  for (i in 1:10000) {
    #Run the above formula for xG list
    #But first we need to normalize the number of xG by randomly picking from xG
    
    if(n_matches == norm_matches){
      new_shot_xg_list <- shot_xg_list
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
    if(n_matches < norm_matches){
      new_shot_xg_list <- c(shot_xg_list, 
                            sample(shot_xg_list, 
                                   round(length(shot_xg_list)/n_matches*norm_matches,0)-length(shot_xg_list),
                                   replace=FALSE))
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
    if(n_matches > norm_matches){
      
      new_shot_xg_list <- sample(shot_xg_list, 
                                 round(length(shot_xg_list)/n_matches*norm_matches,0),
                                 replace=FALSE)
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
  }
  
  
  sim_goal_dat <- data.frame(value = unlist(sim_goal_list))
  
  goal_prob <- sim_goal_dat %>% 
    count(value) %>% 
    summarise(goals = value,
              prob = round(n/1000*10,1)) %>% 
    arrange(goals)
  
  return(goal_prob)
  
  
}

#apply function to a single team dataframe (1 obs. of 4 variables)
calculate_goals_norm(shot_xg_list= unlist(a_df$cluster), n_matches = a_df$tot_matches, norm_matches = 20)

# A tibble: 12 x 2
   goals  prob
   <dbl> <dbl>
 1     0   0.6
 2     1   4  
 3     2  12.4
 4     3  20.6
 5     4  23.8
 6     5  19.1
 7     6  11.6
 8     7   5.6
 9     8   1.8
10     9   0.4
11    10   0.1
12    12   0 

添加第二个团队来创建一个full_df,以实现定制功能的预期用途:

full_df <- add_row(a_df, tibble(team = "B",
                                cluster = list(
                                  c(0.06, 0.01, 0.11, 0.18, 0.75, 0.04, 0.23, 0.07, 0.1, 0.05, 0.24, 0.12, 0.28, 0.02, 0.09, 0.16, 0.64, 0.03, 0.1, 0.19, 0.09, 0.01, 0.02, 0.12, 0.01, 0.11, 0.18, 0.05, 0.02, 0.8, 0.08)),
                                tot_matches = 19,
                                tot_ck = 83) )

#final function should look like this?
full_df %>% 
  group_by(team) %>% 
  calculate_goals_norm(shot_xg_list = unlist(cluster), n_matches = tot_matches, norm_matches = 20)

我不喜欢上面例子中的自定义函数.我知道apply()/sapply()通常用于在数据帧上进行迭代,但我还是没有足够的知识来知道如何在这里apply.

谢谢你的帮助.

推荐答案

我们这里可以用group_modify

library(dplyr)
out <- full_df %>% 
  group_by(team) %>% 
  group_modify(~with(.x, 
    calculate_goals_norm(shot_xg_list = unlist(cluster), 
           n_matches = tot_matches, norm_matches = 20))) %>%
  ungroup

-输出

> as.data.frame(out)
   team goals prob
1     A     0  0.6
2     A     1  4.0
3     A     2 12.4
4     A     3 20.6
5     A     4 23.8
6     A     5 19.0
7     A     6 11.6
8     A     7  5.6
9     A     8  1.8
10    A     9  0.4
11    A    10  0.1
12    A    12  0.0
13    B     0  0.0
14    B     1  0.8
15    B     2  3.7
16    B     3 11.1
17    B     4 18.6
18    B     5 22.4
19    B     6 19.6
20    B     7 12.4
21    B     8  6.9
22    B     9  2.9
23    B    10  1.1
24    B    11  0.3
25    B    12  0.1
26    B    14  0.0

注意,函数calculate_goals_norm给出的错误是

sim_goal_list <- as_tibble(sim_goal_list)

因此,它被修改为

sim_goal_dat <-  data.frame(value = unlist(sim_goal_list));

-全功能


calculate_goals_norm <- function(shot_xg_list, n_matches, norm_matches){
  
  #set seed only for stackoverflow question!!!!
  set.seed(123)
  
  #Start goal total at 0
  goals_total <- 0
  
  #Function to take an xG number and simulate whether it was a goal or not
  xg_to_goals_sim <- function(shot_xg){

    #Start goal count at 0
    Goals <- 0

    #For each shot, if it goes in, add a goal
    for (shot in shot_xg){
      if (runif(1)<=shot){
        Goals <- Goals + 1
      }
    }

    #Finally, return the number of goals
    return(Goals)

  }
  
  #Run xG calculator 10000 times to test winner %
  sim_goal_list <- c()
  # Create a for statement to populate the list
  for (i in 1:10000) {
    #Run the above formula for xG list
    #But first we need to normalize the number of xG by randomly picking from xG
    
    if(n_matches == norm_matches){
      new_shot_xg_list <- shot_xg_list
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
    if(n_matches < norm_matches){
      new_shot_xg_list <- c(shot_xg_list, 
                            sample(shot_xg_list, 
                                   round(length(shot_xg_list)/n_matches*norm_matches,0)-length(shot_xg_list),
                                   replace=FALSE))
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
    if(n_matches > norm_matches){
      
      new_shot_xg_list <- sample(shot_xg_list, 
                                 round(length(shot_xg_list)/n_matches*norm_matches,0),
                                 replace=FALSE)
      goals_total <- xg_to_goals_sim(new_shot_xg_list)
      sim_goal_list[[i]] <- goals_total
    }
    
  }
  
  sim_goal_dat <-  data.frame(value = unlist(sim_goal_list))
  
    goal_prob <- sim_goal_dat %>% 
      count(value) %>% 
      summarise(goals = value,
                prob = round(n/1000*10,1)) %>% 
      arrange(goals)
  
    return(goal_prob)
  
  
  
  
  
  
}

R相关问答推荐

如何将Rmarkdown中包含图像和文本的行的两个单元格与.PDF输出垂直对齐?

在之前合并的数据.tables中分配新列后.internal.selfref无效

pivot_longer:names_to和names_pattern

使用case_when和Mutate搜索多个列以寻找条件

在值和NA的行顺序中寻找中断模式

derrr summarise每个组返回多行?

筛选出以特定顺序患病的个体

lightgbm发动机在tidymmodels中的L1正则化""

如何从R ggplot图片中获取SVG字符串?

矩阵的堆叠条形图,条形图上有数字作为标签

线性模型斜率在减少原始数据时提供NA

以相同的方式对每个表进行排序

使用范围和单个数字将数字与字符串进行比较

在R函数中使用加号

当我添加美学时,geom_point未对齐

如何使这些react 表对象相互独立?

变长向量的矢量化和

向R中的数据帧添加一列,该列统计另一列中每个唯一值的二进制观测值的数量

按组跨多列创建伪变量

为什么不能使用lApply在包装函数中调用子集