R 将每个组扩展到最大 n 行

发布于08月14日

如何将组扩展到最大组的长度:

df <- structure(list(ID = c(1L, 1L, 2L, 3L, 3L, 3L), col1 = c("A", 
"B", "O", "U", "L", "R")), class = "data.frame", row.names = c(NA, 
-6L))


ID col1
1 A
1 B
2 O
3 U
3 L
3 R

帖子主题:Re:Колибри

1 A
1 B
NA NA
2 O
NA NA
NA NA
3 U
3 L
3 R

推荐答案

您可以利用df[n_bigger_than_nrow,]等于NA这一事实

dplyr个

max_n <- max(count(df, ID)$n)

df %>% 
  group_by(ID) %>% 
  summarise(cur_data()[seq(max_n),])
#> `summarise()` has grouped output by 'ID'. You can override using the `.groups`
#> argument.
#> # A tibble: 9 × 2
#> # Groups:   ID [3]
#>      ID col1 
#>   <int> <chr>
#> 1     1 A    
#> 2     1 B    
#> 3     1 <NA> 
#> 4     2 O    
#> 5     2 <NA> 
#> 6     2 <NA> 
#> 7     3 U    
#> 8     3 L    
#> 9     3 R

base R个

n <- tapply(df$ID, df$ID, length)
max_n <- max(n)
i <- c(sapply(n, \(x) c(seq(x), rep(Inf, max_n - x))))
i <- i + rep(c(0, cumsum(head(n, -1))), each = max_n)
df <- df[i,]
rownames(df) <- NULL
df$ID <- Reduce(\(x, y) if (is.na(y) && !is.na(x)) x else y, df$ID, accumulate = TRUE)

df
#>   ID col1
#> 1  1    A
#> 2  1    B
#> 3  1 <NA>
#> 4  2    O
#> 5  2 <NA>
#> 6  2 <NA>
#> 7  3    U
#> 8  3    L
#> 9  3    R