我有一个有地区数据的数据框架,我希望能够使每个国家的比例在特定变量下相等.下面是我的例子. 我有一个表有详细的样本,按性别划分的国家.我希望能够删除样本,目标是0和1等价.
> table(df$Gender , df$COUNTRY)
1 2 3
0 86 81 282
1 21 7 23
是否有任何包/函数可以删除等于0的值以保持足够的值来匹配等于1的值?
这将是预期的结果
> table(df$Gender , df$COUNTRY)
1 2 3
0 21 7 23
1 21 7 23
如果还有一种更基本的方法来做到这一点,那也会有所帮助.例如,删除其中df$Country=1&;df$Gender=0的随机样本65.然后我可以手动完成每个国家的工作.
就像有人要求的那样,我们开始吧.以上各表已作相应更改
df <- structure(list(Gender = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0), COUNTRY = c(2, 3, 2, 1, 3, 3, 3, 2, 2, 3, 3,
3, 3, 3, 3, 2, 3, 3, 1, 3, 3, 3, 3, 1, 2, 3, 2, 3, 1, 3, 2, 3,
3, 3, 2, 2, 3, 3, 3, 2, 3, 2, 2, 1, 3, 3, 3, 2, 2, 3, 1, 1, 2,
2, 1, 3, 3, 1, 2, 1, 3, 3, 3, 1, 1, 3, 3, 3, 1, 3, 2, 1, 3, 2,
3, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 1, 1, 1, 1, 3, 1, 2, 1, 3,
2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 1, 1, 3, 1, 1, 2,
2, 3, 3, 1, 2, 3, 3, 3, 2, 3, 3, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2,
3, 1, 1, 1, 3, 3, 3, 2, 3, 3, 2, 3, 2, 1, 3, 2, 3, 1, 3, 2, 2,
2, 3, 3, 2, 1, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 2, 2,
1, 1, 3, 1, 1, 1, 3, 3, 1, 2, 1, 1, 1, 1, 3, 3, 3, 1, 3, 3, 2,
3, 3, 3, 3, 3, 1, 3, 3, 2, 1, 1, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3,
3, 3, 1, 3, 2, 2, 1, 3, 2, 1, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 3,
3, 1, 3, 2, 1, 1, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 1, 2, 3, 1, 2,
3, 2, 1, 2, 1, 3, 1, 3, 3, 3, 3, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1,
3, 3, 1, 3, 1, 1, 1, 2, 3, 2, 2, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3,
3, 3, 3, 3, 2, 3, 1, 3, 3, 3, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1, 3, 3, 1, 2, 3, 1, 3, 3, 3, 1, 3, 1, 3, 3, 3, 1, 1, 3, 2, 3,
1, 3, 3, 3, 1, 2, 3, 3, 3, 3, 1, 3, 1, 3, 1, 1, 3, 3, 3, 3, 1,
3, 3, 3, 3, 3, 1, 1, 3, 3, 2, 3, 3, 3, 3, 1, 3, 3, 2, 3, 3, 1,
3, 3, 3, 2, 3, 1, 3, 3, 1, 3, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 2,
2, 3, 3, 2, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 2, 1, 3, 3, 3, 3, 3,
3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3,
1, 1, 3, 3, 3, 1, 3, 3, 1, 3, 3, 3, 3, 3, 1, 3, 3, 1, 3, 3, 3,
3, 3, 2, 3, 2, 3)), row.names = c(NA, 500L), class = "data.frame")