我一直在try 在MongoDB中使用MapReduce来完成我认为简单的过程.我不知道这是否是正确的方法,甚至不知道我是否应该使用MapReduce.我在谷歌上搜索我想到的关键词,并试图点击我认为最成功的文档,但什么都没有.也许我想得太多了?
我有两个系列:details
和gpas
details
是由一大堆文件(3+百万)组成的.studentid
元素可以重复两次,每个year
重复一次,如下所示:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640100"), "classes" : [1,17,19,21], "studentid" : "12345a", "year" : 1}
{ "_id" : ObjectId("4d76b7oij7s2d8372v640100"), "classes" : [2,12,19,22], "studentid" : "98765a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8372v640100"), "classes" : [32,91,101,217], "studentid" : "12345a", "year" : 2}
{ "_id" : ObjectId("4d76b7rty7s2d8372v640100"), "classes" : [1,11,18,22], "studentid" : "24680a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8856v640100"), "classes" : [32,99,110,215], "studentid" : "98765a", "year" : 2}
...
gpas
的元素与details
中的studentid
相同.每studentid
个只有一个条目,如下所示:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "overall" : 76, "subscore": 2}
...
最后,我想用这种格式 for each 学生收集一行:
{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "classes_1": [1,17,19,21], "classes_2": [32,91,101,217], "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "classes_1": [2,12,19,22], "classes_2": [32,99,110,215], "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "classes_1": [1,11,18,22], "classes_2": [], "overall" : 76, "subscore": 2}
...
我要做的就是这样运行MapReduce:
var mapDetails = function() {
emit(this.studentid, {studentid: this.studentid, classes: this.classes, year: this.year, overall: 0, subscore: 0});
};
var mapGpas = function() {
emit(this.studentid, {studentid: this.studentid, classes: [], year: 0, overall: this.overall, subscore: this.subscore});
};
var reduce = function(key, values) {
var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0};
values.forEach(function(value) {
if (value.year == 0) {
outs.overall = value.overall;
outs.subscore = value.subscore;
}
else {
if (value.year == 1) {
outs.classes_1 = value.classes;
}
if (value.year == 2) {
outs.classes_2 = value.classes;
}
outs.studentid = value.studentid;
}
});
return outs;
};
res = db.details.mapReduce(mapDetails, reduce, {out: {reduce: 'joined'}})
res = db.gpas.mapReduce(mapGpas, reduce, {out: {reduce: 'joined'}})
但当我运行它时,这是我的最终Collection :
{ "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 97, "subscore" : 1 } }
{ "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 85, "subscore" : 5 } }
{ "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } }
我错过了课程array.
另外,顺便问一下,如何访问生成的MapReduce value
元素中的元素?MapReduce是否总是将输出设置为value
或其他任何名称?