我一直在try 在MongoDB中使用MapReduce来完成我认为简单的过程.我不知道这是否是正确的方法,甚至不知道我是否应该使用MapReduce.我在谷歌上搜索我想到的关键词,并试图点击我认为最成功的文档,但什么都没有.也许我想得太多了?

我有两个系列:detailsgpas

details是由一大堆文件(3+百万)组成的.studentid元素可以重复两次,每个year重复一次,如下所示:

{ "_id" : ObjectId("4d49b7yah5b6d8372v640100"), "classes" : [1,17,19,21], "studentid" : "12345a", "year" : 1}
{ "_id" : ObjectId("4d76b7oij7s2d8372v640100"), "classes" : [2,12,19,22], "studentid" : "98765a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8372v640100"), "classes" : [32,91,101,217], "studentid" : "12345a", "year" : 2}
{ "_id" : ObjectId("4d76b7rty7s2d8372v640100"), "classes" : [1,11,18,22], "studentid" : "24680a", "year" : 1}
{ "_id" : ObjectId("4d49b7oij7s2d8856v640100"), "classes" : [32,99,110,215], "studentid" : "98765a", "year" : 2}
...

gpas的元素与details中的studentid相同.每studentid个只有一个条目,如下所示:

{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "overall" : 76, "subscore": 2}
...

最后,我想用这种格式 for each 学生收集一行:

{ "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "classes_1": [1,17,19,21], "classes_2": [32,91,101,217], "overall" : 97, "subscore": 1}
{ "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "classes_1": [2,12,19,22], "classes_2": [32,99,110,215], "overall" : 85, "subscore": 5}
{ "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "classes_1": [1,11,18,22], "classes_2": [], "overall" : 76, "subscore": 2}
...

我要做的就是这样运行MapReduce:

var mapDetails = function() {
    emit(this.studentid, {studentid: this.studentid, classes: this.classes, year: this.year, overall: 0, subscore: 0});
};

var mapGpas = function() {
    emit(this.studentid, {studentid: this.studentid, classes: [], year: 0, overall: this.overall, subscore: this.subscore});
};

var reduce = function(key, values) {
    var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0};

    values.forEach(function(value) {
        if (value.year == 0) {
            outs.overall = value.overall;
            outs.subscore = value.subscore;
        }
        else {
            if (value.year == 1) {
                outs.classes_1 = value.classes;
            }
            if (value.year == 2) {
                outs.classes_2 = value.classes;
            }

            outs.studentid = value.studentid;
        }
    });

    return outs;

};

res = db.details.mapReduce(mapDetails, reduce, {out: {reduce: 'joined'}})
res = db.gpas.mapReduce(mapGpas, reduce, {out: {reduce: 'joined'}})

但当我运行它时,这是我的最终Collection :

{ "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 97, "subscore" : 1 } }
{ "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 85, "subscore" : 5 } }
{ "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } }

我错过了课程array.

另外,顺便问一下,如何访问生成的MapReduce value元素中的元素?MapReduce是否总是将输出设置为value或其他任何名称?

推荐答案

This is similar to a question that was asked on the MongoDB-users Google Groups.
https://groups.google.com/group/mongodb-user/browse_thread/thread/60a8b683e2626ada?pli=1

答案参考了一个与您的示例类似的在线教程:

有关MongoDB中MapReduce的更多信息,请参阅文档:

此外,在MongoDB Cookbook文章"使用版本化文档查找最大值和最小值"的"Extras"部分,有一个关于MapReduce操作如何工作的有用的分步演练:

如果您已经阅读了一些参考文件,请原谅.我把它们包括进来是为了其他可能正在阅读本文的用户,以及刚开始在MongoDB中使用MapReduce的用户

映射函数中"emit"语句的输出必须与Reduce函数的输出相匹配,这一点很重要.如果Map函数只输出一个文档,那么Reduce函数可能根本不会运行,那么输出集合中的文档将不匹配.

我稍微修改了map语句,以使用两个独立的"类"数组以所需输出的格式发送文档

var mapDetails = function(){
    var output = {studentid: this.studentid, classes_1: [], classes_2: [], year: this.year, overall: 0, subscore: 0}
    if (this.year == 1) {
        output.classes_1 = this.classes;
    }
    if (this.year == 2) {
        output.classes_2 = this.classes;
    }
    emit(this.studentid, output);
};

var mapGpas = function() {
    emit(this.studentid, {studentid: this.studentid, classes_1: [], classes_2: [], year: 0, overall: this.overall, subscore: this.subscore});
};

var r = function(key, values) {
    var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0};

    values.forEach(function(v){
        outs.studentid = v.studentid;
        v.classes_1.forEach(function(class){if(outs.classes_1.indexOf(class)==-1){outs.classes_1.push(class)}})
        v.classes_2.forEach(function(class){if(outs.classes_2.indexOf(class)==-1){outs.classes_2.push(class)}})

        if (v.year == 0) {
            outs.overall = v.overall;
            outs.subscore = v.subscore;
        }
    });
    return outs;
};

res = db.details.mapReduce(mapDetails, r, {out: {reduce: 'joined'}})
res = db.gpas.mapReduce(mapGpas, r, {out: {reduce: 'joined'}})

运行两个MapReduce操作将生成以下集合,该集合与所需的格式匹配:

> db.joined.find()
{ "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ 1, 17, 19, 21 ], "classes_2" : [ 32, 91, 101, 217 ], "overall" : 97, "subscore" : 1 } }
{ "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ 1, 11, 18, 22 ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } }
{ "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ 2, 12, 19, 22 ], "classes_2" : [ 32, 99, 110, 215 ], "overall" : 85, "subscore" : 5 } }
>

MapReduce总是以{u id:"id",value:"value"}的形式输出文档

如果希望MapReduce的输出以不同的格式显示,则必须在应用程序中以编程方式进行.

希望这将提高您对MapReduce的理解,并使您离生成所需的输出集合更近一步.祝你好运

Mongodb相关问答推荐

使用mongosh将大型json文件插入到mongo集合中

如何在MongoDB中正确解析佛年?

无法配置数据源:未指定url属性,无法为 MongoDb 配置嵌入数据源

MongoDB - 将对象转换为数组

MongoDB查询优化

Mongodb聚合中基于其他字段值的多个条件的动态新字段值

创建索引需要很长时间

当属性确实存在时,为什么mongoose模型的 hasOwnProperty 返回 false?

Java MongoDB FindOne 获取最后插入的记录

将 MongoDB 地理空间索引与 3d 数据结合使用

Express 无法 PUT/DELETE 方法.出了什么问题?

Node + Mongoose:获取最后插入的 ID?

如何在mongoose中加入两个集合

当前的 URL 字符串解析器已弃用

MongoDB限制内存

使用 mongodb 在数组中查找子文档

Mongo:匹配聚合查询中的日期似乎被忽略了

更新时提示Field name duplication not allowed with modifiers

Mongoose 为所有嵌套对象添加 _id

MongoDB - $set 更新或推送数组元素