MongoDB中的MapReduce其實(shí)更類似關(guān)系型數(shù)據(jù)庫中的GroupBy 。
剛做了下這樣試驗(yàn),對(duì)于大數(shù)據(jù)量的GroupBy(MapReduce)還是比較理想的,生成100W條3位隨機(jī)字符串
- for (var i=0; i<1000000; i++)
- {
- var x = "0123456789";
- var tmp="";
- for (var j=0; j<3; j++)
- {
- tmp += x.charAt(Math.ceil(Math.random()*100000000)%x.length);|
- }
- var u = {_id:i,v1:tmp};
- db.RandomNum.insert(u);
- }
然后進(jìn)行對(duì)相同的隨機(jī)數(shù)取Count數(shù) 所以必須GroupBy
- var m = function(){emit(this.v1,{count:1}); };
- //map key類似關(guān)系型數(shù)據(jù)的group by 第二個(gè)是value 就是要進(jìn)行聚合的字段(sum...)
- var r = function (key,values) { var total = 0;for (var i=0; i<values.length; i++) { total += values[i].count; } return {count : total}; };.
- //reduce
- var res = db.RandomNum.mapReduce(m, r, {out:{replace:'Result'}});
- db[res.result].find()
測(cè)試了下時(shí)間:
- var startTime = new Date();
- var m = function(){emit(this.v1,{count:1}); };
- var r = function (key,values) { var total = 0;for (var i=0; i<values.length; i++) { total += values[i].count; } return {count : total}; };
- var res = db.RandomNum.mapReduce(m, r, {out:{replace:'Result'}});
- db[res.result].find()
- (new Date().getTime()-startTime.getTime())/1000
- > db[res.result].find()
- { "_id" : "000", "value" : { "count" : 1075 } }
- { "_id" : "001", "value" : { "count" : 1045 } }
- { "_id" : "002", "value" : { "count" : 1022 } }
- { "_id" : "003", "value" : { "count" : 968 } }
- { "_id" : "004", "value" : { "count" : 994 } }
- { "_id" : "005", "value" : { "count" : 1009 } }
- { "_id" : "006", "value" : { "count" : 948 } }
- { "_id" : "007", "value" : { "count" : 1003 } }
- { "_id" : "008", "value" : { "count" : 983 } }
- { "_id" : "009", "value" : { "count" : 993 } }
- { "_id" : "010", "value" : { "count" : 987 } }
- { "_id" : "011", "value" : { "count" : 982 } }
- { "_id" : "012", "value" : { "count" : 957 } }
- { "_id" : "013", "value" : { "count" : 1031 } }
- { "_id" : "014", "value" : { "count" : 971 } }
- { "_id" : "015", "value" : { "count" : 1053 } }
- { "_id" : "016", "value" : { "count" : 974 } }
- { "_id" : "017", "value" : { "count" : 975 } }
- { "_id" : "018", "value" : { "count" : 978 } }
- { "_id" : "019", "value" : { "count" : 1010 } }
- has more
- >
- > (new Date().getTime()-startTime.getTime())/1000
- 63.335s
- > bye
測(cè)試機(jī)的性能: