我如何(在MongoDB)结合数据从多个集合到一个集合?
我可以使用地图减少,如果是,然后如何?
我非常感谢一些例子,因为我是一个新手。
我如何(在MongoDB)结合数据从多个集合到一个集合?
我可以使用地图减少,如果是,然后如何?
我非常感谢一些例子,因为我是一个新手。
当前回答
非常基本的$lookup示例。
db.getCollection('users').aggregate([
{
$lookup: {
from: "userinfo",
localField: "userId",
foreignField: "userId",
as: "userInfoData"
}
},
{
$lookup: {
from: "userrole",
localField: "userId",
foreignField: "userId",
as: "userRoleData"
}
},
{ $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }},
{ $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}
])
这里用到了
{ $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }},
{ $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}
而不是
{ $unwind:"$userRoleData"}
{ $unwind:"$userRoleData"}
因为{$unwind:"$userRoleData"}如果在$lookup中没有找到匹配的记录,将返回空或0结果。
其他回答
如果mongodb没有批量插入,我们循环small_collection中的所有对象,并将它们逐个插入到big_collection中:
db.small_collection.find().forEach(function(obj){
db.big_collection.insert(obj)
});
你必须在你的应用层做这些。如果您正在使用ORM,它可以使用注释(或类似的东西)来提取存在于其他集合中的引用。我只使用过Morphia, @Reference注释在查询时获取被引用的实体,因此我能够避免自己在代码中这样做。
从Mongo 4.4开始,我们可以通过将新的$unionWith聚合阶段与$group的新的$accumulator操作符耦合在聚合管道中实现这种连接:
// > db.users.find()
// [{ user: 1, name: "x" }, { user: 2, name: "y" }]
// > db.books.find()
// [{ user: 1, book: "a" }, { user: 1, book: "b" }, { user: 2, book: "c" }]
// > db.movies.find()
// [{ user: 1, movie: "g" }, { user: 2, movie: "h" }, { user: 2, movie: "i" }]
db.users.aggregate([
{ $unionWith: "books" },
{ $unionWith: "movies" },
{ $group: {
_id: "$user",
user: {
$accumulator: {
accumulateArgs: ["$name", "$book", "$movie"],
init: function() { return { books: [], movies: [] } },
accumulate: function(user, name, book, movie) {
if (name) user.name = name;
if (book) user.books.push(book);
if (movie) user.movies.push(movie);
return user;
},
merge: function(userV1, userV2) {
if (userV2.name) userV1.name = userV2.name;
userV1.books.concat(userV2.books);
userV1.movies.concat(userV2.movies);
return userV1;
},
lang: "js"
}
}
}}
])
// { _id: 1, user: { books: ["a", "b"], movies: ["g"], name: "x" } }
// { _id: 2, user: { books: ["c"], movies: ["h", "i"], name: "y" } }
$unionWith combines records from the given collection within documents already in the aggregation pipeline. After the 2 union stages, we thus have all users, books and movies records within the pipeline. We then $group records by $user and accumulate items using the $accumulator operator allowing custom accumulations of documents as they get grouped: the fields we're interested in accumulating are defined with accumulateArgs. init defines the state that will be accumulated as we group elements. the accumulate function allows performing a custom action with a record being grouped in order to build the accumulated state. For instance, if the item being grouped has the book field defined, then we update the books part of the state. merge is used to merge two internal states. It's only used for aggregations running on sharded clusters or when the operation exceeds memory limits.
对聚合中的多个集合使用多个$查找
查询:
db.getCollection('servicelocations').aggregate([
{
$match: {
serviceLocationId: {
$in: ["36728"]
}
}
},
{
$lookup: {
from: "orders",
localField: "serviceLocationId",
foreignField: "serviceLocationId",
as: "orders"
}
},
{
$lookup: {
from: "timewindowtypes",
localField: "timeWindow.timeWindowTypeId",
foreignField: "timeWindowTypeId",
as: "timeWindow"
}
},
{
$lookup: {
from: "servicetimetypes",
localField: "serviceTimeTypeId",
foreignField: "serviceTimeTypeId",
as: "serviceTime"
}
},
{
$unwind: "$orders"
},
{
$unwind: "$serviceTime"
},
{
$limit: 14
}
])
结果:
{
"_id" : ObjectId("59c3ac4bb7799c90ebb3279b"),
"serviceLocationId" : "36728",
"regionId" : 1.0,
"zoneId" : "DXBZONE1",
"description" : "AL HALLAB REST EMIRATES MALL",
"locationPriority" : 1.0,
"accountTypeId" : 1.0,
"locationType" : "SERVICELOCATION",
"location" : {
"makani" : "",
"lat" : 25.119035,
"lng" : 55.198694
},
"deliveryDays" : "MTWRFSU",
"timeWindow" : [
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32cde"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "06:00",
"closeTime" : "08:00"
},
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32cdf"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "09:00",
"closeTime" : "10:00"
},
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32ce0"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "10:30",
"closeTime" : "11:30"
},
"accountId" : 1.0
}
],
"address1" : "",
"address2" : "",
"phone" : "",
"city" : "",
"county" : "",
"state" : "",
"country" : "",
"zipcode" : "",
"imageUrl" : "",
"contact" : {
"name" : "",
"email" : ""
},
"status" : "ACTIVE",
"createdBy" : "",
"updatedBy" : "",
"updateDate" : "",
"accountId" : 1.0,
"serviceTimeTypeId" : "1",
"orders" : [
{
"_id" : ObjectId("59c3b291f251c77f15790f92"),
"orderId" : "AQ18O1704264",
"serviceLocationId" : "36728",
"orderNo" : "AQ18O1704264",
"orderDate" : "18-Sep-17",
"description" : "AQ18O1704264",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 296.0,
"size2" : 3573.355,
"size3" : 240.811,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "BNWB020",
"size1" : 15.0,
"size2" : 78.6,
"size3" : 6.0
},
{
"ItemId" : "BNWB021",
"size1" : 20.0,
"size2" : 252.0,
"size3" : 11.538
},
{
"ItemId" : "BNWB023",
"size1" : 15.0,
"size2" : 285.0,
"size3" : 16.071
},
{
"ItemId" : "CPMW112",
"size1" : 3.0,
"size2" : 25.38,
"size3" : 1.731
},
{
"ItemId" : "MMGW001",
"size1" : 25.0,
"size2" : 464.375,
"size3" : 46.875
},
{
"ItemId" : "MMNB218",
"size1" : 50.0,
"size2" : 920.0,
"size3" : 60.0
},
{
"ItemId" : "MMNB219",
"size1" : 50.0,
"size2" : 630.0,
"size3" : 40.0
},
{
"ItemId" : "MMNB220",
"size1" : 50.0,
"size2" : 416.0,
"size3" : 28.846
},
{
"ItemId" : "MMNB270",
"size1" : 50.0,
"size2" : 262.0,
"size3" : 20.0
},
{
"ItemId" : "MMNB302",
"size1" : 15.0,
"size2" : 195.0,
"size3" : 6.0
},
{
"ItemId" : "MMNB373",
"size1" : 3.0,
"size2" : 45.0,
"size3" : 3.75
}
],
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b291f251c77f15790f9d"),
"orderId" : "AQ137O1701240",
"serviceLocationId" : "36728",
"orderNo" : "AQ137O1701240",
"orderDate" : "18-Sep-17",
"description" : "AQ137O1701240",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 28.0,
"size2" : 520.11,
"size3" : 52.5,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "MMGW001",
"size1" : 25.0,
"size2" : 464.38,
"size3" : 46.875
},
{
"ItemId" : "MMGW001-F1",
"size1" : 3.0,
"size2" : 55.73,
"size3" : 5.625
}
],
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b291f251c77f15790fd8"),
"orderId" : "AQ110O1705036",
"serviceLocationId" : "36728",
"orderNo" : "AQ110O1705036",
"orderDate" : "18-Sep-17",
"description" : "AQ110O1705036",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 60.0,
"size2" : 1046.0,
"size3" : 68.0,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "MMNB218",
"size1" : 50.0,
"size2" : 920.0,
"size3" : 60.0
},
{
"ItemId" : "MMNB219",
"size1" : 10.0,
"size2" : 126.0,
"size3" : 8.0
}
],
"accountId" : 1.0
}
],
"serviceTime" : {
"_id" : ObjectId("59c3b07cb7799c90ebb32cdc"),
"serviceTimeTypeId" : "1",
"serviceTimeType" : "nohelper",
"description" : "",
"fixedTime" : 30.0,
"variableTime" : 0.0,
"accountId" : 1.0
}
}
虽然不能实时执行,但可以多次运行map-reduce,通过使用MongoDB 1.8+ map/reduce中的“reduce”out选项将数据合并在一起(参见http://www.mongodb.org/display/DOCS/MapReduce#MapReduce-Outputoptions)。您需要在两个集合中都有一些可以用作_id的键。
例如,假设您有一个用户集合和一个评论集合,并且您希望有一个新的集合,其中包含每个评论的一些用户统计信息。
让我们说users集合有以下字段:
_id firstName 姓 国家 性别 年龄
然后comments集合有以下字段:
_id 用户标识 评论 创建
你可以这样做:
var mapUsers, mapComments, reduce;
db.users_comments.remove();
// setup sample data - wouldn't actually use this in production
db.users.remove();
db.comments.remove();
db.users.save({firstName:"Rich",lastName:"S",gender:"M",country:"CA",age:"18"});
db.users.save({firstName:"Rob",lastName:"M",gender:"M",country:"US",age:"25"});
db.users.save({firstName:"Sarah",lastName:"T",gender:"F",country:"US",age:"13"});
var users = db.users.find();
db.comments.save({userId: users[0]._id, "comment": "Hey, what's up?", created: new ISODate()});
db.comments.save({userId: users[1]._id, "comment": "Not much", created: new ISODate()});
db.comments.save({userId: users[0]._id, "comment": "Cool", created: new ISODate()});
// end sample data setup
mapUsers = function() {
var values = {
country: this.country,
gender: this.gender,
age: this.age
};
emit(this._id, values);
};
mapComments = function() {
var values = {
commentId: this._id,
comment: this.comment,
created: this.created
};
emit(this.userId, values);
};
reduce = function(k, values) {
var result = {}, commentFields = {
"commentId": '',
"comment": '',
"created": ''
};
values.forEach(function(value) {
var field;
if ("comment" in value) {
if (!("comments" in result)) {
result.comments = [];
}
result.comments.push(value);
} else if ("comments" in value) {
if (!("comments" in result)) {
result.comments = [];
}
result.comments.push.apply(result.comments, value.comments);
}
for (field in value) {
if (value.hasOwnProperty(field) && !(field in commentFields)) {
result[field] = value[field];
}
}
});
return result;
};
db.users.mapReduce(mapUsers, reduce, {"out": {"reduce": "users_comments"}});
db.comments.mapReduce(mapComments, reduce, {"out": {"reduce": "users_comments"}});
db.users_comments.find().pretty(); // see the resulting collection
此时,您将拥有一个名为users_comments的新集合,其中包含合并的数据,您现在可以使用它了。这些简化的集合都有_id,这是你在map函数中发出的键,然后所有的值都是value键内的子对象-这些值不在这些简化文档的顶层。
这是一个比较简单的例子。您可以重复使用更多的集合,只要您想继续构建减少的集合。您还可以在该过程中对数据进行总结和聚合。随着聚合和保存现有字段的逻辑变得更加复杂,您可能会定义多个reduce函数。
您还会注意到,现在每个用户都有一个文档,数组中包含该用户的所有评论。如果我们合并的数据是一对一的关系,而不是一对多的关系,它将是平坦的,你可以简单地使用这样的reduce函数:
reduce = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
for (field in value) {
if (value.hasOwnProperty(field)) {
result[field] = value[field];
}
}
});
return result;
};
如果你想平铺users_comments集合,所以每个注释只有一个文档,另外运行这个:
var map, reduce;
map = function() {
var debug = function(value) {
var field;
for (field in value) {
print(field + ": " + value[field]);
}
};
debug(this);
var that = this;
if ("comments" in this.value) {
this.value.comments.forEach(function(value) {
emit(value.commentId, {
userId: that._id,
country: that.value.country,
age: that.value.age,
comment: value.comment,
created: value.created,
});
});
}
};
reduce = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
for (field in value) {
if (value.hasOwnProperty(field)) {
result[field] = value[field];
}
}
});
return result;
};
db.users_comments.mapReduce(map, reduce, {"out": "comments_with_demographics"});
这个技巧绝对不应该在飞行中执行。它适用于定期更新合并数据的cron作业或类似的工作。您可能希望在新集合上运行ensureIndex,以确保对它执行的查询能够快速运行(请记住,您的数据仍然在值键中,因此如果您要在注释创建时间上索引comments_with_demographic,那么它将是db.comments_with_demographic .ensureIndex({"value.created": 1});