MongoDb summing number of distinct elements in string field array - python

I have an ip address collection:
{
"_id" : "uezyuLx4jjfvcqN",
"CVE" : ["CVE2020-123", "CVE2022-789", "CVE2019-456"],
"ip" : "1.2.3.4"
}
{
"_id" : "dCC8GrNdEjym3ryua",
"CVE" : ["CVE2020-123", "CVE2021-469"],
"ip" : "5.6.7.8"
}
{
"_id" : "dCC8GrNdEjym3ryua",
"CVE" : ["CVE2020-123", "CVE2021-469"],
"ip" : "7.6.7.6"
}
I'm trying to calculate the distinct sum of the CVE field, where IPs are in ["5.6.7.8", "1.2.3.4"].
Expected output:
{
ip: ['1.2.3.4', '5.6.7.8'],
sum_distinct_cve:4,
CVES: ["CVE2020-123", "CVE2022-789", "CVE2019-456", "CVE2021-469"]
}
So I'm doing the following:
db = db.getSiblingDB("test");
hosts = db.getCollection("test-collection")
hosts.aggregate([
{$match:
{"ip": {$in: ["1.2.3.4", "5.6.7.8"]}}},
{$group:
{_id: "$CVE",
totals: {$sum: "$CVE"}}}
]);
The sum is returning 0, which I've realised is because of MongoDb's behaviour when trying to sum a string field. This is detailed here: mongodb sum query returning zero
What I would like to know though is how I can sum the number of elements, and also find the distinct sum.`

Simple option:
db.collection.aggregate([
{
$match: {
"ip": {
$in: [
"1.2.3.4",
"5.6.7.8"
]
}
}
},
{
$unwind: "$CVE"
},
{
$group: {
_id: "",
ip: {
$addToSet: "$ip"
},
CVE: {
$addToSet: "$CVE"
}
}
},
{
$project: {
_id: 0,
ip: 1,
CVE: 1,
sum_distinct_cve: {
$size: "$CVE"
}
}
}
])
Explained:
Match the ip's
unwind the CVE arrays
group so you can join ip and CVE distinct values only
Project the necessary fields and use $size to count the distinct CVE's
Playground

I agree with #R2D2. One more option to avoid $unwind (which considered costly in terms of performance) is to use $reduce instead:
db.collection.aggregate([
{$match: {ip: {$in: ["1.2.3.4", "5.6.7.8"]}}},
{$group: {
_id: 0,
ip: {$addToSet: "$ip"},
CVE: {$addToSet: "$CVE"}
}},
{$project: {
_id: 0, ip: 1,
CVE: {
$reduce: {
input: "$CVE",
initialValue: [],
in: {$setUnion: ["$$value", "$$this"]}
}
}
}},
{$set: {sum_distinct_cve: {$size: "$CVE"}}}
])
See how it works on the playground example

Related

PyMongo not returning results on aggregation

I'm a total beginner in PyMongo. I'm trying to find activities that are registered multiple times. This code is returning an empty list. Could you please help me in finding the mistake:
rows = self.db.Activity.aggregate( [
{ '$group':{
"_id":
{
"user_id": "$user_id",
"transportation_mode": "$transportation_mode",
"start_date_time": "$start_date_time",
"end_date_time": "$end_date_time"
},
"count": {'$sum':1}
}
},
{'$match':
{ "count": { '$gt': 1 } }
},
{'$project':
{"_id":0,
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1
}
}
]
)
5 rows from db:
{ "_id" : 0, "user_id" : "000", "start_date_time" : "2008-10-23 02:53:04", "end_date_time" : "2008-10-23 11:11:12" }
{ "_id" : 1, "user_id" : "000", "start_date_time" : "2008-10-24 02:09:59", "end_date_time" : "2008-10-24 02:47:06" }
{ "_id" : 2, "user_id" : "000", "start_date_time" : "2008-10-26 13:44:07", "end_date_time" : "2008-10-26 15:04:07" }
{ "_id" : 3, "user_id" : "000", "start_date_time" : "2008-10-27 11:54:49", "end_date_time" : "2008-10-27 12:05:54" }
{ "_id" : 4, "user_id" : "000", "start_date_time" : "2008-10-28 00:38:26", "end_date_time" : "2008-10-28 05:03:42" }
Thank you
When you pass _id: 0 in the $project stage, it will not project the sub-objects even if they are projected in the follow up, since the rule is overwritten.
Try the below $project stage.
{
'$project': {
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1
}
}
rows = self.db.Activity.aggregate( [
{
'$group':{
"_id": {
"user_id": "$user_id",
"transportation_mode": "$transportation_mode",
"start_date_time": "$start_date_time",
"end_date_time": "$end_date_time"
},
"count": {'$sum':1}
}
},
{
'$match':{
"count": { '$gt': 1 }
}
},
{
'$project': {
"user_id":"_id.user_id",
"transportation_mode":"_id.transportation_mode",
"start_date_time":"_id.start_date_time",
"end_date_time":"_id.end_date_time",
"count": 1,
}
}
])
Your group criteria is likely too narrow.
The $group stage will create a separate output document for each distinct value of the _id field. The pipeline in the question will only include two input documents in the same group if they have exactly the same value in all four of those fields.
In order for a count to be greater than 1, there must exist 2 documents with the same user, mode, and exactly the same start and end.
In the same data you show, there are no two documents that would be in the same group, so all of the output documents from the $group stage would have a count of 1, and therefore none of them satisfy the $match, and the return is an empty list.

MongoDB: find document with largest amount of

I have a collection of documents like this:
"RecordId": 1,
"CurrentState" : {
"collection_method" : "Phone",
"collection_method_convert" : 1,
"any_amount_outside_of_min_max_fx_margin" : null,
"amounts_and_rates" : [
{
"_id" : ObjectId("5ef870670000000000000000"),
"amount_from" : 1000.0,
"time_collected_researcher_input" : null,
"date_collected_researcher_input" : null,
"timezone_researcher_input" : null,
"datetime_collected_utc" : ISODate("2020-03-02T21:45:00.000Z"),
"interbank_rate" : 0.58548,
"ib_api_url" : null,
"fx_rate" : 0.56796,
"fx_margin" : 2.9924164787866,
"amount_margin_approved" : true,
"outside_of_min_max_fx_margin" : null,
"amount_duplicated" : false,
"fx_margin_delta_mom" : null,
"fx_margin_reldiff_pct_mom" : null,
"fx_margin_reldiff_gt15pct_mom" : null
},
{
"_id" : ObjectId("5efdadae0000000000000000"),
"amount_from" : 10000.0,
"time_collected_researcher_input" : null,
"date_collected_researcher_input" : null,
"timezone_researcher_input" : null,
"datetime_collected_utc" : ISODate("2020-03-02T21:45:00.000Z"),
"interbank_rate" : 0.58548,
"ib_api_url" : null,
"fx_rate" : 0.57386,
"fx_margin" : 1.9846963175514,
"amount_margin_approved" : true,
"outside_of_min_max_fx_margin" : null,
"amount_duplicated" : false,
"fx_margin_delta_mom" : null,
"fx_margin_reldiff_pct_mom" : null,
"fx_margin_reldiff_gt15pct_mom" : null
}
Array of amounts_and_rates can contain different fields in different documents. Even inside one document.
I need to find the document with largest number of fields.
And also to find all possible fields in the amounts_and_rates. Collection can be rather large and check one by one can take rather long time. Is it possible to find what I need with aggregation functions of mongodb?
I want to have in the end something like:
[{RecordId: 1, number_of_fields: [13, 12, 14]}{RecordId:2, number_of_fields:[9, 12, 14]}]
Or even just max_records_number in [{RecordId:2}, {RecordId: 4}].
Also would like to receive set of fields in amount_and_rates through the collection like:
set = ["_id", "amount_from", "time_collected_researcher_input" ...]
The solutions of your 2 requirements,
The set of unique fields:
set = ["_id", "amount_from", "time_collected_researcher_input" ...]
$unwind amounts_and_rates because its an array and need to use in $project
$project converted object to array using $objectToArray
$unwind again because amounts_and_rates is again an array and need to use in $group
$group by null _id and add unique keys in set amounts_and_rates using $addToSet
$project remove _id
db.collection.aggregate([
{
$unwind: "$CurrentState.amounts_and_rates"
},
{
$project: {
amounts_and_rates: {
$objectToArray: "$CurrentState.amounts_and_rates"
}
}
},
{
$unwind: "$amounts_and_rates"
},
{
$group: {
_id: null,
amounts_and_rates: {
$addToSet: "$amounts_and_rates.k"
}
}
},
{
$project: {
_id: 0
}
}
])
Working Playground: https://mongoplayground.net/p/6dPGM2hZ4vW
Fields count in sub document:
[{RecordId: 1, number_of_fields: [13, 12, 14]}{RecordId:2, number_of_fields:[9, 12, 14]}]
$unwind amounts_and_rates because its an array and need to use in $project
$project converted object to array using $objectToArray and get the count of particular document
$group by RecordId and push all arrayofkeyvalue count in number_of_fields and added total for total count
$project remove _id
db.collection.aggregate([
{
$unwind: "$CurrentState.amounts_and_rates"
},
{
"$project": {
RecordId: 1,
arrayofkeyvalue: {
$size: {
$objectToArray: "$CurrentState.amounts_and_rates"
}
}
}
},
{
$group: {
_id: "$RecordId",
RecordId: {
$first: "$RecordId"
},
number_of_fields: {
$push: {
$sum: "$arrayofkeyvalue"
}
},
total: {
$sum: "$arrayofkeyvalue"
}
}
},
{
$project: {
_id: 0
}
}
])
Working Playground: https://mongoplayground.net/p/TRFsj11BqVR

Query a 3rd level nested

I have a DB in MongoDB that has 3 levels, and I want to get the value form last level. The structure is the following:
{
"_id" : "10000",
"Values" : [
{
"Value1" : "Article 1",
"Value2" : [
{
"Value2_1" : 1,
"Value2_2" : 2,
}
]
}
]
}
I need to get the value form the label "Value2_1".
So far my code is the following:
for row in collection.find({"_id":1, "Values.Value2.Value2_1":1})
print(row)
The output is always "None".
Any ideas about how to make the correct query?
Thanks!
By using dot (.) notation you can get your expected result.
db.collection.find({"Values.Value2.Value2_1" : 100})
The above query will select all documents where the Values array has Values2 array and Values2 has Values2_1 whose value is equal 100
Output:
{
"_id" : ObjectId("5b86bd1172876096c7a9d6cf"),
"Values" : [
{
"Value1" : "Article 1",
"Value2" : [
{
"Value2_1" : 100.0,
"Value2_2" : 200.0
},
{
"Value2_1" : 15.0,
"Value2_2" : 25.0
}
]
}
]
}
And if you try to search with _id then you don't need to use your
second condition because by definition _id is always unique.
This following query will also show the same result as above.
db.collection.find({"_id" : ObjectId("5b86bd1172876096c7a9d6cf")})
If you want to specifically get only those items from inner array which have fulfill your inner conditions, you can aggregate the query
PS - My 2 cents - I do not know if you require this, as i could not get it that clear from your question, i just thought you may be asking this.
db.coll.aggregate([{
$unwind: '$Values'
}, {
$project: {
'Values_F': {
$filter: {
input: "$Values.Value2",
as: "value2",
cond: {
$eq: ["$$value2.Value2_1", 1]
}
}
}
}
}, {
$project: {
'Values_F': 1,
'total': {
$size: '$Values_F'
}
}
}, {
$match: {
total: {
$gte: 1
}
}
}
])

How can I chain two Pymongo cursors into one? [duplicate]

How can I (in MongoDB) combine data from multiple collections into one collection?
Can I use map-reduce and if so then how?
I would greatly appreciate some example as I am a novice.
MongoDB 3.2 now allows one to combine data from multiple collections into one through the $lookup aggregation stage. As a practical example, lets say that you have data about books split into two different collections.
First collection, called books, having the following data:
{
"isbn": "978-3-16-148410-0",
"title": "Some cool book",
"author": "John Doe"
}
{
"isbn": "978-3-16-148999-9",
"title": "Another awesome book",
"author": "Jane Roe"
}
And the second collection, called books_selling_data, having the following data:
{
"_id": ObjectId("56e31bcf76cdf52e541d9d26"),
"isbn": "978-3-16-148410-0",
"copies_sold": 12500
}
{
"_id": ObjectId("56e31ce076cdf52e541d9d28"),
"isbn": "978-3-16-148999-9",
"copies_sold": 720050
}
{
"_id": ObjectId("56e31ce076cdf52e541d9d29"),
"isbn": "978-3-16-148999-9",
"copies_sold": 1000
}
To merge both collections is just a matter of using $lookup in the following way:
db.books.aggregate([{
$lookup: {
from: "books_selling_data",
localField: "isbn",
foreignField: "isbn",
as: "copies_sold"
}
}])
After this aggregation, the books collection will look like the following:
{
"isbn": "978-3-16-148410-0",
"title": "Some cool book",
"author": "John Doe",
"copies_sold": [
{
"_id": ObjectId("56e31bcf76cdf52e541d9d26"),
"isbn": "978-3-16-148410-0",
"copies_sold": 12500
}
]
}
{
"isbn": "978-3-16-148999-9",
"title": "Another awesome book",
"author": "Jane Roe",
"copies_sold": [
{
"_id": ObjectId("56e31ce076cdf52e541d9d28"),
"isbn": "978-3-16-148999-9",
"copies_sold": 720050
},
{
"_id": ObjectId("56e31ce076cdf52e541d9d28"),
"isbn": "978-3-16-148999-9",
"copies_sold": 1000
}
]
}
It is important to note a few things:
The "from" collection, in this case books_selling_data, cannot be sharded.
The "as" field will be an array, as the example above.
Both "localField" and "foreignField" options on the $lookup stage will be treated as null for matching purposes if they don't exist in their respective collections (the $lookup docs has a perfect example about that).
So, as a conclusion, if you want to consolidate both collections, having, in this case, a flat copies_sold field with the total copies sold, you will have to work a little bit more, probably using an intermediary collection that will, then, be $out to the final collection.
Although you can't do this real-time, you can run map-reduce multiple times to merge data together by using the "reduce" out option in MongoDB 1.8+ map/reduce (see http://www.mongodb.org/display/DOCS/MapReduce#MapReduce-Outputoptions). You need to have some key in both collections that you can use as an _id.
For example, let's say you have a users collection and a comments collection and you want to have a new collection that has some user demographic info for each comment.
Let's say the users collection has the following fields:
_id
firstName
lastName
country
gender
age
And then the comments collection has the following fields:
_id
userId
comment
created
You would do this map/reduce:
var mapUsers, mapComments, reduce;
db.users_comments.remove();
// setup sample data - wouldn't actually use this in production
db.users.remove();
db.comments.remove();
db.users.save({firstName:"Rich",lastName:"S",gender:"M",country:"CA",age:"18"});
db.users.save({firstName:"Rob",lastName:"M",gender:"M",country:"US",age:"25"});
db.users.save({firstName:"Sarah",lastName:"T",gender:"F",country:"US",age:"13"});
var users = db.users.find();
db.comments.save({userId: users[0]._id, "comment": "Hey, what's up?", created: new ISODate()});
db.comments.save({userId: users[1]._id, "comment": "Not much", created: new ISODate()});
db.comments.save({userId: users[0]._id, "comment": "Cool", created: new ISODate()});
// end sample data setup
mapUsers = function() {
var values = {
country: this.country,
gender: this.gender,
age: this.age
};
emit(this._id, values);
};
mapComments = function() {
var values = {
commentId: this._id,
comment: this.comment,
created: this.created
};
emit(this.userId, values);
};
reduce = function(k, values) {
var result = {}, commentFields = {
"commentId": '',
"comment": '',
"created": ''
};
values.forEach(function(value) {
var field;
if ("comment" in value) {
if (!("comments" in result)) {
result.comments = [];
}
result.comments.push(value);
} else if ("comments" in value) {
if (!("comments" in result)) {
result.comments = [];
}
result.comments.push.apply(result.comments, value.comments);
}
for (field in value) {
if (value.hasOwnProperty(field) && !(field in commentFields)) {
result[field] = value[field];
}
}
});
return result;
};
db.users.mapReduce(mapUsers, reduce, {"out": {"reduce": "users_comments"}});
db.comments.mapReduce(mapComments, reduce, {"out": {"reduce": "users_comments"}});
db.users_comments.find().pretty(); // see the resulting collection
At this point, you will have a new collection called users_comments that contains the merged data and you can now use that. These reduced collections all have _id which is the key you were emitting in your map functions and then all of the values are a sub-object inside the value key - the values aren't at the top level of these reduced documents.
This is a somewhat simple example. You can repeat this with more collections as much as you want to keep building up the reduced collection. You could also do summaries and aggregations of data in the process. Likely you would define more than one reduce function as the logic for aggregating and preserving existing fields gets more complex.
You'll also note that there is now one document for each user with all of that user's comments in an array. If we were merging data that has a one-to-one relationship rather than one-to-many, it would be flat and you could simply use a reduce function like this:
reduce = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
for (field in value) {
if (value.hasOwnProperty(field)) {
result[field] = value[field];
}
}
});
return result;
};
If you want to flatten the users_comments collection so it's one document per comment, additionally run this:
var map, reduce;
map = function() {
var debug = function(value) {
var field;
for (field in value) {
print(field + ": " + value[field]);
}
};
debug(this);
var that = this;
if ("comments" in this.value) {
this.value.comments.forEach(function(value) {
emit(value.commentId, {
userId: that._id,
country: that.value.country,
age: that.value.age,
comment: value.comment,
created: value.created,
});
});
}
};
reduce = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
for (field in value) {
if (value.hasOwnProperty(field)) {
result[field] = value[field];
}
}
});
return result;
};
db.users_comments.mapReduce(map, reduce, {"out": "comments_with_demographics"});
This technique should definitely not be performed on the fly. It's suited for a cron job or something like that which updates the merged data periodically. You'll probably want to run ensureIndex on the new collection to make sure queries you perform against it run quickly (keep in mind that your data is still inside a value key, so if you were to index comments_with_demographics on the comment created time, it would be db.comments_with_demographics.ensureIndex({"value.created": 1});
Doing unions in MongoDB in a 'SQL UNION' fashion is possible using aggregations along with lookups, in a single query. Here is an example I have tested that works with MongoDB 4.0:
// Create employees data for testing the union.
db.getCollection('employees').insert({ name: "John", type: "employee", department: "sales" });
db.getCollection('employees').insert({ name: "Martha", type: "employee", department: "accounting" });
db.getCollection('employees').insert({ name: "Amy", type: "employee", department: "warehouse" });
db.getCollection('employees').insert({ name: "Mike", type: "employee", department: "warehouse" });
// Create freelancers data for testing the union.
db.getCollection('freelancers').insert({ name: "Stephany", type: "freelancer", department: "accounting" });
db.getCollection('freelancers').insert({ name: "Martin", type: "freelancer", department: "sales" });
db.getCollection('freelancers').insert({ name: "Doug", type: "freelancer", department: "warehouse" });
db.getCollection('freelancers').insert({ name: "Brenda", type: "freelancer", department: "sales" });
// Here we do a union of the employees and freelancers using a single aggregation query.
db.getCollection('freelancers').aggregate( // 1. Use any collection containing at least one document.
[
{ $limit: 1 }, // 2. Keep only one document of the collection.
{ $project: { _id: '$$REMOVE' } }, // 3. Remove everything from the document.
// 4. Lookup collections to union together.
{ $lookup: { from: 'employees', pipeline: [{ $match: { department: 'sales' } }], as: 'employees' } },
{ $lookup: { from: 'freelancers', pipeline: [{ $match: { department: 'sales' } }], as: 'freelancers' } },
// 5. Union the collections together with a projection.
{ $project: { union: { $concatArrays: ["$employees", "$freelancers"] } } },
// 6. Unwind and replace root so you end up with a result set.
{ $unwind: '$union' },
{ $replaceRoot: { newRoot: '$union' } }
]);
Here is the explanation of how it works:
Instantiate an aggregate out of any collection of your database that has at least one document in it. If you can't guarantee any collection of your database will not be empty, you can workaround this issue by creating in your database some sort of 'dummy' collection containing a single empty document in it that will be there specifically for doing union queries.
Make the first stage of your pipeline to be { $limit: 1 }. This will strip all the documents of the collection except the first one.
Strip all the fields of the remaining document by using a $project stage:
{ $project: { _id: '$$REMOVE' } }
Your aggregate now contains a single, empty document. It's time to add lookups for each collection you want to union together. You may use the pipeline field to do some specific filtering, or leave localField and foreignField as null to match the whole collection.
{ $lookup: { from: 'collectionToUnion1', pipeline: [...], as: 'Collection1' } },
{ $lookup: { from: 'collectionToUnion2', pipeline: [...], as: 'Collection2' } },
{ $lookup: { from: 'collectionToUnion3', pipeline: [...], as: 'Collection3' } }
You now have an aggregate containing a single document that contains 3 arrays like this:
{
Collection1: [...],
Collection2: [...],
Collection3: [...]
}
You can then merge them together into a single array using a $project stage along with the $concatArrays aggregation operator:
{
"$project" :
{
"Union" : { $concatArrays: ["$Collection1", "$Collection2", "$Collection3"] }
}
}
You now have an aggregate containing a single document, into which is located an array that contains your union of collections. What remains to be done is to add an $unwind and a $replaceRoot stage to split your array into separate documents:
{ $unwind: "$Union" },
{ $replaceRoot: { newRoot: "$Union" } }
VoilĂ . You now have a result set containing the collections you wanted to union together. You can then add more stages to filter it further, sort it, apply skip() and limit(). Pretty much anything you want.
Starting Mongo 4.4, we can achieve this join within an aggregation pipeline by coupling the new $unionWith aggregation stage with $group's new $accumulator operator:
// > db.users.find()
// [{ user: 1, name: "x" }, { user: 2, name: "y" }]
// > db.books.find()
// [{ user: 1, book: "a" }, { user: 1, book: "b" }, { user: 2, book: "c" }]
// > db.movies.find()
// [{ user: 1, movie: "g" }, { user: 2, movie: "h" }, { user: 2, movie: "i" }]
db.users.aggregate([
{ $unionWith: "books" },
{ $unionWith: "movies" },
{ $group: {
_id: "$user",
user: {
$accumulator: {
accumulateArgs: ["$name", "$book", "$movie"],
init: function() { return { books: [], movies: [] } },
accumulate: function(user, name, book, movie) {
if (name) user.name = name;
if (book) user.books.push(book);
if (movie) user.movies.push(movie);
return user;
},
merge: function(userV1, userV2) {
if (userV2.name) userV1.name = userV2.name;
userV1.books.concat(userV2.books);
userV1.movies.concat(userV2.movies);
return userV1;
},
lang: "js"
}
}
}}
])
// { _id: 1, user: { books: ["a", "b"], movies: ["g"], name: "x" } }
// { _id: 2, user: { books: ["c"], movies: ["h", "i"], name: "y" } }
$unionWith combines records from the given collection within documents already in the aggregation pipeline. After the 2 union stages, we thus have all users, books and movies records within the pipeline.
We then $group records by $user and accumulate items using the $accumulator operator allowing custom accumulations of documents as they get grouped:
the fields we're interested in accumulating are defined with accumulateArgs.
init defines the state that will be accumulated as we group elements.
the accumulate function allows performing a custom action with a record being grouped in order to build the accumulated state. For instance, if the item being grouped has the book field defined, then we update the books part of the state.
merge is used to merge two internal states. It's only used for aggregations running on sharded clusters or when the operation exceeds memory limits.
Very basic example with $lookup.
db.getCollection('users').aggregate([
{
$lookup: {
from: "userinfo",
localField: "userId",
foreignField: "userId",
as: "userInfoData"
}
},
{
$lookup: {
from: "userrole",
localField: "userId",
foreignField: "userId",
as: "userRoleData"
}
},
{ $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }},
{ $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}
])
Here is used
{ $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }},
{ $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}
Instead of
{ $unwind:"$userRoleData"}
{ $unwind:"$userRoleData"}
Because { $unwind:"$userRoleData"} this will return empty or 0 result if no matching record found with $lookup.
If there is no bulk insert into mongodb, we loop all objects in the small_collection and insert them one by one into the big_collection:
db.small_collection.find().forEach(function(obj){
db.big_collection.insert(obj)
});
use multiple $lookup for multiple collections in aggregation
query:
db.getCollection('servicelocations').aggregate([
{
$match: {
serviceLocationId: {
$in: ["36728"]
}
}
},
{
$lookup: {
from: "orders",
localField: "serviceLocationId",
foreignField: "serviceLocationId",
as: "orders"
}
},
{
$lookup: {
from: "timewindowtypes",
localField: "timeWindow.timeWindowTypeId",
foreignField: "timeWindowTypeId",
as: "timeWindow"
}
},
{
$lookup: {
from: "servicetimetypes",
localField: "serviceTimeTypeId",
foreignField: "serviceTimeTypeId",
as: "serviceTime"
}
},
{
$unwind: "$orders"
},
{
$unwind: "$serviceTime"
},
{
$limit: 14
}
])
result:
{
"_id" : ObjectId("59c3ac4bb7799c90ebb3279b"),
"serviceLocationId" : "36728",
"regionId" : 1.0,
"zoneId" : "DXBZONE1",
"description" : "AL HALLAB REST EMIRATES MALL",
"locationPriority" : 1.0,
"accountTypeId" : 1.0,
"locationType" : "SERVICELOCATION",
"location" : {
"makani" : "",
"lat" : 25.119035,
"lng" : 55.198694
},
"deliveryDays" : "MTWRFSU",
"timeWindow" : [
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32cde"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "06:00",
"closeTime" : "08:00"
},
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32cdf"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "09:00",
"closeTime" : "10:00"
},
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b0a3b7799c90ebb32ce0"),
"timeWindowTypeId" : "1",
"Description" : "MORNING",
"timeWindow" : {
"openTime" : "10:30",
"closeTime" : "11:30"
},
"accountId" : 1.0
}
],
"address1" : "",
"address2" : "",
"phone" : "",
"city" : "",
"county" : "",
"state" : "",
"country" : "",
"zipcode" : "",
"imageUrl" : "",
"contact" : {
"name" : "",
"email" : ""
},
"status" : "ACTIVE",
"createdBy" : "",
"updatedBy" : "",
"updateDate" : "",
"accountId" : 1.0,
"serviceTimeTypeId" : "1",
"orders" : [
{
"_id" : ObjectId("59c3b291f251c77f15790f92"),
"orderId" : "AQ18O1704264",
"serviceLocationId" : "36728",
"orderNo" : "AQ18O1704264",
"orderDate" : "18-Sep-17",
"description" : "AQ18O1704264",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 296.0,
"size2" : 3573.355,
"size3" : 240.811,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "BNWB020",
"size1" : 15.0,
"size2" : 78.6,
"size3" : 6.0
},
{
"ItemId" : "BNWB021",
"size1" : 20.0,
"size2" : 252.0,
"size3" : 11.538
},
{
"ItemId" : "BNWB023",
"size1" : 15.0,
"size2" : 285.0,
"size3" : 16.071
},
{
"ItemId" : "CPMW112",
"size1" : 3.0,
"size2" : 25.38,
"size3" : 1.731
},
{
"ItemId" : "MMGW001",
"size1" : 25.0,
"size2" : 464.375,
"size3" : 46.875
},
{
"ItemId" : "MMNB218",
"size1" : 50.0,
"size2" : 920.0,
"size3" : 60.0
},
{
"ItemId" : "MMNB219",
"size1" : 50.0,
"size2" : 630.0,
"size3" : 40.0
},
{
"ItemId" : "MMNB220",
"size1" : 50.0,
"size2" : 416.0,
"size3" : 28.846
},
{
"ItemId" : "MMNB270",
"size1" : 50.0,
"size2" : 262.0,
"size3" : 20.0
},
{
"ItemId" : "MMNB302",
"size1" : 15.0,
"size2" : 195.0,
"size3" : 6.0
},
{
"ItemId" : "MMNB373",
"size1" : 3.0,
"size2" : 45.0,
"size3" : 3.75
}
],
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b291f251c77f15790f9d"),
"orderId" : "AQ137O1701240",
"serviceLocationId" : "36728",
"orderNo" : "AQ137O1701240",
"orderDate" : "18-Sep-17",
"description" : "AQ137O1701240",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 28.0,
"size2" : 520.11,
"size3" : 52.5,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "MMGW001",
"size1" : 25.0,
"size2" : 464.38,
"size3" : 46.875
},
{
"ItemId" : "MMGW001-F1",
"size1" : 3.0,
"size2" : 55.73,
"size3" : 5.625
}
],
"accountId" : 1.0
},
{
"_id" : ObjectId("59c3b291f251c77f15790fd8"),
"orderId" : "AQ110O1705036",
"serviceLocationId" : "36728",
"orderNo" : "AQ110O1705036",
"orderDate" : "18-Sep-17",
"description" : "AQ110O1705036",
"serviceType" : "Delivery",
"orderSource" : "Import",
"takenBy" : "KARIM",
"plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
"plannedDeliveryTime" : "",
"actualDeliveryDate" : "",
"actualDeliveryTime" : "",
"deliveredBy" : "",
"size1" : 60.0,
"size2" : 1046.0,
"size3" : 68.0,
"jobPriority" : 1.0,
"cancelReason" : "",
"cancelDate" : "",
"cancelBy" : "",
"reasonCode" : "",
"reasonText" : "",
"status" : "",
"lineItems" : [
{
"ItemId" : "MMNB218",
"size1" : 50.0,
"size2" : 920.0,
"size3" : 60.0
},
{
"ItemId" : "MMNB219",
"size1" : 10.0,
"size2" : 126.0,
"size3" : 8.0
}
],
"accountId" : 1.0
}
],
"serviceTime" : {
"_id" : ObjectId("59c3b07cb7799c90ebb32cdc"),
"serviceTimeTypeId" : "1",
"serviceTimeType" : "nohelper",
"description" : "",
"fixedTime" : 30.0,
"variableTime" : 0.0,
"accountId" : 1.0
}
}
Mongorestore has this feature of appending on top of whatever is already in the database, so this behavior could be used for combining two collections:
mongodump collection1
collection2.rename(collection1)
mongorestore
Didn't try it yet, but it might perform faster than the map/reduce approach.
Yes you can: Take this utility function that I have written today:
function shangMergeCol() {
tcol= db.getCollection(arguments[0]);
for (var i=1; i<arguments.length; i++){
scol= db.getCollection(arguments[i]);
scol.find().forEach(
function (d) {
tcol.insert(d);
}
)
}
}
You can pass to this function any number of collections, the first one is going to be the target one. All the rest collections are sources to be transferred to the target one.
Code snippet. Courtesy-Multiple posts on stack overflow including this one.
db.cust.drop();
db.zip.drop();
db.cust.insert({cust_id:1, zip_id: 101});
db.cust.insert({cust_id:2, zip_id: 101});
db.cust.insert({cust_id:3, zip_id: 101});
db.cust.insert({cust_id:4, zip_id: 102});
db.cust.insert({cust_id:5, zip_id: 102});
db.zip.insert({zip_id:101, zip_cd:'AAA'});
db.zip.insert({zip_id:102, zip_cd:'BBB'});
db.zip.insert({zip_id:103, zip_cd:'CCC'});
mapCust = function() {
var values = {
cust_id: this.cust_id
};
emit(this.zip_id, values);
};
mapZip = function() {
var values = {
zip_cd: this.zip_cd
};
emit(this.zip_id, values);
};
reduceCustZip = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
if ("cust_id" in value) {
if (!("cust_ids" in result)) {
result.cust_ids = [];
}
result.cust_ids.push(value);
} else {
for (field in value) {
if (value.hasOwnProperty(field) ) {
result[field] = value[field];
}
};
}
});
return result;
};
db.cust_zip.drop();
db.cust.mapReduce(mapCust, reduceCustZip, {"out": {"reduce": "cust_zip"}});
db.zip.mapReduce(mapZip, reduceCustZip, {"out": {"reduce": "cust_zip"}});
db.cust_zip.find();
mapCZ = function() {
var that = this;
if ("cust_ids" in this.value) {
this.value.cust_ids.forEach(function(value) {
emit(value.cust_id, {
zip_id: that._id,
zip_cd: that.value.zip_cd
});
});
}
};
reduceCZ = function(k, values) {
var result = {};
values.forEach(function(value) {
var field;
for (field in value) {
if (value.hasOwnProperty(field)) {
result[field] = value[field];
}
}
});
return result;
};
db.cust_zip_joined.drop();
db.cust_zip.mapReduce(mapCZ, reduceCZ, {"out": "cust_zip_joined"});
db.cust_zip_joined.find().pretty();
var flattenMRCollection=function(dbName,collectionName) {
var collection=db.getSiblingDB(dbName)[collectionName];
var i=0;
var bulk=collection.initializeUnorderedBulkOp();
collection.find({ value: { $exists: true } }).addOption(16).forEach(function(result) {
print((++i));
//collection.update({_id: result._id},result.value);
bulk.find({_id: result._id}).replaceOne(result.value);
if(i%1000==0)
{
print("Executing bulk...");
bulk.execute();
bulk=collection.initializeUnorderedBulkOp();
}
});
bulk.execute();
};
flattenMRCollection("mydb","cust_zip_joined");
db.cust_zip_joined.find().pretty();
You've to do that in your application layer. If you're using an ORM, it could use annotations (or something similar) to pull references that exist in other collections. I only have worked with Morphia, and the #Reference annotation fetches the referenced entity when queried, so I am able to avoid doing it myself in the code.

How to print minimum result in MongoDB

MongoDB noob here...
So, I'm trying to print out the minimum value score inside a collection that looks like this...
> db.students.find({'_id': 1}).pretty()
{
"_id" : 1,
"name" : "Aurelia Menendez",
"scores" : [
{
"type" : "exam",
"score" : 60.06045071030959
},
{
"type" : "quiz",
"score" : 52.79790691903873
},
{
"type" : "homework",
"score" : 71.76133439165544
},
{
"type" : "homework",
"score" : 34.85718117893772
}
]
}
The incantation I'm using is as such...
db.students.aggregate(
// Initial document match (uses index, if a suitable one is available)
{ $match: {
_id : 1
}},
// Expand the scores array into a stream of documents
{ $unwind: '$scores' },
// Filter to 'homework' scores
{ $match: {
'scores.type': 'homework'
}},
// grab the minimum value score
{ $match: {
'scores.min.score': 1
}}
)
the output i'm getting is this...
{ "result" : [ ], "ok" : 1 }
What am I doing wrong?
You've got the right idea, but in the last step of the aggregation what you want to do is group all the scores by student and find the $min value.
Change the last pipeline operation to:
{ $group: {
_id: "$_id",
minScore: {$min: "$scores.score"}
}}
> db.students.aggregate(
{ $unwind: "$scores" },`
{ $match:{"scores.type":"homework"} },
{ $group: {
_id : "$_id",
maxScore : { $max : "$scores.score"},
minScore: { $min:"$scores.score"}
}
});
how to aggregate on each item in collection in mongoDB

Categories

Resources