mongoDB - Get newest date from document in array - python

I want to retrieve the array object with the newest dates for a particular document.
But I sadly can't solve it, I always end up with errors.
Dateformat 2020-06-10T13:25:25.645+00:00 datetime.now()
Sample data
collection.insert_one(
{
"document_name": "My Document",
"status": [
{
"status_time": datetimeobject, # 2020-01-02T13:25:25.645+00:00
"status_title": "Sample Title 1"
},
{
"status_time": datetimeobject, # 2020-06-10T13:25:25.645+00:00
"status_title": "Sample Title"
}
]
})
What I've tried
result = collection.find_one({"document_name": "My Document"}, {"status": 1}).sort({"status.status_time": -1}).limit(1)
result = collection.find_one({"document_name": "My Document"}, {"$max": {"status.status_time": -1})
result = collection_projects.find_one({"document_name": "Document"}, {"status": {"$elemMatch": {"$max": "$´status_time"}}})
result = list(collection.find({"document_name": "Document"}, {"_id": 0, "status": 1}).limit(1))
result = collection_projects.find_one(
{"document_name": "My Document"},
{"status.status_time": {"$arrayElemAt": -1}})
Result I'm looking for
{
"status_time": datetimeobject, # 2020-06-10T13:25:25.645+00:00
"status_title": "Sample Title 2"
}

You need to use aggregation to achieve this :
Query 1 :
db.collection.aggregate([
/** Re-create `status` field with what is needed */
{
$addFields: {
status: {
$reduce: {
input: "$status", // Iterate on array
initialValue: { initialDate: ISODate("1970-06-09T17:56:34.350Z"), doc: {} }, // Create initial values
in: { // If condition is met push current value to accumulator or return acummulator as is
initialValue: { $cond: [ { $gt: [ "$$this.status_time", "$$value.initialDate" ] }, "$$this.status_time", "$$value.initialDate" ] },
doc: { $cond: [ { $gt: [ "$$this.status_time", "$$value.initialDate" ] }, "$$this", "$$value" ] }
}
}
}
}
},
/**
* re-create `status` field from `$status.doc`
* Since it will always be having only on object you can make `status` as an object ratherthan an array
* Just in case if `status` need to be an array you need do { status: [ "$status.doc" ] }
*/
{
$addFields: { status: "$status.doc" }
}
])
Test : mongoplayground
Ref : $reduce , pymongo
Query 2 :
db.collection.aggregate([
/** unwind on `status` array */
{
$unwind: {
path: "$status",
preserveNullAndEmptyArrays: true // preserves doc where `status` field is `[]` or null or missing (Optional)
}
},
/** sort on descending order */
{
$sort: { "status.status_time": -1 }
},
/** group on `_id` & pick first found doc */
{
$group: { _id: "$_id", doc: { $first: "$$ROOT" } }
},
/** make `doc` field as new root */
{
$replaceRoot: { newRoot: "$doc" }
}
])
Test : mongoplayground
Test both queries, I believe on a huge dataset $unwind & $sort might be a bit slow, similar to iteration on a huge array.

You will have to use aggregate with $reduce, this solution is similar to #whoami's except there is no nested document when using $reduce
db.collection.aggregate([
{
$match: {
document_name: "My Document"
}
},
{
$project: { // use $project if you only want the status, use $addFields if you want other fields as well
status: {
$reduce: {
input: "$status",
initialValue: null,
in: {
$cond: [
{
$gte: [
"$$this.status_time",
"$$value.status_time"
]
},
"$$this",
"$$value"
]
}
}
}
}
}
])
mongoplayground

Related

How do I get a specific element in array - MongoDB

Documents store in mongo db in following form
{
"_id" : ObjectId("54fa059ce4b01b3e086c83e9"),
"field1" : "value1",
"field2" : "value2"
"field3" : [
{
"abc123": ["somevalue", "somevalue"]
},
{
"xyz345": ["somevalue", "somevalue"]
}
]
}
What I want in output is whenever I pass abc123 in pymongo query I need result in following form
{
"abc123": ["somevalue", "somevalue"]
}
or
["somevalue", "somevalue"]
Please suggest a mongo query for it. Thanks
Maybe something like this:
db.collection.aggregate([
{
$project: {
field3: {
"$filter": {
"input": "$field3",
"as": "f",
"cond": {
$ne: [
"$$f.abc123",
undefined
]
}
}
}
}
},
{
$unwind: "$field3"
},
{
"$replaceRoot": {
"newRoot": "$field3"
}
}
])
Explained:
Use the mongo aggregation framework with below 3x stages:
project/filter only the needed array field3 if exist
unwind the field3 array
replace the root document with the content of field3
playground

Aggregation function for Counting of Duplicates in a field based on duplicate items in another field

I am using mongoengine as ORM with flask application. The model class is define like
class MyData(db.Document):
task_id = db.StringField(max_length=50, required=True)
url = db.URLField(max_length=500,required=True,unique=True)
organization = db.StringField(max_length=250,required=True)
val = db.StringField(max_length=50, required=True)
The field organization can be repeating and I want to get the count of duplicates with respect to values in another field. For example if the data in mongodb is like
[{"task_id":"as4d2rds5","url":"https:example1.com","organization":"Avengers","val":"null"},
{"task_id":"rfre43fed","url":"https:example1.com","organization":"Avengers","val":"valid"},
{"task_id":"uyje3dsxs","url":"https:example2.com","organization":"Metro","val":"valid"},
{"task_id":"ghs563vt6","url":"https:example1.com","organization":"Avengers","val":"invalid"},
{"task_id":"erf6egy64","url":"https:example2.com","organization":"Metro","val":"null"}]
Then I am querying all the objects using
data = MyData.objects()
I want a response like
[{"url":"https:example1.com","Avengers":{"valid":1,"null":1,"invalid":1}},{"url":"https:example2.com",Metro":{"valid":1,"null":1,"invalid":0}}]
I tried like
db.collection.aggregate([
{
"$group": {
"_id": "$organization",
"count": [
{
"null": {
"$sum": 1
},
"valid": {
"$sum": 1
},
"invalid": {
"$sum": 1
}
}
]
}
}
])
but I am getting an error
The field 'count' must be an accumulator object
Maybe something like this:
db.collection.aggregate([
{
"$group": {
"_id": {
k: "$organization",
v: "$val"
},
"cnt": {
$sum: 1
}
}
},
{
$project: {
_id: 0,
k: "$_id.k",
o: {
k: "$_id.v",
v: "$cnt"
}
}
},
{
$group: {
_id: "$k",
v: {
$push: "$o"
}
}
},
{
$addFields: {
v: {
"$arrayToObject": "$v"
}
}
},
{
$project: {
_id: 0,
new: [
{
k: "$_id",
v: "$v"
}
]
}
},
{
"$addFields": {
"new": {
"$arrayToObject": "$new"
}
}
},
{
"$replaceRoot": {
"newRoot": "$new"
}
}
])
Explained:
Group to count
Project for arrayToObject
Group to join the values
arrayToObject one more time
project additionally
arrayToObject to form the final object
project one more time
replaceRoot to move the object to root.
P.S.
Please, note this solution is not showing the missing values if they do not exist , if you need the missing values additional mapping / mergeObjects need to be added
playground1
Option with missing values ( if possible values are fixed to null,valid,invalid) :
just replace the second addFiedlds with:
{
$addFields: {
v: {
"$mergeObjects": [
{
"null": 0,
valid: 0,
invalid: 0
},
{
"$arrayToObject": "$v"
}
]
}
}
}
playground2
++url:
playground3

How do i remove a item in a object if i did not know the key name in MongoDB?

Please refer this image
Please refer the image, Here i want to delete bearing_accelerometer_sensor field but i dont know this key name but i know the value here (i.e ObjectId("618e3fc8fccb88b50f2d9317")), I'm aware we can use this query db.getCollection('algorithm_association_collection').update({},{"$unset":{"sensor.bearing_accelerometer_sensor":""}}) to delete the field but in my case i dont know the key name "bearing_accelerometer_sensor". Pls help thanks in advance
You can use this one:
db.collection.aggregate([
{
$set: {
sensor: {
$filter: {
input: { $objectToArray: "$sensor" },
cond: { $ne: [ "$$this.v", ObjectId("618e3fc8fccb88b50f2d9317") ] }
}
}
}
},
{ $set: { sensor: { $arrayToObject: "$sensor" } } }
])
Mongo Playground
If you like to update existing collection, you can use the pipeline in an update statement:
db.collection.updateMany({}, [
{
$set: {
sensor: {
$filter: {
input: { $objectToArray: "$sensor" },
cond: { $ne: [ "$$this.v", ObjectId("618e3fc8fccb88b50f2d9317") ] }
}
}
}
},
{ $set: { sensor: { $arrayToObject: "$sensor" } } }
])

Python - After i insert geo data into elastic search, how do I search the data?

I'm am using python-elasticsearch to insert geo data into the engine as below, but what function or method can I use to search for my data? Can you give an example please?
mappings = {
"doc": {
"properties": {
"geo": {
"properties": {
"location": {"type": "geo_point"}
}
}
}
}
}
es.indices.create(index='geodata', body=mappings)
# ...
es_entries['geo'] = {'location':str(data['_longitude_'])+","+str(data['_latitude_'])}
# ...
es.index(index="geodata", doc_type="doc", body=es_entries)
You can use the Geo Distance Query:
{
"query": {
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_distance" : {
"distance" : "10km",
"geo.location" : {
"lat" : 10,
"lon" : -10
}
}
}
}
}
}
You can use both es.search and elasticsearch.helpers.scan, for example:
res = es.search(index='geodata', body= { ... }) # put the above dictionary in the `body`

Compute first order derivative with MongoDB aggregation framework

Is it possible to calculate a first order derivative using the aggregate framework?
For example, I have the data :
{time_series : [10,20,40,70,110]}
I'm trying to obtain an output like:
{derivative : [10,20,30,40]}
db.collection.aggregate(
[
{
"$addFields": {
"indexes": {
"$range": [
0,
{
"$size": "$time_series"
}
]
},
"reversedSeries": {
"$reverseArray": "$time_series"
}
}
},
{
"$project": {
"derivatives": {
"$reverseArray": {
"$slice": [
{
"$map": {
"input": {
"$zip": {
"inputs": [
"$reversedSeries",
"$indexes"
]
}
},
"in": {
"$subtract": [
{
"$arrayElemAt": [
"$$this",
0
]
},
{
"$arrayElemAt": [
"$reversedSeries",
{
"$add": [
{
"$arrayElemAt": [
"$$this",
1
]
},
1
]
}
]
}
]
}
}
},
{
"$subtract": [
{
"$size": "$time_series"
},
1
]
}
]
}
},
"time_series": 1
}
}
]
)
We can use the pipeline above in version 3.4+ to do this.
In the pipeline, we use the $addFields pipeline stage. operator to add the array of the "time_series"'s elements index to do document, we also reversed the time series array and add it to the document using respectively the $range and $reverseArray operators
We reversed the array here because the element at position p in the array is always greater than the element at position p+1 which means that [p] - [p+1] < 0 and we do not want to use the $multiply here.(see pipeline for version 3.2)
Next we $zipped the time series data with the indexes array and applied a substract expression to the resulted array using the $map operator.
We then $slice the result to discard the null/None value from the array and re-reversed the result.
In 3.2 we can use the $unwind operator to unwind our array and include the index of each element in the array by specifying a document as operand instead of the traditional "path" prefixed by $.
Next in the pipeline, we need to $group our documents and use the $push accumulator operator to return an array of sub-documents that look like this:
{
"_id" : ObjectId("57c11ddbe860bd0b5df6bc64"),
"time_series" : [
{ "value" : 10, "index" : NumberLong(0) },
{ "value" : 20, "index" : NumberLong(1) },
{ "value" : 40, "index" : NumberLong(2) },
{ "value" : 70, "index" : NumberLong(3) },
{ "value" : 110, "index" : NumberLong(4) }
]
}
Finally comes the $project stage. In this stage, we need to use the $map operator to apply a series of expression to each element in the the newly computed array in the $group stage.
Here is what is going on inside the $map (see $map as a for loop) in expression:
For each subdocument, we assign the value field to a variable using the $let variable operator. We then subtract it value from the value of the "value" field of the next element in the array.
Since the next element in the array is the element at the current index plus one, all we need is the help of the $arrayElemAt operator and a simple $addition of the current element's index and 1.
The $subtract expression return a negative value so we need to multiply the value by -1 using the $multiply operator.
We also need to $filter the resulted array because it the last element is None or null. The reason is that when the current element is the last element, $subtract return None because the index of the next element equal the size of the array.
db.collection.aggregate([
{
"$unwind": {
"path": "$time_series",
"includeArrayIndex": "index"
}
},
{
"$group": {
"_id": "$_id",
"time_series": {
"$push": {
"value": "$time_series",
"index": "$index"
}
}
}
},
{
"$project": {
"time_series": {
"$filter": {
"input": {
"$map": {
"input": "$time_series",
"as": "el",
"in": {
"$multiply": [
{
"$subtract": [
"$$el.value",
{
"$let": {
"vars": {
"nextElement": {
"$arrayElemAt": [
"$time_series",
{
"$add": [
"$$el.index",
1
]
}
]
}
},
"in": "$$nextElement.value"
}
}
]
},
-1
]
}
}
},
"as": "item",
"cond": {
"$gte": [
"$$item",
0
]
}
}
}
}
}
])
Another option which I think is less efficient is perform a map/reduce operation on our collection using the map_reduce method.
>>> import pymongo
>>> from bson.code import Code
>>> client = pymongo.MongoClient()
>>> db = client.test
>>> collection = db.collection
>>> mapper = Code("""
... function() {
... var derivatives = [];
... for (var index=1; index<this.time_series.length; index++) {
... derivatives.push(this.time_series[index] - this.time_series[index-1]);
... }
... emit(this._id, derivatives);
... }
... """)
>>> reducer = Code("""
... function(key, value) {}
... """)
>>> for res in collection.map_reduce(mapper, reducer, out={'inline': 1})['results']:
... print(res) # or do something with the document.
...
{'value': [10.0, 20.0, 30.0, 40.0], '_id': ObjectId('57c11ddbe860bd0b5df6bc64')}
You can also retrieve all the document and use the numpy.diff to return the derivative like this:
import numpy as np
for document in collection.find({}, {'time_series': 1}):
result = np.diff(document['time_series'])
it's a bit dirty, but perhaps something like this?
use test_db
db['data'].remove({})
db['data'].insert({id: 1, time_series: [10,20,40,70,110]})
var mapF = function() {
emit(this.id, this.time_series);
emit(this.id, this.time_series);
};
var reduceF = function(key, values){
var n = values[0].length;
var ret = [];
for(var i = 0; i < n-1; i++){
ret.push( values[0][i+1] - values[0][i] );
}
return {'gradient': ret};
};
var finalizeF = function(key, val){
return val.gradient;
}
db['data'].mapReduce(
mapF,
reduceF,
{ out: 'data_d1', finalize: finalizeF }
)
db['data_d1'].find({})
The "strategy" here is to emit the data to be operated on twice so that it is accessible in the reduce stage, return an object to avoid the message "reduce -> multiple not supported yet" and then filter back the array in the finalizer.
This script then produces:
MongoDB shell version: 3.2.9
connecting to: test
switched to db test_db
WriteResult({ "nRemoved" : 1 })
WriteResult({ "nInserted" : 1 })
{
"result" : "data_d1",
"timeMillis" : 13,
"counts" : {
"input" : 1,
"emit" : 2,
"reduce" : 1,
"output" : 1
},
"ok" : 1
}
{ "_id" : 1, "value" : [ 10, 20, 30, 40 ] }
bye
Alternatively, one could move all the processing into the finalizer (reduceF is not called here since mapF is assumed to emit unique keys):
use test_db
db['data'].remove({})
db['data'].insert({id: 1, time_series: [10,20,40,70,110]})
var mapF = function() {
emit(this.id, this.time_series);
};
var reduceF = function(key, values){
};
var finalizeF = function(key, val){
var x = val;
var n = x.length;
var ret = [];
for(var i = 0; i < n-1; i++){
ret.push( x[i+1] - x[i] );
}
return ret;
}
db['data'].mapReduce(
mapF,
reduceF,
{ out: 'data_d1', finalize: finalizeF }
)
db['data_d1'].find({})

Categories

Resources