I have this data in a mongo database,
{
"_id": {
"$oid": "5654a8f0d487dd1434571a6e"
},
"ValidationDate": {
"$date": "2015-11-24T13:06:19.363Z"
},
"DataRaw": " WL 00100100012015-08-28 02:44:17+0000+ 16.81 8.879 1084.00",
"ReadingsAreValid": true,
"locationID": " WL 001",
"Readings": {
"pH": {
"value": 8.879
},
"SensoreDate": {
"value": {
"$date": "2015-08-28T02:44:17.000Z"
}
},
"temperature": {
"value": 16.81
},
"Conductivity": {
"value": 1084
}
},
"HMAC": "ecb98d73fcb34ce2c5bbcc9c1265c8ca939f639d791a1de0f6275e2d0d71a801"
}
My goal is to calculate temperature, ph and conductivity values that satisfy a given range for the last 30 days but i am getting an error which i have not been able to resolve while searching online. Here is my code.
import datetime
from pymongo import MongoClient
def total_data_push():
data = MongoClient().cloudtest.test_5_27
now = datetime.datetime.utcnow()
last_30d = now - datetime.timedelta(days=30)
last_year = now.replace(year=now.year - 1)
since_last_month = data.find({"ReadingsAreValid": False}, {"ValidationDate": {"$gte": last_30d}},
{"Readings.temperature.value": {"$gt": 1.0}}).count()
print since_last_month
def main():
total_data_push()
if __name__ == "__main__":
main()
When i run the script without the ValidationDate piece, it returns correct values but adding this data component to get that for last 30 days returns the following error
traceback (most recent call last):
File "total_data_push.py", line 29, in <module>
main()
File "total_data_push.py", line 26, in main
total_data_push()
File "total_data_push.py", line 17, in total_data_push
{"Readings.temperature.value": {"$gt": 1.0}}).count()
File "/Library/Python/2.7/site-packages/pymongo/collection.py", line 866, in find
return Cursor(self, *args, **kwargs)
File "/Library/Python/2.7/site-packages/pymongo/cursor.py", line 90, in __init__
raise TypeError("skip must be an instance of int")
TypeError: skip must be an instance of int
What am i really missing here? thanks for your help in advance
As said #BenCr, if you look at find signature :
find(filter=None, projection=None, skip=0, limit=0,
no_cursor_timeout=False, cursor_type=CursorType.NON_TAILABLE,
sort=None, allow_partial_results=False, oplog_replay=False,
modifiers=None, manipulate=True)
The filter is the first parameter as the following :
since_last_month = db.data.find({
"ReadingsAreValid": False,
"ValidationDate": {"$gte": last_30d},
"Readings.temperature.value": {"$gte": 1.0}
}).count()
This filter should like:
since_last_month = db.data.find({
"ReadingsAreValid": False,
"ValidationDate": {"$gte": last_30d},
"Readings.temperature.value": {"$gte": 1.0}
}).count()
Related
Is there any way to use $cond along with ($set, $inc, ...) operators in update? (MongoDB 4.2)
I want to update a field in my document by $inc it with "myDataInt" if a condition comes true, otherwise keeps it as it is:
db.mycoll.update(
{"_id" : "5e9e5da03da783817d231dc4"},
{"$inc" : {
"my_data_sum" : {
"$cond" : [
{
"$ne" : ["snapshot_time", new_snapshot_time)]
},myDataInt, 0]
]
}
},
{upsert=True, multi=False}
)
However, this gives an error in pymongo:
raise WriteError(error.get("errmsg"), error.get("code"), error)
pymongo.errors.WriteError: The dollar ($) prefixed field '$cond' in 'my_data_sum.$cond' is not valid for storage.
Any idea to avoid using find() before update in this case?
Update:
If I use the approach that Joe has mentioned, an exception will be raised in PyMongo (v3.10.1) due to using 'list' as a parameter in update_many() instead of 'dict':
from pymongo import MongoClient
db = MongoClient()['mydb']
db.mycoll.update_many(
{"_id" : "5e9e5da03da783817d231dc4"},
[{"$set" : {
"my_data_sum" : {
"$sum": [
"$my_data_sum",
{"$cond" : [
{"$ne" : ["snapshot_time", new_snapshot_time]},
myDataInt,
0
]}
]
}
}}],
upsert:true
)
That ends up with this error:
File "/usr/local/lib64/python3.6/site-packages/pymongo/collection.py", line 1076, in update_many session=session),
File "/usr/local/lib64/python3.6/site-packages/pymongo/collection.py", line 856, in _update_retryable _update, session)
File "/usr/local/lib64/python3.6/site-packages/pymongo/mongo_client.py", line 1491, in _retryable_write return self._retry_with_session(retryable, func, s, None)
File "/usr/local/lib64/python3.6/site-packages/pymongo/mongo_client.py", line 1384, in _retry_with_session return func(session, sock_info, retryable)
File "/usr/local/lib64/python3.6/site-packages/pymongo/collection.py", line 852, in _update retryable_write=retryable_write)
File "/usr/local/lib64/python3.6/site-packages/pymongo/collection.py", line 823, in _update _check_write_command_response(result)
File "/usr/local/lib64/python3.6/site-packages/pymongo/helpers.py", line 221, in _check_write_command_response _raise_last_write_error(write_errors)
File "/usr/local/lib64/python3.6/site-packages/pymongo/helpers.py", line 203, in _raise_last_write_error raise WriteError(error.get("errmsg"), error.get("code"), error)
pymongo.errors.WriteError: Modifiers operate on fields but we found type array instead. For example: {$mod: {<field>: ...}} not {$set: [ { $set: { my_data_sum: { $sum: [ "$my_data_sum", { $cond: [ { $ne: [ "$snapshot_time", 1586910283 ] }, 1073741824, 0 ] } ] } } } ]}
If you are using MongoDB 4.2, you can use aggregation operators with updates. $inc is not an aggregation operator, but $sum is. To specify a pipeline, pass an array as the second argument to update:
db.coll.update(
{"_id" : "5e9e5da03da783817d231dc4"},
[{"$set" : {
"my_data_sum" : {
"$sum": [
"$my_data_sum",
{"$cond" : [
{"$ne" : ["snapshot_time", new_snapshot_time]},
myDataInt,
0
]}
]
}
}}],
{upsert:true, multi:false}
)
After spending some time and searching online, I figured that the update_many(), update_one(), and update() methods of Collection object in PyMongo do not accept type list as parameters to support the new Aggregation Pipeline feature of the Update operation in MongoDB 4.2+. (At least this option is not available in PyMongo v3.10 yet.)
However, looks like I could use the command method of the Database object in PyMongo which is an instance of the (MongoDB runCommand) and it worked just fine for me:
from pymongo import MongoClient
db = MongoClient()['mydb']
result = db.command(
{
"update" : "mycoll",
"updates" : [{
"q" : {"_id" : "5e9e5da03da783817d231dc4"},
"u" : [
{"$set" : {
"my_data_sum" : {
"$sum": [
"$my_data_sum",
{"$cond" : [
{"$ne" : ["snapshot_time", new_snapshot_time]},
myDataInt,
0
]}
]
}
}}
],
"upsert" : True,
"multi" : True
}],
"ordered": False
}
)
The command method of the database object gets a dict object of all the required commands as its first argument, and then the list of Aggregation Pipeline can be included inside the dict object (q is the update query, and the u defined the fields to be updated).
result is a dictionary of Ack message from MongoDB which contains 'nModified', 'upserted', and 'writeErrors'.
https://mongoplayground.net/p/1AklFKuhFi6
[
{
"id": 1,
"like": 3
},
{
"id": 2,
"like": 1
}
]
let value = 1,
if you want to increment then use
value = -1 * value
db.collection.aggregate([
{
"$match": {
"id": 1
}
},
{
"$set": {
"count": {
$cond: {
if: {
$gt: [
"$like",
0
]
},
then: {
"$subtract": [
"$like",
value
]
},
else: 0
}
}
}
}
])
How do you parse the JSON response from the musixmatch api?
I'm calling the method
res = artist_api.artist_search_get(format=format, q_artist=artist)
I'm getting a response of
{'message': {'body': {'artist_list': [{'artist': {'artist_alias_list': [],
'artist_comment': '',
'artist_country': '',
'artist_credits': {'artist_list': []},
'artist_edit_url': None,
'artist_id': 26575484.0,
'artist_mbid': None,
'artist_name': 'Illenium',
'artist_name_translation_list': [],
'artist_rating': 55.0, .........
I'm trying to get the artist_id.
I tried getting the artist_id like this:
print(res['message']['body']['artist']['artist_id'])
artist_api = swagger_client.ArtistApi()
format = 'json'
try:
artist_list = ["adele", "lady gaga", "john legend"];
for artist in artist_list:
print(artist)
res = artist_api.artist_search_get(format=format, q_artist=artist)
print(res['message']['body']['artist']['artist_id'])
except ApiException as e:
print "Exception when calling ArtistApi->artist_search_get: %s\n" % e
I'm getting this error message:
Traceback (most recent call last):
File "musixmatch.py", line 39, in <module>
print(res['message']['body']['artist_id'])
TypeError: 'InlineResponse2004' object has no attribute '__getitem__'
Please help I've searched through a bunch of threads but I still can't find the answer to this. I'm new to python so I'm not sure what I'm doing wrong.
You a little messed up with the hierarchy of JSON-document, use this code to get desired values:
[artist['artist']['artist_id'] for artist in res['message']['body']['artist_list']]
As an alternative, can be used JSONPath-expression: $..artist_list..artist_id to get the same result.
Example
from jsonpath_rw import parse
import json
res = json.loads("""{
"message": {
"header": {},
"body": {
"artist_list": [
{
"artist": {
"artist_credits": { },
"artist_country": "string",
"artist_id": 110
}
},
{
"artist": {
"artist_credits": {},
"artist_country": "string",
"artist_id": 220
}
}
]
}
}
}""")
# 1-way: Iterate through JSON-document
print([artist['artist']['artist_id'] for artist in res['message']['body']['artist_list']])
# result: [110, 220]
# 2-way: Use JSONPath-expression
jsonpath_expr = parse('$..artist_list..artist_id')
print([match.value for match in jsonpath_expr.find(res)])
# result: [110, 220]
I'm trying to loop through this json string
"layoutOptions": {
"titleText": "Route Number",
"authorText": "West LA Yard",
"copyrightText": "",
"customTextElements": [{
"Date": "9/11/2018, 7:37:35 AM"
}
],
"scaleBarOptions": {
"metricUnit": "esriKilometers",
"metricLabel": "km",
"nonMetricUnit": "esriMiles",
"nonMetricLabel": "mi"
},
"legendOptions": {
"operationalLayers": [{
"id": "ParcelRouteEditingTest_1458"
}, {
"id": "ParcelRouteEditingTest_1259"
}
]
}
}
I keep running to this error list indices must be integers, not str
layoutOpsDict = layoutData["layoutOptions"]
dateList = [dateEle["customTextElements"]["Date"] for dateEle in layoutOpsDict]
Error:
Traceback (most recent call last):
File "<pyshell#44>", line 1, in <module>
dateList = [dateEle["customTextElements"]["Date"] for dateEle in layoutOpsDict]
TypeError: string indices must be integers, not str
What is the best method to grab the date in customTextElements other than keep setting more variables to keep track of?
You are looping through every key instead of just "customTextElements" and not all of them have a list of dictionaries with "Date" as the key.
Since you only want to look through the values mapped to "customTextElements" you can only loop through that:
dateList = [dateEle["Date"] for dateEle in layoutOpsDict["customTextElements"]]
The issue I'm having, I cant make the aggregate search work. I have been able to run it in a mongodb without any users, however in the mongodb with users (even when authentication is not turned on) i get this weird error message (see below). Whats weirder is I have no problem with other functions for example find().count() witch as you can see below returns 32 (the number of results in the database).
Code in MongoDB shell to create user.
use admin
db.createUser( { user: "VibrationDataCollector",
pwd: '45.',
roles: [ "readWriteAnyDatabase",
"dbAdminAnyDatabase",
"clusterAdmin" ] } )
Code in python to conduct search
client = MongoClient('mongodb://xx.x.x.xx:xxxxx/')
db = client['VibrationDataDB']
db.authenticate('VibrationDataCollector', '45.', source='admin')
coll = db.VibrationData
hello = coll.find().count()
print 'count=', hello
LastWrite_Time = coll.aggregate([{
"$unwind": "$Records"
}, {
"$redact": {
"$cond": [{
"$anyElementTrue": {
"$map": {
"input": "$Records.Properties",
"as": "result",
"in": {
"$and": [{
"$eq": ["$$result.Property.Name", "LastWrite_User"]
}, {
"$eq": ["$$result.value", "42"]
}]
}
}
}
},
"$$KEEP",
"$$PRUNE"
]
}
}, {
"$unwind": "$Records.Properties"
}, {
"$match": {
"Records.Properties.Property.Name": 'LastWrite_Time'
}
}, {
"$project": {
"_id": 0,
"value": "$Records.Properties.value"
}
}])
list1 = LastWrite_Time['result']
for T in list1:
print T['value']
Result
count= 32
Traceback (most recent call last):
File "C:/Python_scripts/SimulationCEI.py", line 64, in <module>
list1 = LastWrite_Time['result']
TypeError: 'CommandCursor' object has no attribute '__getitem__'
UPDATEEEEE!!!!
using next()
count= 32
Traceback (most recent call last):
File "C:/Python_scripts/SimulationCEI.py", line 64, in <module>
list1 = LastWrite_Time['result']
TypeError: 'CommandCursor' object has no attribute '__getitem__'
>>> next()
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
next()
TypeError: next expected at least 1 arguments, got 0
>>>
You want:
for T in LastWrite_Time:
print T['value']
"aggregate" returns a cursor that must be iterated. I'm certain that authentication is not related to the error you see.
I have a Mongo Collection that I need to update, and I'm trying to use the collection.update command to no avail.
Code below:
import pymongo
from pymongo import MongoClient
client = MongoClient()
db = client.SensorDB
sensors = db.Sensor
for sensor in sensors.find():
lat = sensor['location']['latitude']
lng = sensor['location']['longitude']
sensor['location'] = {
"type" : "Feature",
"geometry" : {
"type" : "Point",
"coordinates" : [lat ,lng]
},
"properties": {
"name": sensor['name']
}
}
sensors.update({'webid': sensor['webid']} , {"$set": sensor}, upsert=True)
However, running this gets me the following:
Traceback (most recent call last):
File "purgeDB.py", line 21, in <module>
cameras.update({'webid': sensor['webid']} , {"$set": sensor}, upsert=True)
File "C:\Anaconda\lib\site-packages\pymongo\collection.py", line 561, in update
check_keys, self.uuid_subtype), safe)
File "C:\Anaconda\lib\site-packages\pymongo\mongo_client.py", line 1118, in _send_message
rv = self.__check_response_to_last_error(response, command)
File "C:\Anaconda\lib\site-packages\pymongo\mongo_client.py", line 1060, in __check_response_to_last_error
raise OperationFailure(details["err"], code, result)
pymongo.errors.OperationFailure: Mod on _id not allowed
Change this line:
for sensor in sensors.find():
to this:
for sensor in sensors.find({}, {'_id': 0}):
What this does is prevent Mongo from returning the _id field, since you aren't using it, and it's causing your problem later in your update() call since you cannot "update" _id.
An even better solution (Only write the data that is needed)
for sensor in sensors.find():
lat = sensor['location']['latitude']
lng = sensor['location']['longitude']
location = {
"type" : "Feature",
"geometry" : {
"type" : "Point",
"coordinates" : [lat ,lng]
},
"properties": {
"name": sensor['name']
}
}
sensors.update({'webid': sensor['webid']} , {"$set": {'location': location}})
Edit:
As mentioned by Loïc Faure-Lacroix, you also do not need the upsert flag in your case - your code in this case is always updating, and never inserting.
Edit2:
Surrounded _id in quotes for first solution.