Pandas: access values inside .json file

Pandas: access values inside .json file - python

I am trying to create a loop that let's me access value's inside a .json file (google location file). I just started learning Python last week, so barre with me and all the help is really appreciated!
First my import list:
import pandas as pd
import requests
import json
Than I open up the .json file:
with open('data_may.json', 'r') as fh:
raw = json.loads(fh.read())
data = raw['timelineObjects']
data
Last I am trying to create a loop to print out the value's of latitudeE7:
for locations in data[0:5]:
print(locations['placeVisit']['location']['latitudeE7'])
This is the error message that I get:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-99-a8c46eea2476> in <module>
1 for locations in data[0:5]:
----> 2 print(locations['placeVisit']['location']['latitudeE7'])
KeyError: 'placeVisit'
My .json file is constructed like this:
{
"timelineObjects" : [ {
"activitySegment" : {
"startLocation" : {
"latitudeE7" : 520950336,
"longitudeE7" : 51250455,
"sourceInfo" : {
"deviceTag" : -1547871288
}
},
"endLocation" : {
"latitudeE7" : 520969498,
"longitudeE7" : 51162451,
"sourceInfo" : {
"deviceTag" : -1547871288
}
},
"duration" : {
"startTimestampMs" : "1556693508999",
"endTimestampMs" : "1556693710203"
},
"distance" : 652,
"activityType" : "CYCLING",
"confidence" : "HIGH",
"activities" : [ {
"activityType" : "CYCLING",
"probability" : 99.11152749237161
}, {
"activityType" : "WALKING",
"probability" : 0.6369730834565008
}, {
"activityType" : "RUNNING",
"probability" : 0.10388160016829592
} ],
"waypointPath" : {
"waypoints" : [ {
"latE7" : 520950508,
"lngE7" : 51250495
}, {
"latE7" : 520971412,
"lngE7" : 51164069
} ]
}
}
}, {
"placeVisit" : {
"location" : {
"latitudeE7" : 520967891,
"longitudeE7" : 51159244,
"placeId" : "ChIJOYpLXEFvxkcRtSdOEoh1Lns",
"address" : "Bemuurde Weerd Oostzijde 1\n3514 AN Utrecht\nNederland",
"name" : "LSR Landelijk Studenten Rechtsbureau",
"sourceInfo" : {
"deviceTag" : -1547871288
},
"locationConfidence" : 37.736637
},
"duration" : {
"startTimestampMs" : "1556693710203",
"endTimestampMs" : "1556713681618"
},
"placeConfidence" : "MEDIUM_CONFIDENCE",
"centerLatE7" : 520969163,
"centerLngE7" : 51162406,
"visitConfidence" : 93,
"otherCandidateLocations" : [ {
"latitudeE7" : 520967890,
"longitudeE7" : 51159240,
"placeId" : "ChIJOYpLXEFvxkcRV2Hp03ASVuI",
"locationConfidence" : 33.198143
}, {
"latitudeE7" : 520968971,
"longitudeE7" : 51161709,
"placeId" : "ChIJbeiKCEFvxkcRgiZQno9hikQ",
"semanticType" : "TYPE_WORK",
"locationConfidence" : 23.059763
}, {
"latitudeE7" : 520968160,
"longitudeE7" : 51158805,
"placeId" : "ChIJ-SXmBkVvxkcRtpNT_vTdQEE",
"locationConfidence" : 1.7593758
}, {
"latitudeE7" : 520971045,
"longitudeE7" : 51162728,
"placeId" : "ChIJd66UREFvxkcRZJCaM9z3baw",
"semanticType" : "TYPE_SEARCHED_ADDRESS",
"locationConfidence" : 0.6827666
} ],
"editConfirmationStatus" : "NOT_CONFIRMED"
}
}, {
"activitySegment" : {
"startLocation" : {
"latitudeE7" : 520967891,
"longitudeE7" : 51159244,
"sourceInfo" : {
"deviceTag" : -1547871288
}
},
"endLocation" : {
"latitudeE7" : 520942097,
"longitudeE7" : 51258010,
"sourceInfo" : {
"deviceTag" : -1547871288
}
},
"duration" : {
"startTimestampMs" : "1556713681618",
"endTimestampMs" : "1556713939630"
},
"distance" : 816,
"activityType" : "CYCLING",
"confidence" : "HIGH",
"activities" : [ {
"activityType" : "CYCLING",
"probability" : 99.41448992772163
}, {
"activityType" : "WALKING",
"probability" : 0.43749986739765867
}, {
"activityType" : "IN_PASSENGER_VEHICLE",
"probability" : 0.08513907766062832
} ],
"waypointPath" : {
"waypoints" : [ {
"latE7" : 520967674,
"lngE7" : 51158652
}, {
"latE7" : 520942306,
"lngE7" : 51260013
} ]
}
}
}, {
"placeVisit" : {
"location" : {
"latitudeE7" : 520941602,
"longitudeE7" : 51258926,
"placeId" : "ChIJ46vtEE9vxkcRyu8VVfUro2Q",
"address" : "Lucasbolwerk 18\n3512 EH Utrecht\nNederland",
"name" : "Lucasbolwerk 18",
"semanticType" : "TYPE_HOME",
"sourceInfo" : {
"deviceTag" : -1547871288
},
"locationConfidence" : 62.083218
},
"duration" : {
"startTimestampMs" : "1556713939630",
"endTimestampMs" : "1556731681066"
},
"placeConfidence" : "HIGH_CONFIDENCE",
"centerLatE7" : 520942021,
"centerLngE7" : 51257989,
"visitConfidence" : 95,
"otherCandidateLocations" : [ {
"latitudeE7" : 520940870,
"longitudeE7" : 51259290,
"placeId" : "ChIJu8CAEE9vxkcRRXzQvpFYiIk",
"locationConfidence" : 28.85588
}, {
"latitudeE7" : 520940508,
"longitudeE7" : 51258612,
"placeId" : "ChIJFXCOGk9vxkcRvLD0dAfMEwE",
"locationConfidence" : 0.79878336
}, {
"latitudeE7" : 520943848,
"longitudeE7" : 51258475,
"placeId" : "ChIJC9dtF09vxkcR9zOEsPmnHcQ",
"locationConfidence" : 0.35787553
}, {
"latitudeE7" : 520940508,
"longitudeE7" : 51258612,
"placeId" : "ChIJx2ucGk9vxkcRF_deiIhd7_k",
"locationConfidence" : 0.25281402
} ],
"editConfirmationStatus" : "NOT_CONFIRMED"
}
}, {
As you can see, for now I am only trying to access the value's after the placeVisit.
Thank you in advance!

placeVisit is just in every second timelineObjects item, you need to check if placeVisit exists:
import json
with open('data_may.json', 'r') as fh:
raw = json.loads(fh.read())
data = raw['timelineObjects']
for locations in data[0:5]:
if 'placeVisit' in locations:
print(locations['placeVisit']['location']['latitudeE7'])
else:
print("Found activitySegment instead of placeVisit!")
Output:
Found activitySegment instead of placeVisit!
520967891
Found activitySegment instead of placeVisit!
520941602

Let's start with how many upper-level keys are present in the JSON:
data = <your-json-dictionary>
# print a list of your top-level keys
print(list(data.keys()))
This shows,
>> ['timelineObjects']
Now let's see the keys of the elements of data['timelineObject']
for timelineObject in a['timelineObjects']:
print(list(timelineObject.keys()))
>> ['activitySegment']
['placeVisit']
['activitySegment']
['placeVisit']
...
However, there are some nested dictionaries where the key 'placeVisit' does not exist. Python is raising keyError when it encounters these cases. You should use python's dict.get(key, default_when_key_is_missing) method to return a default value when the key is not found in the dictionary.
for timelineObject in a['timelineObjects']:
print(timelineObject.get('placeVisit', None))
This will return your desired output.

Related

Using Python to build a special data container

My english is poor.
I have a piece of json data with such a structure, and I want to use python to aggregate this data, as shown in the figure.
If the service.name is the same, then it needs to be archived, and the duplicate "url.path" needs to be removed
I don’t know what method to use to store data, use list? dict?
Can anyone help please? thanks
```
{
[
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_AAA_Web_Host"
}, "url" : {
"path" : "/product/getAAA" }
}
},
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_BBB_Web_Host"
}, "url" : {
"path" : "/product/getBBB" }
}
},
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_AAA_Web_Host"
}, "url" : {
"path" : "/product/getAAA" }
}
} ] }
```

This should get you started. It builds a cache of all the known past names, and drops anything that is previously seen.
import pprint
import json
json_data = """[
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_AAA_Web_Host"
}, "url" : {
"path" : "/product/getAAA" }
}
},
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_BBB_Web_Host"
}, "url" : {
"path" : "/product/getBBB" }
}
},
{
"_source" : {
"error" : {
"exception" : [
{
"handled" : false,
"type" : "Abp.UI.UserFriendlyException",
"message" : "未发现该用户的WeiXinUserRelation,粉丝编号447519"
}
]
},
"trace" : {
"id" : "a3e3796ca145b448829d0d0f96661e67"
},
"#timestamp" : "2021-06-21T06:57:52.603Z",
"service" : {
"name" : "Lonsid_AAA_Web_Host"
}, "url" : {
"path" : "/product/getAAA" }
}
}
]"""
data = json.loads(json_data)
cache = {}
for item in data:
if item['_source']['service']['name'] not in cache:
cache[item['_source']['service']['name']] = item
pprint.pprint(list(cache.values()))

How to acces element from json object

How can i access each element from the json object using beautifulsoup or regex?
For example call timestamp and get the output = 1561238146781
{
"timestamp" : 1561238146781,
"context" : {
"external_urls" : {
"spotify" : "https://open.spotify.com/playlist/4rwZyPeww8O7ZPnQqHD2q"
},
"href" : "https://api.spotify.com/v1/playlists/4rwZyPqew8O7ZPnQqHD2q",
"type" : "playlist",
"uri" :
"spotify:user:3sui5bqpr8a0i2t:playlist:4rwZyPqewO7ZPnQqHD2q"
},
"progress_ms" : 115728,
"item" : {
"album" : {
"album_type" : "album",
"artists" : [ {
"external_urls" : {
"spotify" :
"https://open.spotify.com/artist/5eAWCfyUhZqwHtBdNk56l1"
},
"href" :
"https://api.spotify.com/v1/artists/5eAWCfyeZtHHtBdNk56l1",
"id" : "5eAWCfyUhZtHHtBdNk56l1",
"name" : "System Of A Down",
"type" : "artist",
"uri" : "spotify:artist:5eAWewyUhZtHHtBdNk56l1"
} ],
"available_markets" : [ ],
"external_urls" : {
"spotify" : "https://open.spotify.com/album/4DR0Gds7w2GJyQnFVa4jAB"
},
"href" : "https://api.spotify.com/v1/albums/4DR0ewwsdJyQnFVa4jAB",
"id" : "4DR0GWo7w2ewyQnFVa4jAB",
"images" : [ {
"height" : 640,
"url" :
"https://i.scdn.co/image/932e185b217ew6caasd837dbe30d54028de9cfc",
"width" : 615
}, {
"height" : 300,
"url" :
"https://i.scdn.co/image/30de1d4e1ew38sd89573893d8494fd6a66",
"width" : 288
}, {
"height" : 64,
"url" :
"https://i.scdn.co/image/1682cd0e8ew8bf87sdc4cd1e01ce24cd165b",
"width" : 62
} ],
"name" : "Toxicity",
"release_date" : "2001-01-01",
"release_date_precision" : "day",
"total_tracks" : 14,
"type" : "album",
"uri" : "spotify:album:4DR0GewdGJyQnFVa4jAB"
},
"artists" : [ {
"external_urls" : {
"spotify" : "https://open.spotify.com/artist/5eAWCsdUweHtBdNk56l1"
},
"href" : "https://api.spotify.com/v1/artists/5eAWCfewhdsHtBdNk56l1",
"id" : "5eAWCfyUhZtHHtBewk56l1",
"name" : "System Of A Down",
"type" : "artist",
"uri" : "spotify:artist:5eAWCfyUsdtHHtBdNk56l1"
} ],
"available_markets" : [ ],
"disc_number" : 1,
"duration_ms" : 235599,
"explicit" : false,
"external_ids" : {
"isrc" : "USSM10107264"
},
"external_urls" : {
"spotify" : "https://open.spotify.com/track/1twBtsdaZiy7HWPG025QGuP"
},
"href" : "https://api.spotify.com/v1/tracks/1twBt7aZiy7HWPG025QGuP",
"id" : "1twBt7aZiy7HWweG025QGuP",
"is_local" : false,
"name" : "Aerials",
"popularity" : 9,
"preview_url" : null,
"track_number" : 14,
"type" : "track",
"uri" : "spotify:track:1twBt7aZieWPG025QGuP"
},
"currently_playing_type" : "track",
"actions" : {
"disallows" : {
"resuming" : true
}
},
"is_playing" : true
}
Call "is-playing" and get true
I've tried making the hole array a list (each element) and trying to get the element from the list but i've realized that this is hardly useful.

make your json data into data.json file then execute this code :
import json
with open('data.json',"r") as f:
data = json.load(f)
print(data["is_playing"])
output
True

Tranquility server would not send data to druid

I'm using imply-2.2.3. Here is my tranquility server configuration:
{
"dataSources" : [
{
"spec" : {
"dataSchema" : {
"dataSource" : "tutorial-tranquility-server",
"parser" : {
"type" : "string",
"parseSpec" : {
"timestampSpec" : {
"column" : "timestamp",
"format" : "auto"
},
"dimensionsSpec" : {
"dimensions" : [],
"dimensionExclusions" : [
"timestamp",
"value"
]
},
"format" : "json"
}
},
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "hour",
"queryGranularity" : "none"
},
"metricsSpec" : [
{
"type" : "count",
"name" : "count"
},
{
"name" : "value_sum",
"type" : "doubleSum",
"fieldName" : "value"
},
{
"fieldName" : "value",
"name" : "value_min",
"type" : "doubleMin"
},
{
"type" : "doubleMax",
"name" : "value_max",
"fieldName" : "value"
}
]
},
"ioConfig" : {
"type" : "realtime"
},
"tuningConfig" : {
"type" : "realtime",
"maxRowsInMemory" : "50000",
"windowPeriod" : "PT10M"
}
},
"properties" : {
"task.partitions" : "1",
"task.replicants" : "1"
}
},
{
"spec": {
"dataSchema" : {
"dataSource" : "test",
"parser" : {
"type" : "string",
"parseSpec" : {
"timestampSpec" : {
"column" : "timestamp",
"format" : "auto"
},
"dimensionsSpec" : {
"dimensions" : [
"a"
],
},
"format" : "json"
}
},
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "hour",
"queryGranularity" : "none"
},
"metricsSpec" : [
{
"type" : "count",
"name" : "count"
},
{
"type": "doubleSum",
"name": "b",
"fieldName": "b"
}
]
},
"ioConfig" : {
"type" : "realtime"
},
"tuningConfig" : {
"type" : "realtime",
"maxRowsInMemory" : "50000",
"windowPeriod" : "P1Y"
}
},
"properties": {
"task.partitions" : "1",
"task.replicants" : "1"
}
}
],
"properties" : {
"zookeeper.connect" : "localhost",
"druid.discovery.curator.path" : "/druid/discovery",
"druid.selectors.indexing.serviceName" : "druid/overlord",
"http.port" : "8200",
"http.threads" : "40",
"serialization.format" : "smile",
"druidBeam.taskLocator": "overlord"
}
}
I have trouble sending data to the second datasoruce, test, specifically. I tried to send the below data to druid with python requests:
{'b': 7, 'timestamp': '2017-01-20T03:32:54.586415', 'a': 't'}
The response I receive:
b'{"result":{"received":1,"sent":0}}'
If you read my config file you will notice that I set window period to one year. I would like to send data in with a large time span to druid using tranquility server. Is there something wrong with my config or data?

Why elasticsearch is not able to retrieve document by document id?

I am new to elasticsearch. I have created a new index, using following REST API:-
req = {
"settings": {
"analysis": {
"analyzer": {
"hinglish_analyzer": {
"type": "custom",
"tokenizer": "standard",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"asciifolding",
"hinglish-token-filter"
]
}
}
}
},
"mappings" : {
"p_ss__user" : {
"properties" : {
"age" : {
"type": "integer"
},
"first_name" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"gender" : {
"type" : "long"
},
"is_alive" : {
"type" : "boolean"
},
"last_name" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"marital_status" : {
"type" : "long"
},
"user_gotra" : {
"properties" : {
"Gotra" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"kuldevi" : {
"properties" : {
"Kuldevi" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
}
}
}
}
},
"user_village" : {
"properties" : {
"areaOrVillageName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"tehsil" : {
"properties" : {
"city" : {
"properties" : {
"cityName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
},
"state" : {
"properties" : {
"country" : {
"properties" : {
"countryCode" : {
"type" : "text"
},
"countryName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
},
"id" : {
"type" : "long"
}
}
},
"id" : {
"type" : "long"
},
"stateCode" : {
"type" : "text"
},
"stateName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
}
}
},
"id" : {
"type" : "long"
},
"tehsilName" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
},
"zipcode" : {
"type" : "text"
}
}
},
"username" : {
"type" : "text",
"analyzer": "hinglish_analyzer"
}
}
}
}
}
Here, 'hinglish-token-filter' is my custom token filter, which I have written and is perfectly fine.
Now, I have created a document in elasticsearch with the help of python script(here I pass my own value of _id variable in the request), which looks like given below :-
{
"username" : "Gopi_Chand",
"first_name" : "Gopi Chand",
"last_name" : "",
"gender" : 2,
"age" : 44,
"user_gotra" : {
"Gotra" : "Thanak",
"kuldevi" : {
"Kuldevi" : "Maa Jagdambaa",
"id" : 1
},
"id" : 1,
"kulrishi" : {
"Rishi" : "Parashar",
"id" : 1
}
},
"user_village" : {
"areaOrVillageName" : "Sanatra",
"tehsil" : {
"city" : {
"state" : {
"country" : {
"countryName" : "India",
"id" : 1,
"countryCode" : "IND"
},
"stateName" : "Rajasthan",
"id" : 1
},
"cityName" : "Barmer (Meru)",
"id" : 1
},
"tehsilName" : "Baitu",
"id" : 1
},
"id" : 1,
"zipcode" : ""
},
"marital_status" : 1,
"is_alive" : true
}
The document is successfully getting stored in the elasticsearch with the Id that I have passed, along with other values.
But the problem comes when I try to retrieve the document with the id, that I have set :-
http://localhost:9200/users/p_s_s__user/3222/
It gives me following response :-
{"_index":"users","_type":"p_s_s__user","_id":"3222","found":false}
But when I try following query :-
http://localhost:9200/users/_search?pretty=true
it shows me my document, as shown below :-
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "users",
"_type" : "p_ss__user",
"_id" : "3222",
"_score" : 1.0,
"_source" : {
"username" : "Gopi_Chand",
"first_name" : "Gopi Chand",
"last_name" : "",
"gender" : 2,
"age" : 44,
"user_gotra" : {
"Gotra" : "Thanak",
"kuldevi" : {
"Kuldevi" : "Maa Jagdambaa",
"id" : 1
},
"id" : 1,
"kulrishi" : {
"Rishi" : "Parashar",
"id" : 1
}
},
"user_village" : {
"areaOrVillageName" : "Sanatra",
"tehsil" : {
"city" : {
"state" : {
"country" : {
"countryName" : "India",
"id" : 1,
"countryCode" : "IND"
},
"stateName" : "Rajasthan",
"id" : 1
},
"cityName" : "Barmer (Meru)",
"id" : 1
},
"tehsilName" : "Baitu",
"id" : 1
},
"id" : 1,
"zipcode" : ""
},
"marital_status" : 1,
"is_alive" : true
}
}
]
}
}
Can you help me out, what wrong I have done ? Moreover, other queries such as "match" queries, are also not working.
Thanks in advance.

How to use distinct with pipeline in mongodb using python

i have data like this
{ "_id": "1234gbrghr",
"Device" : "samsung",
"UserId" : "12654",
"Month" : "july"
},
{ "_id": "1278gbrghr",
"Device" : "nokia",
"UserId" : "87654",
"Month" : "july"
},
{ "_id": "1234gbrghr",
"Device" : "samsung",
"UserId" : "12654",
"Month" : "july"
}
I need to get the no of distinct user for a particular device in the month of july . for example " If a user(UserId) used samsung device twice or more in the month of july then it will count it as one for samsung .
For this i used this query to get the total no of users in the the month of july . but i need to get the distinct no of users
pipeline1 = [
{'$match':{'Month':'july'}},
{'$group':{'_id' : '$Device', 'count' : { '$sum' : 1 }}}
]
data = db.command('aggregate', 'collection', pipeline=pipeline1);

You will need to group on device and user instead first. You can do that with the following pipeline operator:
{'$group':{'_id' : { d: '$Device', u: '$UserId' } } }
And then secondly you need to count the number of devices per user (like you already had, but slighty modified:
{ '$group': { '_id' : '$_id.d', 'count': { '$sum' : 1 } } }
With the following dataset:
{ "_id" : "1234gbrghr", "Device" : "samsung", "UserId" : "12654", "Month" : "july" }
{ "_id" : "1278gbrghr", "Device" : "nokia", "UserId" : "87654", "Month" : "july" }
{ "_id" : "1239gbrghr", "Device" : "samsung", "UserId" : "12654", "Month" : "july" }
{ "_id" : "1238gbrghr", "Device" : "samsung", "UserId" : "12653", "Month" : "july" }
And the following aggregate command:
db.so.aggregate( [
{ '$match' : {'Month' : 'july' } },
{ '$group' : {
'_id' : { d: '$Device', u: '$UserId' },
'count' : { '$sum' : 1 }
} },
{ '$group': {
'_id' : '$_id.d',
'count': { '$sum' : 1 }
} }
] );
This outputs:
{
"result" : [
{
"_id" : "nokia",
"count" : 1
},
{
"_id" : "samsung",
"count" : 2
}
],
"ok" : 1
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pandas: access values inside .json file - python

Related

Using Python to build a special data container

How to acces element from json object

Tranquility server would not send data to druid

Why elasticsearch is not able to retrieve document by document id?

How to use distinct with pipeline in mongodb using python

Categories

Resources