How to aggregate data with date range?

How to aggregate data with date range? - python

Hello I have the following problem, whenever I aggregate data, the aggregations and to be more exact the date_histogram is always different. It starts with pretty much random date.
I am using elasticpy and my query looks like this before executing. Note that I am using python datetime objects to get a "real" results. I had some problems with other formats.
{
"query": {
"bool": {
"filter": [
{
"range": {
"original_date": {
"gte": datetime.datetime(2020, 2, 13, 0, 0),
"lte": datetime.datetime(2020, 2, 15, 23, 0),
}
}
}
],
"must": [
{
"query_string": {
"query": "whatever string"
}
}
],
}
},
"aggs": {
"docs_histogram": {
"date_histogram": {
"field": "original_date",
"interval": "hour",
"time_zone": "EET",
},
... (other aggs)
},
},
}
The date histogram should be in this range: 2020-02-13 00:00:00 - 2020-02-15 23:00:00 But look at the output's start and end. It starts 1 day later and ends same day 18:00 ??
"buckets": [
{
"key_as_string": "2020-02-14T00:00:00.000+02:00",
"key": 1581631200000,
"doc_count": 1,
"source_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "WhateverKey", "doc_count": 1}],
},
},
...
{
"key_as_string": "2020-02-14T18:00:00.000+02:00",
"key": 1581696000000,
"doc_count": 1,
"source_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "WhateverKey2", "doc_count": 1}],
},
},
]

Related

Returning data that is not in ElasticSearch as 0 in doc_count

I am filtering in ElasticSearch. I want doc_count to return 0 on non-data dates, but it doesn't print those dates at all, only dates with data are returned to me. do you know how i can do it? Here is the Python output:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
33479 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33480 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33481 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33482 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33483 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
And here is my ElasticSearch filter:
"from": 0,
"size": 0,
"query": {
"bool": {
"must":
[
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword"
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp", "interval": "8m"
}}}}}}

You need to set min_doc_count value to 0 for aggregation where you want result with zero doc_count.
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword",
"min_doc_count": 0
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp",
"interval": "8m",
"min_doc_count": 0
}
}
}
}
}
}

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?

Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

update_many objects by reference to objects in same documents

Let's say I have a collection like the following. For every document that contains animals.horse, I want to set animals.goat equal to animals.horse (so the horses don't get lonely or outnumbered).
[
{
"_id": 1,
"animals": {
"goat": 1
}
},
{
"_id": 2,
"animals": {
"cow": 1,
"horse": 2,
"goat": 1
}
},
{
"_id": 3,
"animals": {
"horse": 5
}
},
{
"_id": 4,
"animals": {
"cow": 1
}
}
]
In Mongo shell, this works as desired:
db.collection.update(
{"animals.horse": { "$gt": 0 }},
[ { "$set": { "animals.goat": "$animals.horse" } } ],
{ "multi": true }
)
which achieves the desired result:
[
{
"_id": 1,
"animals": {
"goat": 1
}
},
{
"_id": 2,
"animals": {
"cow": 1,
"goat": 2,
"horse": 2
}
},
{
"_id": 3,
"animals": {
"goat": 5,
"horse": 5
}
},
{
"_id": 4,
"animals": {
"cow": 1
}
}
]
However, this doesn't work in pymongo -- the collection is unaltered.
db.collection.update_many( filter = {'animals.horse': {'$gt':0} },
update = [ {'$set': {'animals.goat': '$animals.horse' } } ],
upsert = True
)
What am I doing wrong?

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}

Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Get names of keys in objectpath

How would I get the names of the keys, for example [800, 801] (the key names are unknown) with objectpath.
It is easy in jmespath: keys(#).
"groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": False,
"waytype": 1,
"multiple": 1,
"primary": 0
},

let your object is assigned to name variable
const name = { "groups": {
"800": {
"short_name": "22",
"oname": "11",
"group": 8,
"title": "SS",
"name": "33",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 1
},
"801": {
"short_name": "ss",
"oname": "zz",
"group": 8,
"title": "ss",
"name": "bbb",
"onames": [""],
"alt_name": false,
"waytype": 1,
"multiple": 1,
"primary": 0
} } }
Use for loop to get the key name as
for(var num in name.groups) {
console.log(num);
}
and to get the values of key
for(var num in name.groups) {
console.log(name.groups[num]);
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to aggregate data with date range? - python

Related

Returning data that is not in ElasticSearch as 0 in doc_count

Get fields from a JSON file with Python

update_many objects by reference to objects in same documents

How to map the dictionary values to another dictionary

Get names of keys in objectpath

Categories

Resources