I have a json array as
{
"query": {
"bool": {
"must": [],
"should": [
{
"match": {
"Name": {
"query": "Nametest",
"fuzziness": 3,
"boost": 5
}
}
},
{
"match": {
"Address": {
"query": "NONE",
"fuzziness": 3,
"boost": 4
}
}
},
{
"match": {
"Site": {
"query": "Adeswfvfv",
"fuzziness": 3,
"boost": 4
}
}
},
{
"match": {
"Phone": {
"query": "5680728.00",
"fuzziness": 2,
"boost": 4
}
}
}
],
"minimum_should_match": 2
}
}
}
So What i wanna do is if In json['query']['bool']['should'] if "query" is "NONE" then I wanna remove that json array and the new json will be
{
"query": {
"bool": {
"must": [],
"should": [
{
"match": {
"Name": {
"query": "Nametest",
"fuzziness": 3,
"boost": 5
}
}
},
{
"match": {
"Site": {
"query": "Adeswfvfv",
"fuzziness": 3,
"boost": 4
}
}
},
{
"match": {
"Phone": {
"query": "5680728.00",
"fuzziness": 2,
"boost": 4
}
}
}
],
"minimum_should_match": 2
}
}
}
I have tried iterating over the json and used del(jsonarray) and pop(jsonarray) but nothing seeems to help out?
tried with python json library but failed
for e in q['query']['bool']['should']:
... if "NONE" in str(e['match']):
... del(e)
This should help.
import pprint
d = {'query': {'bool': {'minimum_should_match': 2, 'should': [{'match': {'Name': {'query': 'Nametest', 'boost': 5, 'fuzziness': 3}}}, {'match': {'Address': {'query': 'NONE', 'boost': 4, 'fuzziness': 3}}}, {'match': {'Site': {'query': 'Adeswfvfv', 'boost': 4, 'fuzziness': 3}}}, {'match': {'Phone': {'query': '5680728.00', 'boost': 4, 'fuzziness': 2}}}], 'must': []}}}
d["query"]['bool']['should'] = [i for i in d["query"]['bool']['should'] if list(i['match'].items())[0][1]["query"] != 'NONE']
pprint.pprint(d)
Output:
{'query': {'bool': {'minimum_should_match': 2,
'must': [],
'should': [{'match': {'Name': {'boost': 5,
'fuzziness': 3,
'query': 'Nametest'}}},
{'match': {'Site': {'boost': 4,
'fuzziness': 3,
'query': 'Adeswfvfv'}}},
{'match': {'Phone': {'boost': 4,
'fuzziness': 2,
'query': '5680728.00'}}}]}}}
I write this,but this seems complex
for p,c in enumerate(json['query']['bool']['should']):
if list(c["match"].values())[0]["query"] == "NONE":
json['query']['bool']['should'].pop(p)
print(json)
Related
I am filtering in ElasticSearch. I want doc_count to return 0 on non-data dates, but it doesn't print those dates at all, only dates with data are returned to me. do you know how i can do it? Here is the Python output:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
33479 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33480 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33481 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33482 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33483 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
And here is my ElasticSearch filter:
"from": 0,
"size": 0,
"query": {
"bool": {
"must":
[
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword"
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp", "interval": "8m"
}}}}}}
You need to set min_doc_count value to 0 for aggregation where you want result with zero doc_count.
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword",
"min_doc_count": 0
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp",
"interval": "8m",
"min_doc_count": 0
}
}
}
}
}
}
I want to merge list of dictionary provided below with unique channel and zrepcode.
sample input:
[
{
"channel": 1,
"zrepcode": "123456",
"turn": 7833.9
},
{
"channel": 1,
"zrepcode": "123456",
"pipeline": 324
},
{
"channel": 1,
"zrepcode": "123456",
"inv_bal": 941.16
},
{
"channel": 1,
"zrepcode": "123456",
"display": 341
},
{
"channel": 3,
"zrepcode": "123456",
"display": 941.16
},
{
"channel": 3,
"zrepcode": "123456",
"turn": 7935.01
},
{
"channel": 3,
"zrepcode": "123456",
"pipeline": 0
},
{
"channel": 3,
"zrepcode": "123456",
"inv_bal": 341
},
{
"channel": 3,
"zrepcode": "789789",
"display": 941.16
},
{
"channel": 3,
"zrepcode": "789789",
"turn": 7935.01
},
{
"channel": 3,
"zrepcode": "789789",
"pipeline": 0
},
{
"channel": 3,
"zrepcode": "789789",
"inv_bal": 341
}
]
Sample output:
[
{'channel': 1, 'zrepcode': '123456', 'turn': 7833.9, 'pipeline': 324.0,'display': 341,'inv_bal': 941.16},
{'channel': 3, 'zrepcode': '123456', 'turn': 7935.01, 'pipeline': 0.0, 'display': 941.16, 'inv_bal': 341.0},
{'channel': 3, 'zrepcode': '789789', 'turn': 7935.01, 'pipeline': 0.0, 'display': 941.16, 'inv_bal': 341.0}
]
Easily solved with our good friend collections.defaultdict:
import collections
by_key = collections.defaultdict(dict)
for datum in data: # data is the list of dicts from the post
key = (datum.get("channel"), datum.get("zrepcode")) # form the key tuple
by_key[key].update(datum) # update the defaultdict by the key tuple
print(list(by_key.values()))
This outputs
[
{'channel': 1, 'zrepcode': '123456', 'turn': 7833.9, 'pipeline': 324, 'inv_bal': 941.16, 'display': 341},
{'channel': 3, 'zrepcode': '123456', 'display': 941.16, 'turn': 7935.01, 'pipeline': 0, 'inv_bal': 341},
{'channel': 3, 'zrepcode': '789789', 'display': 941.16, 'turn': 7935.01, 'pipeline': 0, 'inv_bal': 341},
]
Let's say I have a collection like the following. For every document that contains animals.horse, I want to set animals.goat equal to animals.horse (so the horses don't get lonely or outnumbered).
[
{
"_id": 1,
"animals": {
"goat": 1
}
},
{
"_id": 2,
"animals": {
"cow": 1,
"horse": 2,
"goat": 1
}
},
{
"_id": 3,
"animals": {
"horse": 5
}
},
{
"_id": 4,
"animals": {
"cow": 1
}
}
]
In Mongo shell, this works as desired:
db.collection.update(
{"animals.horse": { "$gt": 0 }},
[ { "$set": { "animals.goat": "$animals.horse" } } ],
{ "multi": true }
)
which achieves the desired result:
[
{
"_id": 1,
"animals": {
"goat": 1
}
},
{
"_id": 2,
"animals": {
"cow": 1,
"goat": 2,
"horse": 2
}
},
{
"_id": 3,
"animals": {
"goat": 5,
"horse": 5
}
},
{
"_id": 4,
"animals": {
"cow": 1
}
}
]
However, this doesn't work in pymongo -- the collection is unaltered.
db.collection.update_many( filter = {'animals.horse': {'$gt':0} },
update = [ {'$set': {'animals.goat': '$animals.horse' } } ],
upsert = True
)
What am I doing wrong?
I have 2 lists, looking like:
temp_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
hum_data:
{
"id": 1,
"name": "test (replaced)",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
]}
I need to merge the 2 lists to 1 without duplicating data. What is the easiest/efficient way? After merging, I want something like this:
{
"id": 1,
"name": "test",
"code": "test",
"last_update": "2020-01-01",
"online": false,
"data": {
"temperature": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
],
"humidity": [
{
"date": "2019-12-17",
"value": 23.652905748126333
},
...
Thanks for helping.
If your lists hum_data and temp_data are not sorted then first sort them and then concatenate the dictionaries pair-wise.
# To make comparisons for sorting
compare_function = lambda value : value['id']
# sort arrays before to make later concatenation easier
temp_data.sort(key=compare_function)
hum_data.sort(key=compare_function)
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
If the lists are already sorted then you just need to concatenate dictionary by dictionary.
combined_data = temp_data.copy()
# concatenate the dictionries using the update function
for hum_row, combined_row in zip(hum_data, combined_data):
combined_row['data'].update(hum_row['data'])
# combined hum_data and temp_data
combined_data
With that code I got the following result:
[
{
'id': 1,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 1}],
'humidity': [{'date': '2019-12-17', 'value': 1}]}
},
{
'id': 2,
'name': 'test (replaced)',
'code': 'test',
'last_update': '2020-01-01',
'online': False,
'data': {
'temperature': [{'date': '2019-12-17', 'value': 2}],
'humidity': [{'date': '2019-12-17', 'value': 2}]}
}
]
I have a data in MongoDB, the data like that:
{
"good": {
"d1": 2,
"d2": 56,
"d3": 3
},
"school": {
"d1": 4,
"d3": 5,
"d4": 12
}
},
{
"good": {
"d5": 4,
"d6": 5
},
"spark": {
"d5": 6,
"d6": 11,
"d7": 10
},
"school": {
"d5": 8,
"d8": 7
}
}
and I want to use pymongo mapreduce to generate data like this:
{
'word': 'good',
'info': [
{
'tbl_id': 'd1',
'term_freq': 2
},
{
'tbl_id': 'd2',
'term_freq': 56
},
{
'tbl_id': 'd3',
'term_freq': 3
},
{
'tbl_id': 'd5',
'term_freq': 4
},
{
'tbl_id': 'd6',
'term_freq': 5
}
]
}
{
'word': 'school',
'info': [
{
'tbl_id': 'd1',
'term_freq': 4
},
{
'tbl_id': 'd3',
'term_freq': 5
},
{
'tbl_id': 'd4',
'term_freq': 12
},
{
'tbl_id': 'd5',
'term_freq': 8
},
{
'tbl_id': 'd8',
'term_freq': 7
}
]
}
{
'word': 'spark',
'info': [
{
'tbl_id': 'd5',
'term_freq': 6
},
{
'tbl_id': 'd6',
'term_freq': 11
},
{
'tbl_id': 'd7',
'term_freq': 10
}
]
}
what should I do? Or there is other solutions?
You don't need *mapReduce` here. The aggregation framework can handle do this beautifully.
As of how this works, I suggest you have a look at each operator in the documentation.
_filter = {
"input": {"$objectToArray": "$$ROOT"},
"cond": {"$ne": ["$$this.k", "_id"]}
}
_map = {
"$map": {
"input": {"$filter": _filter},
"in": {
"k": "$$this.k",
"info": {
"$map": {
"input": {"$objectToArray": "$$this.v"},
"in": {"tbl_id": "$$this.k", "freq_term": "$$this.v"}
}
}
}
}
}
pipeline = [
{"$project": {"word": _map}},
{"$unwind": "$word"},
{
"$group": {
"_id": "$word.k",
"info": {
"$push": "$word.info"
}
}
},
{
"$project": {
"_id": 0,
"word": "$_id",
"info": {
"$reduce": {
"input": "$info",
"initialValue": [
],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
}
]
Then run with the .aggregate() method.
collection.aggregate(pipeline)