Elastic Search query list with sublist - python

I have an index in Elastic that contains an array of keys and values.
For example - a single document looks like this:
{
"_index": "my_index",
"_source": {
"name": "test",
"values": [
{
"name": "a",
"score": 10
},
{
"name": "b",
"score": 4
},
{
"name": "c",
"score": 2
},
{
"name": "d",
"score": 1
}
]
},
"fields": {
"name": [
"test"
],
"values.name.keyword": [
"a",
"b",
"c",
"d"
],
"name.keyword": [
"test"
],
"values.score": [
10,
4,
2,
1
],
"values.name": [
"a",
"b",
"c",
"d"
]
}
}
I want to create an Elastic query (through API) that retrieves a sum of all the name scores filtered by a list of names.
For example, for the input:
names = ['a', 'b']
The result will be: 14
Any idea how to do it?

You can di this by making values array nested. Example mapping:
{
"mappings": {
"properties": {
"values": { "type": "nested" }
}
}
}
Following query will give the result you want:
{
"size":0,
"aggs": {
"asd": {
"nested": {
"path": "values"
},
"aggs": {
"filter_agg": {
"filter": {
"terms": {
"values.name.keyword": [
"a",
"b"
]
}
},
"aggs": {
"sum": {
"sum": {
"field": "values.score"
}
}
}
}
}
}
}
}

Related

How to update a value inside an object which is inside array of another object in mongodb using python code

My collection looks like this
{"ingr": [
{
"ingrName": [
{
"_id": "57aa56e2a06b57b",
"name": "abc",
"type": "ingr"
}
],
"_id": {
"$oid": "62232cd70ce38c50"
},
"quantity": "1.0",
},
{
"ingr": [
{
"_id": "607e7fcca57aa",
"name": "xyz",
"type": "ingr"
}
],
"_id": {
"$oid": "62232cd70ce38c"
},
"quantity": "1.0"
}
}}
I just want to change the id and type based on the object id. what i tried is
db1.update_one({
'ingr.$._id': ObjectId("62232cd70ce38c50")
},
{
'$set': {
"ingr.ingrName.$.type":"alternate",
"ingr.ingrName.$._id":"abc123"
}
})
But the values are not changing.Help me to find the mistake I making. Thanks
expected output
{"ingr": [
{
"ingrName": [
{
"_id": "abc123",
"name": "abc",
"type": "alternate"
}
],
"_id": {
"$oid": "62232cd70ce38c50"
},
"quantity": "1.0",
}
i need to change the id and type

Convert Pandas Dataframe to nested JSON for table records

I am using Python and Pandas. Trying to convert a Pandas Dataframe to a nested JSON. The function .to_json() doesn't give me enough flexibility for my aim.
Here are some data points of the data frame (in CSV, comma separated):
Hotel_id,Room_id,Client_id,Loayalty_level,Price
1,100,1,Default,100
1,100,2,Default,98
1,101,1,Default,200
1,101,1,Discounted,196
1,101,2,Default,202
1,101,3,Default,204
There is a lot of repetitive information and I would like to have a JSON like this:
{
"hotelId": 1,
"rooms": [
{
"roomId": 100,
"prices": [
{
"clientId": 1,
"price": {
"default": 100
}
},
{
"clientId": 2,
"price": {
"default": 98
}
}
]
},
{
"roomId": 101,
"prices": [
{
"clientId": 1,
"price": {
"default": 200,
"discounted": 196
}
},
{
"clientId": 2,
"price": {
"default": 202
}
},
{
"clientId": 3,
"price": {
"default": 204
}
}
]
}
]
}
How to achieve this?
Have a look at convtools library, it provides a lot of primitives for data processing.
Here is the solution for your case:
import json
from convtools import conversion as c
from convtools.contrib.tables import Table
input_data = [
("Hotel_id", "Room_id", "Client_id", "Loayalty_level", "Price"),
("1", "100", "1", "Default", "100"),
("1", "100", "2", "Default", "98"),
("1", "101", "1", "Default", "200"),
("1", "101", "1", "Discounted", "196"),
("1", "101", "2", "Default", "202"),
("1", "101", "3", "Default", "204"),
]
# if reading from csv is needed
# rows = Table.from_csv("tmp/input.csv", header=True).into_iter_rows(tuple)
# convert to list of dicts
rows = list(Table.from_rows(input_data, header=True).into_iter_rows(dict))
# generate the converter (store somewhere and reuse, because this is where
# code-generation happens)
converter = (
c.group_by(c.item("Hotel_id"))
.aggregate(
{
"hotelId": c.item("Hotel_id").as_type(int),
"rooms": c.ReduceFuncs.Array(c.this()).pipe(
c.group_by(c.item("Room_id")).aggregate(
{
"roomId": c.item("Room_id").as_type(int),
"prices": c.ReduceFuncs.Array(c.this()).pipe(
c.group_by(c.item("Client_id")).aggregate(
{
"clientId": c.item("Client_id").as_type(
int
),
"price": c.ReduceFuncs.DictFirst(
c.item("Loayalty_level"),
c.item("Price").as_type(float),
),
}
)
),
}
)
),
}
)
.gen_converter()
)
print(json.dumps(converter(rows)))
The output is:
[
{
"hotelId": 1,
"rooms": [
{
"roomId": 100,
"prices": [
{ "clientId": 1, "price": { "Default": 100.0 } },
{ "clientId": 2, "price": { "Default": 98.0 } }
]
},
{
"roomId": 101,
"prices": [
{ "clientId": 1, "price": { "Default": 200.0, "Discounted": 196.0 } },
{ "clientId": 2, "price": { "Default": 202.0 } },
{ "clientId": 3, "price": { "Default": 204.0 } }
]
}
]
}
]
P.S. pay attention to the c.ReduceFuncs.DictFirst part, this is where it takes the first price per loyalty level, you may want to change this to DictLast / DictMax / DictMin / DictArray.

Transforming nested JSON with pyjq

I'm trying to transform the JSON from this:
{
"meta": {
"ver": "3.0"
},
"cols": [
{
"name": "val"
}
],
"rows": [
"cols": [
{
"name": "ts"
},
{
"name": "v0"
},
{
"name": "v1"
},
{
"name": "v2"
},
{
"name": "v3"
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:10:00-07:00",
"tz": "Los_Angeles"
},
"v3": {
"_kind": "number",
"val": 6167699.5,
"unit": "kWh"
}
},
{
"ts": {
"_kind": "dateTime",
"val": "2021-07-07T00:15:00-07:00",
"tz": "Los_Angeles"
},
"v0": {
"_kind": "number",
"val": 808926.0625,
"unit": "m\\u00b3"
},
"v1": {
"_kind": "number",
"val": 112999.3046875,
"unit": "m\\u00b3"
},
"v2": {
"_kind": "number",
"val": 8823498,
"unit": "kWh"
}
}
]
}
to a more simplified form using the pyjq module:
{
"data": {
"v0": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
],
"v1": [
[
"first timestamp",
val
],
[
"second timestamp",
val
]
]
}
}
I got started with the pyjq module, however I'm unsure about how to proceed with place two values (one str, one float) within the [] separated by a comma. Here's my code (returns error as expected).
import json
import pyjq
with open('file.json') as f:
data = json.load(f)
transformed = pyjq.all('{data: { meter_id_1: [[[.rows[].val.rows[].ts.val + "," + .rows[].val.rows[].v0.val]]}}', data)
Thanks in advance.

Changing Key name in mongodb based on its value

I have a list of a element element_list=['A','C'] and my document in mongodb is like:
"product_id": {
"$oid": "AA"
},
"output": [
{
"product": {
"$oid": "A"
},
"value": 1
},
{
"product": {
"$oid": "B"
},
"value": 1
},
]
}
what I want is based on my element_list value the key should change like:
"product_id": {
"$oid": "AA"
},
"products": [
{
"product": {
"$oid": "A"
},
"value": 1
},
{
"Offer": {
"$oid": "B"
},
"value": 1
},
]
}
'B' is not present in element_list, that's why its key is Offer. How to automatically update multiple similar documents in python?
try
oids = set([e['product_id']['$oid'] for e in data])
for product in data:
new_products = []
for output in product['output']:
key = 'Offer' if output['product']['$oid'] not in oids else 'product'
new_products.append({key: {'$oid': output['product']['$oid'], 'value': output['value']}})
product['products'] = new_products
del product['output']
print(data)

Manipulating data from json to reflect a single value from each entry

Setup:
This data set has 50 "issues", within these "issues" i have captured the data that I need to then put into my postgresql database. But when i get to "components" is where i have trouble. I am able to get a list of all "names" of "components" but only want to have 1 instance of "name" for each "issue", and some of them have 2. Some are empty and would like to return null for those.
Here is some sample data that should suffice:
{
"issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": null,
"customfield_2": 5
}
}
]
}
I am looking to return (just for the component name piece):
['null', 'Testing', 'Documentation']
I set up the other data for entry into the db like so:
values = list((item['key'],
//components list,
item['fields']['customfield_1'],
item['fields']['customfield_2']) for item in data_story['issues'])
I am wondering if there is a possible way to enter in the created components list where i have commented "components list" above
Just for recap, i want to have only 1 component name for each issue null or not and be able to have it put in the the values variable with the rest of the data. Also the first name in components will work for each "issue"
Here's what I would do, assuming that we are working with a data variable:
values = [(x['fields']['components'][0]['name'] if len(x['fields']['components']) != 0 else 'null') for x in data['issues']]
Let me know if you have any queries.
in dict comprehension use if/else
example code is
results = [ (x['fields']['components'][0]['name'] if 'components' in x['fields'] and len(x['fields']['components']) > 0 else 'null') for x in data['issues'] ]
full sample code is
import json
data = json.loads('''{ "issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": null,
"customfield_2": 5
}
}
]
}''')
results = [ (x['fields']['components'][0]['name'] if 'components' in x['fields'] and len(x['fields']['components']) > 0 else 'null') for x in data['issues'] ]
print(results)
output is ['null', u'Testing', u'Documentation']
If you just want to delete all but one of the names from the list, then you can do that this way:
issues={
"issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": "null",
"customfield_2": "null"
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": "null",
"customfield_2": "null"
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": "null",
"customfield_2": 5
}
}
]
}
Data^
componentlist=[]
for i in range(len(issues["issues"])):
x= issues["issues"][i]["fields"]["components"]
if len(x)==0:
x="null"
componentlist.append(x)
else:
x=issues["issues"][i]["fields"]["components"][0]
componentlist.append(x)
print(componentlist)
>>>['null', {'name': 'Testing'}, {'name': 'Documentation'}]
Or, if you just want the values, and not the dictionary keys:
else:
x=issues["issues"][i]["fields"]["components"][0]["name"]
componentlist.append(x)
['null', 'Testing', 'Documentation']

Categories

Resources