Generate Hash Value for Nested JSON Object - python

I have two nested JSON objects with different order of elements and would like to generate hash value for both. Now, I'm comparing these two hash values and it needs to be same. How can I achieve this mechanism?
First JSON Object
{
"X":{
"Y":[
{
"A":"1",
"B":{
"b1":"2",
"b2":"2"
}
},
{
"C":"4",
"D":{
"d1":"5",
"d2":"6"
}
},
],
"Z":[
{
"E":{
"e1":"7",
"e2":"8"
},
"F":"9"
}
]
}
}
Second JSON Object
{
"X":{
"Y":[
{
"C":"4",
"D":{
"d1":"5",
"d2":"6"
}
},
{
"A":"1",
"B":{
"b1":"2",
"b2":"2"
}
},
],
"Z":[
{
"E":{
"e1":"7",
"e2":"8"
},
"F":"9"
}
]
}
}
So, here goal is I want to generate same hash value for both JSON object. How can I achieve this in Python or Golang?

The JSON object is unsorted as well the map[string]interface{}, so you should sort the maps. However, sorting could be complicated and time consuming.
Instead of using the objects I would like to use the JSON as string, this way the JSON can be Unmarshaled and the string can be sorted to create the SHA value.
var json1 = `
{
"X": {
"Y": [
{
"A": "1",
"B": {
"b1": "2",
"b2": "2"
}
},
{
"C": "4",
"D": {
"d1": "5",
"d2": "6"
}
}
],
"Z": [
{
"E": {
"e1": "7",
"e2": "8"
},
"F": "9"
}
]
}
}
`
var json2 = `
{
"X": {
"Y": [
{
"C": "4",
"D": {
"d1": "5",
"d2": "6"
}
},
{
"A": "1",
"B": {
"b1": "2",
"b2": "2"
}
}
],
"Z": [
{
"E": {
"e1": "7",
"e2": "8"
},
"F": "9"
}
]
}
}
`
type JSON struct {
X struct {
Y []struct {
A string `json:"A"`
B struct {
B1 string `json:"b1"`
B2 string `json:"b2"`
} `json:"B"`
C string `json:"C"`
D struct {
D1 string `json:"d1"`
D2 string `json:"d2"`
} `json:"D"`
} `json:"Y"`
Z []struct {
E struct {
E1 string `json:"e1"`
E2 string `json:"e2"`
} `json:"E"`
F string `json:"F"`
} `json:"Z"`
} `json:"X"`
}
func SortString(w string) string {
s := strings.Split(w, "")
sort.Strings(s)
return strings.Join(s, "")
}
func main() {
var v1, v2 interface{}
json.Unmarshal([]byte(json1), &v1)
json.Unmarshal([]byte(json2), &v2)
fmt.Println(reflect.DeepEqual(v1, v2))
var m1, m2 JSON
json.Unmarshal([]byte(json1), &m1)
json.Unmarshal([]byte(json2), &m2)
fmt.Println(reflect.DeepEqual(m1, m2))
json1 = SortString(json1)
json2 = SortString(json2)
fmt.Println(reflect.DeepEqual(json1, json2))
}
Keep in mind that the objects are unsorted, so you should evaluate if creating a JSONSort function is important (considering that JSON could be different each time).
go run json.go
false
false
true

Related

Elastic Search query list with sublist

I have an index in Elastic that contains an array of keys and values.
For example - a single document looks like this:
{
"_index": "my_index",
"_source": {
"name": "test",
"values": [
{
"name": "a",
"score": 10
},
{
"name": "b",
"score": 4
},
{
"name": "c",
"score": 2
},
{
"name": "d",
"score": 1
}
]
},
"fields": {
"name": [
"test"
],
"values.name.keyword": [
"a",
"b",
"c",
"d"
],
"name.keyword": [
"test"
],
"values.score": [
10,
4,
2,
1
],
"values.name": [
"a",
"b",
"c",
"d"
]
}
}
I want to create an Elastic query (through API) that retrieves a sum of all the name scores filtered by a list of names.
For example, for the input:
names = ['a', 'b']
The result will be: 14
Any idea how to do it?
You can di this by making values array nested. Example mapping:
{
"mappings": {
"properties": {
"values": { "type": "nested" }
}
}
}
Following query will give the result you want:
{
"size":0,
"aggs": {
"asd": {
"nested": {
"path": "values"
},
"aggs": {
"filter_agg": {
"filter": {
"terms": {
"values.name.keyword": [
"a",
"b"
]
}
},
"aggs": {
"sum": {
"sum": {
"field": "values.score"
}
}
}
}
}
}
}
}

Flatten/merge dictionary with nested dictionaries

I've got the following dictionary example:
z1 = {
"ZH": {
"hosts": {
"zhsap001.domain.com": {
"active": "y",
"ip": "11.111.11.10",
"zone": "North"
},
"zhsap002.domain.com": {
"active": "y",
"ip": "11.111.11.11",
"zone": "North"
}
}
},
"BE": {
"hosts": {
"besap001.domain.com": {
"active": "y",
"ip": "22.222.2.20",
"zone": "Center"
},
"besap002.domain.com": {
"active": "y",
"ip": "10.214.4.58",
"zone": "Center"
}
}
}
}
And I'd like to "flatten" it to:
z2 = {
"zhsap001.domain.com": {
"active": "y",
"ip": "11.111.11.10",
"zone": "North"
},
"zhsap002.domain.com": {
"active": "y",
"ip": "11.111.11.11",
"zone": "North"
},
"besap001.domain.com": {
"active": "y",
"ip": "22.222.2.20",
"zone": "Center"
},
"besap002.domain.com": {
"active": "y",
"ip": "10.214.4.58",
"zone": "Center"
}
}
I can create z2 from z1 by running:
z2 = {}
for a in z1.values():
for b in a.values():
for (c,d) in b.items():
z2.update({c:d})
But I would like to achieve the same in a more Pythonized manner using a
comprehension expression or lambda function.
You could one-line it, but it won't necessarily be more readable that way.
z2 = {c_key: c for a in z1.values() for b in a.values() for c_key, c in b.items()}
I'd recommend naming the variables more clearly (I'm guessing what they mean here, you might want to change it)
z2 = {
url: url_info
for region in z1.values()
for host in region.values()
for url, url_info in host.items()
}
Also, you could improve your original code
z2 = {}
for a in z1.values():
for b in a.values():
z2 |= b
# The above requires python >= 3.9, alternatively use the below
# z2.update(b)

Pull values from the JSON in order - first all the first character, then all the seconds and etc

This is JSON:
"y": {
"titleOne": {
"a": [{
"ss": "one"
}, {
"ss": "two"
}
]
},
"titleTwo": {
"a": [{
"ss": "one"
}, {
"ss": "two"
}, {
"ss": "thee"
}
]
},
..........
This is my current code:
for i in y:
for c in y[i]["a"]:
print(c["ss"])
This code will simply print all values in order:
one
two
one
two
three
but I need that loop will get the first value from each section and will return
one
one
two
two
three
Maybe something like this:
y = {"titleOne": {"a": [{"ss": "one"}, {"ss": "two"}]},
"titleTwo": {"a": [{"ss": "one"}, {"ss": "two"}, {"ss": "three"}]}}
#check the max depth you can go to
depth = max(len(y[title]["a"]) for title in y)
for i in range(depth):
for title in y:
if len(y[title]["a"])>i:
print(y[title]["a"][i]["ss"])
one
one
two
two
three
Just for experimentation, you can give zip_longest a try. This code is not written not keeping in mind efficiency, ideally, generators can be used.
from itertools import zip_longest
d = {"y": {
"titleOne": {
"a": [{
"ss": "one"
}, {
"ss": "two"
}
]
},
"titleTwo": {
"a": [{
"ss": "one"
}, {
"ss": "two"
}, {
"ss": "thee"
}
]
}
}}
ll = [[k['ss'] for k in j] for j in [i['a'] for i in d['y'].values()]]
for i in zip_longest(*generators):
print(*filter(bool, i))

How to convert a string into int with python library pyjq?

How can I use pyjq to convert incoming String into integer?
As of now, pyjq returns a string for x attribute, and I understand why it's doing that, but isn't there any function that I can add to the schema in order to convert it into integer?
Code:
# apply incoming data to template
def apply(template_name: str, data):
result = pyjq.one(
import_template(template_name),
data
)
Incoming event data:
"dynamodb": {
"NewImage": {
"x": {
"N": "3"
},
Schema template:
"event": {
"data": {
"x": .dynamodb.NewImage.x.N,
}
}
Output:
"event": {
"data": {
"x": "3",
}
}
Looking for a solution to get output like below:
Output:
"event": {
"data": {
"x": 3,
}
}
jq can convert a string to an number using the tonumber filter.
Change the template to:
"event": {
"data": {
"x": .dynamodb.NewImage.x.N | tonumber
}
}

I want to create a multi nested json from my pandas dataframe

I have a pandas data frame in the following format :-
EMPCODE|Indicator|CustNAME
1 CASA Raja
1 CASA lala
1 CASA dada
1 TL Nan
1 l Nan
1 p Nan
1 q Nan
2 CASA Crick
2 CASA Flick
2 TL Nan
2 l Nan
2 p Nan
2 q Nan
I want to convert this into a nested json .
Ive tried various different methods including groupby(),apply() but I Cant get a the output in the required json format.From the code mentioned below I am getting duplicate custNAme values for both employees .
group = merge_hr_anr.groupby('EMPCODE_y').groups
group1 = merge_hr_anr.groupby("EMPNAME").groups
for variable in range(a):
d = {'EMPCODE_y': list(group.keys())[variable],'EMPNAME':
list(group1.keys())[variable] ,'Indicators': [{'IndicatorName':
merge_hr_anr.loc[i, 'IndicatorName']} for i in list(group.values())
[variable].unique()]}
d['Indicators'] = list(map(dict,sorted(set(map(lambda x:
tuple(x.items()),d['Indicators'])), key=list(map(lambda x:
tuple(x.items()),d['Indicators'])).index)))
d['Performance'] = [{i['IndicatorName']:
(merge_hr_anr.loc[merge_hr_anr['IndicatorName'].eq(i['IndicatorName']),"CUSTNAME"]).dropna().tolist()} for i in d['Indicators']]
My output is
{
"EMPCODE": "1",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"Custname":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
{
"EMPCODE": "2",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"CustName":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
i want the output to be
{
"EMPCODE": "1",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"Custname":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
{
"EMPCODE": "2",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Crick"},{"Custname":"Flick"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
Try the below code with constructing a dictionary:
group = merge_hr_anr.groupby('EMPCODE').groups
d = {'EMPCODE': list(group.keys())[0], 'Indicators': [{'IndicatorName': merge_hr_anr.loc[i, 'Indicator']} for i in list(group.values())[0].unique()]}
d['Indicators'] = list(map(dict,sorted(set(map(lambda x: tuple(x.items()),d['Indicators'])), key=list(map(lambda x: tuple(x.items()),d['Indicators'])).index)))
d['Performance'] = [{i['IndicatorName']: merge_hr_anr.loc[merge_hr_anr['Indicator'].eq(i['IndicatorName']), 'CustNAME'].dropna().tolist()} for i in d['Indicators']]
print(d)
Output:
{'EMPCODE': 1, 'Indicators': [{'IndicatorName': 'CASA'}, {'IndicatorName': 'TL'}, {'IndicatorName': 'l'}, {'IndicatorName': 'p'}, {'IndicatorName': 'q'}], 'Performance': [{'CASA': ['Raja', 'lala', 'dada']}, {'TL': []}, {'l': []}, {'p': []}, {'q': []}]}
To write a .json file:
with open('outvalue7.json', 'w') as f:
f.write(str(d))

Categories

Resources