Read complex json file in python

Read complex json file in python - python

Actual Json is:
{
"title1": {
"titleID": "1234",
"titlename": "a-b-c",
},
"title2": [
{
"block": "0.0.0.0/26",
"abc_id": "abc-0123",
"tags": [{ "key": "Name", "value": "abc-name"},
{ "key": "env", "value": "dev"}]
},
{
"block": "1.2.0.0/26",
"abc_id": "abc-4567"
},
{
"block": "0.0.0.0/26",
"abc_id": "abc-8999",
"tags": [{ "key": "Name", "value": "xyz-name"}]
},
{
"block": "0.0.0.0/26",
"abc_id": "abc-7766",
"tags": [{ "app": "Name", "value": "web-app"}]
}
]
}
My Code is
with open('/tmp/temp.json') as access_json:
read_content = json.load(access_json)
for key1, value1 in read_content.items():
if key1 == "title1":
title_id = value1['titleID']
if key1 == "title2":
title2_access = read_content['title2']
for title2_data in title2_access:
for key2, value2 in title2_data.items():
if key2 == "abc_id":
abc_id = value2
if key2 == "tags":
tags_access = read_content['tags']
for tags_data in tags_access:
for key3, value3 in tags_data.items():
if key3 == "Name":
abc_name = value3
and the error is:
Traceback (most recent call last):
File "/tmp/runscript.py", line 123, in <module>
runpy.run_path(temp_file_path, run_name='__main__')
File "/usr/local/lib/python3.6/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/usr/local/lib/python3.6/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/tmp/glue-python-scripts-lw031e0z/tsf_dev.py", line 160, in <module>
KeyError: 'tags'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 142, in <module>
raise e_type(e_value).with_traceback(new_stack)
File "/tmp/glue-python-scripts-lw031e0z/tsf_dev.py", line 160, in <module>
KeyError: KeyError('tags',)
Reason: All the items in the title2 dict will not contain "tags". so, if there is no 'tags' or the tags['name'], then the abc_name = ''
i need the list of lists
(titleID, abc_id, abc_name).
Expected output :
['1234','abc-0123','abc-name']
['1234','abc-4567','']
['1234','abc-8999','xyz-name']
['1234','abc-7766','']
There is a dictionary of "title2",
and it contains abc_id and few items contains "tags" as well.
If there is no tags, then the abc-name should be ''.
If there is no Key: "name", then the abc-name should be ''.
If there is tags and the key: "name" in the dict, then the abc-name should be the value present in the title2[tags][value: ""] where title2[tags][key is "name"]

You have too many if statements and for-loops to properly handle your code. Use the default option for the dictionary get method to handle the cases where the data doesn't exist like shown below.
title_id = read_content.get('title1', {}).get('titleID', '')
for block in read_content['title2']:
id_ = block.get('abc_id', '')
tags = block.get('tags', [{}])
for tag in tags:
if tag.get('key', '') == 'Name':
name = tag.get('value', '')
else:
name = ''
vals = [title_id, id_, name]
print(vals)
['1234', 'abc-0123', 'abc-name']
['1234', 'abc-0123', '']
['1234', 'abc-4567', '']
['1234', 'abc-8999', 'xyz-name']
['1234', 'abc-7766', '']

Related

[instance segmentation]pixellib fails to remove _background_ label in labelme on custom dataset

I learn pixellib to do instance segmentation at pixellib.
I used labelme to mark 4 categories of objects.According to the document of labelme, I used the label _background_ to mark the parts that do not belong to the object.
The 4 categories of objects are plastic bag,drink bottle,banana,apple.
When I use this code to train my own dataset:
from pixellib.custom_train import instance_custom_training
train_maskrcnn = instance_custom_training()
train_maskrcnn.modelConfig(network_backbone = "resnet101", num_classes=4, batch_size=4, class_names= ["_background_","apple","banana","drinkBottle","plasticBag"])
train_maskrcnn.load_pretrained_model("mask_rcnn_coco.h5")
train_maskrcnn.load_dataset(r'myData')
train_maskrcnn.train_model(num_epochs = 300, augmentation=True, path_trained_models = r"D:/pythonProjects/model")
I find the categories in test.json and train.json is:
"categories": [
{
"supercategory": "apple",
"id": 1,
"name": "apple"
},
{
"supercategory": "_background_",
"id": 2,
"name": "_background_"
},
{
"supercategory": "banana",
"id": 3,
"name": "banana"
},
{
"supercategory": "plasticBag",
"id": 4,
"name": "plasticBag"
},
{
"supercategory": "drinkBottle",
"id": 5,
"name": "drinkBottle"
}
]
And the error message is:
Traceback (most recent call last):
File "D:/pythonProjects/test_mask.py", line 6, in <module>
train_maskrcnn.train_model(num_epochs = 300, augmentation=True, path_trained_models = r"D:/pythonProjects/model")
File "D:\Anaconda38\lib\site-packages\pixellib\custom_train\__init__.py", line 124, in train_model
self.model.train(self.dataset_train, self.dataset_test,models = path_trained_models, augmentation = augmentation,
File "D:\Anaconda38\lib\site-packages\pixellib\instance\mask_rcnn.py", line 2307, in train
self.keras_model.fit(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_v1.py", line 776, in fit
return func.fit(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_generator_v1.py", line 570, in fit
return fit_generator(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_generator_v1.py", line 252, in model_iteration
batch_outs = batch_function(*batch_data)
File "D:\Anaconda38\lib\site-packages\keras\engine\training_v1.py", line 1048, in train_on_batch
x, y, sample_weights = self._standardize_user_data(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_v1.py", line 2323, in _standardize_user_data
return self._standardize_tensors(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_v1.py", line 2351, in _standardize_tensors
x = training_utils_v1.standardize_input_data(
File "D:\Anaconda38\lib\site-packages\keras\engine\training_utils_v1.py", line 642, in standardize_input_data
raise ValueError('Error when checking ' + exception_prefix +
ValueError: Error when checking input: expected input_image_meta to have shape (17,) but got array with shape (18,)
I know that _background_ is a special category and should not be trained as a category, and _background_ should not appear under the node categories in test.json and train.json, so how should I modify my code?

ForwardReference NameError when loading a recursive dict in dataclass

I'm using marshmallow-dataclass to load a json which represents a sequence of rules where each rule is represented by a LogicalGroup and applies a logical operator on its child expressions, knowing that an expression can itself be a LogicalGroup.
The input dict follows this structure:
import marshmallow_dataclass
from dataclasses import field
from api_handler import BaseSchema
from typing import Sequence, Union, Literal, Type, List, ForwardRef, TypeVar, Generic
filter_input = { "rules" :
[{
"groupOperator" : "and",
"expressions" : [
{ "field": "xxxxx", "operator": "eq", "value": 'level1' },
{ "field": "xxxxx", "operator": "eq", "value": 'm'},
{ "field": "xxxxx", "operator": "eq", "value": "test"},
{
"groupOperator" : "or",
"expressions" : [
{ "field": "xxxx", "operator": "eq", "value": 'level2' },
{ "field": "xxxx", "operator": "eq", "value": 'm' },
{ "field": "xxxx", "operator": "eq", "value": "test" }
]
}
]
}]
}
The dataclasses i'm using for this purpose are the following :
#marshmallow_dataclass.dataclass(base_schema=BaseSchema)
class Expression:
field : str
operator : str
value : str
#marshmallow_dataclass.dataclass(base_schema=BaseSchema)
class LogicalGroup:
group_operator : str
expressions : List[Union['LogicalGroup', Expression]] = field(default_factory=list)
#marshmallow_dataclass.dataclass(base_schema=BaseSchema)
class Filter:
rules: List[LogicalGroup] = field(default_factory=list)
The problem is when i try to load the dict using the Filter dataclass i get the following error
filt = Filter.Schema().load(filter_input)
Traceback (most recent call last):
File "/home/adam/billing/billing/filter/filter.py", line 96, in <module>
filt = Filter.Schema().load(filter_input)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow_dataclass/__init__.py", line 628, in load
all_loaded = super().load(data, many=many, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 725, in load
return self._do_load(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 859, in _do_load
result = self._deserialize(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 667, in _deserialize
value = self._call_and_store(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 496, in _call_and_store
value = getter_func(data)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 664, in <lambda>
getter = lambda val: field_obj.deserialize(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 354, in deserialize
output = self._deserialize(value, attr, data, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 726, in _deserialize
result.append(self.inner.deserialize(each, **kwargs))
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 354, in deserialize
output = self._deserialize(value, attr, data, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 609, in _deserialize
return self._load(value, data, partial=partial)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 592, in _load
valid_data = self.schema.load(value, unknown=self.unknown, partial=partial)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow_dataclass/__init__.py", line 628, in load
all_loaded = super().load(data, many=many, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 725, in load
return self._do_load(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 859, in _do_load
result = self._deserialize(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 667, in _deserialize
value = self._call_and_store(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 496, in _call_and_store
value = getter_func(data)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/schema.py", line 664, in <lambda>
getter = lambda val: field_obj.deserialize(
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 354, in deserialize
output = self._deserialize(value, attr, data, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 726, in _deserialize
result.append(self.inner.deserialize(each, **kwargs))
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow/fields.py", line 354, in deserialize
output = self._deserialize(value, attr, data, **kwargs)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/marshmallow_dataclass/union_field.py", line 56, in _deserialize
typeguard.check_type(attr or "anonymous", result, typ)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/typeguard/__init__.py", line 655, in check_type
expected_type = resolve_forwardref(expected_type, memo)
File "/home/adam/thanos-envv/lib/python3.9/site-packages/typeguard/__init__.py", line 198, in resolve_forwardref
return evaluate_forwardref(maybe_ref, memo.globals, memo.locals, frozenset())
File "/usr/lib/python3.9/typing.py", line 533, in _evaluate
eval(self.__forward_code__, globalns, localns),
File "<string>", line 1, in <module>
NameError: name 'LogicalGroup' is not defined
I'm guessing the problem comes from declaring LogicalGroup as a ForwardRef inside type hint Union, because when i use only
Union['LogicalGroup'] and modify my dict to be a nested dict of LogicalGroups without the Expressions it works fine.
Does someone have any idea on the source of the bug ? Or maybe a proposition to adress this problem in another way ?
Thanks in advance !

ValueError: Length of values does not match length of index - json_normalize

I'm using the below code to parse a json response to a pandas dataframe and then convert the nested json 'terms'
Sample json:
{
"conditions": [
{
"id": 3855792,
"type": "apm_app_metric",
"name": "Error percentage (2.5%)",
"enabled": true,
"entities": [
"3418936",
"39720385",
"70369692",
"3242026",
"3744556",
"5673838"
],
"metric": "error_percentage",
"condition_scope": "application",
"terms": [
{
"duration": "5",
"operator": "above",
"priority": "critical",
"threshold": "2.5",
"time_function": "any"
}
]
}
]
}
Code:
import requests
import json
import pandas as pd,os
from pandas.io.json.normalize import json_normalize
json_data = json.loads(response.text)
terms_data = json_normalize(data = json_data['conditions'],
record_path = 'terms',
meta = ['condition_scope', 'enabled', 'id', 'metric', 'name', 'type', 'entities'])
However, the script is failing while trying to parse 'entities' into the dataframe with the below error
meta = ['condition_scope', 'enabled', 'id', 'metric', 'name', 'type', 'entities'])
File "C:\Users\M1049639\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\io\json\normalize.py", line 279, in json_normalize
result[k] = np.array(v).repeat(lengths)
File "C:\Users\M1049639\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 3119, in __setitem__
self._set_item(key, value)
File "C:\Users\M1049639\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 3194, in _set_item
value = self._sanitize_column(key, value)
File "C:\Users\M1049639\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\frame.py", line 3391, in _sanitize_column
value = _sanitize_index(value, self.index, copy=False)
File "C:\Users\M1049639\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pandas\core\series.py", line 4001, in _sanitize_index
raise ValueError('Length of values does not match length of ' 'index')
ValueError: Length of values does not match length of index
Desired output:
output
Final output:
output

(InvalidChangeBatch) when calling the ChangeResourceRecordSets operation - using boto3 to update resource record

I am trying to update a number of CNAME records to A hosted on Route53 using boto3, here is my function:
def change_resource_record(domain, zone_id, hosted_zone_id, balancer_id):
print(domain, zone_id, hosted_zone_id, balancer_id)
client.change_resource_record_sets(
HostedZoneId=zone_id,
ChangeBatch={
"Comment": "Automatic DNS update",
"Changes": [
{
"Action": "UPSERT",
"ResourceRecordSet": {
"Name": domain,
"Type": "A",
"AliasTarget": {
"HostedZoneId": hosted_zone_id,
"DNSName": balancer_id,
"EvaluateTargetHealth": False
}
}
},
]
}
)
I get this error:
Traceback (most recent call last):
File "load_balancer.py", line 138, in <module>
get_balancer(domain)
File "load_balancer.py", line 135, in get_balancer
change_resource_record(domain, zone_id, hosted_zone_id, balancer_id)
File "load_balancer.py", line 116, in change_resource_record
"EvaluateTargetHealth": False
File "C:\Python36\lib\site-packages\botocore\client.py", line 312, in _api_call
return self._make_api_call(operation_name, kwargs)
File "C:\Python36\lib\site-packages\botocore\client.py", line 601, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.errorfactory.InvalidChangeBatch: An error occurred (InvalidChangeBatch) when calling the ChangeResourceRecordSets operation: RRSet of type A with DNS name suffix.domain.tld. is not permitted because a conflicting RRSet of type CNAME with the same DNS name already exists in zone domain.tld.
What is the correct way to update the record, should I delete the entry and then re-create it?
Any advice is much appreciated.

I had a missing closing period in the domain, so changing to this solved my issue:
"ResourceRecordSet": {
"Name": domain + '.',
"Type": "A",
"AliasTarget": {
"HostedZoneId": hosted_zone_id,
"DNSName": balancer_id,
"EvaluateTargetHealth": False
}
}

Reading a Json response recursively with python

I'm trying to print all the "keys, values" from a json response without knowing the keys names (without using the syntax json['example'], for example). I'm doing this with a recursively function that uses iteritems(), but I'm having some problems:
This is the Json response that I'm trying to read:
{"servers": [{"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}]}
This is the funcion that I'm using:
def format_main_response(self, json_string):
print "json: " + json_string
content = json.loads(str(json_string))
for key, value in content.iteritems():
print key
if type(value) == type(['']):
strg = str(json.dumps(value))
strg = strg.strip('[]')
self.format_main_response(strg)
else:
print value
I'm using the strip function to take out all the '[ ]' from my json string. If I didn't do that I got an error when trying to load it using 'json.loads()' function.
Traceback (most recent call last):
File "main.py", line 135, in <module>
formatter.format_main_response(nova_API.list_servers())
File "/home/python/jsonformatter.py", line 51, in format_main_response
self.format_main_response(strg, mod)
File "/home/python/jsonformatter.py", line 51, in format_main_response
self.format_main_response(strg, mod)
File "/home/python/jsonformatter.py", line 31, in format_main_response
for key, value in content.iteritems():
AttributeError: 'list' object has no attribute 'iteritems'
My problem is that in some point the json that should be printed looks like this, without the '[ ]':
{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
When the function tries to find the 'key,value' from this json, I got this error:
Traceback (most recent call last): File "main.py", line 135, in <module>
formatter.format_main_response(nova_API.list_servers())
File "/home/python/jsonformatter.py", line 34, in format_main_response
self.format_main_response(strg)
File "/home/python/jsonformatter.py", line 34, in format_main_response
self.format_main_response(strg)
File "/home/python/jsonformatter.py", line 28, in format_main_response
content = json.loads(str(json_string))
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 369, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 1 column 135 - line 1 column 273 (char 135 - 273)
What should I do in this case? Or any other way to get the same result?

Use that:
def format_main_response(json_string):
print "json: " + json_string
content = json.loads(str(json_string))
for key, value in content.iteritems():
print key
if type(value) == type(['']):
for sub_value in value:
strg = str(json.dumps(sub_value))
format_main_response(strg)
else:
print value
That's the result:
~$ python test_pdb.py
json: {"servers": [{"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}]}
servers
json: {"id": "a059eccb-d929-43b2-8db3-b32b6201d60f", "links": [{"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}], "name": "birk"}
id
a059eccb-d929-43b2-8db3-b32b6201d60f
links
json: {"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}
href
http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f
rel
self
json: {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
href
http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f
rel
bookmark
name
birk

How about:
jsonStr = {"href": "http://192.168.100.142:8774/v2/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "self"}, {"href": "http://192.168.100.142:8774/2ad1fc162c254e59bea043560b7f73cb/servers/a059eccb-d929-43b2-8db3-b32b6201d60f", "rel": "bookmark"}
print json.dumps(jsonStr, sort_keys=True, indent=2, separators=(',', ': '))
This should give you the format you want

Code below recursively traverses the json response and prints the key,value pairs:
Trick is to load json response only once in the main and then recursively traverse the response:
def parse_json_response(content):
if len (content.keys()) > 1 :
for key, value in content.iteritems():
print "key : ", key
print "Value", value
if type(value) is dict:
parse_json_response(value)
else:
print value
if __name__ == '__main__':
content = json.loads(str(response))
parse_json_response(content)
Hope it helps.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Read complex json file in python - python

Related

[instance segmentation]pixellib fails to remove _background_ label in labelme on custom dataset

ForwardReference NameError when loading a recursive dict in dataclass

ValueError: Length of values does not match length of index - json_normalize

(InvalidChangeBatch) when calling the ChangeResourceRecordSets operation - using boto3 to update resource record

Reading a Json response recursively with python

Categories

Resources