I have a input variable(stud_id), list(sub_code) and array(data) with the below values.
stud_id: 10
sub_code: ['002', '003', '007']
data: [array([['867192', '5545']], dtype=object), array([['964433', '0430']], dtype=object), array([['965686', '2099']], dtype=object)]
How to convert the above input into json format like this?
stud_id is the main key
output = '{ "10" : { "002" : [ 867192, 5545 ], '\
' "003" : [ 964433, 0430 ], '\
' "007" : [ 965686, 2099 ] } }'
I had to adjust your array type for testing.
Try this code:
stud_id = 10
sub_code = ['002', '003', '007']
#data = [array([['867192', '5545']], dtype=object),
# array([['964433', '0430']], dtype=object),
# array([['965686', '2099']], dtype=object)]
data = [['867192', '5545'],
['964433', '0430'],
['965686', '2099']]
output = '{ "10" : { "002" : [ 867192, 5545 ], '\
' "003" : [ 964433, 0430 ], '\
' "007" : [ 965686, 2099 ] } }'
dd = {str(stud_id):{k:a for k,a in zip(sub_code, data)}}
print(dd)
Output
{'10': {'002': ['867192', '5545'], '003': ['964433', '0430'], '007': ['965686', '2099']}}
>>> import json
>>> from numpy import array
>>> stud_id = 10
>>> sub_code = ['002', '003', '007']
>>> data = [array([['867192', '5545']], dtype=object),
... array([['964433', '0430']], dtype=object),
... array([['965686', '2099']], dtype=object)]
>>> json.dumps({stud_id: dict(zip(sub_code, map(lambda arr: arr[0].tolist(), data)))})
'{"10": {"002": ["867192", "5545"], "003": ["964433", "0430"], "007": ["965686", "2099"]}}'
Zip sub_code and data, turn them into a dict with a dict comprehension, then put them in another dictionary with stud_id as a key, then dump as json:
import json
json.dumps({stud_id: {k: v.tolist()[0] for (k, v) in zip(sub_code, data)}})
# '{"10": {"002": ["867192", "5545"], "003": ["964433", "0430"], "007": ["965686", "2099"]}}'
Related
I have a text file which I want to convert to a nested json structure. The text file is :
Report_for Reconciliation
Execution_of application_1673496470638_0001
Spark_version 2.4.7-amzn-0
Java_version 1.8.0_352 (Amazon.com Inc.)
Start_time 2023-01-12 09:45:13.360000
Spark Properties:
Job_ID 0
Submission_time 2023-01-12 09:47:20.148000
Run_time 73957ms
Result JobSucceeded
Number_of_stages 1
Stage_ID 0
Number_of_tasks 16907
Number_of_executed_tasks 16907
Completion_time 73207ms
Stage_executed parquet at RawDataPublisher.scala:53
Job_ID 1
Submission_time 2023-01-12 09:48:34.177000
Run_time 11525ms
Result JobSucceeded
Number_of_stages 2
Stage_ID 1
Number_of_tasks 16907
Number_of_executed_tasks 0
Completion_time 0ms
Stage_executed parquet at RawDataPublisher.scala:53
Stage_ID 2
Number_of_tasks 300
Number_of_executed_tasks 300
Completion_time 11520ms
Stage_executed parquet at RawDataPublisher.scala:53
Job_ID 2
Submission_time 2023-01-12 09:48:46.908000
Run_time 218358ms
Result JobSucceeded
Number_of_stages 1
Stage_ID 3
Number_of_tasks 1135
Number_of_executed_tasks 1135
Completion_time 218299ms
Stage_executed parquet at RawDataPublisher.scala:53
I want the output to be :
{
"Report_for": "Reconciliation",
"Execution_of": "application_1673496470638_0001",
"Spark_version": "2.4.7-amzn-0",
"Java_version": "1.8.0_352 (Amazon.com Inc.)",
"Start_time": "2023-01-12 09:45:13.360000",
"Job_ID 0": {
"Submission_time": "2023-01-12 09:47:20.148000",
"Run_time": "73957ms",
"Result": "JobSucceeded",
"Number_of_stages": "1",
"Stage_ID 0”: {
"Number_of_tasks": "16907",
"Number_of_executed_tasks": "16907",
"Completion_time": "73207ms",
"Stage_executed": "parquet at RawDataPublisher.scala:53"
"Stage": "parquet at RawDataPublisher.scala:53",
},
},
}
I tried defaultdict method but it was generating a json with values as list which was not acceptable to make a table on it. Here's what I did:
import json
from collections import defaultdict
INPUT = 'demofile.txt'
dict1 = defaultdict(list)
def convert():
with open(INPUT) as f:
for line in f:
command, description = line.strip().split(None, 1)
dict1[command].append(description.strip())
OUTPUT = open("demo1file.json", "w")
json.dump(dict1, OUTPUT, indent = 4, sort_keys = False)
and was getting this:
"Report_for": [ "Reconciliation" ],
"Execution_of": [ "application_1673496470638_0001" ],
"Spark_version": [ "2.4.7-amzn-0" ],
"Java_version": [ "1.8.0_352 (Amazon.com Inc.)" ],
"Start_time": [ "2023-01-12 09:45:13.360000" ],
"Job_ID": [
"0",
"1",
"2", ....
]]]
I just want to convert my text to the above json format so that I can build a table on top of it.
There's no way, python or one of it's libraries can figure out your nesting requirements, if a flat text is being given as an input. How should it know Stages are inside Jobs...for example.
You will have to programmatically tell your application how it works.
I hacked an example which should work, you can go from there (assuming input_str is what you posted as your file content):
# define your nesting structure
nesting = {'Job_ID': {'Stage_ID': {}}}
upper_nestings = []
upper_nesting_keys = []
# your resulting dictionary
result_dict = {}
# your "working" dictionaries
current_nesting = nesting
working_dict = result_dict
# parse each line of the input string
for line_str in input_str.split('\n'):
# key is the first word, value are all consecutive words
line = line_str.split(' ')
# if key is in nesting, create new sub-dict, all consecutive entries are part of the sub-dict
if line[0] in current_nesting.keys():
current_nesting = current_nesting[line[0]]
upper_nestings.append(line[0])
upper_nesting_keys.append(line[1])
working_dict[line_str] = {}
working_dict = working_dict[line_str]
else:
# if a new "parallel" or "upper" nesting is detected, reset your nesting structure
if line[0] in upper_nestings:
nests = upper_nestings[:upper_nestings.index(line[0])]
keys = upper_nesting_keys[:upper_nestings.index(line[0])]
working_dict = result_dict
for nest in nests:
working_dict = working_dict[' '.join([nest, keys[nests.index(nest)]])]
upper_nestings = upper_nestings[:upper_nestings.index(line[0])+1]
upper_nesting_keys = upper_nesting_keys[:upper_nestings.index(line[0])]
upper_nesting_keys.append(line[1])
current_nesting = nesting
for nest in upper_nestings:
current_nesting = current_nesting[nest]
working_dict[line_str] = {}
working_dict = working_dict[line_str]
continue
working_dict[line[0]] = ' '.join(line[1:])
print(result_dict)
Results in:
{
'Report_for': 'Reconciliation',
'Execution_of': 'application_1673496470638_0001',
'Spark_version': '2.4.7-amzn-0',
'Java_version': '1.8.0_352 (Amazon.com Inc.)',
'Start_time': '2023-01-12 09:45:13.360000',
'Spark': 'Properties: ',
'Job_ID 0': {
'Submission_time': '2023-01-12 09:47:20.148000',
'Run_time': '73957ms',
'Result': 'JobSucceeded',
'Number_of_stages': '1',
'Stage_ID 0': {
'Number_of_tasks': '16907',
'Number_of_executed_tasks': '16907',
'Completion_time': '73207ms',
'Stage_executed': 'parquet at RawDataPublisher.scala:53'
}
},
'Job_ID 1': {
'Submission_time': '2023-01-12 09:48:34.177000',
'Run_time': '11525ms',
'Result': 'JobSucceeded',
'Number_of_stages': '2',
'Stage_ID 1': {
'Number_of_tasks': '16907',
'Number_of_executed_tasks': '0',
'Completion_time': '0ms',
'Stage_executed': 'parquet at RawDataPublisher.scala:53'
},
'Stage_ID 2': {
'Number_of_tasks': '300',
'Number_of_executed_tasks': '300',
'Completion_time': '11520ms',
'Stage_executed': 'parquet at RawDataPublisher.scala:53'
}
},
'Job_ID 2': {
'Submission_time':
'2023-01-12 09:48:46.908000',
'Run_time': '218358ms',
'Result': 'JobSucceeded',
'Number_of_stages': '1',
'Stage_ID 3': {
'Number_of_tasks': '1135',
'Number_of_executed_tasks': '1135',
'Completion_time': '218299ms',
'Stage_executed': 'parquet at RawDataPublisher.scala:53'
}
}
}
and should pretty much be generically usable for all kinds of nesting definitions from a flat input. Let me know if it works for you!
How can I double loop over a list and dictionary
provided with this dict:
tabs = {
'RESULT' : 'result_out',
'INFO' : 'info_out',
'LOGGING' : 'loggging_out',
}
And this list:
li = [11,22,33]
I would like to map the dictionary to the list to get this result:
tabs = {
'RESULT' : [11,'result_out'],
'INFO' : [22,'info_out'],
'LOGGING' : [33,'loggging_out'],
}
since:
list(zip(list(tabs.keys()),list(tabs.values()),icons))
is equal to:
[('d', 'a', 1), ('e', 'b', 2), ('f', 'c', 3)]
I thought this would make it:
{key:[icons[i],value] for key,value,i in zip(list(tabs.keys()),list(tabs.values()),icons)}
but this gives:
IndexError: list index out of range
Do you know how can I do this?
Thanks
You can try:
>>> tabs = {
... 'RESULT' : 'result_out',
... 'INFO' : 'info_out',
... 'LOGGING' : 'loggging_out',
... }
>>> li = [11,22,33]
>>>
>>> {a : [c, b] for (a, b), c in zip(tabs.items(), li)}
{'RESULT': [11, 'result_out'], 'INFO': [22, 'info_out'], 'LOGGING': [33, 'loggging_out']}
You could try something like this?
count = 0
for i in tabs:
tabs[i] = [li[count],tabs[i]]
count += 1
Output:
{'RESULT': [11, 'result_out'], 'INFO': [22, 'info_out'], 'LOGGING': [33, 'loggging_out']}
I have ['key','value'] format list, which also contain sub-list. How can I convert nested list to JSON format in python
[[' key ', ' 1542633482511430199'
],
['value=>>>BasicData',
[['isConfirmAndOrder', '0'],['brmRequestId', 'BR-2018-0000124'],
['requestType','batch'],['projectId', 'PRJ-2018-0000477'],
['createdOn', 'Mon Nov 19 18:48:02 IST 2018']]
],
['createdBy=>>>BasicData',
[['userId', '999996279'], ['email', 'ITEST275#ITS.JNJ.com'],
['firstName', 'Iris'], ['lastName', 'TEST275'],
['ntId', 'itest275'], ['region', 'NA'],
[' LastAccessTime ', ' 1542639905785 ']]
]
]
Excepted format is
{
"key": "1542633482511430199",
"value=>>>BasicData": {
"isConfirmAndOrder": "0",
"brmRequestId": "BR-2018-0000124"
.
},
"createdBy=>>>BasicData": {
"userId": "999996279",
"email": "ITEST275#ITS.JNJ.com"
.
}
.
}
Actually format of large data is:
[
[
['key11','value11']
['key12',['key13','value13']]
['key14',['key15','value15']]
]
[
['key21','value21']
['key22',['key23','value23']]
['key24',['key25','value25']]
]
]
You can write a simple recursive function for this:
def to_dict_recursive(x):
d = {}
for key, value in x:
if isinstance(value, list):
value = to_dict_recursive(value)
else:
value = value.strip() # get rid of unnecessary whitespace
d[key.strip()] = value
return d
to_dict_recursive(x)
# {'createdBy=>>>BasicData': {'displayName': 'Iris TEST275',
# 'email': 'ITEST275#ITS.JNJ.com',
# 'firstName': 'Iris',
# 'lastName': 'TEST275',
# 'ntId': 'itest275',
# 'region': 'NA',
# 'roles': '[0]CG510_DHF_AP_Role',
# 'userId': '999996279'},
# 'formulaDetails=>>>BasicData': {'CreationTime': '1542633482512',
# 'LastAccessTime': '1542639905785',
# 'batchSizeUnits': 'kg<<<<<<',
# 'hitCount': '1',
# 'version': '1'},
# 'key': '1542633482511430199',
# 'value=>>>BasicData': {'brmRequestId': 'BR-2018-0000124',
# 'createdMonth': 'Nov',
# 'createdOn': 'Mon Nov 19 18:48:02 IST 2018',
# 'department': 'Global Packaging',
# 'gxp': '1',
# 'id': '1542633482511430199',
# 'isConfirmAndOrder': '0',
# 'isFilling': 'false',
# 'projectId': 'PRJ-2018-0000477',
# 'projectName': 'Automation_Product_By_Admin',
# 'requestType': 'batch',
# 'status': 'New',
# 'statusDescription': 'Batch request created',
# 'updatedOn': 'Mon Nov 19 18:48:02 IST 2018'}}
(I ran this in Python 3.6 so the order of the keys in the dictionary representation is different than insertion order. In Python 3.7+ this would be different.)
You can even make this into a dict comprehension:
def to_dict_recursive(x):
return {key.strip(): to_dict_recursive(value) if isinstance(value, list)
else value.strip
for key, value in x}
Since apparently some elements in your object are not a two-element list of key and value, you can add a simple guard against that:
def to_dict_recursive(x):
d = {}
try:
for key, value in x:
if isinstance(value, list):
value = to_dict_recursive(value)
else:
value = value.strip()
d[key.strip()] = value
except ValueError:
return x
return d
x = [[' key ', ' 1542633482511430199'],
["test", ["a", "b", "c"]]
]
to_dict_recursive(x)
# {'key': '1542633482511430199', 'test': ['a', 'b', 'c']}
Note that if mylist is a key-value pair list, then dict(mylist) simply returns a dictionary version of it. The tricky part is traversing deep into those nested lists to replace them with dictionaries. Here's a recursive function that does that:
# Where <kv> is your giant list-of-lists.
def kv_to_dict(kv):
if isinstance(kv, list):
kv = dict(kv)
for k in kv:
if isinstance(kv[k], list):
kv[k] = kv_to_dict(kv[k])
return kv
newdict = kv_to_dict(kvpairs)
Once you have things converted to a dictionary, you can just use json.dumps() to format it as JSON:
import json
as_json = json.dumps(newdict, indent=4)
print(as_json)
I see though that you've tried something similar and got an error. Are you sure that all of the lists in your data are really key-value pairs, and not for example a list of 3 strings?
So I've a list of students which looks something like this :
students = [ {'name': 'Jack' , 'status' : 'Average' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Math' } , 'height' : '20cm' },
{'name': 'Tom' , 'status' : 'Good' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Science' } , 'height' : '30cm' }
]
So the above list is of size 2. Assume that the size is pretty big, lets say 50 or 60 or more.
I want to return a list students_output & for each student I want to return a dictionary which contains the following values for each student which are fetched from the above list but have slightly modified 'keys'. The end output should be something like this :
students_output = [ {'student_name': 'Jack' , 'student_status' : 'Average' , 'student_subjects' : { 'student_subject1' : 'English' , 'student_subject2' : 'Math' } , 'child_height' : '20cm' },
{'student_name': 'Tom' , 'student_status' : 'Good' , 'student_subjects' : { 'student_subject1' : 'English' , 'student_subject2' : 'Science' } , 'child_height' : '30cm' }
]
I am not able to understand how I can create an effective loop so that the keys in my resultant data structure are maintained as provided in the output and i can fetch the data from the first list.
for example, in students_output, I know
students_output[0]['student_name']=students[0]['name']
But can anyone help me do it iteratively ?
In order to achieve this, you have to concatenate "student_" at the start of each key with some exception as "height" key. You may do it via combination of list comprehension and dict comprehension expression as:
students = [
{'name': 'Jack' , 'status' : 'Average' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Math' } , 'height' : '20cm' },
{'name': 'Tom' , 'status' : 'Good' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Science' } , 'height' : '30cm' }
]
def get_key(key):
return {
'height': 'child_height', # All exception you need in `key`
# apart from concatenating `"student_"`
}.get(key, 'student_' + key)
new_list = [{
get_key(k): ({
get_key(kk):v for kk, vv in v.items()} if isinstance(v, dict) else v) \
for k, v in s.items()
} for s in students]
Value hold by new_list will be:
[{'student_name': 'Jack', 'child_height': '20cm', 'student_status': 'Average', 'student_subjects': {'student_subject1': {'subject1': 'English', 'subject2': 'Math'}, 'student_subject2': {'subject1': 'English', 'subject2': 'Math'}}},
{'student_name': 'Tom', 'child_height': '30cm', 'student_status': 'Good', 'student_subjects': {'student_subject1': {'subject1': 'English', 'subject2': 'Science'}, 'student_subject2': {'subject1': 'English', 'subject2': 'Science'}}}]
Here's a quick-and-dirty function that will do what you need:
In [10]: def rename_keys(students):
...: d = {}
...: for k,v in students.items():
...: if isinstance(v,dict):
...: k = "student_" + k
...: v = rename_keys(v)
...: d[k] = v
...: elif k == 'height':
...: k = "child_height"
...: d[k] = v
...: else:
...: k = "student_" + k
...: d[k] = v
...: return d
...:
...:
In [11]: [rename_keys(d) for d in students]
Out[11]:
[{'child_height': '20cm',
'student_name': 'Jack',
'student_status': 'Average',
'student_subjects': {'student_subject1': 'English',
'student_subject2': 'Math'}},
{'child_height': '30cm',
'student_name': 'Tom',
'student_status': 'Good',
'student_subjects': {'student_subject1': 'English',
'student_subject2': 'Science'}}]
And really, this doesn't have to be recursive, you could substitute the recursive call with a dictionary comprehension:
v = {'student_'+key:value for key,value in v.items()}
You can use the following function inside a list comprehension like this:
def new_dict(d):
res = {}
for key, value in d.iteritems():
student_or_child = 'student' if key != 'height' else 'child'
if type(value) == dict:
res['{}_{}'.format(student_or_child, key)] = new_dict(value)
else:
res['{}_{}'.format(student_or_child, key)] = value
return res
The above function takes a dict as argument, for each key, value in the passed dict, if value is of type dict then the same function is called on value, and the result is added to res dict, else the same value is added.
Now, with a list comprehension, we can do:
[new_dict(d) for d in students]
Output:
>>> [new_dict(d) for d in students]
[{'child_height': '20cm', 'student_name': 'Jack', 'student_status': 'Average', 'student_subjects': {'student_subject1': 'English', 'student_subject2': 'Math'}}, {'child_height': '30cm', 'student_name': 'Tom', 'student_status': 'Good', 'student_subjects': {'student_subject1': 'English', 'student_subject2': 'Science'}}]
I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)
You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}
You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}