Turn text list into json formatted list - python

I have a text file that is formatted like the following, with each hyphen representing a hierarchy for the list item.
category1 : 0120391123123
- subcategory : 0120391123123
-- subsubcategory : 019301948109
--- subsubsubcategory : 013904123908
---- subsubsubsubcategory : 019341823908
- subcategory2 : 0934810923801
-- subsubcategory2 : 09341829308123
category2: 1309183912309
- subcategory : 10293182094
...
How can I programmatically get a list like this into a json format like the following?
[
{
"category1":"0120391123123"
},
[
{
"subcategory":"0120391123123"
},
[
{
"subsubcategory":"019301948109"
},
[
{
"subsubsubcategory":"013904123908"
},
[
{
"subsubsubsubcategory":"019341823908"
}
]
]
]
],
[
{
"subcategory2":"0934810923801"
},
[
{
"subsubcategory2":"09341829308123"
}
]
],
[
{
"category2":"1309183912309"
},
[
{
"subcategory":"10293182094"
}
]
]
]

use a recursive function to split the content of the file to chunks and use divide and conquer
from pprint import pprint
req=[]
startingindex=-1
with open('temp.txt' ,'r') as f:
content=f.read().split('\n')
def foo(splitcontent):
index=0
reqlist=[]
while(index<len(splitcontent)):
if (splitcontent[index][0]!='-'):
key,value=splitcontent[index].split(':')
reqlist.append({key.strip():value.strip()})
index+=1
templist=[]
while(index<len(splitcontent) and splitcontent[index][0]=='-'):
templist.append(splitcontent[index][1:])
index+=1
intermediatelist=foo(templist)
if(intermediatelist):
reqlist.append(intermediatelist)
return reqlist
pprint(foo(content))
OUTPUT
[{'category1': '0120391123123'},
[{'subcategory': '0120391123123'},
[{'subsubcategory': '019301948109'},
[{'subsubsubcategory': '013904123908'},
[{'subsubsubsubcategory': '019341823908'}]]],
{'subcategory2': '0934810923801'},
[{'subsubcategory2': '09341829308123'}]],
{'category2': '1309183912309'},
[{'subcategory': '10293182094'}]]

You can use recursion with itertools.groupby:
s = """
category1 : 0120391123123
- subcategory : 0120391123123
-- subsubcategory : 019301948109
--- subsubsubcategory : 013904123908
---- subsubsubsubcategory : 019341823908
- subcategory2 : 0934810923801
-- subsubcategory2 : 09341829308123
category2: 1309183912309
- subcategory : 10293182094
"""
import re, itertools
data = list(filter(None, s.split('\n')))
def group_data(d):
if len(d) == 1:
return [dict([re.split('\s*:\s*', d[0])])]
grouped = [[a, list(b)] for a, b in itertools.groupby(d, key=lambda x:not x.startswith('-'))]
_group = [[grouped[i][-1], grouped[i+1][-1]] for i in range(0, len(grouped), 2)]
_c = [[dict([re.split('\s*:\s*', i) for i in a]), group_data([c[1:] for c in b])] for a, b in _group]
return [i for b in _c for i in b]
print(json.dumps(group_data(data), indent=4))
Output:
[
{
"category1": "0120391123123"
},
[
{
" subcategory": "0120391123123"
},
[
{
" subsubcategory": "019301948109"
},
[
{
" subsubsubcategory": "013904123908"
},
[
{
" subsubsubsubcategory": "019341823908"
}
]
]
],
{
" subcategory2": "0934810923801"
},
[
{
" subsubcategory2": "09341829308123"
}
]
],
{
"category2": "1309183912309"
},
[
{
" subcategory": "10293182094"
}
]
]
Note: this answer assumes that your final output should have "category2" be at the same level as "category1", since both do not contain a "-" in the front.

Related

How to transform a JSON file that has 1 'header' row followed by 'data' rows? - (use jq?)

I have a JSON file, like this:
{
"data" : [
{ "values" : [ "ColumnHeader1", "ColumnHeader2", "ColumnHeader3" ]},
{ "values" : [ "Row1Column1", "Row1Column2", "Row1Column3" ]},
{ "values" : [ "Row2Column1", "Row2Column2", "Row2Column3" ]}
]
}
I want to transform it, to be like this:
{
data: [
{ "ColumnHeader1" : "Row1Value1", "ColumnHeader2": "Row1Value2", "ColumnHeader3" : "Row1Value3" },
{ "ColumnHeader1" : "Row2Value1", "ColumnHeader2": "Row2Value2", "ColumnHeader3" : "Row2Value3" }
]
}
I did write a Python script for that - but I wonder could something clever be done via jq or pandas ? (or some other Unix tool or Python library...)
A jq-only solution:
def objectify($header):
. as $in
| reduce range(0; $header|length) as $i ({}; .[$header[$i]] = $in[$i] );
.data[0].values as $header
| .data |= (.[1:] | map(.values | objectify($header)) )
If you like nifty:
def objectify($header): with_entries(.key |= $header[.]) ;
So, if you want a two-liner:
.data[0].values as $header
| .data |= (.[1:] | map(.values | with_entries(.key |= $header[.])))
Here is a different jq solution without reduce:
.data |= (
map(.values)
| first as $headers | del(first)
| map(
[ $headers, .]
| transpose
| map({(first): last})
| add
)
)
Output:
{
"data": [
{
"ColumnHeader1": "Row1Column1",
"ColumnHeader2": "Row1Column2",
"ColumnHeader3": "Row1Column3"
},
{
"ColumnHeader1": "Row2Column1",
"ColumnHeader2": "Row2Column2",
"ColumnHeader3": "Row2Column3"
}
]
}
Or to rebuild the result object from scratch:
{
data: (
.data | map(.values)
| first as $headers | del(first)
| map(
[ $headers, .]
| transpose
| map({(first): last})
| add
)
)
}
first as $headers could be rewritten as . as [$headers] or .[0] as $headers. del(first) could be replaced with .[1:].
(not very elegant) Answer
"""
Read a JSON file where the 1st item in array is a set of headers, and the other items are values.
Outputs a JSON file where the other items in array are transposed to use those headers.
"""
import json
from sys import argv
input_json=argv[1]
output_json=argv[2]
data = None
with open(input_json, "r") as infile:
# returns JSON object as a dictionary
data = json.load(infile)
headings = data["data"][0]['values']
new_rows = []
rows = len(data["data"])
for r in range(1, rows):
row = data["data"][r]['values']
new_row = dict()
new_rows.append(new_row)
for h in range(0, len(headings)):
new_row[headings[h]] = row[h]
new_data = dict()
new_data["data"] = new_rows
with open(output_json, "w") as outfile:
json_object = json.dumps(new_data, indent=2)
outfile.write(json_object)
Hoping there is a better way with less code :)
A one liner:
d = {
"data" : [
{ "values" : [ "ColumnHeader1", "ColumnHeader2", "ColumnHeader3" ]},
{ "values" : [ "Row1Column1", "Row1Column2", "Row1Column3" ]},
{ "values" : [ "Row2Column1", "Row2Column2", "Row2Column3" ]}
]
}
d = {"data": [{k: v for k, v in zip(d["data"][0]["values"], row["values"])} for row in d["data"][1:]]}
Outputs:
{'data': [{'ColumnHeader1': 'Row1Column1', 'ColumnHeader2': 'Row1Column2', 'ColumnHeader3': 'Row1Column3'}, {'ColumnHeader1': 'Row2Column1', 'ColumnHeader2': 'Row2Column2', 'ColumnHeader3': 'Row2Column3'}]}
You don't need to iterate all headings. I hope it helps.
data = {
"data": [
{"values": ["ColumnHeader1", "ColumnHeader2", "ColumnHeader3"]},
{"values": ["Row1Column1", "Row1Column2", "Row1Column3"]},
{"values": ["Row2Column1", "Row2Column2", "Row2Column3"]},
]
}
data = data["data"]
headings = data[0]["values"]
rows = data[1:]
new_data = {"data":[dict(zip(headings, row["values"])) for row in rows]}
Here's a solution for jq using transpose and map:
.data |= (map(.values) | transpose
| map([{(.[0]): .[1:][]}]) | transpose
| map(add)
)
{
"data": [
{
"ColumnHeader1": "Row1Column1",
"ColumnHeader2": "Row1Column2",
"ColumnHeader3": "Row1Column3"
},
{
"ColumnHeader1": "Row2Column1",
"ColumnHeader2": "Row2Column2",
"ColumnHeader3": "Row2Column3"
}
]
}
Demo

How to access the following list and dictionary values in Python3?

{
"list":[
{
"a":1,
"b":2,
"c":3,
"d":{
"d1":10,
"d2":20,
"d3":30
},
"e":4,
"f":5,
"g":[
{
"g1":100,
"g2":200,
"g3":300
}
],
"h":6
}
]
}
I want to access d1, e, g1, h. How can I do it?
I have corrected your JSON string to this equivalent valid JSON string:
{
"list":[
{
"a":1,
"b":2,
"c":3,
"d":{
"d1":10,
"d2":20,
"d3":30
},
"e":4,
"f":5,
"g":[
{
"g1":100,
"g2":200,
"g3":300
}
],
"h":6
}
]
}
import json
# some JSON:
x = '{"list":[{"a":1,"b":2,"c":3,"d":{"d1":10,"d2":20,"d3":30},"e":4,"f":5,"g":[{"g1":100,"g2":200,"g3":300}],"h":6}]}'
# parse x:
d = json.loads(x)
print(d["list"][0]["d"]["d1"])
print(d["list"][0]["e"])
print(d["list"][0]["g"][0]["g1"])
print(d["list"][0]["h"])
Your question is lacking a lot of informations.
Let's suppose your overall dict is called dictionnary. To access :
d1 : dictionnary['list'][0]['d']['d1']
e : dictionnary['list'][0]['e']
g1 : dictionnary['list'][0]['g'][0]['g1']
h : dictionnary['list'][0]['h']

Print only shows last register from a json

I'm trying to print all values from a JSON file but it only prints the last one.
example Json file:
[
{
"folderTeste9": [
{
"_aliasinput": "folderTeste9",
"_idinput": "folderteste132131",
"_timesinput": [
"10:20"
],
"_statusinput": "true"
}
]
},
{
"testeFolder1991": [
{
"_aliasinput": "testeFolder1991",
"_idinput": "testefolder1991",
"_timesinput": [],
"_statusinput": "true"
}
]
},
{
"Flo.": [
{
"_aliasinput": "Flo.",
"_idinput": "12321354564613",
"_timesinput": [],
"_statusinput": "true"
}
]
}
]
My Code
import json
with open('config/'+'config.json', 'r') as file:
data: list = json.load(file)
lista = data
for element in lista:
print("")
for alias_element in element:
#print("Alias: " +alias_element)
for result in element[alias_element]:
profile_data = result
aliasInput = profile_data['_aliasinput']
timesInput = profile_data['_timesinput']
idInput = profile_data['_idinput']
statusInput = profile_data['_statusinput']
print(f" Values from register are {aliasInput}{timesInput}{idInput}{statusInput}")
Actual Result
Values from register are Flo. [] 12321354564613 true
Expected Result
I'd like to print all values from the different register.
Example:
Values from register are folderTest9 [10:20] folderteste132131 true
[...]
Values from register are Flo. [] 12321354564613 true

How to trim JSON array to get specific value

So I am using the Echo Arena API Which gives me some of the following in JSON Format. I am trying to get all of the NAMES of users in the match at the time, as seen here there is player name: rnedds and further down DarkCobra866. How can I get just the names and none of the other information? Using Python 3.
{
"teams":[
{
"players":[
{
"name":"rnedds",
"rhand":[
-3.3230002,
-1.2370001,
-18.701
],
"playerid":0,
"position":[
-2.7520001,
-0.96800005,
-18.622002
],
"lhand":[
-2.414,
-1.5630001,
-18.487001
],
"userid":1663152230440088,
"stats":{ }
},
{
"name":"DarkCobra866",
"rhand":[
-5.3710003,
-1.978,
-7.5110002
],
"playerid":4,
"position":[
-5.5280004,
-1.3520001,
-7.4040003
],
"lhand":[
-5.6520004,
-1.7540001,
-7.4020004
],
"userid":2649496045086049,
"stats":{ }
}
]
}
]
}
Currently, my code looks like this for other information in the API
matchdetails = {
'echosessionid' : data['sessionid'],
'echoclientname' : data['client_name'],
'echogameclockdisplay' : data['game_clock_display'],
'echogamestatus' : data['game_status']
}
currentMatchDetails = json.dumps(matchdetails)
Load your JSON string into a dictionary like this:
import json
json_text = '''
{
"teams":[
{
"players":[
{
"name":"rnedds",
"rhand":[
-3.3230002,
-1.2370001,
-18.701
],
"playerid":0,
"position":[
-2.7520001,
-0.96800005,
-18.622002
],
"lhand":[
-2.414,
-1.5630001,
-18.487001
],
"userid":1663152230440088,
"stats":{ }
},
{
"name":"DarkCobra866",
"rhand":[
-5.3710003,
-1.978,
-7.5110002
],
"playerid":4,
"position":[
-5.5280004,
-1.3520001,
-7.4040003
],
"lhand":[
-5.6520004,
-1.7540001,
-7.4020004
],
"userid":2649496045086049,
"stats":{ }
}
]
}
]
}
'''
data = json.loads(json_text)
players = [player['name'] for team in data['teams'] for player in team['players']]
print(players)
The above code will result in:
['rnedds', 'DarkCobra866']

Recursively generate subset of list in python

I have a json file that looks something like the following:
[
{
"category1":"0120391123123"
},
[
{
"subcategory":"0120391123123"
},
[
{
"subsubcategory":"019301948109"
},
[
{
"subsubsubcategory":"013904123908"
},
[
{
"subsubsubsubcategory":"019341823908"
}
]
]
]
],
[
{
"subcategory2":"0934810923801"
},
[
{
"subsubcategory2":"09341829308123"
}
]
],
[
{
"category2":"1309183912309"
},
[
{
"subcategory":"10293182094"
}
]
]
]
I also have a list of categories that I would like to find in the original list. If the category exists in categoriesToFind, I would also like to find all subcategories and return those as well.
categoriesToFind = ['019301948109', '1309183912309']
finalCategories = []
def findCategories(currentList, isFirstIteration):
for x in currentList:
if type(x) is dict and (next(iter(x.values())) in categoriesToFind or not isFirstIteration):
finalCategories.append(next(iter(x.values())))
if len(currentList) < currentList.index(x) + 1:
findCategories(currentList[currentList.index(x) + 1], False)
findCategories(data, True)
I would want finalCategories to contain the following:
['019301948109', '013904123908', '019341823908', '1309183912309', '10293182094']
You can use recursion with a generator:
categoriesToFind = ['019301948109', '1309183912309']
d = [{'category1': '0120391123123'}, [{'subcategory': '0120391123123'}, [{'subsubcategory': '019301948109'}, [{'subsubsubcategory': '013904123908'}, [{'subsubsubsubcategory': '019341823908'}]]]], [{'subcategory2': '0934810923801'}, [{'subsubcategory2': '09341829308123'}]], [{'category2': '1309183912309'}, [{'subcategory': '10293182094'}]]]
def get_subcategories(_d, _flag):
flag = None
for i in _d:
if isinstance(i, dict):
_val = list(i.values())[0]
if _val in categoriesToFind or _flag:
yield _val
flag = True
else:
yield from get_subcategories(i, _flag or flag)
print(list(get_subcategories(d, False)))
Output:
['019301948109', '013904123908', '019341823908', '1309183912309', '10293182094']

Categories

Resources