How to group a list in python? - python

Here I got a list of string:
['2-3-1-*-*','2-3-*-*-*','2-1-*-*-*','1-4-3-*-*','2-3-2-*-*','2-1-3-*-*','1-1-*-*-*','2-3-1-1-*'];
I am trying to group this string into a structure like this:
--'2-3-*-*-*'
--'2-3-1-*-*'
--'2-3-1-1-*'
--'2-3-2-*-*'
--'2-1-*-*-*'
--'2-1-3-*-*'
--'1-4-3-*-*'
--'1-1-*-*-*'
This is like tree structure. I am beginner in programming, so can someone give me a hint on how can I construct the tree-like structure and any suitable structure that I could use?

s = ['2-3-1-*-*','2-3-*-*-*','2-1-*-*-*','1-4-3-*-*','2-3-2-*-*','2-1-3-*-*','1-1-*-*-*','2-3-1-1-*']
def isSubElement(subelement, element):
return (subelement != element) and all([(e1 == e2) or (e1 == "*" and e2 != "*") for e1, e2 in zip(element.split('-'), subelement.split('-'))])
def parseTree(elementList):
if len(elementList) == 0:
return {}
elements = elementList[:]
d = {}
for element1 in elements:
parent = True
for element2 in elements:
if isSubElement(element1, element2):
parent = False
break
if parent:
d[element1] = {}
for element1 in d.keys():
d[element1] = parseTree([element for element in elements if isSubElement(element, element1)])
return d
print parseTree(s)
OUTPUT:
{'2-1-*-*-*':
{'2-1-3-*-*':
{}},
'1-4-3-*-*':
{},
'1-1-*-*-*':
{},
'2-3-*-*-*':
{'2-3-1-*-*':
{'2-3-1-1-*':
{}},
'2-3-2-*-*':
{}}}

if you're using some js library to render it, save it as a nested dict so that it can be exported to a JSON without hassle.
def parser(items):
nested_dicts = {}
for item in items:
nodes = item.split('-')
current_dict = nested_dicts
for n in nodes[:-1]:
current_dict = current_dict.setdefault(n, {})
last = nodes[-1]
current_dict[last] = current_dict.get(last, 0) + 1
return nested_dicts
sample output with provided data:
{
"1": {
"1": {
"*": {
"*": {
"*": 1
}
}
},
"4": {
"3": {
"*": {
"*": 1
}
}
}
},
"2": {
"1": {
"3": {
"*": {
"*": 1
}
},
"*": {
"*": {
"*": 1
}
}
},
"3": {
"1": {
"1": {
"*": 1
},
"*": {
"*": 1
}
},
"2": {
"*": {
"*": 1
}
},
"*": {
"*": {
"*": 1
}
}
}
}
}

Related

How to convert nested json to csv with multiple different names?

I've been trying to convert a nested json file to csv. Here is a small example of the json file.
json_data =
{"labels":
{
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1"
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {
"answer": "no",
"format": 1
}
}
}
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2"
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {
"answer": "yes",
"format": 1
}
}
}
}
I've tried multiple ways to get this output:
Labels
Event
User
Time
Answer
Long filename 01
-N8V6uUR__vvB0qv1lPb
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-08-02T19:54:23.608Z
no
Long filename 02
-N8ILnaH-1ylwp2LGvtP
bmEhwNCZT9Wiftgvsopb7vBjO9o1
2022-07-31T08:24:23.698Z
yes
If I normalise with:
f= open('after_labels.json')
data = json.load(f)
df = pd.json_normalize(data)
Or try to flatten the file with multiple functions such as:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
df = flatten_json(data)
or
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for item in data:
[rows.append(elem) for elem in flatten_list(flatten_json(item, prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
df = json_to_dataframe(data)
print(df)
It gives me 292 columns and I suspect this is because of the long unique filenames.
I can't change the json file before processing, because that seems like the simple solution to do "filename": "longfilename01:png" as they would then all be consistent and I wouldn't have this problem.
I would be grateful for any other clever ideas on how to solve this.
Try:
json_data = {
"labels": {
"longfilename01:png": {
"events": {
"-N8V6uUR__vvB0qv1lPb": {
"t": "2022-08-02T19:54:23.608Z",
"user": "bmEhwNCZT9Wiftgvsopb7vBjO9o1",
}
},
"questions": {
"would-you": {
"-N8V6uUR__vvB0qv1lPb": {"answer": "no", "format": 1}
}
},
},
"longfilename02:png": {
"events": {
"-N8ILnaH-1ylwp2LGvtP": {
"t": "2022-07-31T08:24:23.698Z",
"user": "Qf7C5cXQkXfQanxKPR0rsKW4QzE2",
}
},
"questions": {
"would-you": {
"-N8ILnaH-1ylwp2LGvtP": {"answer": "yes", "format": 1}
}
},
},
}
}
df = pd.DataFrame(
[
{
"Labels": k,
"Event": list(v["events"])[0],
"User": list(v["events"].values())[0]["user"],
"Time": list(v["events"].values())[0]["t"],
"Answer": list(list(v["questions"].values())[0].values())[0][
"answer"
],
}
for k, v in json_data["labels"].items()
]
)
print(df)
Prints:
Labels Event User Time Answer
0 longfilename01:png -N8V6uUR__vvB0qv1lPb bmEhwNCZT9Wiftgvsopb7vBjO9o1 2022-08-02T19:54:23.608Z no
1 longfilename02:png -N8ILnaH-1ylwp2LGvtP Qf7C5cXQkXfQanxKPR0rsKW4QzE2 2022-07-31T08:24:23.698Z yes

Replace all the keys in nested dictionary and merge duplicate keys in python

I have a nested dictionary that represents parent-child relationships. For example:
{
"45273425f5abc05b->s":
{
"12864f455e7c86bb->s": {
"12864f455e7c86bbexternal_call->c": {}
}
},
"c69aead72fcd6ec1->d":
{
"8ade76728bdddf27->d": {
"8ade76728bdddf27external_call->i": {}
},
"b29f07de47c5841f->d": {
"107bec1baede1bff->l": {
"e14ebabea4785c3f->l": {
"e14ebabea4785c3fexternal_call->r": {}
},
"e36b35daa794bd50->l": {
"e36b35daa794bd50external_call->a": {}
}
},
"b29f07de47c5841fexternal_call->l": {}
},
"1906ef2c2897ac01->d": {
"1906ef2c2897ac01external_call->e": {}
}
}
}
I want to do two things with this dictionary. Firstly I want to remove everything before and including "->" i.e I want to update the keys. Secondly, after renaming there will be duplicate values in the nested dictionary. for example the second element in the dictionary. If there are two keys with the same name I want to merge them into one. So, the result will look like the following:
{
"s":
{
"s": {
"c"
}
},
"d":
{
"d": {
"i",
"l": {
"l": {
"r",
"a"
}
},
"e"
}
}
}
How can I achieve this? I have written this code so far.
def alter_dict(nested_dict):
new_dict = {}
for k, v in nested_dict.items():
if isinstance(v, dict):
v = alter_dict(v)
new_key = k.split("->")[1]
new_dict[new_key] = v
return new_dict
It works for a simple one like the first element but doesn't work for the second one. It loses some information. The purpose of this is to create a graph with the dictionary.
You can use recursion:
import json
from collections import defaultdict
def merge(d):
r = defaultdict(list)
for i in d:
for a, b in i.items():
r[a.split('->')[-1]].append(b)
return {a:merge(b) for a, b in r.items()}
data = {'45273425f5abc05b->s': {'12864f455e7c86bb->s': {'12864f455e7c86bbexternal_call->c': {}}}, 'c69aead72fcd6ec1->d': {'8ade76728bdddf27->d': {'8ade76728bdddf27external_call->i': {}}, 'b29f07de47c5841f->d': {'107bec1baede1bff->l': {'e14ebabea4785c3f->l': {'e14ebabea4785c3fexternal_call->r': {}}, 'e36b35daa794bd50->l': {'e36b35daa794bd50external_call->a': {}}}, 'b29f07de47c5841fexternal_call->l': {}}, '1906ef2c2897ac01->d': {'1906ef2c2897ac01external_call->e': {}}}}
print(json.dumps(merge([data]), indent=4))
Output:
{
"s": {
"s": {
"c": {}
}
},
"d": {
"d": {
"i": {},
"l": {
"l": {
"r": {},
"a": {}
}
},
"e": {}
}
}
}

How to parse ['A:B:C', 'A:B:D', 'A:C'] into {'A': ['C', {'B': ['D', 'C']} ] }

Essentially I need to write a parser of some product of a markup. It's a list of strings formatted like such:
x = [
'A:B:C:D:E',
'A:B:D',
'A:C:E:F',
'B:D:E',
'B:C',
'A:C:F',
]
I need to turn it into a python object like so:
{
"B": [
"C",
{
"D": "E"
}
],
"A": [
{
"B": [
"D",
{
"C": {
"D": "E"
}
}
]
},
{
"C": [
"F",
{
"E": "F"
}
]
}
]
}
You can copy above and paste into this inspector to look at the object hierarchy, and understand what I'm going after. In any regards, it's a nested dictionary combining common keys, and putting items in lists sometimes.
TL;DR -
I have written a function below
splits = [l.split(':') for l in x]
def DictDrill(o):
# list of lists
if type(o)==type([]) and all([type(l)==type([]) for l in o]):
d = dict()
for group in o:
if type(group)==type([]) and len(group)>1:
d[group[0]] = d.get(group[0],[]) + [group[1:]]
if type(group)==type([]) and len(group)==1:
d[group[0]] = d.get(group[0],[]) + []
return DictDrill(d)
# a dictionary
elif type(o)==type({}):
next = dict(o)
for k,groups in next.items():
next[k] = DictDrill(groups)
return next
But you'll see that this script is only returning dictionaries and the last item is placed on as a key again with an empty dict() as value. If you run my script like DictDrill(splits) on the example you will see this:
{
"B": {
"C": {},
"D": {
"E": {}
}
},
"A": {
"C": {
"E": {
"F": {}
},
"F": {}
},
"B": {
"C": {
"D": {
"E": {}
}
},
"D": {}
}
}
}
Notice the useless {} as values
Preferably I need to solve this in python. I know a little C# but it seems very cumbersome to move data around between lists and dictionaries...
You can use itertools.groupby with recursion:
from itertools import groupby as gb
data = ['A:B:C:D:E', 'A:B:D', 'A:C:E:F', 'B:D:E', 'B:C', 'A:C:F']
def to_dict(d):
if isinstance(d, dict) or not d or any(isinstance(i, (dict, list)) for i in d):
return d
return d[0] if len(d) == 1 else {d[0]:to_dict(d[1:])}
def group(d):
_d = [(a, [c for _, *c in b]) for a, b in gb(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])]
new_d =[{a:to_dict(b[0] if len(b) == 1 else group(b))} for a, b in _d]
return [i for b in new_d for i in (b if not all(b.values()) else [b])]
import json
print(json.dumps(group([i.split(':') for i in data]), indent=4))
Output:
[
{
"A": [
{
"B": [
{
"C": {
"D": "E"
}
},
"D"
]
},
{
"C": [
{
"E": "F"
},
"F"
]
}
]
},
{
"B": [
"C",
{
"D": "E"
}
]
}
]

Sorting nested dictionaries using its second key

I am trying to sort a nested dictionary using its second key where my dictionary looks like:
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
And my expected output will be:
my_dictionary = {
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"char": {
"3": {
"genman": [
"motion"
]
}
}
}
Tried using the following code:
new_dict = {}
for k, v in my_dictionary.items():
for s in sorted(my_dictionary.itervalues()):
if not s.keys()[0]:
new_val = my_dictionary[k].get(s.keys()[0])
my_dictionary[s.keys()[0]] = new_val
my_dictionary.update(new_dict)
It fails badly, and I am getting the same result as my initial dictionary.
This works:
sorted(my_dictionary.items(), key=lambda x: list(x[1].keys())[0])
Returns:
[('EMPT', {'0': {}}),
('veh', {'1': {'tankers': ['varA', 'varB']}}),
('fast', {'2': {'empty': []}}),
('char', {'3': {'genman': ['motion']}})]
Sorted receives a list of key-value pairs, we sort using the result of lambda x: list(x[1].keys())[0] which takes a list of the keys in the inner dict, then grabs the first key (need to do this because dict_keys directly is not indexable).
Edit: the result is a list of key, value pairs but it can be fed into an OrderedDict to use it as a dict.
actually there is no order for a dict, however you can use OrderedDIct instead.
from collections import OrderedDict
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
s = sorted((list(v.keys())[0], k) for k, v in my_dictionary.items())
new_dic = OrderedDict([(k,my_dictionary[k]) for _, k in s])

Nested list into a hierarchical dict

I have a nested list that I need to convert into a hierarchical dictionary. However I am a bit confused how to achieve it in a clean pythonic way. Here's a somewhat ugly sample code that I have come up with. How to improve it?
from itertools import tee,izip
import json
L=[(1,2,3,4,5),(1,2,7),(2,3,5),(3,4,5,6)]
def pairs(iterable):
a,b = tee(iterable)
b.next()
return izip(a,b)
def innerfunc(pairs,d):
try:
pair = pairs.next()
item, nextitem = pair
except StopIteration:
return
if item in d:
innerfunc(pairs,d[item])
else:
d[item]= {}
{nextitem : innerfunc(pairs,d[item])}
def outerfunc(matrix):
result_dict={}
for row in matrix:
iter_pairs = pairs(row+(0,))
innerfunc(iter_pairs,result_dict)
return result_dict
print json.dumps(outerfunc(L), sort_keys=True, indent=4)
Output:
{
"1": {
"2": {
"3": {
"4": {
"5": {}
}
},
"7": {}
}
},
"2": {
"3": {
"5": {}
}
},
"3": {
"4": {
"5": {
"6": {}
}
}
}
}
You can do that quite succinctly using recursion:
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
# Example usage
root = {}
for p in [(1,2,3,4,5),(1,2,7),(2,3,5),(3,4,5,6)]:
append_path(root, p)
# Print results
import json
print json.dumps(root, indent=4)
Output:
{
"1": {
"2": {
"3": {
"4": {
"5": {}
}
},
"7": {}
}
},
"2": {
"3": {
"5": {}
}
},
"3": {
"4": {
"5": {
"6": {}
}
}
}
}

Categories

Resources