Getting pandas dataframe from nested dictionaries? - python

I am new to Python and I have not been able to find a good answer for my problem after looking for a while.
I am trying to create a Pandas dataframe from a list of dictionaries.
My list of nested dictionaries is the following:
{'category_1': [{'a': '151',
'b': '116',
'c': '86'}],
'category_2': [{'d': '201',
'e': '211',
'f': '252'},
{'d': '-1',
'e': '-9',
'f': '-7'}],
'category_3': {'g': 'Valid',
'h': None,
'i': False,
'j': False},
'category_4': {'k': None,
'l': None,
'm': None,
'n': None}}
And my output should be
a b c d e f g h i j k l m n
0 151 116 86 201,-1 211,-9 252,-7 valid None False False None None None None
What i tried,
I'm able to do category 1,3,4 but couldn't figure out the 2nd category
I tried concat and for nested loop to get it
ex=pd.concat([pd.Series(d) for d in (eg1)], axis=1).T
Then mergiting it.
As i said, couldn't figure out in the whole!

I wrote a short recursive function that returns a series, or a concatenation of several series if one of the keys in your dict (e.g category_2) contains a list of multiple dicts.
def expand(x):
if type(x) == dict:
return pd.Series(x)
elif type(x) == list:
return pd.concat([expand(i) for i in x])
If I start with the dictionary that you pasted in in your example above:
d = {'category_1': [{'a': '151',
'b': '116',
'c': '86'}],
'category_2': [{'d': '201',
'e': '211',
'f': '252'},
{'d': '-1',
'e': '-9',
'f': '-7'}],
'category_3': {'g': 'Valid',
'h': None,
'i': False,
'j': False},
'category_4': {'k': None,
'l': None,
'm': None,
'n': None}}
Then it's just a matter of concatenating all the series created by the recursive method I wrote:
output = pd.concat([expand(value) for key, value in d.items()])
And merging any duplicate indices so that their items appear in one row and are separated by commas. I also reshape the series into a df with one row and several columns:
output = pd.DataFrame(output.groupby(output.index).apply(lambda x: ','.join(x.astype(str)))).T
This results in a dataframe that matches your desired output:
output
a b c d e f g h i j k l m n
0 151 116 86 201,-1 211,-9 252,-7 Valid None Invalid Invalid None None None None

The code below recursively tries to flatten the input structure that can have lists or other dicts. When it hit the leafs, adds the content to a flattened dict and then convert it to a dataframe.
flattened_dict = {}
def flatten(obj, name = ''):
if isinstance(obj, dict):
for key, value in obj.items():
flatten(obj[key], key)
elif isinstance(obj, list):
for e in obj:
flatten(e)
else:
if obj == 'null':
obj = None
flattened_dict[name] = [obj]
flatten(eg1)
The result is:
Please note that you have to define the null as a string. The definition for the original dict is:
eg1 = {
"my_list": {
"category_1": [
{
"a": "151",
"b": "116",
"c": "86"
}
],
"category_2": [
{
"d": "201",
"e": "211",
"f": "252"
},
{
"d": "-1 ",
"e": "-9",
"f": "-7"
}
],
"category_3": {
"g": "Valid",
"h": "null",
"i": "Invalid",
"j": "Invalid"
},
"category_4": {
"k": "null",
"l": "null",
"m": "null",
"n": "null"
}
}
}

Related

how to find index by value in json nested python [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 1 year ago.
Improve this question
body = {
'a': 1,
'b': 'apple',
'child': [
{
'a': 12,
'b': 'banana',
'child': [
{
'a': 121,
'b': 'mango',
'child': [
{
'a': 1211,
'b': 'coconut',
'child': [dynamic nested]
}
]
},
{
'a': 122,
'b': 'papaya',
'child': [
{
'a': 1221,
'b': 'lemon',
'child': [dynamic nested]
}
]
}
]
},
{
'a': 13,
'b': 'orenge',
'child': [
dynamic nested
]
}
]
}
if i want know index of 'coconut' or 'lemon' (json body dynamic children sub child i don't know dee child, but khow 'a' or 'b' for find index deep)
how to get index with python?
ex1: index of 'coconut' = [0,0,0]
ex2: index of 'lemon' = [0,1,0]
def find(child, value, index=False):
# First element in nested object is searched value
if child["b"] == value:
return []
if len(child["child"]) == 0:
return False
for i in range(len(child["child"])):
if child["child"][i]["b"] == value:
index = [i]
break
else:
index = find(child["child"][i], value, index)
if index != False:
index = [i] + index
break
return index
print(find(body, "coconut"))
Use a recursive function
For educational purposes, the same Akane's algorithm but in a little bit more Pythonic style:
def find_fruit(child, value, index=False):
if child["b"] == value:
return []
for i, ch in enumerate(child["child"]):
if ch["b"] == value:
return [i]
index = find_fruit(ch, value, index)
if index:
return [i] + index
print(find(body, "coconut"))

Extract dictionary data which match with other dictionary

I've two dictionaries one original & the other reference. And I want to match reference dict keys with original dict keys and extract all those keys, values from original which are present in reference.
For example
original_dict = {
'a': {
'1': [1, 2, 3, 4, 5]
},
'b': {
'1': {
'A': [1, 2, 3, 4]
}
},
'c': {
'3': [1]
}
}
And a reference dictionary
reference_dict = {
'a': {
'2': [1, 2, 3]
},
'b': {
'1': {
'A': []
}
},
'c': {
'3': []
}
}
And this is the extracted dictionary.
extracted_dict = {
'b': {
'1': {
'A': [1, 2, 3, 4]
}
},
'c': {
'3': [1]
}
}
Here you might have noticed that I don't care about values of reference dict. All I want is the values of original dict.
You can use recursion to accomplish this:
def merge(value_dict, key_dict):
ret = {}
for key, subdict in key_dict.items():
if key in value_dict:
if not isinstance(value_dict[key], dict):
# we found a value so don't keep recursing
ret[key] = value_dict[key]
else:
# found another dict so merge subdicts
merged = merge(value_dict[key], subdict)
if len(merged) > 0:
ret[key] = merged
return ret
merge(original_dict, reference_dict)
I´m not sure if I understand your question 100% but this might be a solution.
we use relevant keys and put them in a list to use them on a .update() dict
new_lst = []
for i in original_dict.keys():
new_lst.append(original_dict[i])
reference_dict.clear()
for i in new_lst:
reference_dict.update(i)
print(reference_dict)

How to fetch the value in dictionary by checking the key mapped in the dict

Dictionary is below
my = {
"1": {
"exclude": 'A,B',
"column": "value",
"output": "Out1",
"Out1": "Cost",
"Out2": "Rev"
},
"2": {
"exclude": 'None',
"column": "value",
"output": "Out2",
"Out1": "Cost",
"Out2": "Rev"
}
}
I need to check column key value (if lower(my[column])) == lower(value)
I have exclude_list = ['A','B','C']
I need to the exclude key, if exclude is 'A' check the exclude_list = ['A','B','C'] take the value B,C. If the exclude key is 'A,B' take the value C as shown in the example.
I need to check the output key if output key is Out1 then fetch value of Out1 and mapped to EndOutput
Expected Out
{'1': {'exclude': ['C'], 'EndOutput': 'Cost'},
'2': {'exclude': ['A', 'B', 'C'], 'EndOutput': 'Rev'}}
exclude_list = ['A','B','C']
for k,v in my.items():
if v['column'].lower() == 'value'.lower:
You can iterate over keys 1,2 .. N with for e in my: and then calculate exclude with help of set() difference as
and extract things like this
exc = set(['A','B','C']) - set(my[e]["exclude"].split(",") if len(my[e]["exclude"].split(",")) else [] )
next you can take value of out1 or out2 depending on the value of output as
my[e][my[e]["output"]]
Insert in new dictionary as
newdict[e] = {'exclude': list(exc) ,'EndOutput':my[e][my[e]["output"]]}
Overall, this is how you can iterate
newdict = {}
for e in my:
if my[e]['column'].lower() == 'value':
exc = set(['A','B','C']) - set(my[e]["exclude"].split(",") if len(my[e]["exclude"].split(",")) else [] )
newdict[e] = {'exclude': list(exc) ,'EndOutput':my[e][my[e]["output"]]}
printing newdict will produce this output
'1': {'EndOutput': 'Cost', 'exclude': ['C'] },
'2': {'EndOutput': 'Rev', 'exclude': ['A', 'B', 'C'] }}

Inverting a nested dictionary tree

I have a dictionary that looks like:
normalDictionary = {'a' : {'b': {}},
'a1': {'b1': {'c1' : {},
'd1' : {}}}}
and I want to invert it so that it looks like:
invertedDictionary = {'d1': {'b1': {'a1': {}}},
'c1': {'b1': {'a1': {}}},
'b': {'a': {}}}
What would that python function look like?
I cannot seem to get much past:
def invert_dictionary( node, leafName, indent ):
keys = list( node.keys() )
keys.sort()
constructed = { leafName: {} }
for key in keys:
inverted = invert_dictionary( node[ key ], key, indent + 4 )
return constructed
invertedDictionary = {}
for key in normalDictionary
inverted = invert_dictionary( normalDictionary[ key ], key, indent = 0 )
This may not be the optimal algorithm but you can start with this. The idea is
we convert the dictionary into all "walk paths" from the "root" to all "leaves"
then we build another dictionary from such paths, in reversed order
Here is the code:
def getpaths(dictionary, pathhead):
if not dictionary:
return [pathhead]
paths = []
for key in dictionary:
paths.extend(getpaths(dictionary[key], pathhead+[key]))
return paths
def invert(dictionary):
paths = getpaths(dictionary, [])
inverted = {}
for path in paths:
head = inverted
for node in path[::-1]:
if node not in head:
head[node] = {}
head = head[node]
return inverted
and this is how it works:
>>> normalDictionary
{'a': {'b': {}}, 'a1': {'b1': {'c1': {}, 'd1': {}}}}
>>> invert(normalDictionary)
{'b': {'a': {}}, 'c1': {'b1': {'a1': {}}}, 'd1': {'b1': {'a1': {}}}}
A recursive implementation:
def asdict(xs: list) -> dict:
return {} if len(xs) == 0 else {xs[0]: asdict(xs[1:])}
def inverted_dict_as_tuples(d: dict, stack: list):
for k, v in d.items():
if len(v) == 0:
yield (k, *reversed(stack))
else:
yield from inverted_dict_as_tuples(v, [*stack, k])
def inverted_dict(d: dict) -> dict:
return {x: asdict(xs) for x, *xs in inverted_dict_as_tuples(d, [])}
Usage:
>>> import json
>>> d = {"a": {"b": {}}, "a1": {"b1": {"c1": {}, "d1": {}}}}
>>> print(json.dumps(d, indent=2))
{
"a": {
"b": {}
},
"a1": {
"b1": {
"c1": {},
"d1": {}
}
}
}
>>> d_inv = inverted_dict(d)
>>> print(json.dumps(d_inv, indent=2))
{
"b": {
"a": {}
},
"c1": {
"b1": {
"a1": {}
}
},
"d1": {
"b1": {
"a1": {}
}
}
}
This is a working solution:
def add_path(d, path):
while path:
k = path.pop()
if k not in d:
d[k] = {}
d = d[k]
def invert_dict(d, target=None, path=None):
if target is None:
target = {}
if path is None:
path = []
if not d:
add_path(target, path)
else:
for k, v in d.items():
invert_dict(v, target, path + [k])
return target
print(invert_dict(normalDictionary))
This assumes your dictionary only contains dictionaries like your example, though. Not sure what the actual use case is, where you may have more mixed data types.
Result:
{'b': {'a': {}}, 'c1': {'b1': {'a1': {}}}, 'd1': {'b1': {'a1': {}}}}
In technical terms, you have an n-ary tree and it sounds like you want to build root-to-leaf paths (using depth-first search), then create another n-ary tree by expanding each of those paths in reverse. Here's one way (all_paths yields root-to-leaf paths, reverse each path, then iterate over each path plugging values into a dict in paths_to_dict):
import json
def all_paths(d, path=[]):
if not d:
yield path[:]
for k, v in d.items():
path.append(k)
yield from all_paths(v, path)
path.pop()
def paths_to_dict(paths):
d = {}
for path in paths:
curr = d
for node in path:
curr[node] = curr = {}
return d
if __name__ == "__main__":
d = {
'a': {
'b': {}
},
'a1': {
'b1': {
'c1': {},
'd1': {}
}
}
}
inverted_d = paths_to_dict([list(reversed(x)) for x in all_paths(d)])
print(json.dumps(inverted_d, indent=2))
Output:
{
"b": {
"a": {}
},
"c1": {
"b1": {
"a1": {}
}
},
"d1": {
"b1": {
"a1": {}
}
}
}
I think you want a single recursive function like this.
def reverse_dict(final_result, middle_result, normal_dictionary):
for key, value in normal_dictionary.items():
if len(value.keys()) == 0:
final_result[key] = value
middle_result.append(final_result[key])
else:
reverse_dict(final_result, middle_result, value)
for item in middle_result:
item[key] = {}
middle_result = []
for item in middle_result:
middle_result.append(item[key])
Example:
test_normal_dictionary = {
'a': {
'b': {}
},
'a1': {
'b1': {
'c1': {},
'd1': {}
}
}
}
result_dictionary = {}
print(f"Origin dict: {test_normal_dictionary}")
reverse_dict(result_dictionary, [], test_normal_dictionary)
print(f"Reversed dict: {result_dictionary}")
Output:
Origin dict: {'a': {'b': {}}, 'a1': {'b1': {'c1': {}, 'd1': {}}}}
Reversed dict: {'b': {'a': {}}, 'c1': {'b1': {}, 'a1': {}}, 'd1': {'b1': {}, 'a1': {}}}

Subtract dict A from dict B (deep del)?

If I have a deeply nested dict is there a built-in way to subtract/remove list of "paths" (eg: keyA.keyB.key1, keyA.keyC.key2, etc) or a the keys of a second dict from the original dict? Or maybe there is a common module which has functionality like this?
Here's a suggestion:
D = { "keyA": {
"keyB" : {
"keyC" : 42,
"keyD": 13
},
"keyE" : 55
}
}
def remove_path(dictionary, path):
for node in path[:-1]:
dictionary = dictionary[node]
del dictionary[path[-1]]
remove_path(D, ["keyA", "keyB", "keyD"])
print D # prints {'keyA': {'keyB': {'keyC': 42}, 'keyE': 55}}
You'll probably want to introduce some error checking, too.
Just in case the other answers aren't what you're looking for, here's one that subtracts one dictionary from another.
def subtract(a, b):
""" Remove the keys in b from a. """
for k in b:
if k in a:
if isinstance(b[k], dict):
subtract(a[k], b[k])
else:
del a[k]
Another solution:
d = {
'A' : {
'C' : {
'D' : {
'E' : 4,
},
'F' : 5,
},
},
'B' : 2,
}
def DeepDictDel(path, dict):
for key in path.split('.'):
owner = dict
dict = dict[key]
del owner[key]
print d # prints {'A': {'C': {'D': {'E': 4}, 'F': 5}}, 'B': 2}
DeepDictDel('A.C.D', d)
print d # prints {'A': {'C': {'F': 5}}, 'B': 2}

Categories

Resources