Create nested dictionary from keys seperated by dot(.) in python - python

I have a requirement where I have a keys in string format combined by dot(.) and the value associated with that string of key and I want to create a dictionary.
key1 = "A.B.C.D"
text_to_be_inserted1_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
Expected result
dict = {
"A": {
"B" : {
"C" : {
"D" : text_to_be_inserted1_for_key1,
"E" : text_to_be_inserted_for_key2
}
}
}
}

from collections import defaultdict
def deep_dict():
return defaultdict(deep_dict)
result = deep_dict()
def deep_insert(key, value):
d = result
keys = key.split(".")
for subkey in keys[:-1]:
d = d[subkey]
d[keys[-1]] = value
deep_insert("A.B.C.D", "Test1")
deep_insert("A.B.C.E", "Test2")
import json
print(json.dumps(result, indent=4))

You may
for each letter except the last one, create a mapping with the key and a dict
for the last letter create the mapping with the value
def insert(keys, values):
res = {}
for k, v in zip(keys, values):
res_tmp = res
levels = k.split(".")
for level in levels[:-1]:
res_tmp = res_tmp.setdefault(level, {})
res_tmp[levels[-1]] = v
return res
Use
key1 = "A.B.C.D"
value_key1 = "Test1"
key2 = "A.B.C.E"
value_key2 = "Test2"
result = insert([key1, key2], [value_key1, value_key2])
print(result) # {'A': {'B': {'C': {'D': 'Test1', 'E': 'Test2'}}}}

You can solve for each case and then merge
from copy import deepcopy
def dict_of_dicts_merge(x, y):
z = {}
overlapping_keys = x.keys() & y.keys()
for key in overlapping_keys:
z[key] = dict_of_dicts_merge(x[key], y[key])
for key in x.keys() - overlapping_keys:
z[key] = deepcopy(x[key])
for key in y.keys() - overlapping_keys:
z[key] = deepcopy(y[key])
return z
key1 = "A.B.C.D"
text_to_be_inserted_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
dict1 = {}
newdict = {}
olddict = {}
keys_for_1 = key1.split(".")
keys_for_1.reverse()
olddict[keys_for_1[0]] = text_to_be_inserted_for_key1
for i in range (1,len(keys_for_1)):
newdict = {}
newdict[keys_for_1[i]] = olddict
olddict = newdict
save1 = newdict
newdict = {}
olddict = {}
keys_for_2 = key2.split(".")
keys_for_2.reverse()
olddict[keys_for_2[0]] = text_to_be_inserted_for_key2
for i in range (1,len(keys_for_2)):
newdict = {}
newdict[keys_for_2[i]] = olddict
olddict = newdict
save2 = newdict
dict1 = dict_of_dicts_merge(save1,save2)
print (dict1)

Related

How to deal with columns in pandas dataframe?

I want to do something with column data which is a list. like:
inputs:
col-A
[{'name':'1','age':'12'}, {'name':'2','age':'12'}]
[{'name':'3','age':'18'}, {'name':'7','age':'15'}]
....
outputs:
col-A
[{'1-age':'12'}, {'2-age':'12'}]
[{'3-age':'18'}, {'7-age':'15'}]
....
My code is:
def deal(dict_col, prefix_key):
key_value = dict_col[prefix_key]+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col
prefix = "name"
[[deal(sub_item, prefix) for sub_item in item] for item in df[col-A]]
Some items will be processed multiple times.
Because the return value of deal method will be swapped to item in real time?
For example:
For deal method we
input:
{'name':'1','age':'12'}
output:
{'1-age':'12'}
Then the next input may be {'1-age':'12'} , and now we have no name or age to deal with.
How to solve this problem?
You can use the pandas apply method for it here some code:
import pandas as pd
d = {'col-A' : [[{'name' : '1', 'age': '12'}, {'name' : '2', 'age': '12'}],[{'name' : '3', 'age': '18'},{'name' : '7', 'age': '15'}]]}
df = pd.DataFrame(d)
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = {}
out_str = sub_dict.get(prefix) + '-'
for k,v in sub_dict.items():
out_dict[out_str + k] = v
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix))
print(df)
You could push some of the code in a one-liner if you like that more:
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix)
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix)
Just for the fun of it you could even bring it down to one single line (not recommended due to poor readability:
prefix = "name"
df['col-A'] = df['col-A'].apply(lambda row : [dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix) for sub_dict in row])
I believe you need .get function for select with default value if not exist key in dict:
def deal(dict_col, prefix_key):
key_value = dict_col.get(prefix_key, 'not_exist')+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col

How to get the last value from the list using python?

data = ['reply': '{"osc":{"version":"1.0"}}']
data1 = ['reply':'{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}']
I need to get only the "1.0" value from data and data1 using python 3.6.
How can I achieve this?
After fixing the data:
data1 = {'reply':{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}}
keep taking the value from the dictionary as long as it remains a dictionary:
d = data1
while isinstance(d, dict):
d = list(d.values())[0]
print(d)
#1.0
Your data and data1 are invalid datatypes in python so I converted them into the valid dictionary.
from operator import getitem
data = {'reply': {"osc":{"version":"1.0"}}}
data1 = {'reply':{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}}
def get_item(keys, dict_):
return reduce(getitem, keys, dict_)
print(get_item(['reply','osc','version'], data))
print(get_item(['reply','device','network', 'ipv4_dante',"auto"],data1))
>>>1.0
1.0
Another Approach that data and data1 as string:
class GetValue:
def __init__(self, string):
self.string = string
self.new_keys = {}
def clean_data(self, data):
if data[2] == '{':
get_json_data = len(data) - 3
else:
get_json_data = len(data) - 1
modified_data = [val for val in list(data[data.find(':')+1:get_json_data])
if val is not "'" ]
return json.loads(''.join(modified_data))
def get_recurvise_key(self, data, dict_):
for key, val in dict_.items():
self.new_keys.setdefault(data,[]).append(key)
if isinstance(val, dict):
self.get_recurvise_key(data, val)
return self.new_keys.get(data)
def get_value(self):
get_data = self.clean_data(self.string)
get_keys = self.get_recurvise_key(self.string,get_data)
value = reduce(getitem, get_keys, get_data)
return value
data = """['{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}']"""
data1 = """['reply':'{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}']"""
obj_data = GetValue(data)
obj_data1 = GetValue(data1)
print(obj_data.get_value(), obj_data1.get_value())
>>> 1.0 1.0
You can get that result recursively like:
Code:
def bottom_value(in_data):
def recurse_to_bottom(a_dict):
if isinstance(a_dict, dict):
a_key = list(a_dict)[0]
return recurse_to_bottom(a_dict[a_key])
return a_dict
return recurse_to_bottom(json.loads(in_data['reply']))
Test Code:
import json
data = {'reply': '{"osc":{"version":"1.0"}}'}
data1 = {'reply':'{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}'}
print(bottom_value(data))
print(bottom_value(data1))
Result:
1.0
1.0
You can try regex :
import re
pattern=r'(?<=")[0-9.]+'
data1="""['reply': '{"osc":{"version":"1.0"}}']"""
data2="""['reply':'{"device":{"network":{"ipv4_dante":{"auto":"1.0"}}}}']"""
def find_value(data):
return re.findall(pattern,data)[0]
output:
print(find_value(data1))
output:
1.0
second:
print(find_value(data2))
output
1.0

Parsing a file with special format using python to a list of dictionaries

I have a file with the following format:
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
I am looking for a smart and robust way to parse it to a list of dict like the following:
X[0] = {'a':"someText",'b':0, 'c':0, 'd':{ 't':'SomeText3' }, 'f':"someText2"}
X[1] = {'a':"someText4",'b':20, 'c':40, 'd':{ 't':'SomeText5' }, 'f':"someText6"}
Note that there might be nested dictionaries and the variables can have different order of occurrence.
My method is to keep track of the level by searching '={' and '};' and construct the list. I wonder if there is an elegant method to parse it.
The simple parser below implements a recursive descent algorithm on simple dictionionary schemes:
import re
from collections import namedtuple
s = """
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
"""
s1 = """
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
"""
token = namedtuple('token', ['type', 'value'])
class Parser:
lang = r'"[a-zA-Z0-9]+"|[a-zA-Z]+|\d+|\{|\};'
token_types = {'int':'\d+', 'key':'[a-zA-Z]+', 'start':'{', 'end':'};'}
def __init__(self, s):
self.starting_with = Parser.tokenize(s)[1:-1]
self.tokens = iter(Parser.tokenize(s)[1:-1])
self.starts = []
self.ends = []
self.k_list = []
self.k = None
self.d = {}
self.current_d = {}
def parse(self):
current = next(self.tokens, None)
if current:
if current.type == 'start':
self.starts.append(current.value)
self.parse()
if current.type == 'key':
self.k = current.value
self.k_list.append(self.k)
self.parse()
if current.type not in ['start', 'end', 'key']:
if len(self.starts) == 1:
self.d[self.k] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
else:
self.current_d[self.k_list[-1]] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
if current.type == 'end':
end = self.starts.pop()
self.d[self.k_list[-len(self.starts)-1]] = self.current_d
self.current_d = {}
self.parse()
#classmethod
def tokenize(cls, s):
return [token('string' if i.startswith('"') and i.endswith('"') else [a for a, b in cls.token_types.items() if re.findall(b, i)][0], i) for i in re.findall(cls.lang, s)]
dictionaries = [s, s1]
X = []
for d in dictionaries:
p = Parser(d)
p.parse()
X.append(p.d)
print(X[0])
print(X[1])
Output:
{'a': 'someText', 'c': '1', 'b': '0', 'd': {'t': 'someText3'}, 'f': 'someText2'}
{'a': 'someText4', 'c': '40', 'b': '20', 'd': {'t': 'someText5'}, 'f': 'someText6'}
Here is an implementation using parsy (which works similarly to pyparsing but is more modern and has much nicer documentation, and generally results in much neater code, but does require Python 3.3 or greater):
from collections import defaultdict
from parsy import generate, regex, seq, string, whitespace
lexeme = lambda parser: whitespace.optional() >> parser << whitespace.optional()
variable = lexeme(regex(r"[A-Za-z]+"))
string_literal = lexeme(string('"') >> regex(r'[^"]*') << string('"'))
int_literal = lexeme(regex(r'[0-9]+').map(int))
#generate
def value():
return (yield dict_literal | string_literal | int_literal)
statement = seq(variable << lexeme(string("=")),
value << lexeme(string(";")))
dict_literal = lexeme(string("{")) >> statement.many().map(dict) << lexeme(string("}"))
file_format = statement.many()
def parse(text_input):
output = defaultdict(list)
for key, val in file_format.parse(text_input):
output[key].append(val)
return dict(output)
Output for your example:
{'X': [{'a': 'someText',
'b': 0,
'c': 1,
'd': {'t': 'someText3'},
'f': 'someText2'},
{'a': 'someText4',
'b': 20,
'c': 40,
'd': {'t': 'someText5'},
'f': 'someText6'}]}
The parsing is done by file_format.parse, the parse function I've added then combines that basic parse into a dictionary with multiple entries for each top level variable, and returns that value. It doesn't print it exactly as per your example because that probably isn't what you need if you want to use the values from Python.
You might want to adjust this according to your needs. Also, you may need to adjust all of the sub-parsers according to your actual rules (e.g. can variable names contain numbers? Are there escapes for string literals?).
You can do this without having an IQ of 170, by using pyparsing. Mind you, I've found that it takes some time to learn it.
I have defined the grammar of your input in seven lines. result is used to house the labelled pieces that pyparsing finds. Then the final lines of the code contructs what you want from the parsed items. The bits of code that include previous constitute a hideous kluge that I needed because my grammar finds the var elements twice. Perhaps you can find the flaw?
input = '''\
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};'''
import pyparsing as pp
result = []
var = pp.Word(pp.alphas).setParseAction(lambda s: result.append(('var', s[0])))
equals = pp.Literal('=')
semicolon = pp.Literal(';')
a_string = pp.QuotedString('"').setParseAction(lambda s: result.append(('string', s[0])))
number = pp.Word(pp.nums).setParseAction(lambda s: result.append(('number', s[0])))
open_curly = pp.Literal('{').setParseAction(lambda s: result.append(('dict_open', None)))
close_curly = pp.Literal('}').setParseAction(lambda s: result.append(('dict_close', None)))
one_dict = pp.Forward()
simple = var + equals + pp.Or([a_string, number]) + semicolon
declaration = one_dict | simple
one_dict << var + equals + open_curly + pp.OneOrMore(declaration) + close_curly + semicolon
dict_list = pp.OneOrMore(one_dict)
dict_list.parseString(input)
count = 0
previous = None
for item in result:
if item[0] == 'var':
if item[1] == 'X':
print ('\nX[{:d}] = '.format(count), end='')
count += 1
else:
if item == previous:
continue
print ('{}: '.format(item[1]), end='')
previous = item
elif item[0] == 'dict_open':
print ('{ ', end='')
elif item[0] == 'dict_close':
print ('}', end='')
elif item[0] == 'number':
print ('{}, '.format(item[1]), end='')
elif item[0] == 'string':
print ('"{}", '.format(item[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0, c: 1, d: { t: "someText3", }f: "someText2", }
X[1] = { a: "someText4", b: 20, c: 40, f: "someText6", d: { t: "someText5", }}
Edit: If it's possible for dictionaries to be empty then substitute the following line in the code above.
one_dict << var + equals + open_curly + pp.ZeroOrMore(declaration) + close_curly + semicolon
I find plex even easier to apply here. Just eight expressions to scan for.
from io import StringIO
input = StringIO(
'''X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};''')
from plex import *
from io import StringIO
space = Any(' \t\n')
lexicon = Lexicon([
(Rep1(Range('AZaz')), 'var'),
(Str('"') + Rep(AnyBut('"')) + Str('"'), 'quoted'),
(Rep1(Range('09')), 'number'),
(space, IGNORE),
(Str('='), IGNORE),
(Str(';'), IGNORE),
(Str('{'), 'open_curly'),
(Str('}'), 'close_curly'),
])
scanner = Scanner(lexicon, input)
count = 0
while True:
token = scanner.read()
if token[0] is None:
break
elif token[0] in ['var', 'number']:
if token[1]=='X':
print ('\nX[{:d}] = '.format(count),end='')
count += 1
else:
print ('{}: '.format(token[1]),end='')
elif token[0]=='quoted':
print('{}, '.format(token[1]), end='')
elif token[0] == 'open_curly':
print ('{} '.format(token[1]), end='')
elif token[0] == 'close_curly':
print ('{}, '.format(token[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0: c: 1: d: { t: "someText3", }, f: "someText2", },
X[1] = { a: "someText4", b: 20: c: 40: f: "someText6", d: { t: "someText5", }, },
The heavy downside is the it's distributed for Py2 only AFAIK. However, was able to make it work for Py3 in about two hours.

make dictionary from list in python

I have a list of path like ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv'].
How can it be converted to dictionary like {'aaa':['aaa.csv', 'bbb.csv', 'ccc.csv'] and so on with first folder in path is equal to others?
I tried this code, but got confused what to do next.
list_split = [i.split('/') for i in list]
dic = {}
list_temp = []
for item in list_split:
list_temp.append(item)
if len(list_temp) < 2:
pass
else:
for itemm in list_temp:
pass
dic = {}
lst = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
for item in lst:
slash = item.find('/')
key = item[:slash]
val = item[slash+1:]
if dic.has_key(key):
dic[key].append(val)
else:
dic[key] = [val]
>>> dic
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
original_list = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'x/1.csv', 'y/2.csv'] # i added a couple more items to (hopefully) prove it works
dic = {}
for item in original_list:
path = item.split('/')
if path[0] not in dic:
dic[path[0]] = []
dic[path[0]].append('/'.join(path[1:]))
You can try this:
>>> L = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
>>> list_split = [tuple(i.split('/')) for i in L]
>>> newdict = {}
>>> for (key,item) in list_split:
if key not in newdict:
newdict[key]=[]
newdict[key].append(item)
Output:
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
You can also use a defaultdict for this:
from collections import defaultdict
paths = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'bbb/ccc.csv', 'aaa/bbb/ccc.csv']
dict = defaultdict(list)
for *key, value in [x.split('/') for x in paths]:
dict['/'.join(key)].append(value)
print(dict.items())
print(dict['aaa'])
This will also work for nested directories by putting the full path as the key.
path_dict = {}
for path in path_list:
if '/' in path:
path_dict[path.split('/')[0]] = path.split('/')[1:]

Adding to a dictionary in python instead of appending

My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d

Categories

Resources