make dictionary from list in python - python

I have a list of path like ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv'].
How can it be converted to dictionary like {'aaa':['aaa.csv', 'bbb.csv', 'ccc.csv'] and so on with first folder in path is equal to others?
I tried this code, but got confused what to do next.
list_split = [i.split('/') for i in list]
dic = {}
list_temp = []
for item in list_split:
list_temp.append(item)
if len(list_temp) < 2:
pass
else:
for itemm in list_temp:
pass

dic = {}
lst = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
for item in lst:
slash = item.find('/')
key = item[:slash]
val = item[slash+1:]
if dic.has_key(key):
dic[key].append(val)
else:
dic[key] = [val]
>>> dic
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}

original_list = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'x/1.csv', 'y/2.csv'] # i added a couple more items to (hopefully) prove it works
dic = {}
for item in original_list:
path = item.split('/')
if path[0] not in dic:
dic[path[0]] = []
dic[path[0]].append('/'.join(path[1:]))

You can try this:
>>> L = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
>>> list_split = [tuple(i.split('/')) for i in L]
>>> newdict = {}
>>> for (key,item) in list_split:
if key not in newdict:
newdict[key]=[]
newdict[key].append(item)
Output:
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}

You can also use a defaultdict for this:
from collections import defaultdict
paths = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'bbb/ccc.csv', 'aaa/bbb/ccc.csv']
dict = defaultdict(list)
for *key, value in [x.split('/') for x in paths]:
dict['/'.join(key)].append(value)
print(dict.items())
print(dict['aaa'])
This will also work for nested directories by putting the full path as the key.

path_dict = {}
for path in path_list:
if '/' in path:
path_dict[path.split('/')[0]] = path.split('/')[1:]

Related

last matched string is being skipped in python

Team,
my last string ml31 that is in log file is being skipped from getting evaluated and final resultant dictionary is missing its entry. any hint?
ml1
/core
/home
ml2
/var
/home
/lib
cpuml3
/home
/root
/raid
ml31
/home
/root
/raid
import os
homedir=os.environ.get('HOME')
print(homedir)
key_str = "ml"
val_list = []
key = ''
my_dict = {}
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
print(my_dict)
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
print(my_dict)
output
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid']}
expected
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid'], 'ml31': ['/home', '/root', '/raid'] }
You assign the list to the key in the dict my_dict[key] = val_list when you reach a new key, so it doesn't come up for the last one, you need to add it at the end too
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
my_dict[key] = val_list
You can improve it with collections.defaultdict
key = ''
key_str = "ml"
my_dict = defaultdict(list)
# from pathlib import Path
content = Path(homedir + '/backup/file2dict.result').read_text().splitlines()
for word in content:
if key_str in word:
key = word
elif word: # ensure not use empty lines
my_dict[key].append(word)

Create nested dictionary from keys seperated by dot(.) in python

I have a requirement where I have a keys in string format combined by dot(.) and the value associated with that string of key and I want to create a dictionary.
key1 = "A.B.C.D"
text_to_be_inserted1_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
Expected result
dict = {
"A": {
"B" : {
"C" : {
"D" : text_to_be_inserted1_for_key1,
"E" : text_to_be_inserted_for_key2
}
}
}
}
from collections import defaultdict
def deep_dict():
return defaultdict(deep_dict)
result = deep_dict()
def deep_insert(key, value):
d = result
keys = key.split(".")
for subkey in keys[:-1]:
d = d[subkey]
d[keys[-1]] = value
deep_insert("A.B.C.D", "Test1")
deep_insert("A.B.C.E", "Test2")
import json
print(json.dumps(result, indent=4))
You may
for each letter except the last one, create a mapping with the key and a dict
for the last letter create the mapping with the value
def insert(keys, values):
res = {}
for k, v in zip(keys, values):
res_tmp = res
levels = k.split(".")
for level in levels[:-1]:
res_tmp = res_tmp.setdefault(level, {})
res_tmp[levels[-1]] = v
return res
Use
key1 = "A.B.C.D"
value_key1 = "Test1"
key2 = "A.B.C.E"
value_key2 = "Test2"
result = insert([key1, key2], [value_key1, value_key2])
print(result) # {'A': {'B': {'C': {'D': 'Test1', 'E': 'Test2'}}}}
You can solve for each case and then merge
from copy import deepcopy
def dict_of_dicts_merge(x, y):
z = {}
overlapping_keys = x.keys() & y.keys()
for key in overlapping_keys:
z[key] = dict_of_dicts_merge(x[key], y[key])
for key in x.keys() - overlapping_keys:
z[key] = deepcopy(x[key])
for key in y.keys() - overlapping_keys:
z[key] = deepcopy(y[key])
return z
key1 = "A.B.C.D"
text_to_be_inserted_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
dict1 = {}
newdict = {}
olddict = {}
keys_for_1 = key1.split(".")
keys_for_1.reverse()
olddict[keys_for_1[0]] = text_to_be_inserted_for_key1
for i in range (1,len(keys_for_1)):
newdict = {}
newdict[keys_for_1[i]] = olddict
olddict = newdict
save1 = newdict
newdict = {}
olddict = {}
keys_for_2 = key2.split(".")
keys_for_2.reverse()
olddict[keys_for_2[0]] = text_to_be_inserted_for_key2
for i in range (1,len(keys_for_2)):
newdict = {}
newdict[keys_for_2[i]] = olddict
olddict = newdict
save2 = newdict
dict1 = dict_of_dicts_merge(save1,save2)
print (dict1)

How to deal with columns in pandas dataframe?

I want to do something with column data which is a list. like:
inputs:
col-A
[{'name':'1','age':'12'}, {'name':'2','age':'12'}]
[{'name':'3','age':'18'}, {'name':'7','age':'15'}]
....
outputs:
col-A
[{'1-age':'12'}, {'2-age':'12'}]
[{'3-age':'18'}, {'7-age':'15'}]
....
My code is:
def deal(dict_col, prefix_key):
key_value = dict_col[prefix_key]+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col
prefix = "name"
[[deal(sub_item, prefix) for sub_item in item] for item in df[col-A]]
Some items will be processed multiple times.
Because the return value of deal method will be swapped to item in real time?
For example:
For deal method we
input:
{'name':'1','age':'12'}
output:
{'1-age':'12'}
Then the next input may be {'1-age':'12'} , and now we have no name or age to deal with.
How to solve this problem?
You can use the pandas apply method for it here some code:
import pandas as pd
d = {'col-A' : [[{'name' : '1', 'age': '12'}, {'name' : '2', 'age': '12'}],[{'name' : '3', 'age': '18'},{'name' : '7', 'age': '15'}]]}
df = pd.DataFrame(d)
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = {}
out_str = sub_dict.get(prefix) + '-'
for k,v in sub_dict.items():
out_dict[out_str + k] = v
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix))
print(df)
You could push some of the code in a one-liner if you like that more:
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix)
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix)
Just for the fun of it you could even bring it down to one single line (not recommended due to poor readability:
prefix = "name"
df['col-A'] = df['col-A'].apply(lambda row : [dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix) for sub_dict in row])
I believe you need .get function for select with default value if not exist key in dict:
def deal(dict_col, prefix_key):
key_value = dict_col.get(prefix_key, 'not_exist')+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col

Adding comma separated items on separate lines to a dict in Python

I am using python 2.7, and using python dicts.
I have my output like this:
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
My code:
d = {}
for line in output.split("\n"):
if ":" not in line:
continue
key, value = line.strip().split(":", 1)
d[key] = value
Expected key and its value:
trunk_names: trunk01, trunk02, trunk03,trunk04,trunk05,trunk06, trunk07,trunk08,trunk09,trunk10, trunk11,trunk12
Actual key and values being output:
trunk_names: trunk01, trunk02, trunk03,trunk04,
from collections import defaultdict
output = '''
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
'''
d = defaultdict(list)
current_key = None
for line in output.split('\n')[1:]:
if ":" in line:
current_key = line.split(':')[0].strip()
values = line.split(':')[1]
else:
values = line
d[current_key] += [
value.strip()
for value in values.split(',')
if value.strip()
]
print(d)
gives:
defaultdict(<type 'list'>, {'trunk_names': ['trunk01', 'trunk02', 'trunk03', 'trunk04', 'trunk05', 'trunk06', 'trunk07', 'trunk08', 'trunk09', 'trunk10', 'trunk11', 'trunk12'], 'goods': ['apples', 'oranges']})
How stable is your structure, if it is very stable and the data quality is high then your can simplify by testing if the line.endswith(','):
In []:
d = {}
f = iter(output.split('\n'))
for line in f:
key, line = map(str.strip, line.split(':', 1))
while line.endswith(','):
line += next(f)
d[key] = [i.strip() for i in line.split(',')]
pprint.pprint(d)
Out[]:
{'goods': ['apples', 'oranges'],
'trunk_names': ['trunk01',
'trunk02',
'trunk03',
'trunk04',
'trunk05',
'trunk06',
'trunk07',
'trunk08',
'trunk09',
'trunk10',
'trunk11',
'trunk12']}

Adding to a dictionary in python instead of appending

My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d

Categories

Resources