I have a directory with many pickled files, each containing a dictionary. The filename indicates the setting of the dictionary. E.g.: 20NewsGroup___10___Norm-False___Probs-True___euclidean.pickle.
I want to combine these different dicts all in one large dict. To do this, I have written the following code:
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
However, when I run this, I get a KeyError, as the key for splitted[0] does not yet exist. What is the best way to populate a dictionary similar to what I envision?
you need to create in these fields
Example:
from typing import List
PATH = '<SOME PATH>'
all_dicts = os.listdir(PATH)
def check_exist_path(target_dict, paths: List[str]):
for key in paths:
if key not in target_dict:
target_dict[key] = {}
target_dict = target_dict[key]
one_dict = dict()
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
for i in range(5):
check_exist_path(one_dict, [splitted[j] for j in range(i)])
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
You could use try-except blocks:
for i, filename in enumerate(all_dicts):
infile = open(PATH+filename, 'rb')
new_dict = pickle.load(infile)
infile.close()
splitted = filename.split('___')
splitted[4] = splitted[4].split('.')[0]
try:
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
except KeyError:
one_dict[splitted[0]] = {}
one_dict[splitted[0]][splitted[1]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]] = {}
one_dict[splitted[0]][splitted[1]][splitted[2]][splitted[3]][splitted[4]] = new_dict
This code tries to access nested dictionary keys, if they don't exist, then the code will create them with an empty dictionary.
Related
I have one nested list, and one list for "numbers"
test_keys = [["tobbe", "kalle"],["karl", "Clara"],["tobbe"],["tank"]]
test_values = ['123', '234','345','456']
res = {}
for key in test_keys:
for value in test_values:
res[value] = key
test_values.remove(value)
break
with open("myfile.txt", 'w') as f:
for key, value in res.items():
f.write('%s;%s;\n' % (key, value))
This provides the file
123;['tobbe', 'kalle'];
234;['karl', 'Clara'];
345;['finis'];
456;['tank'];
now I want to load the data back into the a dictionary without the ";" and later on back into the corresponding lists.
Try this:
res = {}
with open("myfile.txt") as file:
for line in file:
chunks = line.split(';')
names = chunks[1][1:-1].split(', ')
res[chunks[0]] = [name[1:-1] for name in names]
print(res)
test_keys = []
test_values = []
for key in res:
test_keys.append(key)
test_values.append(res[key])
print(test_keys)
print(test_values)
I got a complex nested JSON file and I need to convert it completely, but my code can only convert part of the information,
How can I modify my code, or do you guys have a better code?
my json file
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('H://14.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w')
# csvfile = open(path+'.csv', 'wb')
csvfile = open('H://11.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0])
print(path)
trans(path)
my json file
{"id":"aa","sex":"male","name":[{"Fn":"jeri","Ln":"teri"}],"age":45,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2001-09","title":{"name":"Doctor","Exp":"head"},"year":"2001","month":"05"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
{"id":"aa","sex":"female","name":[{"Fn":"lo","Ln":"li"}],"age":34,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2008-11","title":{"name":"Doctor","Exp":"hand"},"year":"2008","month":"02"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
It only converts part of the information, 'name''info''Hobby''connect''School' these information are not converted,I need to convert all information completely,
You could use the below function to treat each dic. It will flatten the dict by recursive calls until there is no dict or list in the values. In order to avoid issues with 2 keys having the same name, I concatenate with the previous level.
WARNING: it is based on your format so if you have lists with more than one element in the middle, it will only take care of the first element.
def flatten_dict(input_dict, result = None):
result = result or {}
for key, value in input_dict.items():
if isinstance(value, list):
current_dict = {key+"_"+k: v for k, v in value[0].items()}
flatten_dict(current_dict, result)
elif isinstance(value, dict):
current_dict = {key+"_"+k: v for k, v in value.items()}
flatten_dict(current_dict, result)
else:
result[key] = value
return result
Then apply this function on each dic, transform to Dataframe and save as CSV.
res = []
for line in jsonData:
dic = json.loads(line)
res.append(flatten_dict(dic))
res_df = pd.DataFrame(res)
res_df.to_csv("result.csv")
Result:
here is my txt file that has contained all of the lines. What I want to do is create a dictionary, and access a key, and get a list of values
Finance:JPMC
Software:Microsoft
Conglomerate:L&T
Conglomerate:Amazon
Software:Palantir
Defense:BAE
Defense:Lockheed
Software:TCS
Retail:TjMax
Retail:Target
Oil:Exxon
Oil:Chevron
Oil:BP
Oil:Gulf
Finance:Square
FMCG:PnG
FMCG:JohnsonNJohnson
FMCG:Nestle
Retail:Sears
Retail:FiveBelow
Defense:Boeing
Finance:Citadel
Finance:BridgeWater
Conglomerate:GE
Conglomerate:HoneyWell
Oil:ONGC
FMCG:Unilever
Semiconductor:Intel
Semiconductor:Nvidia
Semiconductor:Qualcomm
Semiconductor:Microchip
Conglomerate:Samsung
Conglomerate:LG
Finance:BoA
Finance:Discover
Software:TCS
Defense:Raytheon
Semiconductor:Microsemi
Defense:BAE
Software:Meta
Oil:SinoPec
Defense:Saab
Defense:Dassault
Defense:Airbus
Software:Adobe
Semiconductor:TSMC
FMCG:CocoCola
FMCG:Pesico
Retail:Kohls
Here is my attempted code
f = open("companyList.txt", "r")
sector, company = [], []
for line in f:
first, second = line.split(":")
sector.append(first)
company.append(second)
dictionary = {}
for key in sector:
for element in company:
dictionary[sector].append(element)
print(dictionary)
Since there are multiple duplicate keys, I wanted to append a list to that particular key as python doesn't allow duplicate keys.
If i understand your question right you can do this:
from collections import defaultdict
dictionary = defaultdict(list)
for line in f:
first, second = line.split(":")
dictionary[first].append(second)
I think this is what you want:
pairs = {}
with open("tst.txt", "r") as f:
while True:
line = f.readline().strip()
if not line:
break
sector, value = line.split(":", 1)
if sector not in pairs:
pairs[sector] = []
pairs[sector].append(value)
f.close()
print(pairs)
you should do:
f = open("companyList.txt", "r")
sector, company = [], []
for line in f:
first, second = line.split(":")
sector.append(first)
company.append(second)
dictionary = {}
for sectory,companyy in zip(sector,company):
dictionary[sectory] = companyy
for key in sector:
dictionary[sector] = key
So far I have this code which is creating a dictionary from an input file:
def read_file(filename):
with open("menu1.csv") as file:
file.readline()
for line in file:
line_strip = [line.rstrip('\n')]
lines= [line.split(',')]
result = {key: (float(fl), int(intg),
text.strip()) for key,
fl, intg,text in lines}
print(result)
read_file("menu1.csv")
I have to keep that code in that def format. However, this outputs 27 different dictionaries. How do I make it so it is all in ONE dictionary?
ALso:
I want to alphabetize the keys and put them into a list. I tried something like this but it won't work:
def alphabetical_menu(dict):
names = []
for name in d:
names.append(name)
names.sort()
print(names)
What am I doing wrong? or do you have a way to do it?
Is this what you wanted?
def read_file(filename):
result = {}
with open(filename) as file:
file.readline()
for line in file:
line_strip = line.rstrip()
line_split= line.split(',')
key, fl, intg, text = tuple(line_split)
result[key] = (float(fl), int(intg), text.strip())
return result
def alphabetical_menu(d):
return sorted(d.keys())
menu_dict = read_file("menu1.csv")
menu_sorted_keys = alphabetical_menu(menu_dict)
# To check the result
print(menu_dict)
print(menu_sorted_keys)
I would like to parse a config file, containing list of filenames, divided in sections:
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
..
I tried ConfigParser, but it requires pairs of name-value. How can I parse such a file?
Likely you have to implement the parser on your own.
Blueprint:
key = None
current = list()
for line in file(...):
if line.startswith('['):
if key:
print key, current
key = line[1:-1]
current = list()
else:
current.append(line)
Here is an iterator/generator solution:
data = """\
[section1]
path11/file11
path12/file12
...
[section2]
path21/file21
...""".splitlines()
def sections(it):
nextkey = next(it)
fin = False
while not fin:
key = nextkey
body = ['']
try:
while not body[-1].startswith('['):
body.append(next(it))
except StopIteration:
fin = True
else:
nextkey = body.pop(-1)
yield key, body[1:]
print dict(sections(iter(data)))
# if reading from a file, do: dict(sections(file('filename.dat')))