I want to make a txt into a dict in python - python

so i have the following data:
Apples = 1
Bananas = 1
Box_Cashew =
{
Cashew = 1
}
Dragonsfruit = 2
Crate_box_epox=
{
box_epox =
{
epox = 2
}
}
and want to make a Dictionary from this txt, as it follows:
{'Apple':'1' , 'Bananas' : '1' , 'Box_Cashew' : {'Cashew':'1'} , 'Dragonsfruit' : '2', 'Crate_box_epox' : { 'box_epox' : {'epox':2}}}
i tried read line by line with the code below, but i dont know what to do when i got a dict within a dict.
edit:
#PrestonM and #juanpa.arrivillaga
The text file:
unit=9023
state=1411
flags=
{
1NobelChemistry=yes
1NobelLiterature=yes
1NobelMedicine=yes
}
worldmarket=
{
worldmarket_pool=
{
ammunition=204.50766
}
}
The code:
text_file = open("teste.v2", "r")
lines = text_file.readlines()
d={}
for line in lines:
try:
(key1, val) = line.replace('\t','').replace('\n','').split('=')
d[str(key1)] = val
except:
pass
result:
>>>d
{'unit':'9023' , 'state':'1411' , 'flags':{},'1NobelChemistry':'yes' , '1NobelLiterature':'yes' , '1NobelMedicine':'yes','worldmarket':{},'worldmarket_pool':{},'ammunition':'204.50766'}
desired result:
>>>d
{'unit':'9023' , 'state':'1411' , 'flags':{ '1NobelChemistry':'yes' , '1NobelLiterature':'yes' , '1NobelMedicine':'yes'},'worldmarket':{'worldmarket_pool':{'ammunition':'204.50766'}}}

The following seems to work in my tests. I hope the comments and text in the exceptions makes it clear what's being done.
In your code, you're simply adding everything to the same dictionary, which cannot produce the result you're after. As soon as { is encountered, you want to start adding key/value pairs to a new dictionary, that's actually stored in the old dictionary. To accomplish this, the code below keeps track of these dictionaries in a list, adding one if necessary, and removing one from the list to get back to the previous dictionary.
dictStack = [ { } ]
currentKey = None
for l in lines:
l = l.strip() # Remove whitespace at start/end
if not l: # skip empty line
continue
if l == "{":
if currentKey is None:
raise Exception("Current key not set!")
newDict = { }
dictStack[0][currentKey] = newDict
dictStack.insert(0, newDict)
currentKey = None
elif l == "}":
if currentKey is not None:
raise Exception("Current key is set, expecting {")
if len(dictStack) == 1:
raise Exception("Can't remove the final dict, there seems to be an extra '}'")
dictStack.pop(0)
else:
if currentKey is not None:
raise Exception("Current key is set, expecting {")
if not "=" in l:
raise Exception("Expecting '=' in '{}'".format(l))
key, value = l.split("=")
key, value = key.strip(), value.strip() # remove whitespace
if not value:
currentKey = key
else:
dictStack[0][key] = value
if len(dictStack) != 1:
raise Exception("Still more than one dict in the stack")
result = dictStack[0]

Here is my solution which uses recursion:
import re
def text2dict(text):
def f(ls, i):
d = {}
while i < len(ls):
if ls[i]=="}":
return d, i
m = re.match(r"(.*)=(.*)", ls[i])
k = m.group(1).strip()
v = m.group(2).strip()
if not len(v):
v, i = f(ls, i+2)
d[k] = v
i += 1
return d
return f([l.strip() for l in text.split("\n")], 0)
with open("file.txt") as f:
text = f.read()
print(text2dict(text))

def make_dict(text):
l = "{"
t = text.splitlines()
for j,i in enumerate(t):
if i != '':
line = i.replace(" ", "").split('=')
next = t[j + 1].replace(" ", "").split('=')[0] if len(t) > (j + 1) else "}"
if line[0] == "{" or line[0] == "}":
l += line[0]
else:
l += ("'"+line[0] + "':" + ("'" + line[1] + "'" + ("," if next != "}" else "") + "" if line[1] != '' else ""))
l += "}"
print(l)
make_dict(text)
Result:
{'unit':'9023','state':'1411','flags':{'1NobelChemistry':'yes','1NobelLiterature':'yes','1NobelMedicine':'yes'}'worldmarket':{'worldmarket_pool':{'ammunition':'204.50766'}}}

Related

How do I merge ranged elements in a list?

testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
What I want the list to be:
["13A-C", "23D", "5D", "9B-D"]
Bonus points if you can sort it (5,9,13,23).
For those interested, this is my current WIP script:
testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
newlist = []
lenlist = len(testlist)
for i in range(lenlist):
#check values of first
indexnum = testlist[i][:-1]
indexchar = testlist[i][-1]
if i == 0:
newlist.append(testlist[i])
if indexnum == (testlist[i-1][:-1]):
newlistvalue = (indexnum + (testlist[i-1][-1]) + "-" + (testlist[i][-1]))
if ((indexchar == "B") and ((testlist[i-1][-1]) == "A")) or ((indexchar == "D") and ((testlist[i-1][-1]) == "C")):
newlist.append(newlistvalue)
lastval = newlist[len(newlist)-1][-1]
lastval2 = newlist[(len(newlist)-2)]
#print(lastval2)
if (indexchar == "C") and (lastval == "B"):
newlistvalue = lastval2[:-1] + indexchar
#print(newlistvalue)
newlist.pop()
newlist.pop()
#print(newlistvalue)
newlist.append(newlistvalue)
else:
newlist.append(testlist[i])
print (newlist)
#print (newlistvalue)
First you'd need to create a dict of the numbers and letters, I assume there will only be one letter in each string. Then you need to sort it and format it. You can use the following:
pairs = defaultdict(list)
for s in testlist:
pairs[s[:-1]].append(s[-1])
result = [f'{k}{"-".join(dict.fromkeys([v[0], v[-1]]))}'
for k, v in sorted(pairs.items(), key=lambda x: int(x[0]))]
['5D', '9B-D', '13A-C', '23D']
On the assumption that each string in the list ends with exactly one letter, you could do this:
import re
testlist = ["13A", "13C", "13B", "23D", "5D", "9B", "9C", "9D"]
def seq(lst):
return lst[0] if len(lst) == 1 else f'{lst[0]}-{lst[-1]}'
def key(e):
return int(re.search('\d+', e)[0])
d = {}
for e in testlist:
d.setdefault(int(e[:-1]), []).append(e[-1])
print(sorted([f'{k}{seq(sorted(v))}' for k, v in d.items()], key=key))
Output:
['5D', '9B-D', '13A-B', '23D']
Note:
Subtle change to OP's data to show that this code can handle out-of-sequence values

Python - Write a new row for each list data under same header into csv

I have a text file, 'student.txt'. Some keys have multiple values. I only want data that is tied to the name, and the sibling & hobby values below that name.
'student.txt'
ignore me
name-> Alice
name-> Sam
sibling-> Kate,
unwanted
sibling-> Luke,
hobby_1-> football
hobby_2-> games
name-> Ramsay
hobby_1-> dance
unwanted data
hobby_2-> swimming
hobby_3-> jogging
ignore data
Code I've done:
file = open("student.txt", "r")
with open("student.csv", "w") as writer:
main_dict = {}
student_dict = {"Siblings": "N/A", "Hobbies": "N/A"}
sibling_list = []
hobby_list = []
flag = True
writer.write ('name,siblings,hobbies\n')
header = 'Name,Siblings,Hobbies'.split(',')
sib_str = ''
hob_str =''
for eachline in file:
try:
key, value = eachline.split("-> ")
value = value.strip(",\n")
if flag:
if key == "name":
print (key,value)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
#print (main_dict)
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
sibling_list = []
hobby_list = []
name = value
main_dict[name] = student_dict.copy()
main_dict[name]["Name"] = name
elif key == "sibling":
sibling_list.append(value)
sib_str= ' '.join(sibling_list).replace(' ', '\n')
elif key.startswith("hobby"):
hobby_list.append(value)
hob_str = ' '.join(hobby_list)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
if 'name' in eachline:
if 'name' in eachline:
flag = True
else:
flag = False
except:
pass
for eachname in main_dict.keys():
for eachkey in header:
writer.write(str(main_dict[eachname][eachkey]))
writer.write (',')
if 'Hobbies' in eachkey:
writer.write ('\n')
CSV Output from Code above:
Expected CSV Output:
P.S: I can't seem to figure out how to not forgo the try/pass. As some lines (without '->') are unwanted, and I can't use the eachline.split("-> "). Would appreciate help on this too.
Thanks so much!
The code below gives the csv file which you can import in your Excel and it will be in exact format you are expecting.
You can use something like
if "->" not in line:
continue
To skip lines that don't contain "->" values, see in the code below:
import csv
file = open("student.txt", "r")
students = {}
name = ""
for line in file:
if "->" not in line:
continue
line = line.strip(",\n")
line = line.replace(" ", "")
key, value = line.split("->")
if key == "name":
name = value
students[name] = {}
students[name]["siblings"] = []
students[name]["hobbies"] = []
else:
if "sibling" in key:
students[name]["siblings"].append(value)
elif "hobby" in key:
students[name]["hobbies"].append(value)
#print(students)
csvlines = []
for student in students:
name = student
hobbies = students[name]["hobbies"]
siblings = students[name]["siblings"]
maxlength = 0
if len(hobbies) > len(siblings) :
maxlength = len(hobbies)
else:
maxlength = len(siblings)
if maxlength == 0:
csvlines.append([name, "N/A", "N/A"])
continue
for i in range(maxlength):
if i < len(siblings):
siblingvalue = siblings[i]
elif i == len(siblings):
siblingvalue = "N/A"
else:
siblingvalue = ""
if i < len(hobbies):
hobbyvalue = hobbies[i]
elif i == len(siblings):
hobbyvalue = "N/A"
else:
hobbyvalue = ""
if i == 0:
csvlines.append([name, siblingvalue, hobbyvalue])
else:
csvlines.append(["", siblingvalue, hobbyvalue])
print(csvlines)
fields = ["name", "siblings", "hobbies"]
with open("students.csv", 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(csvlines)

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

How to turn a linear string into a trie?

I am using the make me a hanzi open-source chinese character dataset. As part of this dataset there are strings which provide the decomposition of chinese characters into their individual units (called radicals). I want to turn the strings describing the decomposition of characters into tries (so that I can use networkx to render the decompositions).
For example for this database entry:
{"character":"⺳","definition":"net, network","pinyin":[],"decomposition":"⿱冖八","radical":"⺳","matches":[[0],[0],[1],[1]]}
The decomposition for this character would be.
- Node(1, char='⿱')
- Node(2, char='冖') # an edge connects '⿱' to '冖'
- Node(3, char='八') # an edge connects '⿱' to '八'
So far, I have come up with a script to turn the string decompositions into dictionaries (but not graphs).
decomposition_types = {
'top-bottom': '⿱',
'left-right': '⿰',
'diagonal-corners': '⿻',
'over-under': '⿺',
'under-over': '⿹',
'over-under-reversed': '⿸',
'top-bottom-middle': '⿳',
'left-right-middle': '⿲',
'inside-outside': '⿴',
'outside-inside': '⿵',
'outside-inside2': '⿷',
'inside-outside2': '⿶'
# 'unknown': '?'
}
decomposition_types_reversed = dict(((value, key) for key, value in decomposition_types.items()))
file = []
if not os.path.isfile('data/dictionary.json'):
with open('data/dictionary.txt') as d:
for line in d:
file.append(json.loads(line))
for i, item in enumerate(file):
item['id'] = i + 1
json.dump(file, open('data/dictionary.json', 'w+'))
else:
file = json.load(open('data/dictionary.json'))
def is_parsed(blocks):
for block in blocks:
if not block['is_unit']:
return False
return True
def search(character, dictionary=file):
for hanzi in dictionary:
if hanzi['character'] == character:
return hanzi
return False
def parse(decomp):
if len(decomp) == 1:
return {"spacing": '?'}
blocks = []
n_loops = 0
for item in decomp:
blocks.append({"char": item, "is_spacing": item in decomposition_types_reversed, "is_unit": False})
while not is_parsed(blocks):
for i, item in enumerate(blocks):
if "is_spacing" in item:
if item['is_spacing']:
next_items = decomposition_types_reversed[item['char']].count('-') + 1
can_match = True
for x in blocks[i + 1:i + 1 + next_items]:
try:
if x['char'] in decomposition_types_reversed:
can_match = False
except KeyError:
pass
if can_match:
blocks[i] = {"spacing": item['char'],
"chars": [l['char'] if 'char' in l else l for l in
blocks[i + 1:i + 1 + next_items]],
"is_unit": True}
del blocks[i + 1:i + 1 + next_items]
n_loops += 1
if n_loops > 10:
print(decomp)
sys.exit()
return blocks

TypeError: can only concatenate list (not "int") to list [Using a Dictionary]

Essentially, I am creating a count by using a dictionary and everytime it sees a "1" in the text file, it adds one to the array.However, I keep getting an error
Letters = ["A","B,"C","D","E","F"]
d= {}
d["A"] = [0]
d["B"] = [0]
d["C"] = [0]
d["D"] = [0]
d["E"] = [0]
file = open('test1.txt','r')
for line in file:
line_array = line.strip("\n").split(",")
for x in range(5):
if line_array[x] == "1":
for y in Letters:
d[y][0] = d[y][0] + 1
BTW, the text file is formatted like this;
1,0,3,0,2
0,2,1,0,3
ETC
EDIT sorry, misworded
You never actually use your dictionary.
Letters= ["A","B","C","D","E"]
d= {key: 0 for key in Letters}
print(Letters)
file = open('test1.txt','r')
for line in file:
line_array = line.strip("\n").split(",")
for x in range(5):
if line_array[x] == "1":
for i, value in enumerate(Letters):
if i == x:
d[value] = d[value] + 1
#print(candidatescores) # No idea where this comes from

Categories

Resources