So I have a problem set for class where I am given a csv file of text abbreviations for words and I'm asked to prompt the user for multiple abbreviations and its supposed to return the full text if the abbreviation is found, if not then it will return the original abbreviation.
I'm currently stuck on getting the user input list into the dictionary properly.
import csv
def CreateDictionary(i):
abbrv = i.lower()
abbrv = i.split(' ')
dictionary = {}
fo = open('filename.csv')
data = csv.reader(fo)
for row in data:
dictionary[row[0]] = row[1]
fo.close()
def main():
user = input("abbreviations")
print(CreateDictionary(user))
main()
import csv
def CreateDictionary(i):
abbrv = i.lower()
abbrv = i.split(' ')
dictionary = {}
fo = open('filename.csv')
data = csv.reader(fo)
for row in data:
if row[0] in abbrv:
yield (row[0],row[1])
fo.close()
user = input("abbreviations")
print(list(CreateDictionary(user)))
might do what you are asking for
The csv library has a DictReader object you can use to automate the process.
https://docs.python.org/3/library/csv.html#csv.DictReader
Try adding the values of the keys in dictionary to a list if they are present
import csv
def CreateDictionary(i):
abbrv = i.lower()
abbrv = i.split(' ')
dictionary = {}
fo = open('filename.csv')
data = csv.reader(fo)
for row in data:
dictionary[row[0]] = row[1]
fo.close()
ret = []
for i in abbrv:
try:
ret.append(dictionary[i])
except KeyError:
ret.append(i)
return ret
def main():
user = input("abbreviations")
print(CreateDictionary(user))
main()
Related
I am working on a text file right now that is called "dracula.txt", and I have to do the following in python:
Save words that occur no more than 3 times in descending order in a file called less_common_words.txt. Each word with its count should be saved on a separate line.
I would appreciate any help! I've been working on this for too long.
I have already tokenized my file and counted the words. This is my code so far:
file = open("C:/Users/17733/Downloads/dracula.txt", 'r', encoding = 'utf-8-sig')
data = file.read()
data
data_list = data.split('\n')
data_list
new_list = []
for i in data_list:
if i !='':
ans_here = i.split(' ')
new_list.extend(ans_here)
new_list
import string
import re
puncs = list(string.punctuation)
puncs.append('"')
puncs.append('[')
puncs.append('.')
puncs.append('-')
puncs.append('_')
#append each seperately
new_2 = []
for i in new_list:
for p in puncs:
if p in i:
i_new = i.replace(p, ' ')
new_2.append(i_new)
new_2
new_2 = [i.replace(' ', ' ').strip().lower() for i in new_2]
new_2
from pathlib import Path
from collections import Counter
import string
filepath = Path('test.txt')
output_filepath = Path('outfile.txt')
# print(filepath.exists())
with open(filepath) as f:
content = f.readlines()
word_list = sum((
(s.lower().strip('\n').translate(str.maketrans('', '', string.punctuation))).split(' ')
for s in content
), [])
less_common_words = sorted([
key for key, value in Counter(word_list).items() if value <= 3
],reverse=True)
with open(output_filepath, mode='wt', encoding='utf-8') as myfile:
myfile.write('\n'.join(less_common_words))
This should exactly be what you need- I fixed my previous error by flattening the entire txt into a 2d list:
book_open = open('frankenstein.txt', 'r').readlines()
beauty_book = [i.split() for i in book_open]
flatten = []
for sublist in beauty_book:
for val in sublist:
flatten.append(val)
foo = 0
for i in flatten:
list_open = open('less_common_words.txt', 'r').readlines()
beauty_list = [i.replace('\n', '') for i in list_open]
count = flatten.count(flatten[foo])
compile = str((flatten[foo], count))
if count <= 3:
if compile not in beauty_list:
file = open('less_common_words.txt', 'a+')
file.write('\n'+compile)
file.close()
foo += 1
So far I have this code which is creating a dictionary from an input file:
def read_file(filename):
with open("menu1.csv") as file:
file.readline()
for line in file:
line_strip = [line.rstrip('\n')]
lines= [line.split(',')]
result = {key: (float(fl), int(intg),
text.strip()) for key,
fl, intg,text in lines}
print(result)
read_file("menu1.csv")
I have to keep that code in that def format. However, this outputs 27 different dictionaries. How do I make it so it is all in ONE dictionary?
ALso:
I want to alphabetize the keys and put them into a list. I tried something like this but it won't work:
def alphabetical_menu(dict):
names = []
for name in d:
names.append(name)
names.sort()
print(names)
What am I doing wrong? or do you have a way to do it?
Is this what you wanted?
def read_file(filename):
result = {}
with open(filename) as file:
file.readline()
for line in file:
line_strip = line.rstrip()
line_split= line.split(',')
key, fl, intg, text = tuple(line_split)
result[key] = (float(fl), int(intg), text.strip())
return result
def alphabetical_menu(d):
return sorted(d.keys())
menu_dict = read_file("menu1.csv")
menu_sorted_keys = alphabetical_menu(menu_dict)
# To check the result
print(menu_dict)
print(menu_sorted_keys)
inverting Dictionary with Tuple[Keys] and string[values], writing dictionary to a text file, then reading the text file, and inverting the keys-values back to original order.
I'm just <json - .dumps/.loads> to convert the data to a string to save, and then back into a dictionary. The problem is that I'm getting an error code trying to invert the values and keys back... my error is
TypeError: string indices must be integers
Here is my code:
import json
def invert_roster(d):
inverse = dict()
for key in d:
val = d[key]
if val not in inverse:
inverse[val] = [key]
else:
inverse[val].append(key)
return inverse
a = 0
def choice(c):
a = ' '
choice = input('Enter <add> to add to file, or <view> to view file.')
if choice == 'add':
add(a)
elif choice == 'view':
view_file(a)
def add(a):
rf = 0
choice2 = input('<new> File or <old> File?')
if choice2 == 'new':
new_file()
elif choice2 =='old':
read_file(rf)
roster = dict()
name = ' '
print('Enter <stop> to exit.')
while name != 'stop':
name = input('Enter first and last name of patient: ')
if name == 'stop':
write_file(roster)
diagnosis = input('Enter Diagnosis of patient: ')
first,last = name.split(' ')
roster[last, first] = (diagnosis)
def write_file(roster):
print(roster)
file = open('journal8.txt', 'w')
ir = invert_roster(roster)
print(ir)
wf = json.dumps(ir)
file.write(wf)
file.close()
choice()
def view_file(a):
file = open('journal8.txt', 'r+')
vf = (read_file(file))
print(vf)
keys = vf.keys()
print(keys)
print(type(vf))
for c in vf:
print(c, vf[c])
for c in vf:
vf[c] = tuple(vf[c])
print(vf)
ta = json.dumps(vf)
invert = invert_roster(ta)
file.close()
return()
def read_file(rf):
line = rf.readline()
fin = line.strip()
rf = json.loads(fin)
return(rf)
def new_file():
file = open('journal8.txt', 'w')
return()
choice(a)
The issue seems to be in f(x)=view_file and f(x)=invert_roster. I'm able to convert to string and write to file, read the file and manipulate data from the file, but I can't invert the darn thing.
I have written a script to convert a text file into dictionary..
script.py
l=[]
d={}
count=0
f=open('/home/asha/Desktop/test.txt','r')
for row in f:
rowcount+=1
if row[0] == ' ' in row:
l.append(row)
else:
if count == 0:
temp = row
count+=1
else:
d[temp]=l
l=[]
count=0
print d
textfile.txt
Time
NtGetTickCount
NtQueryPerformanceCounter
NtQuerySystemTime
NtQueryTimerResolution
NtSetSystemTime
NtSetTimerResolution
RtlTimeFieldsToTime
RtlTimeToTime
System informations
NtQuerySystemInformation
NtSetSystemInformation
Enumerations
Structures
The output i have got is
{'Time\n': [' NtGetTickCount\n', ' NtQueryPerformanceCounter\n', ' NtQuerySystemTime\n', ' NtQueryTimerResolution\n', ' NtSetSystemTime\n', ' NtSetTimerResolution\n', ' RtlTimeFieldsToTime\n', ' RtlTimeToTime\n']}
Able to convert upto 9th line in the text file. Suggest me where I am going wrong..
You never commit (i.e. run d[row] = []) the final list to the dictionary.
You can simply commit when you create the row:
d = {}
cur = []
for row in f:
if row[0] == ' ': # line in section
cur.append(row)
else: # new row
d[row] = cur = []
print (d)
Using dict.setdefault to create dictionary with lists as values will make your job easier.
d = {}
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d.setdefault(key, []).append(row.strip())
else:
key = row
print(d)
Output:
{'Time\n': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations\n': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}
A few things to note here:
Always use with open(...) for file operations.
If you want to check the first index, or the first few indices, use str.startswith()
The same can be done using collections.defaultdict:
from collections import defaultdict
d = defaultdict(list)
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d[key].append(row)
else:
key = row
So you need to know two things at any given time while looping over the file:
1) Are we on a title level or content level (by indentation) and
2) What is the current title
In the following code, we first check if the current line we are at, is a title (so it does not start with a space) and set the currentTitle to that as well as insert that into our dictionary as a key and an empty list as a value.
If it is not a title, we just append to corresponding title's list.
with open('49359186.txt', 'r') as input:
topics = {}
currentTitle = ''
for line in input:
line = line.rstrip()
if line[0] != ' ':
currentTitle = line
topics[currentTitle] = []
else:
topics[currentTitle].append(line)
print topics
Try this:
d = {}
key = None
with open('/home/asha/Desktop/test.txt','r') as file:
for line in file:
if line.startswith(' '):
d[key].append(line.strip())
else:
key = line.strip(); d[key] = []
print(d)
Just for the sake of adding in my 2 cents.
This problem is easier to tackle backwards. Consider iterating through your file backwards and then storing the values into a dictionary whenever a header is reached.
f=open('test.txt','r')
d = {}
l = []
for row in reversed(f.read().split('\n')):
if row[0] == ' ':
l.append(row)
else:
d.update({row: l})
l = []
Just keep track the line which start with ' ' and you are done with one loop only :
final=[]
keys=[]
flag=True
with open('new_text.txt','r') as f:
data = []
for line in f:
if not line.startswith(' '):
if line.strip():
keys.append(line.strip())
flag=False
if data:
final.append(data)
data=[]
flag=True
else:
if flag==True:
data.append(line.strip())
final.append(data)
print(dict(zip(keys,final)))
output:
{'Example': ['data1', 'data2'], 'Time': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}
I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person