This produces and empty dictionary, and I don't know why - python

this is the start of something I'm trying to do as a weekly TAFE worksheet. All it requires is the emails sent in the text file to be tallied up and see who sent the most emails. Still quite new to all of it so sorry if noob question. This seems to me like it should grab the email addresses on each relevant line in the file and add it as a key to the dictionary.
if len(fname) < 1:
fname = "mbox-short.txt"
fhand = open(fname)
frequency = dict()
for line in fhand:
if line.startswith("From "):
line = line.split()
frequency.get(line[1], 0) + 1
print(frequency)

You should add something to the dictionary, but your code is not doing that. Consider to do this:
if len(fname) < 1:
fname = "mbox-short.txt"
fhand = open(fname)
frequency = dict()
for line in fhand:
if line.startswith("From "):
line = line.split()
frequency[line[1]] = frequency.get(line[1], 0) + 1
print(frequency)

Related

Incorrectly reading lines of a text File in python

So basically i want to iterate the lines of a text file that has this format:
-----------------------------------------
Code: 0123456789
EGGS: 3 7.00 21.00
BACON: 1 3.50 3.50
COFFEE: 2 14.20 28.40
TOTAL: 52.90
-----------------------------------------
and i have the following code to read the lines one by one:
with open(filename, "rt", encoding="utf-8") as f:
for line in f:
prevline = line
line.split()
if '-' in line:
temp = f.readline().split(':') #Get Code
print(temp)
AFM = temp[1]
print(AFM)
else:
tempProducts = line.split(':') #Get Product in a list
productName = tempProducts[0] #Store Product Name in a variable
productStats = tempProducts[1] #Store Product Stats in a list
productStats = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName:valueArray})
if '-' in f.readline():
rec = Receipt(AFM,products)
products={}
valueArray=[]
receipts.append(rec)
else:
line=prevline
mind that i want to skip the line with the '------------' characters the code works but it keeps reading second line then fourth then sixth(code,bacon,total). The question is how can i fix this.Edit: there are multiple receipts in the file so i need each time to skip the line with the'----------'.
with open(filename, "rt", encoding="utf-8") as f:
old_list = [] # Saving all the lines including '----'
for line in f:
old_list.append(line)
new_list = old_list[1:-1] # new list which removes the '----' lines
You can iterate just through new_list with your .split logic.
See if this does the job
with open(filename, "rt", encoding="utf-8") as f:
valueArray = []
for line in f:
if not '-' in line:
if 'Code' in line:
AFM = line.split(':')[1]
print(AFM)
valueArray = []
products = {}
else:
tempProducts = line.split(':') # Get Product in a list
productName = tempProducts[0] # Store Product Name in a variable
productStats = tempProducts[1] # Store Product Stats in a list
productStats_list = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName: valueArray})
if 'TOTAL' in line:
rec = Receipt(AFM, products)
receipts.append(rec)
To anyone seeing this post now consider it closed i do not provide enough information and the code was messed up. Sorry for wasting your time

Trouble with matching variables to line in txt, and removing line

I am having trouble with matching variables to lines in txt, and removing the lines.
I am currently doing a hotel room booking program in which I am having trouble removing a booking from my text file.
This is how my lines in my text file are formatted:
first_name1, phonenumber1 and email 1 are linked to entry boxes
jeff;jeff#gmail.com;123123123;2019-06-09;2019-06-10;Single Room
def edit_details(self,controller):
f = open("Bookings.txt")
lines = f.readlines()
f.close()
x = -1
for i in lines:
x += 1
data = lines[x]
first_name1 = str(controller.editName.get())
phonenumber1 = str(controller.editPhone.get())
email1 = str(controller.editEmail.get())
checkfirst_name, checkemail, checkphone_num, checkclock_in_date, checkclock_out_date, checkroom = map(str, data.split(";"))
if checkfirst_name.upper() == first_name1.upper() and checkemail.upper() == email1.upper() and checkphone_num == phonenumber1:
controller.roomName.set(checkfirst_name)
controller.roomEmail.set(checkemail)
controller.roomPhone.set(checkphone_num)
controller.roomCheckin.set(checkclock_in_date)
controller.roomCheckout.set(checkclock_out_date)
controller.roomSelect.set(checkroom)
print(controller.roomName.get())
print(controller.roomSelect.get())
controller.show_frame("cancelBooking")
break
elif x > len(lines) - int(2):
messagebox.showerror("Error", "Please Enter Valid Details")
break
I have the user to enter their details to give me the variables but I don't know how to match these variables to the line in the text file to remove the booking.
Do I have to format these variables to match the line?
This is what i have tried but it deletes the last line in my file
line_to_match = ';'.join([controller.roomName.get(),controller.roomEmail.get(),controller.roomPhone.get()])
print(line_to_match)
with open("Bookings.txt", "r+") as f:
line = f.readlines()
f.seek(0)
for i in line:
if i.startswith(line_to_match):
f.write(i)
f.truncate()
I have kind of added a pseudocode here. You can join the variables using ; and validate if the line startswith those details, like below.
first_name1, phonenumber1, email1 = 'jeff', 'jeff#gmail.com', '123123123'
line_to_match = ';'.join([first_name1, email1, phonenumber1])
for i in line:
...
if i.startswith(line_to_match):
# Add your removal code here
...

Memory Error while running python script on 4GB file

I am trying to count number of words that has length between 1 and 5, file size is around 4GB end I am getting memory error.
import os
files = os.listdir('C:/Users/rram/Desktop/')
for file_name in files:
file_path = "C:/Users/rram/Desktop/"+file_name
f = open (file_path, 'r')
text = f.readlines()
update_text = ''
wordcount = {}
for line in text:
arr = line.split("|")
word = arr[13]
if 1<=len(word)<6:
if word not in wordcount:
wordcount[word] = 1
else:
wordcount[word] += 1
update_text+= '|'.join(arr)
print (wordcount) #print update_text
print 'closing', file_path, '\t', 'total files' , '\n\n'
f.close()
At the end i get a MemoryError on this line text = f.readlines()
Can you pelase help to optimize it.
As suggested in the comments you should read the file line by line and not the entire file.
For example :
count = 0
with open('words.txt','r') as f:
for line in f:
for word in line.split():
if(1 <= len(word) <=5):
count=count+1
print(count)
EDIT :
If you only want to count the words in 14-th column and split by | instead then :
count = 0
with open('words.txt','r') as f:
for line in f:
iterator = 0
for word in line.split("|"):
if(1 <= len(word) <=5 and iterator == 13):
count=count+1
iterator = iterator +1
print(count)
note that you should avoid to write this
arr = line.split("|")
word = arr[13]
since the line may contains less than 14 words, which can result in a segmentation error.

Python 3.X combining similar lines in .txt files together

A question regarding combining values from a text file into a single variable and printing it.
An example I can give is a .txt file such as this:
School, 234
School, 543
I want to know the necessary steps to combining both of the school into a single variable "school" and have a value of 777.
I know that we will need to open the .txt file for reading and then splitting it apart with the .split(",") method.
Code Example:
schoolPopulation = open("SchoolPopulation.txt", "r")
for line in schoolPopulation:
line = line.split(",")
Could anyone please advise me on how to tackle this problem?
Python has rich standard library, where you can find classes for many typical tasks. Counter is what you need in current situation:
from collections import Counter
c = Counter()
with open('SchoolPopulation.txt', 'r') as fh:
for line in fh:
name, val = line.split(',')
c[name] += int(val)
print(c)
Something like this?
schoolPopulation = open("SchoolPopulation.txt", "r")
results = {}
for line in schoolPopulation:
parts = line.split(",")
name = parts[0].lower()
val = int(parts[1])
if name in results:
results[name] += val
else:
results[name] = val
print(results)
schoolPopulation.close()
You could also use defaultdict and the with keyword.
from collections import defaultdict
with open("SchoolPopulation.txt", "r") as schoolPopulation:
results = defaultdict(int)
for line in schoolPopulation:
parts = line.split(",")
name = parts[0].lower()
val = int(parts[1])
results[name] += val
print(results)
If you'd like to display your results nicely you can do something like
for key in results:
print("%s: %d" % (key, results[key]))
school = population = prev = ''
pop_count = 0
with open('SchoolPopulation.txt', 'r') as infile:
for line in infile:
line = line.split(',')
school = line[0]
population = int(line[1])
if school == prev or prev == '':
pop_count += line[1]
else:
pass #do something else here
prev = school

Why aren't the lists populating in this code?

I wrote this code for class and cannot figure out why my lists are not populating with any values. I've tried using a debugger and still can't figure out why it won't work. Any ideas? Also... I know for loops would have made more sense, but I needed to use while loops for the assignment.
__author__ = 'Ethan'
#This program reads in a file from the user which contains lines of
def mileage():
filename = input("Please enter the file name: ")
file = open(filename,"r")
line_list = []
num_lines = sum(1 for line in file)
line_counter = 0
while line_counter <= num_lines:
line = file.readline()
line_items = line.split()
line_list.append(line_items)
line_counter += 1
current_index_pos = 0
while current_index_pos <= num_lines:
current_item = line_list[current_index_pos]
print("Leg",current_index_pos + 1,"---", current_item[0]/current_item[1],"miles/gallon")
current_index_pos += 1
mileage()
This reads to the end of the file
num_lines = sum(1 for line in file)
so there are no lines left to read when you get here
line = file.readline()
Better to structure the code like this
with open(filename, "r") as fin:
for line_counter, line in enumerate(fin):
line_items = line.split()
line_list.append(line_items)
# after the loop line_counter has counted the lines
or even (if you don't need line_counter)
with open(filename, "r") as fin:
line_list = [line.split() for line in fin]
More advanced would be to use a generator expression or do everything in a single loop to avoid needing to read the whole file into memory at once
def mileage():
filename = input("Please enter the file name: ")
with open(filename, "r") as fin:
for line_counter, line in enumerate(fin):
current_item = line.split()
print("Leg",line_counter + 1,"---", float(current_item[0])/float(current_item[1]),"miles/gallon")

Categories

Resources