Incorrectly reading lines of a text File in python

Incorrectly reading lines of a text File in python - python

So basically i want to iterate the lines of a text file that has this format:
-----------------------------------------
Code: 0123456789
EGGS: 3 7.00 21.00
BACON: 1 3.50 3.50
COFFEE: 2 14.20 28.40
TOTAL: 52.90
-----------------------------------------
and i have the following code to read the lines one by one:
with open(filename, "rt", encoding="utf-8") as f:
for line in f:
prevline = line
line.split()
if '-' in line:
temp = f.readline().split(':') #Get Code
print(temp)
AFM = temp[1]
print(AFM)
else:
tempProducts = line.split(':') #Get Product in a list
productName = tempProducts[0] #Store Product Name in a variable
productStats = tempProducts[1] #Store Product Stats in a list
productStats = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName:valueArray})
if '-' in f.readline():
rec = Receipt(AFM,products)
products={}
valueArray=[]
receipts.append(rec)
else:
line=prevline
mind that i want to skip the line with the '------------' characters the code works but it keeps reading second line then fourth then sixth(code,bacon,total). The question is how can i fix this.Edit: there are multiple receipts in the file so i need each time to skip the line with the'----------'.

with open(filename, "rt", encoding="utf-8") as f:
old_list = [] # Saving all the lines including '----'
for line in f:
old_list.append(line)
new_list = old_list[1:-1] # new list which removes the '----' lines
You can iterate just through new_list with your .split logic.

See if this does the job
with open(filename, "rt", encoding="utf-8") as f:
valueArray = []
for line in f:
if not '-' in line:
if 'Code' in line:
AFM = line.split(':')[1]
print(AFM)
valueArray = []
products = {}
else:
tempProducts = line.split(':') # Get Product in a list
productName = tempProducts[0] # Store Product Name in a variable
productStats = tempProducts[1] # Store Product Stats in a list
productStats_list = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName: valueArray})
if 'TOTAL' in line:
rec = Receipt(AFM, products)
receipts.append(rec)

To anyone seeing this post now consider it closed i do not provide enough information and the code was messed up. Sorry for wasting your time

Related

How to search for a string in part of text?

I am trying to search multiple text files for the text "1-2","2-3","3-H" which occur in the last field of the lines of text that start with "play".
An example of the text file is show below
id,ARI201803290
version,2
info,visteam,COL
info,hometeam,ARI
info,site,PHO01
play,1,0,lemad001,22,CFBBX,HR/78/F
play,1,0,arenn001,20,BBX,S7/L+
play,1,0,stort001,12,SBCFC,K
play,1,0,gonzc001,02,SS>S,K
play,1,1,perad001,32,BTBBCX,S9/G
play,1,1,polla001,02,CSX,S7/L+.1-2
play,1,1,goldp001,32,SBFBBB,W.2-3;1-2
play,1,1,lambj001,00,X,D9/F+.3-H;2-H;1-3
play,1,1,avila001,31,BC*BBX,31/G.3-H;2-3
play,2,0,grayj003,12,CC*BS,K
play,2,1,dysoj001,31,BBCBX,43/G
play,2,1,corbp001,31,CBBBX,43/G
play,4,1,avila001,02,SC1>X,S8/L.1-2
For the text file above, I would like the output to be '4' since there are 4 occurrences of "1-2","2-3" and "3-H" in total.
The code I have got so far is below, however I'm not sure where to start with writing a line of code to do this function.
import os
input_folder = 'files' # path of folder containing the multiple text files
# create a list with file names
data_files = [os.path.join(input_folder, file) for file in
os.listdir(input_folder)]
# open csv file for writing
csv = open('myoutput.csv', 'w')
def write_to_csv(line):
print(line)
csv.write(line)
j=0 # initialise as 0
count_of_plate_appearances=0 # initialise as 0
for file in data_files:
with open(file, 'r') as f: # use context manager to open files
for line in f:
lines = f.readlines()
i=0
while i < len(lines):
temp_array = lines[i].rstrip().split(",")
if temp_array[0] == "id":
j=0
count_of_plate_appearances=0
game_id = temp_array[1]
awayteam = lines[i+2].rstrip().split(",")[2]
hometeam = lines[i+3].rstrip().split(",")[2]
date = lines[i+5].rstrip().split(",")[2]
for j in range(i+46,i+120,1): #only check for plate appearances this when temp_array[0] == "id"
temp_array2 = lines[j].rstrip().split(",") #create new array to check for plate apperances
if temp_array2[0] == "play" and temp_array2[2] == "1": # plate apperance occurs when these are true
count_of_plate_appearances=count_of_plate_appearances+1
#print(count_of_plate_appearances)
output_for_csv2=(game_id,date,hometeam, awayteam,str(count_of_plate_appearances))
print(output_for_csv2)
csv.write(','.join(output_for_csv2) + '\n')
i=i+1
else:
i=i+1
j=0
count_of_plate_appearances=0
#quit()
csv.close()
Any suggestions on how I can do this? Thanks in advance!

You can use regex, I put your text in a file called file.txt.
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
count = 0
with open('file.txt', 'r') as f:
for line in f.readlines():
count += len(find_this.findall(line)) # Each findall returns the list of things found
print(count) # 7
or a shorter solution: (Credit to wjandrea for hinting the use of a generator)
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
with open('file.txt', 'r') as f:
count = sum(len(find_this.findall(line)) for line in f)
print(count) # 7

Huge list in Python returning None elements

I've a CSV file like:
134;football
1457;soccer
12;volley
...
It has 4.992.909 lines exactly. The code is:
with open('/Users/someone/Desktop/python/sports.csv', 'r') as file1:
f = set(file1)
sports_label_list = [None] * 9985818
i = 0
for line in f:
sports = line.split(';')[0]
sports_label_list[i] = sports
i = i + 1
if 'football' in line:
sports_label_list[i] = 'football'
if 'volley' in line:
sports_label_list[i] = 'volley'
if 'basketball' in line:
sports_label_list[i] = 'basketball'
if 'soccer' in line:
sports_label_list[i] = 'soccer'
i = i + 1
When I do print to check sports_label_list, it goes OK (showing numeric values at even numbers and sports names at odd numbers) until somewhere near the 30000 element of the list. After that, it starts to print 'None'. The CSV file is ok. Any suggestions? Thank you!

The file probably has duplicate lines, which set eliminates. Why are you putting it in a set? Needless use of memory. A better, and faster choice would be to build up the list incrementally:
sports_label_list = []
with open('/Users/someone/Desktop/python/sports.csv', 'r') as file1:
for line in file1:
number, sport = line.strip().split(";")
sport_label_list.append(number)
if sport in ("football", "soccer", "baseball", "basketball"):
sport_label_list.append(sport)
else:
sport_label_list.append(None)

Value Error: Item not in List when it clearly is

I have a list, abbreviations, filled with string objects. I am trying to call the .index of a string in my list. When I call the .index method with a string I get a ValueError: 'LING' is not in list, when it clearly is in the list.
My code:
for item in abbreviations:
print item
print abbreviations.index("LING")
Why does 'LING' not exist when it clearing does? I have added my following lines of code, which searches 'abbreviations' for the index of a string. I am baffled -- "LING" is clearly in my abbreviations list.
EDIT (Additional Code):
import csv
myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
lines = [tuple(row) for row in csv.reader(myfile)]
longSubjectNames = []
abbreviations = []
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
masterSchedule = tuple(open("/Users/it/Desktop/Classbook/masterClassList.txt", 'r'))
for masterline in masterSchedule:
masterline.strip()
masterSplitLine = masterline.split("|")
subjectAbrev = ""
if masterSplitLine[0] != "STATUS":
subjectAbrev = ''.join([i for i in masterSplitLine[2] if not i.isdigit()])
masterAbrevs.append(subjectAbrev)
finalAbrevs = []
for subject in masterAbrevs:
if (subject[-1] == 'W') and (subject[-2:] != 'UW'):
subject = subject[:-1]
finalAbrevs.append(subject)
x = 0
for item in abbreviations:
print item
print abbreviations.index("LING")
for item in finalAbrevs:
if masterSplitLine[0] != "STATUS":
concat = abbreviations.index(str(finalAbrevs[x]).strip())
print "The abbreviation for " + str(item) + " is: " + longSubjectNames[concat]
x = x + 1
The output of:
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
print '-'.join(abbreviations)
is:
ACA-ACCY-AFST-AMST-ANAT-ANTH-APSC-ARAB-AH-FA-ASTR-BIOC-BISC-BME-BMSC-BIOS-BADM-CHEM-CHIN-CE-CLAS-CCAS-COMM-CSCI-CFA-CNSL-CPED-DNSC-EALL-ECON-EDUC-ECE-EHS-ENGL-EAP-EMSE-ENRP-EPID-EXSC-FILM-FINA-FORS-FREN-GEOG-GEOL-GER-GREK-HCS-HSCI-HLWL-HSML-HEBR-HIST-HOMP-HONR-HDEV-HOL-HSSJ-ISTM-IDIS-IAD-INTD-IAFF-IBUS-ITAL-JAPN-JSTD-KOR-LATN-LAW-LSPA-LING -MGT-MKTG-MBAD-MATH-MAE-MED-MICR-MMED-MSTD-MUS-NSC-ORSC-PSTD-PERS-PHAR-PHIL-PT-PA-PHYS-PMGT-PPSY-PSC-PORT-PSMB-PSYD-PSYC-PUBH-PPPA-REL-SEAS-SMPA-SLAV-SOC-SPAN-SPED-SPHR-STAT-SMPP-SUST-TRDA-TSTD-TURK-UW-WLP-WSTU
Traceback (most recent call last):
File "/Users/it/Desktop/Classbook/sortClasses.py", line 25, in <module>
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as anything:
IOError: [Errno 2] No such file or directory: '/Users/it/Desktop/Classbook/masterClassList.txt'

myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
lines = [tuple(row) for row in csv.reader(myfile)]
longSubjectNames = []
abbreviations = []
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
The problem is here;
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
You defined myfile before here,
myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
So actually abbreviations = [] is not taking data from classAbrevs.csv.Because it's taking data from masterClassList.txt as you defined myfile with this line;
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile
That's why your string not in that list.Also this line;
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
Are you sure is item[1] has all of the strings that you want?
And I tried these codes I just copy-pasted it from your's and here is the result;

The problem is, from the result you ran:
"LING\t" is shown in your list, not "LING"
with running this I get the desired index:
abbreviations.index("LING\t")
71
To correct this, there are many methods to strip the \t, I'm showing one of those:
abbreviations.append(item[1].strip())
By correcting this line, your item[1] will strip the \t before appending to your abbreviations list.

Why aren't the lists populating in this code?

I wrote this code for class and cannot figure out why my lists are not populating with any values. I've tried using a debugger and still can't figure out why it won't work. Any ideas? Also... I know for loops would have made more sense, but I needed to use while loops for the assignment.
__author__ = 'Ethan'
#This program reads in a file from the user which contains lines of
def mileage():
filename = input("Please enter the file name: ")
file = open(filename,"r")
line_list = []
num_lines = sum(1 for line in file)
line_counter = 0
while line_counter <= num_lines:
line = file.readline()
line_items = line.split()
line_list.append(line_items)
line_counter += 1
current_index_pos = 0
while current_index_pos <= num_lines:
current_item = line_list[current_index_pos]
print("Leg",current_index_pos + 1,"---", current_item[0]/current_item[1],"miles/gallon")
current_index_pos += 1
mileage()

This reads to the end of the file
num_lines = sum(1 for line in file)
so there are no lines left to read when you get here
line = file.readline()
Better to structure the code like this
with open(filename, "r") as fin:
for line_counter, line in enumerate(fin):
line_items = line.split()
line_list.append(line_items)
# after the loop line_counter has counted the lines
or even (if you don't need line_counter)
with open(filename, "r") as fin:
line_list = [line.split() for line in fin]
More advanced would be to use a generator expression or do everything in a single loop to avoid needing to read the whole file into memory at once
def mileage():
filename = input("Please enter the file name: ")
with open(filename, "r") as fin:
for line_counter, line in enumerate(fin):
current_item = line.split()
print("Leg",line_counter + 1,"---", float(current_item[0])/float(current_item[1]),"miles/gallon")

Read and write to a list of names and scores - python

I am trying to create a program that gives the user a short quiz and create a score, which I have done, then I would like to add them to a list in a .txt file. In the program I will ask them their name, so say I have a list such as this;
Bob,7
Bill,5
Jane,6
and someone takes the quiz and inputs the name Bob and gets a score 4 the list will update to;
Bob,4
Bill,5
Jane,6
or someone new takes a quiz, Sarah it will change to;
Bob,4
Bill,5
Jane,6
Sarah,7
So far I have;
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line)
else:
file.write(name +',' +str(UserScore))
I've randomised the score for now to make it easier to read, however that will be from what the user answered correctly, and I thought this code would read the file then check each name from each line and if the name they entered is the same to the name in the list the line will be replaced with the name and score. However, the file just ends up blank, what am I doing wrong?

Here is what I think is a better idea using the Python pickle module:
In [1]: import pickle
In [2]: scores={'Bob':75, 'Angie':60, 'Anita':80} #create a dict called scores
In [3]: pickle.dump(scores,open('scores.dat','wb')) #dump the pickled object into the file
In [4]: !ls scores.dat #verify that the file has been created
scores.dat
In [5]: !cat scores.dat #list out the file
(dp0
S'Bob'
p1
I75
sS'Angie'
p2
I60
sS'Anita'
p3
I80
s.
In [9]: tscores = pickle.load(open('scores.dat','rb')) #Verification: load the pickled object from the file into a new dict
In [10]: tscores #Verification: list out the new dict
Out[10]: {'Angie': 60, 'Anita': 80, 'Bob': 75}
In [11]: scores == tscores #Verify that the dict object is equivalent to the newly created dict object
Out[11]: True

I tried your code and the first time you run it, then you rewrite the file in one single line. So the next time you run the script on this single line file, you get an unpack exception in the split function and hence you write nothing to the file, resulting in an empty file.
A solution could be to add the newline char again when writing the lines to the file.
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line + '\n')
else:
file.write(name +',' +str(UserScore) + '\n')

This should do what you want
import random
file = open("scores.txt", "r")
UserScore=random.randint(0,10)
lines = file.readlines()
file.close()
student=input('What is your name? ')
flag = True
file = open("scores.txt", "w")
for line in lines:
line = line.strip()
name, score = line.strip().split(",")
if name!=student:
file.write(line + '\n')
else:
file.write(name +',' +str(UserScore) + '\n')
flag = False
if flag:
file.write(student +',' +str(UserScore) + '\n')

I adjusted a bit of your code and took the liberty to remove the random part and name, score part. But I got some working code. I assume you can make it work for your situation.
file = open("scores.txt", "r+")
lines = file.readlines()
file.close()
us = 15
student = input('What is your name? ')
ls = []
file = open("scores.txt", "r+")
found_student = False
for line in lines:
line = line.strip()
ls = line.split(",")
print("Parsing: " + str(ls))
if not line:
print("Empty line")
pass
elif ls[0] != student:
file.write(line + "\n")
else:
found_student = True
file.write(ls[0] + ',' + str(us) + "\n")
if not found_student:
file.write(student + ',' + str(us) + "\n" )
file.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Incorrectly reading lines of a text File in python - python

with open(filename, "rt", encoding="utf-8") as f: old_list = [] # Saving all the lines including '----' for line in f: old_list.append(line) new_list = old_list[1:-1] # new list which removes the '----' lines You can iterate just through new_list with your .split logic.

To anyone seeing this post now consider it closed i do not provide enough information and the code was messed up. Sorry for wasting your time

Related

How to search for a string in part of text?

Huge list in Python returning None elements

Value Error: Item not in List when it clearly is

Why aren't the lists populating in this code?

Read and write to a list of names and scores - python

Categories

Resources