#
# Obtain user input for file name, and open it
#
inFile = open(input("Enter file name: "), "r")
#
# Process data and address possible errors
#
countDinner = 0
countLodging = 0
countConference = 0
valueDinner = 0
valueLodging = 0
valueConference = 0
done = False
while not done :
line = inFile.readline()
try :
s = line
serviceAmount = ';'.join(s.split(';')[1:-1]) #Removes date and name regardless of format
serviceAmount.split(";")
s.lower()
if "dinner" in s :
countDinner = countDinner + 1
valueDinner = valueDinner + int(filter(str.isdigit, s))
print("Dinners: ", countDinner, "Value of Dinner sales: ", valueDinner)
elif "lodging" in s :
countLodging = countLodging + 1
valueLodging = valueLodging + int(filter(str.isdigit, s))
print("Lodging: ", countLodging, "Value of Lodging sales: ", valueLodging)
elif "conference" in s :
countConference = countConference + 1
valueConference = valueConference + int(filter(str.isdigit, s))
print("Conferences: ", countConference, "Value of Conference sales: ", valueConference)
elif line == "" :
done = True
else :
print("Invalid file format.")
except FileNotFoundError :
print("Unable to find file.")
finally :
done = True
inFile.close()
Returns "Invalid file format" even when the document is set up specifically for this code. I'm not getting a syntax error, so I'm not sure whats wrong.
The document contains the text:
John;Lodging;123;050617
Tyler;Conference;123;081497
Taylor;Dinner;453;041798
There are a lot of things you aren't doing quite right here. I tried to not only fix the issue you posted about, but also write some code that should be more clear and easier to use. I left comments to explain things.
# Don't open the file here, just get the file name. We will open in later
fname = input("Enter file name: ")
# I think using dicts is more clearn and organized. Having so many variables I think makes the code messy
counts = {"Dinner": 0,
"Lodging": 0,
"Conference": 0}
values = {"Dinner": 0,
"Lodging": 0,
"Conference": 0}
# Lets try to open the file
try:
with open(fname, 'r') as inFile: # Use "with", this way the file is closed automatically when we are done reading it
for linenum, line in enumerate(inFile): # I want to enumerate each line. If there is an error on a line, we can display the line nmber this way
line = line.lower().split(';')[1:-1] # lets make it all lower case, then split and drop as needed
print(line)
if "dinner" in line :
counts["Dinner"] += 1 # x += 1 is the same as x = x + 1, but cleaner
values["Dinner"] += int(line[1])
print("Dinners: {} Value of Dinner sales: {}".format(counts["Dinner"], values["Dinner"]))
elif "lodging" in line :
counts["Lodging"] += 1
values["Lodging"] += int(line[1])
print("Lodging: {} Value of Dinner sales: {}".format(counts["Lodging"], values["Lodging"]))
elif "conference" in line :
counts["Conference"] += 1
values["Conference"] += int(line[1])
print("Conference: {} Value of Dinner sales: {}".format(counts["Conference"], values["Conference"]))
else :
print("Invalid file format on line {}".format(linenum)) # Here is why we used enumerate in the for loop
except FileNotFoundError:
print("Unable to find file.")
Here is your problem:
serviceAmount = ';'.join(s.split(';')[1:-1]) #Removes date and name regardless of format
serviceAmount.split(";")
You should do:
serviceAmount = ';'.join(s.lower().split(';')[1:-1])
You are checking against lower case strings, but not actually lower casing your input.
It is also important to note that s.lower() doesn't actually change s, it just returns a string where all the letters of s have been switched to lower case. Same thing for split (as in not changing the string its called on, not that it returns a string).
Another problem you are going to run into is getting the numbers from your strings.
int(filter(str.isdigit, s))
Won't work. You can use split again like you did earlier (or just not re-join since you only care about the first element in the comparisons).
int(serviceAmount.split(';')[1])
The last thing is the
finally:
done = True
inFile.close()
finally always runs when exiting a try, meaning that you are always done after each loop (and close the file after you read the first line).
If you remove the finally and add inFile.close() inside the elif line == "" it will close, and set done only when you've reached the end of the file.
It could be done as simple as
categories = {}
filename = input("Enter file name: ")
with open(filename, "r") as file:
name, category, value, date = file.readline().split(";")
if category not in categories:
categories[category] = {"count": 0, "value": 0}
categories[category]["count"] += 1
categories[category]["value"] += int(value)
At the end, you'll have a dict with categories, their count, and value, and also their names are not hard-coded.
Related
So I'm learning python3 at the moment through university - entirely new to it (not really a strong point of mine haha), and i'm not quite sure what i'm missing - even after going through my course content
So the program in question is a text based Stock Management program
and part of the brief is that i be able to search for a line in the text file and print the line on the program
def lookupstock():
StockFile = open('file.txt', 'r')
flag = 0
index = 0
search = str(input("Please enter in the Item: "))
for line in StockFile:
index += 1
if search in line:
flag = 1
break
if flag == 0:
print(search, "Not Found")
else:
print(search)
StockFile.close()
However the output is only what i have typed in if it exists rather than the whole line itself so lets say the line i want to print is 'Kit-Kat, 2003, 24.95' and i search for Kit-Kat
Since the line exists - the output is only
Kit-Kat
Rather than the whole line
Where have I gone wrong? Was I far off?
Greatly appreciated, thank you!
Something like this
if flag == 0:
print(search, "Not Found")
else:
print(search, 'find in line N° ', index , ' line:',line )
StockFile.close()
Alternatively you could open your file using a context manager. This will automatically handle closing the file, here's an example:
def lookupstock():
flag = False
with open('file.txt', 'r') as StockFile:
search = str(input("Please enter in the Item: "))
for index, line in enumerate(StockFile):
if search in line:
print(line, f"Found at line {index}")
flag = True
if not flag:
print(search, "Not Found")
lookupstock()
Results:
Please enter in the Item: test
test Not Found
Please enter in the Item: hello
hello Found at line 0
Setting flags, breaking the loop then testing the flag is not good practice - it's unnecessarily complex. Try this instead:
def LookupStock():
search = input('Enter search item: ')
with open('file.txt') as StockFile:
for line in StockFile:
if search in line:
print(line)
break
else:
print(search, ' not found')
now i have this code and i need to use better the function try and except and improve the code, like which parts i should change of place
this is the beginning of my code:
contador = 0
name = input("Put the name of the file:")
while name != "close":
validation=0
try:
file = open(name,"r",1,"utf-8")
validation = validation + 1
except FileNotFoundError:
validation = validation
if validation >= 1:
Games=[]
countrylist = []
lines = 0
File = open(name,"r")
line = File.readline().strip()
while line != "":
parts= line.split(";")
country=parts[0]
game= parts[1]
sales= int(parts[2])
price= float(parts[3])
format= parts[4]
Games.append(parts)
countrylist.append(country)
line = File.readline().strip()
lines = lines + 1
contador = contador + 1
I don't know exactly the objective of the code, however.
I had to work out how would the file be structured by the code Correct me if I'm wrong but I believe that the file is meant to have a list of parameters separated by ";" and each line being an entry in that list.
You do nothing with the data, in any case just breaking the file into a list of parameters and sending said list of lists back would be enough for a function and then you could do the separation later
So that I could see that the code was doing what I wanted I added a print at the end to get the result
This is the code I ended with I tried to explain most of the issues in comment (probably a bad idea and I shall be berated by this till the end of ages)
# Why is there a global counter
# contador = 0
name = None # you need to declare the name before the loop
# check if the name is empty instead of an arbitrary name
while name != "":
name = input("Put the name of the file:")
# have the call defenition of the name in the loop so you can run the
# loop until the anme is "" (nothing)
# otherwhise if you don't break on the catch block it will loop forever
# since the name will be constant inside the loop
try:
File = open(file=name,encoding="utf-8").read()
# when using a function and you don't want to use the arguments
Games=[]
countrylist = []
# lines = 0
lst = File.strip().split("\n") # break the whole text into lines
for line in lst: # iterate over the list of lines
# seperate it into a list of data
parts= line.strip().split(";") #make each line into a list that you can adress
# elem[0] -> county
countrylist.append(parts[0]) # here you can just append directly isntead of saving extra variables
# same as the previous example
Games.append(parts[1])
sales= int(parts[2])
price= float(parts[3].replace(",","."))
style = parts[4] # format is already an existing function you shoudn't name your variable like that
# line = File.readline().strip() -> you don't need to prepare the next line since all lines are
# already in the array lst
# lines += 1
# contador += 1
# you don't need to count the lines let the language do that for you
# and why do you need a counter in the first place
# you were using no for loops or doing any logic based around the number of lines
# the only logic you were doing is based on their
print(parts)
except FileNotFoundError as e0:
print("File not found: " + str(e0))
except ValueError as e1 :
print("Value Error: " + str(e1))
For a text file with the format:
Portugal;Soccer;1000;12.5;dd/mm/yyyy
England;Cricket;2000;13,5;mm/dd/yyyy
Spain;Ruggby;1500;11;yyyy/dd/mm
I got an output in the form of:
['Portugal', 'Soccer', '1000', '12.5', 'dd/mm/yyyy']
['England', 'Cricket', '2000', '13,5', 'mm/dd/yyyy']
['Spain', 'Ruggby', '1500', '11', 'yyyy/dd/mm']
ask questions for clarification
I think it would be best if I just posted my code for better understanding. So first I just simply ask the user to input a file name, just to see if it exists. And if it doesn't it exits the program.
I have a separate file containing a list of keywords, which I put into a dict
then I check if another file exists... similar to before. and with this file, Im checking to see if the keywords in the first file are contained in the second one. and calculating the sentiment value. But this is not what I need help with I just wanted to explain a little before hand
So my question is, in the section below:
for line in open('tweets.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1]) #Stripping the ]
if eastern.contains(lat, long):
eastScore += score(line)
elif central.contains(lat, long):
centralScore += score(line)
elif mountain.contains(lat, long):
mountainScore += score(line)
elif pacific.contains(lat, long):
pacificScore += score(line)
else:
continue
how would I be able to focus on only lines in the file that contain keywords and not the entire file?
like you see in this part where I ignore lines without values?
with open('tweets.txt') as f:
for line in f:
values = Counter(word for word in line.split() if word in sentiments)
if not values:
continue
I tried methods such as creating a new file and writing the lines that contain keywords into the new file, but that raised
TypeError: 'in <string>' requires string as left operand, not dict
but it wasnt really what I wanted to do anyways. So my first question is, how would I be able to focus on only lines that contain keywords for that section above?
[41.923916200000001, -88.777469199999999] 6 2011-08-28 19:24:18 My life is a moviee.
from collections import Counter
try:
keyW_Path = input("Enter file named keywords: ")
keyFile = open(keyW_Path, "r")
except IOError:
print("Error: file not found.")
exit()
# Read the keywords into a list
keywords = {}
wordFile = open('keywords.txt', 'r')
for line in wordFile.readlines():
word = line.replace('\n', '')
if not(word in keywords.keys()): #Checks that the word doesn't already exist.
keywords[word] = 0 # Adds the word to the DB.
wordFile.close()
# Read the file name from the user and open the file.
try:
tweet_path = input("Enter file named tweets: ")
tweetFile = open(tweet_path, "r")
except IOError:
print("Error: file not found.")
exit()
#Calculating Sentiment Values
with open('keywords.txt') as f:
sentiments = {word: int(value) for word, value in (line.split(",") for line in f)}
with open('tweets.txt') as f:
for line in f:
values = Counter(word for word in line.split() if word in sentiments)
if not values:
continue
happyScore_Tweet = (sum(values[word]*sentiments[word] for word in values)) // (len(values))
print(happyScore_Tweet)
def score(tweet):
total = 0
for word in tweet:
if word in sentiments:
total += 1
return total
#Classifying the regions
class Region:
def __init__(self, lat_range, long_range):
self.lat_range = lat_range
self.long_range = long_range
def contains(self, lat, long):
return self.lat_range[0] <= lat and lat < self.lat_range[1] and\
self.long_range[0] <= long and long < self.long_range[1]
eastern = Region((24.660845, 49.189787), (-87.518395, -67.444574))
central = Region((24.660845, 49.189787), (-101.998892, -87.518395))
mountain = Region((24.660845, 49.189787), (-115.236428, -101.998892))
pacific = Region((24.660845, 49.189787), (-125.242264, -115.236428))
eastScore = 0
centralScore = 0
pacificScore = 0
mountainScore = 0
happyScoreE = 0
for line in open('tweets.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1]) #Stripping the ]
if eastern.contains(lat, long):
eastScore += score(line)
elif central.contains(lat, long):
centralScore += score(line)
elif mountain.contains(lat, long):
mountainScore += score(line)
elif pacific.contains(lat, long):
pacificScore += score(line)
else:
continue
Use regex to extract the lat and long.
import re
text = open(filename, 'r')
matches = re.findall("(\-?\d+\.\d+?),\s*(\-?\d+\.\d+)", text.read())
Matches will return a list of strings containing only your lat and long.
Also, there are some very good tools in python you can use for spatial queries, you should look them up.
I Have a program that calculates a score 'LevelScore' and i want to open the UserFile 'UserScoreFile' and check against the current user score saved in the file, and if LevelScore > CurrentScore overwrite the previous characters representing that levels score in the text file to the LevelScore.
Each line in the Text file represents a level from 0-7 with each line format being, "T 000", T is representing if level is unlocked and 000 represents the current score (score can be 0-100) "lev" is a variable from 0-7 indicating which level the user is on.
UserFileR = open("UserScoreFile.txt","r")
UserFileLines = UserFileR.readlines()
UserLevelLine = UserFileLines[lev]
UserLevelScore = UserLevelLine[2:5]
if LevelScore > UserLevelScore:
UserFileWR = open("UserScoreFile.txt","r+")
#This is where i dont know what to do...
This should get you started.
UserFileRW = open("UserScoreFile.txt","r+")
UserFileLines = UserFileR.readlines()
UserLevelLine = UserFileLines[lev]
UserLevelScore = int(UserLevelLine[2:5])
if int(LevelScore) > UserLevelScore:
UserFileRW.truncate()
UserFileLines[lev] = "some tex" + str(LevelScore) # there is something before score, but I don't know what
UserFileRW.write(''.join(UserFileLines))
UserFileRW.close()
Unfortunately this is not possible to change something in the middle of file. So you have to parse all of it, make modifications and then write it again.
Here's the solution me and Luke have come up with collaboratively:
UserFileRW = open("UserScoreFile.txt","r+")
UserFileLines = UserFileRW.readlines()
UserLevelLine = UserFileLines[lev]
UserLevelScore = int(UserLevelLine[2:])
UserFileRW.close()
if LevelScore > UserLevelScore:
UserFileWR = open("UserScoreFile.txt","w+")
UserFileLines[lev] = "T " + str(LevelScore) + "\n"
UserFileRW.writelines(UserFileLines)
UserFileRW.close()
Thank you to Jotto and Tim Pietzcker for your contributions.
Using the fileinput module, you could do this:
import fileinput
with fileinput.input(files=["test.txt"], inplace=True) as f:
for line in f:
if fileinput.lineno() == lev+1: # line numbers start at 1, not 0
UserLevelScore = int(line[2:5]) # assuming LevelScore is an int
if LevelScore > UserLevelScore:
line = "{}{:0>3}".format(line[:2], LevelScore)
# right-justify LevelScore with leading zeroes
print(line, end="") # Output is redirected to the current line of the file
I am working on a project that requires me to be able to search for multiple keywords in a file. For example, if I had a file with 100 occurrences of the word "Tomato", 500 for the word "Bread", and 20 for "Pickle", I would want to be able to search the file for "Tomato" and "Bread" and get the number of times it occurs in the file. I was able to find people with the same issue/question, but for other languages on this site.
I a working program that allows me to search for the column name and tally how many times something shows up in that column, but I want to make something a bit more precise. Here is my code:
def start():
location = raw_input("What is the folder containing the data you like processed located? ")
#location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
if os.path.exists(location) == True: #Tests to see if user entered a valid path
file_extension = raw_input("What is the file type (.txt for example)? ")
search_for(location,file_extension)
else:
print "I'm sorry, but the file location you have entered does not exist. Please try again."
start()
def search_for(location,file_extension):
querylist = []
n = 5
while n == 5:
search_query = raw_input("What would you like to search for in each file? Use'Done' to indicate that you have finished your request. ")
#list = ["CD90-N5722-15C", "CD90-NB810-4C", "CP90-N2475-8", "CD90-VN530-22B"]
if search_query == "Done":
print "Your queries are:",querylist
print ""
content = os.listdir(location)
run(content,file_extension,location,querylist)
n = 0
else:
querylist.append(search_query)
continue
def run(content,file_extension,location,querylist):
for item in content:
if item.endswith(file_extension):
search(location,item,querylist)
quit()
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
print item, countlist
start()
If I use that code, the last part (def search) is not working correctly. Any time I put a search in, any search after the first one I enter in returns "0", despite there being up to 500,000 occurrences of the search word in a file.
I was also wondering, since I have to index 5 files with 1,000,000 lines each, if there was a way I could write either an additional function or something to count how many times "Lettuce" occurs over all the files.
I cannot post the files here due to their size and content. Any help would be greatly appreciated.
Edit
I also have this piece of code here. If I use this, I get the correct count of each, but it would be much better to have a user be able to enter as many searches as they want:
def check_start():
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
countcol1 = 0
countcol2 = 0
countcol3 = 0
countcol4 = 0
#print os.path.join(currentdir,item)
with open(os.path.join(location,item), 'r') as f:
for line in f:
if "CD90-N5722-15C" in line:
countcol1 = countcol1 + 1
if "CD90-NB810-4C" in line:
countcol2 = countcol2 + 1
if "CP90-N2475-8" in line:
countcol3 = countcol3 + 1
if "CD90-VN530-22B" in line:
countcol4 = countcol4 + 1
print item, "CD90-N5722-15C", countcol1, "CD90-NB810-4C", countcol2, "CP90-N2475-8", countcol3, "CD90-VN530-22B", countcol4
You are trying to iterate over your file more than once. After the first time, the file pointer is at the end so subsequent searches will fail because there's nothing left to read.
If you add the line:
f.seek(0), this will reset the pointer before every read:
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
f.seek(0)
print item, countlist
PS. I've guessed at the indentation... You really shouldn't use tabs.
I'm not sure I get your question completely, but how about something like this?
def check_start():
raw_search_terms = raw_input('Enter search terms seperated by a comma:')
search_term_list = raw_search_terms.split(',')
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
# create a dictionary of search terms with their counts (initialized to 0)
search_term_count_dict = dict(zip(search_term_list, [0 for s in search_term_list]))
for line in f:
for s in search_term_list:
if s in line:
search_term_count_dict[s] += 1
print item
for key, value in search_term_count_dict.iteritems() :
print key, value