Find line with specific string in a file with Python - python

I'm trying to print the category and the number of pages from a text file with python.
This is the code I wrote so far:
search = input("Which book are you looking for: ")
file = open("books.txt","r")
line_num = 0
reading = file.readlines()
# for line in file:
# if search in file:
# NOT SURE HOW TO WRITE TO GET THE LINE NUMBER OF WHAT THE USER IS LOOKING FOR
book_category = line_num + 1
book_pages = line_num + 2
print(reading[book_category] , " " , reading[book_pages])
file.close()
And here is the file (books.txt)
Book one
Horror
300
Book two
Fantasy
150
Book three
Mystery
400
Book four
Romance
100

Try this:
with open('books.txt') as fin :
lines = fin.readlines()
line_num = -1
for k,line in enumerate(lines) :
if line.find( search ) != -1 :
line_num = k
The code above will search for the user input in the book titles, for example, if user inputs "Gob", all following books will match: "The Goblin", "The Life of Richard Gobbler" etc.
If you need an exact match, use:
if line.lower().strip() == search.lower().strip() :

One way to go about this problem is break the string / book into two parts everything before your target and after. Then split all of the stuff before your target by new line and count how many lines there are.
def line_no(inp, target):
inp = inp.split(target)
#inp[0] = everything before your target
line_no = len(inp[0].split('\n')) #split everything before your target by line and get a count
#-1 for zero index
return line_no
statement = '''Hello
World
I
Am
A
Statement'''
line_no(statement, 'Am')

Related

Creating a search function in a list from a text file

everyone. I have a Python assignment that requires me to do the following:
Download this CSV fileLinks to an external site of female Oscar winners (https://docs.google.com/document/d/1Bq2T4m7FhWVXEJlD_UGti0zrIaoRCxDfRBVPOZq89bI/edit?usp=sharing) and open it into a text editor on your computer
Add a text file to your sandbox project named OscarWinnersFemales.txt
Copy and paste several lines from the original file into your sandbox file. Make sure that you include the header.
Write a Python program that does the following:
Open the file and store the file object in a variable
Read the entire contents line by line into a list and strip away the newline character at the end of each line
Using list slicing, print lines 4 through 7 of your file
Write code that will ask the user for an actress name and then search the list to see if it is in there. If it is it will display the record and if it is not it will display Sorry not found.
Close the file
Below is the code I currently have. I've already completed the first three bullet points but I can't figure out how to implement a search function into the list. Could anyone help clarify it for me? Thanks.
f = open('OscarsWinnersFemales.txt')
f = ([x.strip("\n") for x in f.readlines()])
print(f[3:7])
Here's what I tried already but it just keeps returning failure:
def search_func():
actress = input("Enter an actress name: ")
for x in f:
if actress in f:
print("success")
else:
print("failure")
search_func()
I hate it when people use complicated commands like ([x.strip("\n") for x in f.readlines()]) so ill just use multiple lines but you can do what you like.
f = open("OscarWinnersFemales.txt")
f = f.readlines()
f.close()
data = {} # will list the actors and the data as their values
for i, d in enumerate(data):
f[i] = d.strip("\n")
try:
index, year, age, name, movie = d.split(",")
except ValueError:
index, year, age, name, movie, movie2 = d.split(",")
movie += " and " + movie2
data[name] = f"{index}-> {year}-{age} | {movie}"
print(f[3:7])
def search_actr(name):
if name in data: print(data[name])
else: print("Actress does not exist in database. Remember to use captols and their full name")
I apologize if there are any errors, I decided not to download the file but everything I wrote is based off my knowledge and testing.
I have figured it out
file = open("OscarWinnersFemales.txt","r")
OscarWinnersFemales_List = []
for line in file:
stripped_line = line.strip()
OscarWinnersFemales_List.append(stripped_line)
file.close()
print(OscarWinnersFemales_List[3:7])
print()
actress_line = 0
name = input("Enter An Actress's Name: ")
for line in OscarWinnersFemales_List:
if name in line:
actress_line = line
break
if actress_line == 0:
print("Sorry, not found.")
else:
print()
print(actress_line)

Trouble with matching variables to line in txt, and removing line

I am having trouble with matching variables to lines in txt, and removing the lines.
I am currently doing a hotel room booking program in which I am having trouble removing a booking from my text file.
This is how my lines in my text file are formatted:
first_name1, phonenumber1 and email 1 are linked to entry boxes
jeff;jeff#gmail.com;123123123;2019-06-09;2019-06-10;Single Room
def edit_details(self,controller):
f = open("Bookings.txt")
lines = f.readlines()
f.close()
x = -1
for i in lines:
x += 1
data = lines[x]
first_name1 = str(controller.editName.get())
phonenumber1 = str(controller.editPhone.get())
email1 = str(controller.editEmail.get())
checkfirst_name, checkemail, checkphone_num, checkclock_in_date, checkclock_out_date, checkroom = map(str, data.split(";"))
if checkfirst_name.upper() == first_name1.upper() and checkemail.upper() == email1.upper() and checkphone_num == phonenumber1:
controller.roomName.set(checkfirst_name)
controller.roomEmail.set(checkemail)
controller.roomPhone.set(checkphone_num)
controller.roomCheckin.set(checkclock_in_date)
controller.roomCheckout.set(checkclock_out_date)
controller.roomSelect.set(checkroom)
print(controller.roomName.get())
print(controller.roomSelect.get())
controller.show_frame("cancelBooking")
break
elif x > len(lines) - int(2):
messagebox.showerror("Error", "Please Enter Valid Details")
break
I have the user to enter their details to give me the variables but I don't know how to match these variables to the line in the text file to remove the booking.
Do I have to format these variables to match the line?
This is what i have tried but it deletes the last line in my file
line_to_match = ';'.join([controller.roomName.get(),controller.roomEmail.get(),controller.roomPhone.get()])
print(line_to_match)
with open("Bookings.txt", "r+") as f:
line = f.readlines()
f.seek(0)
for i in line:
if i.startswith(line_to_match):
f.write(i)
f.truncate()
I have kind of added a pseudocode here. You can join the variables using ; and validate if the line startswith those details, like below.
first_name1, phonenumber1, email1 = 'jeff', 'jeff#gmail.com', '123123123'
line_to_match = ';'.join([first_name1, email1, phonenumber1])
for i in line:
...
if i.startswith(line_to_match):
# Add your removal code here
...

Python: Counting Occurrences of a Word From a txt file with Input

I am new to programming and I need help finding how many times the user input occurs inside of a txt file. The code I currently have is:
myfile = open("WorldSeriesWinners.txt")
count = 0
team = input("Enter in the team name that won the world series: ")
line = myfile.readline()
myfile.readline()
while team in line:
count += 1
myfile.readline()
print("The", team, "won the world series", count, "times")
myfile.close()
The output I get from this is:
Enter in the team name that won the world series: New York Yankees
The New York Yankees won the world series 0 times
How would I get it to show many times a specific team won? Thanks in advance.
team = input('Enter team name: ')
count = 0
with open("WorldSeriesWinners.txt") as f:
for line in f:
if team in line:
count += 1
print('{} won the world series {} times'.format(team, count)
Go through line by line and check each line using if statements
Try the following:
import re
def world_series_count(team):
with open("WorldSeriesWinners.txt") as f:
data = f.read()
items = re.findall(team, data)
return len(items)
team = input('Enter a team name: ')
count = world_series_count(team)
print('{} won the world series {} times'.format(team, count))
Why are you people complicating matters way over necessary.
This will count occurrences of text in a txt file given by path, regardless of text formatting (unless it is hyphenized):
def count_in_file (path, text):
f = open(path, "rb")
c = f.read()
f.close()
return " ".join(c.split()).count(text.strip())
A little tweak will be needed to support unicode txt files. But there. Simple and easy.
If the txt file is extremely big, then perform this using buffering with static chunk size:
def count_in_file (path, text, chunksize=4096):
text = text.strip()
f = open(path, "rb")
f.seek(0, 2)
fsize = f.tell()
f.seek(0)
if fsize<=chunksize:
c = f.read()
f.close()
return " ".join(c.split()).count(text)
count = 0
l = len(text)
c = f.read(chunksize)
r = c[-l:]
while c:
count += " ".join(c.split()).count(text)
if r!=text: f.seek(f.tell()-l+1)
c = f.read(chunksize)
r = c[-l:]
f.close()
return count
Well, this now is a bit complicated. But if a file is really, really big, and it is formatted over lines, this is a good way to do it.

Trying to only look at specific lines in txt file TypeError: 'in <string>' requires string as left operand, not dict raises when I try

ask questions for clarification
I think it would be best if I just posted my code for better understanding. So first I just simply ask the user to input a file name, just to see if it exists. And if it doesn't it exits the program.
I have a separate file containing a list of keywords, which I put into a dict
then I check if another file exists... similar to before. and with this file, Im checking to see if the keywords in the first file are contained in the second one. and calculating the sentiment value. But this is not what I need help with I just wanted to explain a little before hand
So my question is, in the section below:
for line in open('tweets.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1]) #Stripping the ]
if eastern.contains(lat, long):
eastScore += score(line)
elif central.contains(lat, long):
centralScore += score(line)
elif mountain.contains(lat, long):
mountainScore += score(line)
elif pacific.contains(lat, long):
pacificScore += score(line)
else:
continue
how would I be able to focus on only lines in the file that contain keywords and not the entire file?
like you see in this part where I ignore lines without values?
with open('tweets.txt') as f:
for line in f:
values = Counter(word for word in line.split() if word in sentiments)
if not values:
continue
I tried methods such as creating a new file and writing the lines that contain keywords into the new file, but that raised
TypeError: 'in <string>' requires string as left operand, not dict
but it wasnt really what I wanted to do anyways. So my first question is, how would I be able to focus on only lines that contain keywords for that section above?
[41.923916200000001, -88.777469199999999] 6 2011-08-28 19:24:18 My life is a moviee.
from collections import Counter
try:
keyW_Path = input("Enter file named keywords: ")
keyFile = open(keyW_Path, "r")
except IOError:
print("Error: file not found.")
exit()
# Read the keywords into a list
keywords = {}
wordFile = open('keywords.txt', 'r')
for line in wordFile.readlines():
word = line.replace('\n', '')
if not(word in keywords.keys()): #Checks that the word doesn't already exist.
keywords[word] = 0 # Adds the word to the DB.
wordFile.close()
# Read the file name from the user and open the file.
try:
tweet_path = input("Enter file named tweets: ")
tweetFile = open(tweet_path, "r")
except IOError:
print("Error: file not found.")
exit()
#Calculating Sentiment Values
with open('keywords.txt') as f:
sentiments = {word: int(value) for word, value in (line.split(",") for line in f)}
with open('tweets.txt') as f:
for line in f:
values = Counter(word for word in line.split() if word in sentiments)
if not values:
continue
happyScore_Tweet = (sum(values[word]*sentiments[word] for word in values)) // (len(values))
print(happyScore_Tweet)
def score(tweet):
total = 0
for word in tweet:
if word in sentiments:
total += 1
return total
#Classifying the regions
class Region:
def __init__(self, lat_range, long_range):
self.lat_range = lat_range
self.long_range = long_range
def contains(self, lat, long):
return self.lat_range[0] <= lat and lat < self.lat_range[1] and\
self.long_range[0] <= long and long < self.long_range[1]
eastern = Region((24.660845, 49.189787), (-87.518395, -67.444574))
central = Region((24.660845, 49.189787), (-101.998892, -87.518395))
mountain = Region((24.660845, 49.189787), (-115.236428, -101.998892))
pacific = Region((24.660845, 49.189787), (-125.242264, -115.236428))
eastScore = 0
centralScore = 0
pacificScore = 0
mountainScore = 0
happyScoreE = 0
for line in open('tweets.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1]) #Stripping the ]
if eastern.contains(lat, long):
eastScore += score(line)
elif central.contains(lat, long):
centralScore += score(line)
elif mountain.contains(lat, long):
mountainScore += score(line)
elif pacific.contains(lat, long):
pacificScore += score(line)
else:
continue
Use regex to extract the lat and long.
import re
text = open(filename, 'r')
matches = re.findall("(\-?\d+\.\d+?),\s*(\-?\d+\.\d+)", text.read())
Matches will return a list of strings containing only your lat and long.
Also, there are some very good tools in python you can use for spatial queries, you should look them up.

Multiple Word Search not Working Correctly (Python)

I am working on a project that requires me to be able to search for multiple keywords in a file. For example, if I had a file with 100 occurrences of the word "Tomato", 500 for the word "Bread", and 20 for "Pickle", I would want to be able to search the file for "Tomato" and "Bread" and get the number of times it occurs in the file. I was able to find people with the same issue/question, but for other languages on this site.
I a working program that allows me to search for the column name and tally how many times something shows up in that column, but I want to make something a bit more precise. Here is my code:
def start():
location = raw_input("What is the folder containing the data you like processed located? ")
#location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
if os.path.exists(location) == True: #Tests to see if user entered a valid path
file_extension = raw_input("What is the file type (.txt for example)? ")
search_for(location,file_extension)
else:
print "I'm sorry, but the file location you have entered does not exist. Please try again."
start()
def search_for(location,file_extension):
querylist = []
n = 5
while n == 5:
search_query = raw_input("What would you like to search for in each file? Use'Done' to indicate that you have finished your request. ")
#list = ["CD90-N5722-15C", "CD90-NB810-4C", "CP90-N2475-8", "CD90-VN530-22B"]
if search_query == "Done":
print "Your queries are:",querylist
print ""
content = os.listdir(location)
run(content,file_extension,location,querylist)
n = 0
else:
querylist.append(search_query)
continue
def run(content,file_extension,location,querylist):
for item in content:
if item.endswith(file_extension):
search(location,item,querylist)
quit()
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
print item, countlist
start()
If I use that code, the last part (def search) is not working correctly. Any time I put a search in, any search after the first one I enter in returns "0", despite there being up to 500,000 occurrences of the search word in a file.
I was also wondering, since I have to index 5 files with 1,000,000 lines each, if there was a way I could write either an additional function or something to count how many times "Lettuce" occurs over all the files.
I cannot post the files here due to their size and content. Any help would be greatly appreciated.
Edit
I also have this piece of code here. If I use this, I get the correct count of each, but it would be much better to have a user be able to enter as many searches as they want:
def check_start():
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
countcol1 = 0
countcol2 = 0
countcol3 = 0
countcol4 = 0
#print os.path.join(currentdir,item)
with open(os.path.join(location,item), 'r') as f:
for line in f:
if "CD90-N5722-15C" in line:
countcol1 = countcol1 + 1
if "CD90-NB810-4C" in line:
countcol2 = countcol2 + 1
if "CP90-N2475-8" in line:
countcol3 = countcol3 + 1
if "CD90-VN530-22B" in line:
countcol4 = countcol4 + 1
print item, "CD90-N5722-15C", countcol1, "CD90-NB810-4C", countcol2, "CP90-N2475-8", countcol3, "CD90-VN530-22B", countcol4
You are trying to iterate over your file more than once. After the first time, the file pointer is at the end so subsequent searches will fail because there's nothing left to read.
If you add the line:
f.seek(0), this will reset the pointer before every read:
def search(location,item,querylist):
with open(os.path.join(location,item), 'r') as f:
countlist = []
for search in querylist: #any search value after the first one is incorrectly reporting "0"
countsearch = 0
for line in f:
if search in line:
countsearch = countsearch + 1
countlist.append(search)
countlist.append(countsearch) #mechanism to update countsearch is not working for any value after the first
f.seek(0)
print item, countlist
PS. I've guessed at the indentation... You really shouldn't use tabs.
I'm not sure I get your question completely, but how about something like this?
def check_start():
raw_search_terms = raw_input('Enter search terms seperated by a comma:')
search_term_list = raw_search_terms.split(',')
#location = raw_input("What is the folder containing the data you like processed located? ")
location = "C:/Code/Samples/Dates/2015-06-07/Large-Scale Data Parsing/Data Files"
content = os.listdir(location)
for item in content:
if item.endswith("processed"):
# create a dictionary of search terms with their counts (initialized to 0)
search_term_count_dict = dict(zip(search_term_list, [0 for s in search_term_list]))
for line in f:
for s in search_term_list:
if s in line:
search_term_count_dict[s] += 1
print item
for key, value in search_term_count_dict.iteritems() :
print key, value

Categories

Resources