Openning all text file & getting a string in python [duplicate] - python

I want to check if a string is in a text file. If it is, do X. If it's not, do Y. However, this code always returns True for some reason. Can anyone see what is wrong?
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
check()
if True:
print "true"
else:
print "false"

The reason why you always got True has already been given, so I'll just offer another suggestion:
If your file is not too large, you can read it into a string, and just use that (easier and often faster than reading and checking line per line):
with open('example.txt') as f:
if 'blabla' in f.read():
print("true")
Another trick: you can alleviate the possible memory problems by using mmap.mmap() to create a "string-like" object that uses the underlying file (instead of reading the whole file in memory):
import mmap
with open('example.txt') as f:
s = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
if s.find('blabla') != -1:
print('true')
NOTE: in python 3, mmaps behave like bytearray objects rather than strings, so the subsequence you look for with find() has to be a bytes object rather than a string as well, eg. s.find(b'blabla'):
#!/usr/bin/env python3
import mmap
with open('example.txt', 'rb', 0) as file, \
mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
if s.find(b'blabla') != -1:
print('true')
You could also use regular expressions on mmap e.g., case-insensitive search: if re.search(br'(?i)blabla', s):

As Jeffrey Said, you are not checking the value of check(). In addition, your check() function is not returning anything. Note the difference:
def check():
with open('example.txt') as f:
datafile = f.readlines()
found = False # This isn't really necessary
for line in datafile:
if blabla in line:
# found = True # Not necessary
return True
return False # Because you finished the search without finding
Then you can test the output of check():
if check():
print('True')
else:
print('False')

Here's another way to possibly answer your question using the find function which gives you a literal numerical value of where something truly is
open('file', 'r').read().find('')
in find write the word you want to find
and 'file' stands for your file name

if True:
print "true"
This always happens because True is always True.
You want something like this:
if check():
print "true"
else:
print "false"
Good luck!

I made a little function for this purpose. It searches for a word in the input file and then adds it to the output file.
def searcher(outf, inf, string):
with open(outf, 'a') as f1:
if string in open(inf).read():
f1.write(string)
outf is the output file
inf is the input file
string is of course, the desired string that you wish to find and add to outf.

Your check function should return the found boolean and use that to determine what to print.
def check():
datafile = file('example.txt')
found = False
for line in datafile:
if blabla in line:
found = True
break
return found
found = check()
if found:
print "true"
else:
print "false"
the second block could also be condensed to:
if check():
print "true"
else:
print "false"

Two problems:
Your function does not return anything; a function that does not explicitly return anything returns None (which is falsy)
True is always True - you are not checking the result of your function
.
def check(fname, txt):
with open(fname) as dataf:
return any(txt in line for line in dataf)
if check('example.txt', 'blabla'):
print "true"
else:
print "false"

How to search the text in the file and Returns an file path in which the word is found
(Как искать часть текста в файле и возвращять путь к файлу в котором это слово найдено)
import os
import re
class Searcher:
def __init__(self, path, query):
self.path = path
if self.path[-1] != '/':
self.path += '/'
self.path = self.path.replace('/', '\\')
self.query = query
self.searched = {}
def find(self):
for root, dirs, files in os.walk( self.path ):
for file in files:
if re.match(r'.*?\.txt$', file) is not None:
if root[-1] != '\\':
root += '\\'
f = open(root + file, 'rt')
txt = f.read()
f.close()
count = len( re.findall( self.query, txt ) )
if count > 0:
self.searched[root + file] = count
def getResults(self):
return self.searched
In Main()
# -*- coding: UTF-8 -*-
import sys
from search import Searcher
path = 'c:\\temp\\'
search = 'search string'
if __name__ == '__main__':
if len(sys.argv) == 3:
# создаем объект поисковика и передаем ему аргументы
Search = Searcher(sys.argv[1], sys.argv[2])
else:
Search = Searcher(path, search)
# начать поиск
Search.find()
# получаем результат
results = Search.getResults()
# выводим результат
print 'Found ', len(results), ' files:'
for file, count in results.items():
print 'File: ', file, ' Found entries:' , count

If user wants to search for the word in given text file.
fopen = open('logfile.txt',mode='r+')
fread = fopen.readlines()
x = input("Enter the search string: ")
for line in fread:
if x in line:
print(line)

found = False
def check():
datafile = file('example.txt')
for line in datafile:
if blabla in line:
found = True
break
return found
if check():
print "true"
else:
print "false"

found = False
def check():
datafile = file('example.txt')
for line in datafile:
if "blabla" in line:
found = True
break
return found
if check():
print "found"
else:
print "not found"

Here's another. Takes an absolute file path and a given string and passes it to word_find(), uses readlines() method on the given file within the enumerate() method which gives an iterable count as it traverses line by line, in the end giving you the line with the matching string, plus the given line number. Cheers.
def word_find(file, word):
with open(file, 'r') as target_file:
for num, line in enumerate(target_file.readlines(), 1):
if str(word) in line:
print(f'<Line {num}> {line}')
else:
print(f'> {word} not found.')
if __name__ == '__main__':
file_to_process = '/path/to/file'
string_to_find = input()
word_find(file_to_process, string_to_find)

"found" needs to be created as global variable in the function as "if else" statement is out of the function. You also don't need to use "break" to break the loop code.
The following should work to find out if the text file has desired string.
with open('text_text.txt') as f:
datafile = f.readlines()
def check():
global found
found = False
for line in datafile:
if 'the' in line:
found = True
check()
if found == True:
print("True")
else:
print("False")

Related

Python variable in re.match

I am trying to write a function that takes in a key (among other things) and returns the word after this key in the file. The code below works, but only if the key happens to be the first phrase in the file. Could anyone point out where I'm going wrong?
def findmatch(key, split_by, tempsl, filename, temp):
rx=r''+key+'(.*)'
f = open(tempsl + filename, 'r', encoding='windows-1252')
for eachline in f:
string=re.match(rx, eachline)
if string:
return (string.group().split(' ')[split_by])
else:
return "didn't work"
You end your for loop after the first iteration because
if string:
return (string.group().split(' ')[split_by])
else:
return "didn't work"
will always break the loop. It will return some result only if you have a keyword in first line. So I suggest this:
for eachline in f:
string=re.match(rx, eachline)
if string:
return (string.group().split(' ')[split_by])
else: # else statemant is a part of for loop (moved to the left)
return "didn't work"
and try this:
m = re.search('(?<=' + key + ')\w+', eachline)
m.group(0)

How to ignore case of a word while searching for it in a text file and copying into another

I am trying to write a program in python which searches for user specified words in a txt file and copies the selected lines containing that word into another file.
Also the user will have an option to exclude any word.
(e.g Suppose the user searches for the word "exception" and want to exclude the word "abc", then the code will only copy the lines which has "exception" in it but not "abc").
Now all the work will be done from the command prompt.
The input would be:
file.py test.txt(input file) test_mod.txt(output file) -e abc(exclude word denoted by -e)-s exception(search word denoted by -s)
Now the user will have an option to enter multiple exclude words and multiple search words.
I have done the program using the argparse module and it runs.
My problem is it only takes lower case words as search or exclude words. That is if I type "exception" as search word, it does not find "Exception" or "EXCEPTION". How do I solve this prob? I want to ignore case on both search and exclude words.
Here's my code as of now:
import sys
import os
import argparse
import tempfile
import re
def main(): #main method
try:
parser = argparse.ArgumentParser(description='Copies selected lines from files') #Defining the parser
parser.add_argument('input_file') #Adds the command line arguments to be given
parser.add_argument('output_file')
parser.add_argument('-e',action="append")
parser.add_argument('-s',action="append")
args = parser.parse_args() #Parses the Arguments
user_input1 = (args.e) #takes the word which is to be excluded.
user_input2 = (args.s) #takes the word which is to be included.
def include_exclude(input_file, output_file, exclusion_list=[], inclusion_list=[]): #Function which actually does the file writing and also handles exceptions
if input_file == output_file:
sys.exit("ERROR! Two file names cannot be the same.")
else:
try:
found_s = False #These 3 boolean variables will be used later to handle different exceptions.
found_e = False
found_e1 = True
with open(output_file, 'w') as fo: #opens the output file
with open(input_file, 'r') as fi: #opens the input file
for line in fi: #reads all the line in the input file
if user_input2 != None:
inclusion_words_in_line = map(lambda x: x in line, inclusion_list)#Mapping the inclusion and the exclusion list in a new list in the namespace
if user_input1 != None and user_input2 != None: #This list is defined as a single variable as condition operators cannot be applied to lists
exclusion_words_in_line = map(lambda x: x in line, exclusion_list)
if any(inclusion_words_in_line) and not any(exclusion_words_in_line): #Main argument which includes the search word and excludes the exclusion words
fo.write(line) #writes in the output file
found_s = True
elif user_input1 == None and user_input2 != None: #This portion executes if no exclude word is given,only the search word
if any(inclusion_words_in_line):
fo.write(line)
found_e = True
found_s = True
found_e1 = False
if user_input2 == None and user_input1 != None: #No search word entered
print("No search word entered.")
if not found_s and found_e: #If the search word is not found
print("The search word couldn't be found.")
fo.close()
os.remove(output_file)
elif not found_e and not found_s: #If both are not found
print("\nNOTE: \nCopy error.")
fo.close()
os.remove(output_file)
elif not found_e1: #If only the search word is entered
print("\nNOTE: \nThe exclusion word was not entered! \nWriting only the lines containing search words")
except IOError:
print("IO error or wrong file name.")
fo.close()
os.remove(output_file)
if user_input1 != user_input2 : #this part prevents the output file creation if someone inputs 2 same words creating an anomaly.
include_exclude(args.input_file, args.output_file, user_input1, user_input2);
if user_input1 == user_input2 : #This part prevents the program from running further if both of the words are same
sys.exit('\nERROR!!\nThe word to be excluded and the word to be included cannot be the same.')
except SystemExit as e: #Exception handles sys.exit()
sys.exit(e)
if __name__ == '__main__':
main()
The typical way to do this is to pick one case, and make all comparisons in that:
if word.lower() == "exception":
For your case, this could look like:
inclusion_words_in_line = map(lambda x: x in line.lower(),
inclusion_list)
this looks like an attempt to build a search engine, you can achieve this using a library like pylucene
you can then be able to run queries like:
+include -exclude
well, and of course many many more, it may worth the learning curve.

Python Search function in a tab delimited column file

while True:
try:
OpenFile=raw_input(str("Please enter a file name: "))
infile=open(OpenFile,"r")
contents=infile.readlines()
infile.close()
user_input = raw_input(str("Enter A=<animal> for animal search or B=<where lives?> for place of living search: \n"))
if user_input.startswith("A="):
def find_animal(user_input,column):
return next(("\t".join(line) for line in contents
if line[column-1]==user_input),None)
find_animal(user_input[1:])
print str((find_animal(user_input[1:], "WHO?"))) #"Who?" is the name of the first column.
else:
print "Unknown option!"
except IOError:
print "File with this name does not exist!"
1.Enter the name of an animal.
2.Program searches for the lines that have this particular name in the first column.
3.Program prints lines that have this name in the first column.
My function can't seem to work properly here. Can you please help me find the mistake(s)? Thank you!
EDIT
def ask_for_filename():
filename=str(raw_input("Please enter file name: "))
return filename
def read_data(filename):
contents=open(filename,"r")
data=contents.read()
return data
def column_matches(line, substring, which_column):
for line in data:
if column_matches(line, substring, 0):
print line
Big chunks of code are hard to read and debug, try splitting your code into smaller functions, for example like this:
def ask_for_filename():
#left as an exercise
return filename
def read_data(filename):
#left as an exercise
return data
def column_matches(line, substring, which_column):
#left as an exercise
def show_by_name(name, data):
for line in data:
if column_matches(line, name, 0):
print line
def do_search(data):
propmt = "Enter A=<animal> for animal search or B=<where lives?> for place of living search: \n"
user_input = raw_input(prompt)
if user_input.startswith('A='):
show_by_name(user_input[2:], data)
# main program
filename = ask_for_filename()
data = read_data(filename)
while True:
do_search(data)
Test and debug these functions separately until you're sure they work properly. Then write and test the main program.
column_matches() is supposed to return true if some column (which_column) in a line is equal to substring. For example, column_matches("foo\tbar\tbaz", "bar", 1) is True. To achieve that
split a line by a delimiter - this gives us a list of values
get the n-th element of the list
compare it with the substing
return True if they are equal and False otherwise
Putting it all together:
def column_matches(line, substring, which_column):
delimiter = '\t'
columns = line.split(delimiter)
value = columns[which_column]
if value == substring:
return True
else:
return False
or, in a more concise and "pythonic" form:
def column_matches(line, substring, which_column):
return line.split('\t')[which_column] == substring

Program that searches for words that use certain letters

I'm designing a program that looks through a list of words, and counts how many words only have the letters p, y, t, h, o and n in them.
So far, my code is:
def find_python(string, python):
"""searches for the letters 'python' in the word."""
for eachLetter in python:
if eachLetter not in string:
return False
return True
def main():
python = 'python'
how_many = 0
try:
fin = open('words.txt')#open the file
except:
print("No, no, file no here") #if file is not found
for eachLine in fin:
string = eachLine
find_python(string, python)
if find_python(string, python) == True:
how_many = how_many + 1#increment count if word found
print how_many#print out count
fin.close()#close the file
if __name__ == '__main__':
main()
However, my code is returning the incorrect number of words, for example, it will return the word 'xylophonist' if I put in the print statement for it because it has the letters python in it. What should I do so it will reject any word that has forbidden letters?
Correct your test function:
def find_python(string, python):
"""searches for the letters 'python' in the word.
return True, if string contains only letters from python.
"""
for eachLetter in string:
if eachLetter not in python:
return False
return True
Welcome to regular expressions:
import re
line = "hello python said the xylophonist in the ythoonp"
words = re.findall(r'\b[python]+\b',line)
print words
returns
['python', 'ythoonp']
If what you want is to find how many times the actual word python appears, then you should issue a re.findall(r'\bpython\b')
If you don't want to go this route, I suggest you return false if any of the letters of the string is NOT p,y,t,h,o or n.
from os import listdir
def diagy(letters,li):
return sum( any(c in letters for c in word) for word in li )
def main():
dir_search = 'the_dir_in_which\\to_find\\the_file\\'
filename = 'words.txt'
if filename in listdir(dir_search):
with open(dir_search + 'words.txt',) as f:
li = f.read().split()
for what in ('pythona','pyth','py','ame'):
print '%s %d' % (what, diagy(what,li))
else:
print("No, no, filename %r is not in %s" % (filename,dir_search))
if __name__ == '__main__':
main()

Read file error in Python, even though print function is printing the list

I have been trying different ways of writing this code but cannot get past this. Currently the program will run all the way to the write_names(list) function and create the file, and the print function will print the sorted list. The program refuses to get the user input for the search_names() function but it will print anything I ask it to.
Debug highlights: while index < len(list) and in the debug I\O only states "read file error". Hopefully someone has an idea what I'm doing wrong.
'# Abstract: This program creates a list of names. The list is printed,
'# sorted, printed again, written to file, and searched.
'#=============================================================================
'#define the main function
def main():
#try:
##open data file for read
#infile = open('names.txt', 'r')
#call get_names function
list = get_names()
#call print function
print_names(list)
#sort list
list.sort()
#print sorted list
print_names(list)
#write sorted list to new file
write_names(list)
#allow user to search list
search_names(list)
def get_names():
try:
infile = open('names.txt', 'r')
#read file contents into a list
list = infile.readlines()
#close file
infile.close()
#strip \n from each element
index = 0
while index < len(list):
list[index] = list[index].rstrip('\n')
index += 1
return list
except IOError:
print 'Read file error'
def print_names(list):
#print header
print '******************'
#print list line by line
index = 0
while index < len(list):
print list[index]
index += 1
return
def write_names(list):
#open file for writing
outfile = open('sortedNames.txt', 'w')
#write the list to the file
for item in list:
outfile.write(str(item) + '\n')
#close file
outfile.close()
def search_names(list):
#set user test variable
again = 'Y'
while again.upper == 'Y':
#get search from user
search = raw_input('Enter a name to search for: ')
#open list for search
if search in list:
try:
item_index = list.index(search)
print search, 'found.', item_index
except ValueError:
print search, 'not found.'
main()
'
Thanks in advance!
Your issue is that upper is a function, and you are not calling it. Your while in search_names() should read:
while again.upper() == 'Y':
instead of:
#strip \n from each element
index = 0
while index < len(list):
list[index] = list[index].rstrip('\n')
index += 1
return list
just use this list comprehension:
lines = infile.readlines()
infile.close()
return [ line.strip() for line in lines ]
edit:
It looks like you are using an index and a while loop where a for loop can be used.
Instead of:
while index < len(list):
print list[index]
index += 1
use:
# using name_list instead of list
for name in name_list:
print name
also, your search_names() function looks flawed:
def search_names(list):
#set user test variable
again = 'Y'
while again.upper == 'Y':
#get search from user
search = raw_input('Enter a name to search for: ')
#open list for search
if search in list:
try:
item_index = list.index(search)
print search, 'found.', item_index
except ValueError:
print search, 'not found.'
would never exit (again is never reassigned). try:
def search_names(names_list):
again = 'Y'
while again.upper() == 'Y':
s_name = raw_input('Enter a name to search for: ')
if s_name in names_list:
print s_name, 'found.', names_list.index(s_name)
else:
print search, 'not found.'
again = raw_input('Search for another name (Y|N)?: ')
or:
def search_names(names_list):
again = 'Y'
while again == 'Y':
s_name = raw_input('Enter a name to search for: ')
try:
idx = names_list.index(s_name)
print s_name, 'found.', idx
except ValueError:
print search, 'not found.'
again = raw_input('Search for another name (Y|N)?: ').upper()
Which brings up the issue of when to catch exceptions vs using an if statement:
from msdn:
The method you choose depends on how
often you expect the event to occur.
If the event is truly exceptional and
is an error (such as an unexpected
end-of-file), using exception handling
is better because less code is
executed in the normal case. If the
event happens routinely, using the
programmatic method to check for
errors is better. In this case, if an
exception occurs, the exception will
take longer to handle.
Comments begin with #, not '# - you are making every other line of your header a docstring.
You are using an index to iterate across lists, which is inefficient - just iterate on the list items.
Calling a variable list is bad because it prevents you from accessing the list() datatype.
Using with is a more reliable replacement for open() .. close()
again.upper is a function reference - you have to call the function, ie again.upper().
You never change the value of again - this will be an infinite loop!
You test if search in list but then do a try..except block which will only fail if it is not in the list (ie you are testing for the same failure twice).
.
#
# Operate on a list of names
#
def load_names(fname):
try:
with open(fname, 'r') as inf:
return [line.strip() for line in inf]
except IOError:
print "Error reading file '{0}'".format(fname)
return []
def print_names(namelist):
print '******************'
print '\n'.join(namelist)
def write_names(namelist, fname):
with open(fname, 'w') as outf:
outf.write('\n'.join(namelist))
def search_names(namelist):
while True:
lookfor = raw_input('Enter a name to search for (or nothing to quit): ').strip()
if lookfor:
try:
ind = namelist.index(lookfor)
print("{0} found.".format(lookfor))
except ValueError:
print("{0} not found.".format(lookfor))
else:
break
def main():
namelist = load_names('names.txt')
print_names(namelist)
namelist.sort()
print_names(namelist)
write_names(namelist, 'sorted_names.txt')
search_names(namelist)
if __name__=="__main__":
main()

Categories

Resources