I am getting an error that says: UnboundLocalError: local variable 'words' referenced before assignment but I am unsure on why. The following is my code:
def hasher(fname):
try:
with open(fname, 'r') as f:
words = re.split('(["\'#&,;:\(\)\s+\*\?\.]|\w+)', f.read().lower())
except:
print 'Out'
while '' in words:
words.remove('')
But I'm getting the error when I try to reference words in the while statement and I'm not sure on why. Any help? Thanks!
You need to define a default value,
def hasher(fname):
words = []
try:
with open(fname, 'r') as f:
words = re.split('(["\'#&,;:\(\)\s+\*\?\.]|\w+)', f.read().lower())
except:
print 'Out'
while '' in words:
words.remove('')
return words
Related
okay. you didnt understand anything from the title. let me explain.
now ı have a file. There is some text in this file. for example "jack.123 jackie.321"
I want to check if the word jack exists in the file and ı wanna print "jack.123".
its my problem. ı didnt print all text.
def append(name,password):
f = open("myfile.txt", "w")
f.write("{},{}".format(name,password))
append("jack",".123")
append("jackie" , ".321")
f = open("myfile.txt" ,"r")
if "jack" in f.read():
print("query found")
Open the file and read all its contents then split on whitespace. That effectively gives you all the words in the file.
Iterate over the list of words checking to see if a word starts with the name you're searching for followed by '.'.
Note that there may be more than one occurrence so build a list.
def find_name(filename, name):
if not name[-1] == '.':
name += '.'
found = []
with open(filename) as myfile:
for word in myfile.read().split():
if word.startswith(name):
found.append(word)
return found
print(*find_name('myfile.txt', 'jack'))
def new_pass(name, passwd):
"creates file and write name and passwd to it"
with open("myfile1.txt", "a") as f:
f.write(name + "." + passwd + "\n")
new_pass("jack", "123")
new_pass("jack", "183")
new_pass("jack", "129")
new_pass("jack", "223")
def check_word(file, word):
"""checks if a word exists and returns its first occurence """
with open(file) as f:
l = f.read().split("\n")
for i in l:
if i.startswith(word):
print("query found")
return i
print(check_word("myfile1.txt", "jack"))
In python you can do it like that :
with open('file.txt', 'r') as file:
data = file.read()
if "jack" in data:
print("jack")
If I understood uncorrectly let me know
I'm trying to do something. I want to open multiple files and count the words in it for example, but I want to know how many of files couldn't be open.
Its what I tried:
i = 0
def word_count(file_name):
try:
with open(file_name) as f:
content = f.read()
except FileNotFoundError:
pass
i = 0
i += 1
else:
words = content.split()
word_count = len(words)
print(f'file {file_name} has {word_count} words.')
file_name = ['data1.txt','a.txt','data2w.txt','b.txt','data3w.txt','data4w.txt']
for names in file_name:
word_count(names)
print(len(file_name) - i , 'files weren\'t found')
print (i)
So, I get this error:
runfile('D:/~/my')
file data1.txt has 13 words.
file data2w.txt has 24 words.
file data3w.txt has 21 words.
file data4w.txt has 108 words.
Traceback (most recent call last):
File "D:\~\my\readtrydeffunc.py", line 27, in <module>
print(len(file_name) - i , 'files weren\'t found')
NameError: name 'i' is not defined
I tried something else also, but I think I don't understand the meaning of scopes well. I think its because i is assigned out of except scope, but when I assign i = 0 in except scope, I can't print it at the end, because it will be destroyed after execution.
Yes, you're on the right track. You need to define and increment i outside the function, or pass the value through the function, increment, and return the new value. Defining i outside the function is more common, and more Pythonic.
def count_words(file_name):
with open(file_name) as f:
content = f.read()
words = content.split()
word_count = len(words)
#print(f'file {file_name} has {word_count} words.')
return word_count
file_name = ['data1.txt','a.txt','data2w.txt','b.txt','data3w.txt','data4w.txt']
i = 0
for names in file_name:
try:
result = count_words(names)
except FileNotFoundError:
i += 1
print(i, 'files weren\'t found')
I would recommend breaking this into 2 functions; One to handle the word counting and a second to control the flow of the script. The control one should handle any errors that arise as well as handle and the feedback from said errors.
def word_count(file_name):
with open(file_name) as f:
content = f.read()
words = content.split()
word_count = len(words)
print(f'file {file_name} has {word_count} words.')
def file_parser(files):
i = 0
for file in files:
try:
word_count(file)
except FileNotFoundError:
i+=1
if i > 0:
print(f'{i} files were not found')
file_names = ['data1.txt','a.txt','data2w.txt','b.txt','data3w.txt','data4w.txt']
file_parser(file_names)
While refactoring your code to not use global variables should be the preferred approach (see edit for a possible refactoring), the minimal modification to get your code running is to remove pass and i = 0 within the except clause, and ask i to be used globally inside your function:
def word_count(file_name):
global i # use a `i` variable defined globally
try:
with open(file_name) as f:
content = f.read()
except FileNotFoundError:
i += 1 # increment `i` when the file is not found
else:
words = content.split()
word_count = len(words)
print(f'file {file_name} has {word_count} words.')
i = 0
file_name = ['data1.txt','a.txt','data2w.txt','b.txt','data3w.txt','data4w.txt']
for names in file_name:
word_count(names)
print(i, 'files weren\'t found')
Note that i will contain the number of files not found.
EDIT
A reasonably refactored code could look something like:
def word_count(filepath):
result = 0
with open(filepath) as file_obj:
for line in file_obj:
result += len(line.split())
return result
def process_files(filepaths):
result = {}
num_missing = 0
for filepath in filepaths:
try:
num_words = word_count(filepath)
except FileNotFoundError:
num_missing += 1
else:
result[filepath] = num_words
return result, num_missing
filenames = [
'data1.txt', 'a.txt', 'data2w.txt', 'b.txt', 'data3w.txt', 'data4w.txt']
wordcounts, num_missing = process_files(filenames)
for filepath, num_words in wordcounts.items():
print(f'File {filepath} has {num_words} words.')
print(f'{i} files weren\'t found')
Notes:
the word_count() function now only does one thing: word counting. This is done on a line by line basis to better handle potentially long files, which could fill the memory if loaded at once.
the process_files() function extract the essential information and stores them in a dict
all the printing of the results is done in one place, and could be easily wrapped up in a main() function.
num_missing (formerly i, circa) is now a local variable.
Finally note that while explicitly counting the number of exception is one way, the other being just getting this information by subtracting the number of elements in result from the number of input filepaths.
This could be done anywhere, there is no need to do this in process_files().
I used this code to delete a word from a text file.
f = open('./test.txt','r')
a = ['word1','word2','word3']
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
f.close()
f = open('./test.txt','w')
for line in lst:
f.write(line)
f.close()
But for some reason if the words have the same characters, all those characters get deleted. So for e.g
in my code:
def cancel():
global refID
f1=open("refID.txt","r")
line=f1.readline()
flag = 0
while flag==0:
refID=input("Enter the reference ID or type 'q' to quit: ")
for i in line.split(','):
if refID == i:
flag=1
if flag ==1:
print("reference ID found")
cancelsub()
elif (len(refID))<1:
print("Reference ID not found, please re-enter your reference ID\n")
cancel()
elif refID=="q":
flag=1
else:
print("reference ID not found\n")
menu()
def cancelsub():
global refIDarr, index
refIDarr=[]
index=0
f = open('flightbooking.csv')
csv_f = csv.reader(f)
for row in csv_f:
refIDarr.append(row[1])
for i in range (len(refIDarr)):
if refID==refIDarr[i]:
index=i
print(index)
while True:
proceed=input("You are about to cancel your flight booking, are you sure you would like to proceed? y/n?: ")
while proceed>"y" or proceed<"n" or (proceed>"n" and proceed<"y") :
proceed=input("Invalid entry. \nPlease enter y or n: ")
if proceed=="y":
Continue()
break
elif proceed=="n":
main_menu
break
exit
break
def Continue():
lines = list()
with open('flightbooking.csv', 'r') as readFile:
reader = csv.reader(readFile)
for row in reader:
lines.append(row)
for field in row:
if field ==refID:
lines.remove(row)
break
with open('flightbooking.csv', 'w') as writeFile:
writer = csv.writer(writeFile)
writer.writerows(lines)
f = open('refID.txt','r')
a=refIDarr[index]
print(a)
lst = []
for line in f:
for word in a:
if word in line:
line = line.replace(word,'')
lst.append(line)
print(lst)
f.close()
f = open('refID.txt','w')
for line in lst:
f.write(line)
f.close()
print("Booking successfully cancelled")
menu()
When the code is run, the refID variable has one word stored in it, and it should replace just that word with a blank space, but it takes that word for e.g 'AB123', finds all other words which might have an 'A' or a 'B' or the numbers, and replace all of them. How do I make it so it only deletes the word?
Text file before running code:
AD123,AB123
Expected Output in the text file:
AD123,
Output in text file:
D,
Edit: I have added the entire code, and maybe you can help now after seeing that the array is being appended to and then being used to delete from a text file.
here's my opinion.
refIDarr = ["AB123"]
a = refIDarr[0] => a = "AB123"
strings in python are iterable, so when you do for word in a, you're getting 5 loops where each word is actually a letter.
Something like the following is being executed.
if "A" in line:
line = line.replace("A","")
if "B" in line:
line = line.replace("B","")
if "1" in line:
line = line.replace("1","")
if "2" in line:
line = line.replace("2","")
if "3" in line:
line = line.replace("3","")
they correct way to do this is loop over refIDarr
for word in refIDarr:
line = line.replace(word,'')
NOTE: You don't need the if statement, since if the word is not in the line it will return the same line as it was.
"abc".replace("bananan", "") => "abc"
Here's a working example:
refIDarr = ["hello", "world", "lol"]
with open('mytext.txt', "r") as f:
data = f.readlines()
for word in refIDarr:
data = [line.replace(word, "") for line in data]
with open("mytext.txt", "w") as newf:
newf.writelines(data)
The problem is here:
a=refIDarr[index]
If refIDarr is a list of words, accessing specific index makes a be a word. Later, when you iterate over a (for word in a:), word becomes a letter and not a word as you expect, which causes eventually replacing characters of word instead the word itself in your file.
To avoid that, remove a=refIDarr[index] and change your loop to be:
for line in f:
for word in refIDarr:
if word in line:
line = line.replace(word,'')
First of all I'm new to Python. what I'm trying to do is to lemmatize my data from a CSV. Used pandas to read the csv.
But while running this I am getting an error on the line lemmatized.append(temp). It's saying NameError: name 'temp' is not defined
I can't figure out what is causing this error. I am using python 2.7.
I will be grateful if anyone of you python expert could help me out with this simple problem and thus help me in learning.
data = pd.read_csv('TrainingSETNEGATIVE.csv')
list = data['text'].values
def get_pos_tag(tag):
if tag.startswith('V'):
return 'v'
elif tag.startswith('N'):
return 'n'
elif tag.startswith('J'):
return 'a'
elif tag.startswith('R'):
return 'r'
else:
return 'n'
lemmatizer = WordNetLemmatizer()
with open('new_file.csv', 'w+', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for doc in list:
tok_doc = nltk.word_tokenize(doc)
pos_tag_doc = nltk.pos_tag(tok_doc)
lemmatized = []
for i in range(len(tok_doc)):
tag = get_pos_tag(pos_tag_doc[i][1])
if tag == 'r':
if tok_doc[i].endswith('ly'):
temp = tok_doc[i].replace("ly", "")
else:
temp = lemmatizer.lemmatize(tok_doc[i], pos=tag)
lemmatized.append(temp)
lemmatized = " ".join(lemmatized)
wr.writerow([lemmatized])
print(lemmatized)
Screentshot:
The Exception says it all: "name 'temp' is not defined". So the variable temp is not defined before it is used.
The problem with your code is here:
if tag == 'r':
if tok_doc[i].endswith('ly'):
temp = tok_doc[i].replace("ly", "")
# else: temp = None
else:
temp = lemmatizer.lemmatize(tok_doc[i], pos=tag)
lemmatized.append(temp)
If tag == 'r' is True and tok_doc[i].endswith('ly') is not True then temp never gets defined.
Consider adding an else clause like the one I inserted and commented out.
I wrote a function. Now I keep getting syntax errors within the try statement. I don't know if its the code I wrote or the try statement
Function:
def connector (links):
for links in infile:
avenues = links.rstrip()
words = []
dct = {}
cord = []
There is more to the code but the error keeps occurring in the try statement, where it says except, any ideas?
try:
infile = open("routes.txt", "r")
links = inf.readlines()
Connector(links)
except LookupError as exceptObj:
print("Error:", str(exceptObj))
connector should be lowercase
You indented wrong
try:
infile = open("routes.txt", "r")
links = inf.readlines()
connector(links)
except LookupError as exceptObj:
print("Error:", str(exceptObj))