First of all I'm new to Python. what I'm trying to do is to lemmatize my data from a CSV. Used pandas to read the csv.
But while running this I am getting an error on the line lemmatized.append(temp). It's saying NameError: name 'temp' is not defined
I can't figure out what is causing this error. I am using python 2.7.
I will be grateful if anyone of you python expert could help me out with this simple problem and thus help me in learning.
data = pd.read_csv('TrainingSETNEGATIVE.csv')
list = data['text'].values
def get_pos_tag(tag):
if tag.startswith('V'):
return 'v'
elif tag.startswith('N'):
return 'n'
elif tag.startswith('J'):
return 'a'
elif tag.startswith('R'):
return 'r'
else:
return 'n'
lemmatizer = WordNetLemmatizer()
with open('new_file.csv', 'w+', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for doc in list:
tok_doc = nltk.word_tokenize(doc)
pos_tag_doc = nltk.pos_tag(tok_doc)
lemmatized = []
for i in range(len(tok_doc)):
tag = get_pos_tag(pos_tag_doc[i][1])
if tag == 'r':
if tok_doc[i].endswith('ly'):
temp = tok_doc[i].replace("ly", "")
else:
temp = lemmatizer.lemmatize(tok_doc[i], pos=tag)
lemmatized.append(temp)
lemmatized = " ".join(lemmatized)
wr.writerow([lemmatized])
print(lemmatized)
Screentshot:
The Exception says it all: "name 'temp' is not defined". So the variable temp is not defined before it is used.
The problem with your code is here:
if tag == 'r':
if tok_doc[i].endswith('ly'):
temp = tok_doc[i].replace("ly", "")
# else: temp = None
else:
temp = lemmatizer.lemmatize(tok_doc[i], pos=tag)
lemmatized.append(temp)
If tag == 'r' is True and tok_doc[i].endswith('ly') is not True then temp never gets defined.
Consider adding an else clause like the one I inserted and commented out.
Related
I have been trying to debug my code for searching strings in two files, but I can't understand why the strings are not found all the time. I have been stuck here for half day, and probably you could help me to understand the error, please?
The logic is: (after filtering out line in "try_ID.txt" by this piece len(re.findall("Ca", row)) == 0 or len(re.findall("Co", row)) == 0), if ca and co in "try_ID.txt" do not appear in both "try.txt" and "try_C.txt", then we go into the first if condition in my code; if we only find either ca or co in "try.txt" or "try_C.txt", then it goes into the elif conditions in my code; if we find both ca and co in both files "try_C.txt" and "try.txt", then we go into else condition in my code.
The problem is that, with my code, all the items go into the first if conditions (both not found). I don't know why.
my code
import re
with open("try_ID.txt", 'r') as fin, \
open("try_C.txt", 'r') as co_splice, \
open("try.txt", 'r') as ca_splice:
for row in fin:
if len(re.findall("Ca", row)) == 0 or len(re.findall("Co", row)) == 0:
pass
else: # problem starts from here
name = str(row.split()[1]) + "_blast"
if not row.split()[1] in ca_splice.read() and not row.split()[2] in co_splice.read():
print(row.split()[0:2])
elif row.split()[1] in ca_splice.read() and not row.split()[2] in col_splice.read():
print(row.split()[1] + "Ca")
elif not row.split()[1] in can_splice.read() and row.split()[2] in col_splice.read():
print(row.split()[2] + "Co")
else:
ne_name = name + "recip"
print(ne_name)
"try_ID.txt"
H21911 Ca29092.1t A05340.1
H21912 Ca19588.1t Co27353.1t A05270.1
H21913 Ca19590.1t Co14899.1t A05260.1
H21914 Ca19592.1t Co14897.1t A05240.1
H21915 Co14877.1t A05091.1
S25338 Ca12595.1t Co27352.1t A53970.1
S20778 Ca29091.1t Co24326.1t A61120.1
S26552 Ca20916.1t Co14730.1t A16155.1
"try_C.txt"
Co14730.1t;Co14730.2t
Co27352.1t;Co27352.2t;Co27352.3t;Co27352.4t;Co27352.5t
Co14732.1t;Co14732.2t
Co4217.1t;Co4217.2t
Co27353.1t;Co27353.2t
Co14733.1t;Co14733.2t
"try.txt"
Ca12595.1t;Ca12595.2t
Ca29091.1t;Ca29091.2t
Ca1440.1t;Ca1440.2t
Ca29092.1t;Ca29092.2t
Ca20916.1t;Ca20916.2t
Though weird thing is when I try a small piece of code like below, it can find the strings.
row = "H20118 Ca12595.1t Co18779.1t A01010.1"
text_file = "try.txt"
with open(text_file, 'r') as fin:
if row.split()[1] in fin.read():
print(True)
else:
print(False)
I really don't understand.
Try to read and split and search only once wherever possible. Try to keep it simple.
with open("try_ID.txt", 'r') as fin, \
open("try_C.txt", 'r') as co_splice, \
open("try.txt", 'r') as ca_splice:
co_splice = co_splice.read()
ca_splice = ca_splice.read()
for row in fin:
if 'Ca' in row or 'Co' in row:
zero,one,two,*_ = row.split()
name = one + "_blast"
one_in_ca = one in ca_splice
two_in_co = two in co_splice
if not one_in_ca and not two_in_co:
print(zero,one,two)
elif one_in_ca and not two_in_co:
print(one + "Ca")
elif not one_in_ca and two_in_co:
print(two + "Co")
else:
ne_name = name + "recip"
print(ne_name)
Hello Community Members,
I am getting the error NameError: name 'f' is not defined. The code is as follows. Please help. Any sort of help is appreciated. I have been strucked onto this since 3 days. The code is all about to extract all the subcategories name of wikipedia category in Python 3.
I have tried both the relative and absolute paths.
The code is as follows:
import httplib2
from bs4 import BeautifulSoup
import subprocess
import time, wget
import os, os.path
#declarations
catRoot = "http://en.wikipedia.org/wiki/Category:"
MAX_DEPTH = 100
done = []
ignore = []
path = 'trivial'
#Removes all newline characters and replaces with spaces
def removeNewLines(in_text):
return in_text.replace('\n', ' ')
# Downloads a link into the destination
def download(link, dest):
# print link
if not os.path.exists(dest) or os.path.getsize(dest) == 0:
subprocess.getoutput('wget "' + link + '" -O "' + dest+ '"')
print ("Downloading")
def ensureDir(f):
if not os.path.exists(f):
os.mkdir(f)
# Cleans a text by removing tags
def clean(in_text):
s_list = list(in_text)
i,j = 0,0
while i < len(s_list):
#iterate until a left-angle bracket is found
if s_list[i] == '<':
if s_list[i+1] == 'b' and s_list[i+2] == 'r' and s_list[i+3] == '>':
i=i+1
print ("hello")
continue
while s_list[i] != '>':
#pop everything from the the left-angle bracket until the right-angle bracket
s_list.pop(i)
#pops the right-angle bracket, too
s_list.pop(i)
elif s_list[i] == '\n':
s_list.pop(i)
else:
i=i+1
#convert the list back into text
join_char=''
return (join_char.join(s_list))#.replace("<br>","\n")
def getBullets(content):
mainSoup = BeautifulSoup(contents, "html.parser")
# Gets empty bullets
def getAllBullets(content):
mainSoup = BeautifulSoup(str(content), "html.parser")
subcategories = mainSoup.findAll('div',attrs={"class" : "CategoryTreeItem"})
empty = []
full = []
for x in subcategories:
subSoup = BeautifulSoup(str(x))
link = str(subSoup.findAll('a')[0])
if (str(x)).count("CategoryTreeEmptyBullet") > 0:
empty.append(clean(link).replace(" ","_"))
elif (str(x)).count("CategoryTreeBullet") > 0:
full.append(clean(link).replace(" ","_"))
return((empty,full))
def printTree(catName, count):
catName = catName.replace("\\'","'")
if count == MAX_DEPTH : return
download(catRoot+catName, path)
filepath = "categories/Category:"+catName+".html"
print(filepath)
content = open('filepath', 'w+')
content.readlines()
(emptyBullets,fullBullets) = getAllBullets(content)
f.close()
for x in emptyBullets:
for i in range(count):
print (" "),
download(catRoot+x, "categories/Category:"+x+".html")
print (x)
for x in fullBullets:
for i in range(count):
print (" "),
print (x)
if x in done:
print ("Done... "+x)
continue
done.append(x)
try: printTree(x, count + 1)
except:
print ("ERROR: " + x)
name = "Cricket"
printTree(name, 0)
The error encountered is as follows.
I think f.close() should be content.close().
It's common to use a context manager for such cases, though, like this:
with open(filepath, 'w+') as content:
(emptyBullets,fullBullets) = getAllBullets(content)
Then Python will close the file for you, even in case of an exception.
(I also changed 'filepath' to filepath, which I assume is the intent here.)
Trying to get my program to split lines into 3 rows from a file and then apply a "if row1 == x:" to add to an existing class. Now thats not my problem, ive gotten it to work, except for when row1 is ''. So i tried changing the input file so it was ' ', then '*', and 'k' (and so on), nothing worked.
Thing is that most lines in the input file reads: 1234565,'streetadress1','streetadress2' but for some lines there are no streetadress1 only ''. but the program has no problem identifying the number or 'streetadress2'.
class adress(object):
def __init__(self,street,ykord,xkord):
self.street = street
self.ykord = ykord
self.xkord = xkord
self.connected = []
self.anlid = []
self.distances = []
self.parent = []
self.child =[]
def set_connections(self):
input_file = open("kopplingar2.txt")
temp = input_file.read().splitlines()
for l in temp:
row = l.split(',')
identity = row[0]
streetA = row[1]
streetB = row[2]
if streetA == self.street:
diction = {'street':streetB, 'identity':identity}
self.child.append(diction)
elif streetA == '':
self.anlid.append(identity)
print 'poop!'
elif streetB == self.street and streetA != '':
diction = {'street':streetA, 'identity':identity}
self.parent.append(diction)
print streetA
The 'print poop' is just to see if it ever occur, but it doesnt. It should be about 400 lines of poop as a result since about 75% of the lines in the inputfile contain ''.
I have no idea why its working for the other rows but not for row1 (except that it sometimes is '' instead of a full string).
'' is an empty string in Python. If you need to compare a value with a string consisting of two apostrophe characters, you need to write streetA = "''".
as #yole said you need to compare with "''", if for example one the line in the file is 123,'','streetB' then l would be "123,'','streetB'" the what you get is
>>> l="123,'','streetB'"
>>> l.split(',')
['123', "''", "'streetB'"]
>>>
I am fairly new to Python (just started learning in the last two weeks) and am trying to write a script to parse a csv file to extract some of the fields into a List:
from string import Template
import csv
import string
site1 = 'D1'
site2 = 'D2'
site3 = 'D5'
site4 = 'K0'
site5 = 'K1'
site6 = 'K2'
site7 = '0'
site8 = '0'
site9 = '0'
lbl = 1
portField = 'y'
sw = 5
swpt = 6
cd = 0
pt = 0
natList = []
with open(name=r'C:\Users\dtruman\Documents\PROJECTS\SCRIPTING - NATAERO DEPLOYER\NATAERO DEPLOYER V1\nataero_deploy.csv') as rcvr:
for line in rcvr:
fields = line.split(',')
Site = fields[0]
siteList = [site1,site2,site3,site4,site5,site6,site7,site8,site9]
while Site in siteList == True:
Label = fields[lbl]
Switch = fields[sw]
if portField == 'y':
Switchport = fields[swpt]
natList.append([Switch,Switchport,Label])
else:
Card = fields[cd]
Port = fields[pt]
natList.append([Switch,Card,Port,Label])
print natList
Even if I strip the ELSE statement away and break into my code right after the IF clause-- i can verify that "Switchport" (first statement in IF clause) is successfully being populated with a Str from my csv file, as well as "Switch" and "Label". However, "natList" is not being appended with the fields parsed from each line of my csv for some reason. Python returns no errors-- just does not append "natList" at all.
This is actually going to be a function (once I get the code itself to work), but for now, I am simply setting the function parameters as global variables for the sake of being able to run it in an iPython console without having to call the function.
The "lbl", "sw", "swpt", "cd", and "pt" refer to column#'s in my csv (the finished function will allow user to enter values for these variables).
I assume I am running into some issue with "natList" scope-- but I have tried moving the "natList = []" statement to various places in my code to no avail.
I can run the above in a console, and then run "append.natList([Switch,Switchport,Label])" separately and it works for some reason....?
Thanks for any assistance!
It seems to be that the while condition needs an additional parenthesis. Just add some in this way while (Site in siteList) == True: or a much cleaner way suggested by Padraic while Site in siteList:.
It was comparing boolean object against string object.
Change
while Site in siteList == True:
to
if Site in siteList:
You might want to look into the csv module as this module attempts to make reading and writing csv files simpler, e.g.:
import csv
with open('<file>') as fp:
...
reader = csv.reader(fp)
if portfield == 'y':
natlist = [[row[i] for i in [sw, swpt, lbl]] for row in fp if row[0] in sitelist]
else:
natlist = [[row[i] for i in [sw, cd, pt, lbl]] for row in fp if row[0] in sitelist]
print natlist
Or alternatively using a csv.DictReader which takes the first row as the fieldnames and then returns dictionaries:
import csv
with open('<file>') as fp:
...
reader = csv.DictReader(fp)
if portfield == 'y':
fields = ['Switch', 'card/port', 'Label']
else:
fields = ['Switch', '??', '??', 'Label']
natlist = [[row[f] for f in fields] for row in fp if row['Building/Site'] in sitelist]
print natlist
I'm running Python 3 and I'm getting the following error:
AttributeError: 'AssemblyParser' object has no attribute 'hasMoreCommands'
Here is the code that is raising the error:
import sys
from Parser import AssemblyParser
from Code import Code
parser = AssemblyParser(sys.argv[1])
translator = Code()
out_file = str(sys.argv[1]).split(".")
out_file = str(out_file[:1]) + ".hack"
with open(out_file, 'w', encoding='utf-8') as f:
while parser.hasMoreCommands():
parser.advance()
if parser.commandType() == "A_COMMAND":
dec_num = parser.symbol()
binary = "{0:b}".format(dec_num)
elif parser.commandType() == "C_COMMAND":
default_bits = "111"
comp_bits += translator.comp(parser.comp())
dest_bits += translator.dest(parser.dest())
jump_bits += translator.jump(parser.jump())
binary = default_bits + comp_bits + dest_bits + jump_bits
assert len(binary) == 16
f.write(binary)
Here is my Parser.py file:
class AssemblyParser:
"""
Encapsulates access to the input code. Reads an assembly language command,
parses it, and provides convenient access to the command's components (fields and symbols).
In addition, removes all whitespace and comments.
"""
def __init__(self, input_file):
self.current_command = ""
self.next_command = ""
with open(input_file,"r+", encoding='utf-8') as f:
for l in f:
line = "".join(l.split()) # Remove whitespace from the line
line = line.split('//') # Removes any comments from the line
clean_line = line[0]
if clean_line.strip(): # Removes any blank lines
f.write(clean_line)
next_command = f.readline()
def __hasMoreCommands__(self):
if self.next_command:
return true
return false
def __advance__(self):
with open(input_file, encoding='utf-8') as f:
self.current_command = self.next_command
self.next_command = f.readline()
def __commandType__(self):
char_1 = self.current_command[:1]
if char_1 == "#":
return "A_COMMAND"
elif char_1 == "(":
return "L_COMMAND"
else:
return "C_COMMAND"
def __symbol__(self):
assert self.commandType() == ("A_COMMAND" or "L_COMMAND")
if self.commandType() == "A_COMMAND":
symbol = str(symbol[1:])
else:
symbol = str(symbol[1:len(symbol)-1])
return str(symbol)
def __dest__(self):
assert self.commandType() == "C_COMMAND"
if "=" in self.current_command:
temp = self.current_command.split("=")
return str(temp[:1])
else:
return ""
def __comp__(self):
assert self.commandType() == "C_COMMAND"
temp = self.current_command
if "=" in temp:
temp = temp.split("=")
temp = str(temp[1:])
if ";" in temp:
temp = temp.split(";")
temp = str(temp[:1])
return temp
def __jump__(self):
assert self.commandType() == "C_COMMAND"
if ";" in self.current_command:
temp = self.current_command.split(";")
return str(temp[1:])
else:
return ""
I really don't know why I'm getting this error, I've looked at the import documentation, but I'm getting more and more confused. I'm fairly new to Python. Can anyone explain this error?
Thanks.
Well. There seems to be no function in Parser module with name hasMoreCommand. The function in there starts with underscore and end eith underscore.
Two leading and trailing underscores are used to identify "magic" attributes. You can't use that to create your own, as they only reference pre-existing methods.
The following is what you probably want:
hasMoreCommands():
If you have multiple classes with this function, use name mangling instead:
_hasMoreCommands():
See: https://stackoverflow.com/a/8689983/2030480
And: http://www.rafekettler.com/magicmethods.html