defined function got an unexpected keyword argument - python

I've got a problem with this line:
processed = process(cleaned, lemmatizer=nltk.stem.wordnet.WordNetLemmatizer());
Why is the unexpected keyword argument popping up?
Error: TypeError: process() got an unexpected keyword argument 'lemmatizer'
Here is my code:
def process(text, filters=nltk.corpus.stopwords.words('english')):
""" Normalizes case and handles punctuation
Inputs:
text: str: raw text
lemmatizer: an instance of a class implementing the lemmatize() method
(the default argument is of type nltk.stem.wordnet.WordNetLemmatizer)
Outputs:
list(str): tokenized text
"""
lemmatizer=nltk.stem.wordnet.WordNetLemmatizer()
word_list = nltk.word_tokenize(text);
lemma_list = [];
for i in word_list:
if i not in filters:
try:
lemma = lemmatizer.lemmatize(i);
lemma_list.append(str(lemma));
except:
pass
return " ".join(lemma_list)
if __name__ == '__main__':
#construct filter for processor
file = open("accountant.txt").read().lower()
filters = set(nltk.word_tokenize(file))
filters.update(nltk.corpus.stopwords.words('english'))
filters = list(filters)
#webcrawling
webContent = []
dataJobs = pd.read_csv("test.csv");
webContent = []
for i in dataJobs["url"]:
content = webCrawl(i);
webContent.append(content);
#clean the crawled text
cleaned_list = []
for j in webContent:
cleaned = extractUseful(j);
processed = process(cleaned, lemmatizer=nltk.stem.wordnet.WordNetLemmatizer());
cleaned_list.append(processed)
#save to csv
contents = pd.DataFrame({ "Content":webContent, "Cleaned": cleaned_list})
contents.to_csv("testwebcrawled.csv")
dataJobs[['jd']]= cleaned_list
dataJobs.to_csv("test_v2_crawled.csv")

You only define one keyword argument filters in the function signature for process (the def process(...) line). If the lemmatizer is what you intend to pass as the filter try:
processed = process(cleaned, filter=nltk.stem.wordnet.WordNetLemmatizer())
If you want to be able to pass a lemmatizer as well, you should change your function signature to something like this:
def process(text,
filters=nltk.corpus.stopwords.words('english'),
lemmatizer=nltk.stem.wordnet.WordNetLemmatizer()):
But note that you only need the = and the content that follows it in your function signature if you want the values after the = to be passed as the default arguments for these parameters. Otherwise, you can just do:
def process(text, filter, lemmatizer):
...
And call it like:
processed = process(cleaned,
filter=nltk.corpus.stopwords.words('english'),
lemmatizer=nltk.stem.wordnet.WordNetLemmatizer())

Related

What is the best way to return a variable or call a function to maximize code reuse?

I was wondering if i could get some input from some season python exports, i have a couple questions
I am extracting data from an api request and calculating the total vulnerabilities,
what is the best way i can return this data so that i can call it in another function
what is the way i can add up all the vulnerabilities (right now its just adding it per 500 at a time, id like to do the sum of every vulnerability
def _request():
third_party_patching_filer = {
"asset": "asset.agentKey IS NOT NULL",
"vulnerability" : "vulnerability.categories NOT IN ['microsoft patch']"}
headers = _headers()
print(headers)
url1 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets"
resp = requests.post(url=url1, headers=headers, json=third_party_patching_filer, verify=False).json()
jsonData = resp
#print(jsonData)
has_next_cursor = False
nextKey = ""
if "cursor" in jsonData["metadata"]:
has_next_cursor = True
nextKey = jsonData["metadata"]["cursor"]
while has_next_cursor:
url2 = f"https://us.api.insight.rapid7.com/vm/v4/integration/assets?&size=500&cursor={nextKey}"
resp2 = requests.post(url=url2, headers=headers, json=third_party_patching_filer, verify=False).json()
cursor = resp2["metadata"]
print(cursor)
if "cursor" in cursor:
nextKey = cursor["cursor"]
print(f"next key {nextKey}")
#print(desktop_support)
for data in resp2["data"]:
for tags in data['tags']:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
if tags["name"] == 'OSSWIN':
print("OSSWIN")
critical_vuln_osswin = data['critical_vulnerabilities']
severe_vuln_osswin = data['severe_vulnerabilities']
modoer_vuln_osswin = data['moderate_vulnerabilities']
total_critical_vul_osswin.append(critical_vuln_osswin)
total_severe_vul_osswin.append(severe_vuln_osswin)
total_modoer_vuln_osswin.append(modoer_vuln_osswin)
print(sum(total_critical_vul_osswin))
print(sum(total_severe_vul_osswin))
print(sum(total_modoer_vuln_osswin))
if tags["name"] == 'DESKTOP_SUPPORT':
print("Desktop")
total_critical_vul_desktop = []
total_severe_vul_desktop = []
total_modorate_vuln_desktop = []
critical_vuln_desktop = data['critical_vulnerabilities']
severe_vuln_desktop = data['severe_vulnerabilities']
moderate_vuln_desktop = data['moderate_vulnerabilities']
total_critical_vul_desktop.append(critical_vuln_desktop)
total_severe_vul_desktop.append(severe_vuln_desktop)
total_modorate_vuln_desktop.append(moderate_vuln_desktop)
print(sum(total_critical_vul_desktop))
print(sum(total_severe_vul_desktop))
print(sum(total_modorate_vuln_desktop))
else:
pass
else:
has_next_cursor = False
If you have a lot of parameters to pass, consider using a dict to combine them. Then you can just return the dict and pass it along to the next function that needs that data. Another approach would be to create a class and either access the variables directly or have helper functions that do so. The latter is a cleaner solution vs a dict, since with a dict you have to quote every variable name, and with a class you can easily add additional functionally beyond just being a container for a bunch of instance variables.
If you want the total across all the data, you should put these initializations:
total_critical_vul_osswin = []
total_severe_vul_osswin = []
total_modoer_vuln_osswin = []
before the while has_next_cursor loop (and similarly for the desktop totals). The way your code is currently, they are initialized each cursor (ie, each 500 samples based on the URL).

appending text using loop

I need a help so basically I am making a function where we give URL as input with parameter and then it will modify the value of parameters and then print the modified URL.
def findingParams(url):
#url = url+'?name=aman&test=aman'
base_url = url.split('?')[0]
parameters = url.split('?')[1]
myparms = {}
myparms1 = []
payload = "modified_value"
if '&' in parameters:
#['name=123', 'test=321']
parameters = parameters.split('&')
for params in parameters:
myparms1.append(params.split('=')[0])
myparms[(params.split('=')[0])] = payload
#['name', '123']
#['test', '321']
print(f'url = {url}\nparams = {myparms}')
#here I want the ouput as modified parameter
else:
#print(params.split('='))
print(base_url)
print(parameters)
myparms1.append(parameters.split('=')[0])
myparms[(parameters.split('=')[0])] = payload
print(f'url = {url}\nparams = {myparms}')
#['name', '123']
#myparms = {'name':'123', 'test':'321'}
for i in myparms1:
print(f'{base_url}?{i}={myparms[i]}')
#base_url+?[params]=[payload]
I wrote this function and this function works fine with URL that have single parameter eg: testme.com/test.php?name=123 and I also have to make it work with multiple parameters but the problem is that I don't know How can I use for loop to append value in URL for example: If I'm using for loop then output is like this
testme.com/test.php?name=modified_Value
testme.com/test.php?test=modified_Value
but I want URL like this
testme.com/test.php?name=modified_value&test=modified_value.
Try using a temp variable to build the url in your loop, then printing at the end, like this:
result = f'{base_url}?'
for i in myparms1:
result = result + f'{i}=myparms1[i]&'
# remove extra & at end after last parameter
result = result[:-1]
print(result)
#!/usr/bin/env python3
def findingParams(url):
if "?" not in url:
print(url)
else:
base_url, parameters = url.split("?")
payload = "modified_value"
if "&" in parameters:
modif = ""
for params in parameters.split("&"):
name, value = params.split("=")
modif += name + "=" + payload + "&"
print(base_url + "?" + modif.strip("&"))
else:
print(url.split("=")[0] + "=" + payload)
findingParams("testme.com/test.php")
# testme.com/test.php
findingParams("testme.com/test.php?name=123")
# testme.com/test.php?name=modified_value
findingParams("testme.com/test.php?name=12345&test=12345")
# testme.com/test.php?name=modified_value&test=modified_value
findingParams("testme.com/test.php?name=12345&test=12345&foo=12345")
# testme.com/test.php?name=modified_value&test=modified_value&foo=modified_value
Like this ?

Python - Getting Attributes From A File of Constants

I have a file of constant variables that I need to query and I am not sure how to go about it.
I have a database query which is returning user names and I need to find the matching user name in the file of constant variables.
The file looks like this:
SALES_MANAGER_01 = {"user_name": "BO01", "password": "password", "attend_password": "BO001",
"csm_password": "SM001", "employee_num": "BOSM001"}
There is just a bunch of users just like the one above.
My function looks like this:
#attr("user_test")
def test_get_user_for_login(self):
application_code = 'BO'
user_from_view = self.select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
password = ""
global_users = dir(gum)
for item in global_users:
if user_wo_ent not in item.__getattr__("user_name"):
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = item.__getattr__("password")
print(user_wo_ent, password)
global_users = dir(gum) is my file of constants. So I know I am doing something wrong since I am getting an attribute error AttributeError: 'str' object has no attribute '__getattr__', I am just not sure how to go about resolving it.
You should reverse your looping as you want to compare each item to your match condition. Also, you have a dictionary, so use it to do some heavy lifting.
You need to add some imports
import re
from ast import literal_eval
I've changed the dir(gum) bit to be this function.
def get_global_users(filename):
gusers = {} # create a global users dict
p_key = re.compile(ur'\b\w*\b') # regex to get first part, e.g.. SALES_MANAGER_01
p_value = re.compile(ur'\{.*\}') # regex to grab everything in {}
with (open(filename)) as f: # open the file and work through it
for line in f: # for each line
gum_key = p_key.match(line) # pull out the key
gum_value = p_value.search(line) # pull out the value
''' Here is the real action. update a dictionary
with the match of gum_key and with match of gum_value'''
gusers[gum_key.group()] = literal_eval(gum_value.group())
return(gusers) # return the dictionary
The bottom of your existing code is replaced with this.
global_users = get_global_users(gum) # assign return to global_users
for key, value in global_users.iteritems(): # walk through all key, value pairs
if value['user_name'] != user_wo_ent:
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = value['password']
So a very simple answer was get the dir of the constants file then parsing over it like so:
global_users = dir(gum)
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if gum.__dict__[item].get("user_name") == user_wo_ent:
print(user_wo_ent, o.get("password"))
else:
print("User was not in global_user_mappings")
I was able to find the answer by doing the following:
def get_user_for_login(application_code='BO'):
user_from_view = BaseServiceTest().select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[4:]
global_users = dir(gum)
user_dict = {'user_name': '', 'password': ''}
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if user_wo_ent == o.get("user_name"):
user_dict['user_name'] = user_wo_ent
user_dict['password'] = o.get("password")
return user_dict

how to loop list to pass string to function name in python

I'm trying to find the most efficient way to create different function name myfunction_a ,.. b , c with slightly different code ( input file name 'app/data/mydata_a.csv' ) so here below is the a function I got
def myfunction_a(request):
os.getcwd() # Should get this Django project root (where manage.py is)
fn = os.path.abspath(os.path.join(os.getcwd(),'app/data/mydata_a.csv'))
# TODO: Move to helper module
response_data = {}
data_format = 'tsv'
if data_format == 'json':
with open(fn, 'rb') as tsvin:
tsvin = csv.reader(tsvin, delimiter='\t')
for row in tsvin:
print 'col1 = %s col2 = %s' % (row[0], row[1])
response_data[row[0]] = row[1]
result = HttpResponse(json.dumps(response_data), content_type = 'application/json')
else:
with open(fn, 'rb') as tsvin:
buff = tsvin.read()
result = HttpResponse(buff, content_type = 'text/tsv')
return result
I want to be able to loop through my list and create multiple function name:
mylist = ['a','b','c' ... 'z' ]
def myfunction_a(request):
... ( 'app/data/mydata_a.csv' )
return request
to get final result of :
def myfunction_a => taking 'app/data/mydata_a.csv'
def myfunction_b => taking 'app/data/mydata_b.csv'
def myfunction_c => taking 'app/data/mydata_c.csv'
right now I just copy and past and change it. is there a better to do this ? Any recommendation would be appreciated. Thanks.
you can add a variable to a string with
"app/data/mydata_%s.csv" % (character)
so
for character in mylist:
print "app/data/mydata_%s.csv" % (character)
should append everytime another charcter at the place of %s
So since you want for every function use another string to get another file you can
do something like this:
def myfunction(label, request):
return "app/data/mydata_%s.csv" % (label)
so you get the function label at the end of your documentpath. Since you described that you
only want to change the name so that it equals to the function label, you only need another parameter and not a new function name
If you must have a special function name, you could do this. Though why you'd need to I'm not sure.
import functools, sys
namespace = sys._getframe(0).f_globals
def myfunction(label, request):
print request
return "app/data/mydata_%s.csv" % (label)
my_labels = ['a','b','c']
for label in my_labels:
namespace['myfunction_%s'%label] = functools.partial(myfunction, label)
print myfunction_a('request1')
print myfunction_b('request2')
Output is this:
request1
app/data/mydata_a.csv
request2
app/data/mydata_b.csv
Or possibly a better implementation would be:
class MyClass(object):
def __init__(self, labels):
for label in labels:
setattr(self, label, functools.partial(self._myfunction, label))
def _myfunction(self, label, request):
print request
return "app/data/mydata_%s.csv" % (label)
myfunction = MyClass(['a','b','c'])
print myfunction.c('request3')
Output is this:
request3
app/data/mydata_c.csv

How to use the Typed unmarshaller in suds?

I have existing code that processes the output from suds.client.Client(...).service.GetFoo(). Now that part of the flow has changed and we are no longer using SOAP, instead receiving the same XML through other channels. I would like to re-use the existing code by using the suds Typed unmarshaller, but so far have not been successful.
I came 90% of the way using the Basic unmarshaller:
tree = suds.umx.basic.Basic().process(xmlroot)
This gives me the nice tree of objects with attributes, so that the pre-existing code can access tree[some_index].someAttribute, but the value will of course always be a string, rather than an integer or date or whatever, so the code can still not be re-used as-is.
The original class:
class SomeService(object):
def __init__(self):
self.soap_client = Client(some_wsdl_url)
def GetStuff(self):
return self.soap_client.service.GetStuff()
The drop-in replacement that almost works:
class SomeSourceUntyped(object):
def __init__(self):
self.url = some_url
def GetStuff(self):
xmlfile = urllib2.urlopen(self.url)
xmlroot = suds.sax.parser.Parser().parse(xmlfile)
if xmlroot:
# because the parser creates a document root above the document root
tree = suds.umx.basic.Basic().process(xmlroot)[0]
else:
tree = None
return tree
My vain effort to understand suds.umx.typed.Typed():
class SomeSourceTyped(object):
def __init__(self):
self.url = some_url
self.schema_file_name =
os.path.realpath(os.path.join(os.path.dirname(__file__),'schema.xsd'))
with open(self.schema_file_name) as f:
self.schema_node = suds.sax.parser.Parser().parse(f)
self.schema = suds.xsd.schema.Schema(self.schema_node, "", suds.options.Options())
self.schema_query = suds.xsd.query.ElementQuery(('http://example.com/namespace/','Stuff'))
self.xmltype = self.schema_query.execute(self.schema)
def GetStuff(self):
xmlfile = urllib2.urlopen(self.url)
xmlroot = suds.sax.parser.Parser().parse(xmlfile)
if xmlroot:
unmarshaller = suds.umx.typed.Typed(self.schema)
# I'm still running into an exception, so obviously something is missing:
# " Exception: (document, None, ), must be qref "
# Do I need to call the Parser differently?
tree = unmarshaller.process(xmlroot, self.xmltype)[0]
else:
tree = None
return tree
This is an obscure one.
Bonus caveat: Of course I am in a legacy system that uses suds 0.3.9.
EDIT: further evolution on the code, found how to create SchemaObjects.

Categories

Resources