How to fix errors for threadpool executor - python

I wrote the following function. Because there are so many keywords, I try to use Threadpool
class AllFind():
def __init__(
self
):
self._session = Session()
def find_kewword(
self,
keyword,
):
url = "https://xxxxxxxx.yy"
params = {
"keyword" : keyword,
"version" : 5,
}
res = self._session.get(url, params=params)
res_count = res.json().get("count")
return res_count
I tried this code but it failed. In this way, only the result value for the end of the keyword_list is displayed. What is wrong?
obj = AllFind()
keyword_list = [1,23,4,5,6.....]
with ThreadPoolExecutor(max_workers=5) as executor:
task = executor.submit(obj.find_keyword, keyword_list)

Related

python decorator takes 1 positional argument but 5 were given after modifying for pytest

I'd appreciate some help with the following code, as I'm still relatively new to Python, and despite countless days trying to figure out where i'm going wrong, i cant seem to spot the error i'm making.
I've adapted the following code from an article on medium to create a logging decorator and then enhanced it to try and "redact pandas df and dictionary" from the logs. Using functools caused me a problem with pytest and pytest fixtures. A post on stack overflow suggested dropping functools in favour of decorators.
def log_decorator(_func=None):
def log_decorator_info(func):
def log_decorator_wrapper(*args, **kwargs):
_logger = Logger()
logger_obj = _logger.get_logger()
args_passed_in_function = args_excl_df_dict(*args)
kwargs_passed_in_function = kwargs_excl_df_dict(**kwargs)
formatted_arguments = join_args_kwargs(args_passed_in_function,kwargs_passed_in_function)
py_file_caller = getframeinfo(stack()[1][0])
extra_args = { 'func_name_override': func.__name__,'file_name_override': os.path.basename(py_file_caller.filename) }
""" Before to the function execution, log function details."""
logger_obj.info(f"Begin function - Arguments: {formatted_arguments}", extra=extra_args)
try:
""" log return value from the function """
args_returned_from_function = args_excl_df_dict(func(*args))
kwargs_returned_from_function = []
formatted_arguments = join_args_kwargs(args_returned_from_function,kwargs_returned_from_function)
logger_obj.info(f"End function - Returned: {formatted_arguments}", extra=extra_args)
except:
"""log exception if occurs in function"""
error_raised = str(sys.exc_info()[1])
logger_obj.error(f"Exception: {str(sys.exc_info()[1])}",extra=extra_args)
msg_to_send = f"{func.__name__} {error_raised}"
send_alert(APP_NAME,msg_to_send,'error')
raise
return func(*args, **kwargs)
return decorator.decorator(log_decorator_wrapper, func)
if _func is None:
return log_decorator_info
else:
return log_decorator_info(_func)
Having adapted the above code i cant figure out what is causing the following error
args_returned_from_function = args_excl_df_dict(func(*args))
TypeError: test_me() takes 4 positional arguments but 5 were given
Other functions which the log decorator relies on
def args_excl_df_dict(*args):
args_list = []
for a in args:
if isinstance(a,(pd.DataFrame,dict)):
a = 'redacted from log'
args_list.append(repr(a))
else:
args_list.append(repr(a))
return args_list
def kwargs_excl_df_dict(**kwargs):
kwargs_list = []
for k, v in kwargs.items():
if isinstance(v,(dict,pd.DataFrame)):
v = 'redacted from log'
kwargs_list.append(f"{k}={v!r}")
else:
kwargs_list.append(f"{k}={v!r}")
return kwargs_list
def join_args_kwargs(args,kwargs):
formatted_arguments = ", ".join(args + kwargs)
return str(formatted_arguments)
This is the code calling the decorator
#log_decorator.log_decorator()
def test_me(a, b, c, d):
return a, b
test_me(string, number, dictionary, pandas_df)
I think the problem is that the wrapper is including the function as an argument to the function.
Try adding this line and see if it helps
args = args[1:]
intor your log_decorator_wrapper function towards the top. Like this.
def log_decorator(_func=None):
def log_decorator_info(func):
def log_decorator_wrapper(*args, **kwargs):
args = args[1:] # < -------------------here
_logger = Logger()
logger_obj = _logger.get_logger()
args_passed_in_function = args_excl_df_dict(*args)
kwargs_passed_in_function = kwargs_excl_df_dict(**kwargs)
formatted_arguments = join_args_kwargs(args_passed_in_function,kwargs_passed_in_function)
py_file_caller = getframeinfo(stack()[1][0])
extra_args = { 'func_name_override': func.__name__,'file_name_override': os.path.basename(py_file_caller.filename) }
""" Before to the function execution, log function details."""
logger_obj.info(f"Begin function - Arguments: {formatted_arguments}", extra=extra_args)
try:
""" log return value from the function """
args_returned_from_function = args_excl_df_dict(func(*args))
kwargs_returned_from_function = []
formatted_arguments = join_args_kwargs(args_returned_from_function,kwargs_returned_from_function)
logger_obj.info(f"End function - Returned: {formatted_arguments}", extra=extra_args)
except:
"""log exception if occurs in function"""
error_raised = str(sys.exc_info()[1])
logger_obj.error(f"Exception: {str(sys.exc_info()[1])}",extra=extra_args)
msg_to_send = f"{func.__name__} {error_raised}"
send_alert(APP_NAME,msg_to_send,'error')
raise
return func(*args, **kwargs)
return decorator.decorator(log_decorator_wrapper, func)
if _func is None:
return log_decorator_info
else:
return log_decorator_info(_func)
If your code is as is in your editor, maybe look at the indentation on the first three functions. Then start from there to move down

How to reduce the complexity of several for loops in python

I have the following function
import requests
children_dict = {}
def get_list_of_children(base_url, username, password, folder="1"):
token = get_token()
url = f"{base_url}/unix/repo/folders/{folder}/list"
json = requests_json(url,token)
for obj in json["list"]:
if obj['name'] == 'MainFolder':
folderId = obj['id']
url_parent = f"{base_url}/unix/repo/folders/{folderId}/list"
json_parent = requests_json(url_parent,token)
for obj_child in json_parent['list']:
if obj_child['folder'] == True:
folder_grand_child_id = obj_child['id']
url_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_child_id}/list"
json_grand_child = requests_json(url_grand_child,token)
for obj_grand_child in json_grand_child["list"]:
if obj_grand_child['name'] == 'SubFolder':
folder_grand_grand_child = obj_grand_child['id']
url_grand_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_grand_child}/list"
json_grand_grand_child = requests_json(url_grand_grand_child,token)
for obj_grand_grand_child in json_grand_grand_child["list"]:
if obj_grand_grand_child['name'] == 'MainTasks':
folder_grand_grand_grand_child = obj_grand_grand_child['id']
url_grand_grand_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_grand_grand_child}/list"
json_grand_grand_grand_child = requests_json(url_grand_grand_grand_child,token)
for obj_grand_grand_grand_child in json_grand_grand_grand_child["list"]:
children_dict[[obj_grand_grand_grand_child['id']] = obj_grand_grand_grand_child['name']
return children_dict
What i am trying to accomplish here is to make repeated api calls to traverse through http folder structure to get the list of files in the last directory
The function works as intended but sonarlint is through the below error
Refactor this function to reduce its Cognitive Complexity from 45 to the 15 allowed. [+9 locations]sonarlint(python:S3776)
is there is a better way to handle this function ?
can anyone refactor this, pointing me in the right direct will do
This isn't a complete solution, but to answer your question "how can I simplify this" more generally, you need to look for repeated patterns in your code and generalize them into a function. Perhaps it's a function you can call recursively, or in a loop. For example, in that deeply-nested statement of yours it's just the same pattern over and over again like:
url = f"{base_url}/unix/repo/folders/{folder}/list"
json = requests_json(url,token)
for obj in json["list"]:
if obj['name'] == '<some folder name>':
folderId = obj['id']
# ...repeat...
So try generalizing this into a loop, maybe, like:
url_format = "{base_url}/unix/repo/folders/{folder_id}/list"
folder_hierarchy = ['MainFolder', 'SubFolder', 'MainTasks']
folder_id = '1' # this was the argument passed to your function
for subfolder in folder_hierarchy:
url = url_format.format(base_url=base_url, folder_id=folder_id)
folder_json = requests_json(url, token)
for obj in folder_json['list']:
if obj['name'] == subfolder:
folder_id = obj['id']
break
# Now the pattern repeats for the next level of the hierarchy but
# starting with the new folder_id
This is just schematic and you may need to generalize further, but it's one idea.
If your goal is to traverse a more complicated hierarchy you might want to look into tree-traversal algorithms.
There's plenty of repeating code. Once you identify the repeating patterns, you can extract them to the classes and functions. In particular, I find it useful to isolate all the web API logic from the rest of the code:
class Client:
def __init__(self, base_url, token):
self.base_url = base_url
self.token = token
def list_folder(self, folder_id):
return request_json(
f'{self.base_url}/unix/repo/folders/{folder_id}/list', self.token
)['list']
def get_subfolders(self, parent_id=1):
return [c for c in self.list_folder(parent_id) if c['folder']]
def get_subfolder(self, parent_id, name):
children = self.list_folder(parent_id)
for child in children:
if child['name'] == name:
return child
return None
def resolve_path(self, path, root_id=1):
parent_id = root_id
for p in path:
current = self.get_subfolder(parent_id, p)
if not current:
return None
parent_id = current['id']
return current
Now you can use the class above to simplify the main code:
client = Client(base_url, token)
for folder in client.get_subfolders():
child = client.resolve_path(folder['id'], ('SubFolder', 'MainTasks'))
if child:
# do the rest of the stuff
The code above is not guaranteed to work as is, just an illustration of the idea.
I can't really test it, but I'd make it like the following in order to easily build many levels of repeating code.
class NestedProcessing:
def __init__(self, base_url):
self.base_url = base_url
self.token = get_token()
self.obj_predicates = []
def next_level_predicate(self, obj_predicate):
self.obj_predicates.append(obj_predicate)
return self
def final_action(self, obj_action):
self.obj_action = obj_action
return self
def process(self, first_folder_id):
self.process_level(0, first_folder_id)
def process_level(self, index, folder_id):
obj_is_good = self.obj_predicates[index]
url = f"{self.base_url}/unix/repo/folders/{folder_id}/list"
json = requests_json(url, self.token)
for obj in json["list"]:
if index == len(self.obj_predicates) - 1: # last level
self.obj_action(obj)
elif obj_is_good(obj):
self.process_level(index + 1, obj['id'])
def get_list_of_children(base_url, username, password, folder="1"):
children_dict = {}
NestedProcessing(base_url)
.next_level_predicate(lambda obj: obj['name'] == 'MainFolder')
.next_level_predicate(lambda obj: obj['folder'] == True)
.next_level_predicate(lambda obj: obj['name'] == 'SubFolder')
.next_level_predicate(lambda obj: obj['name'] == 'MainTasks')
.final_action(lambda obj, storage=children_dict: storage.update({obj['id']: obj['name']})
.process(folder)
return children_dict

How do I use the functions within this Python script?

I have this Python script to control a PfSense router via FauxAPI. The problem is that when i call a function it gives an error. I think i'm calling the function wrong. Does anyone know how to call them?
Here is a link to the API i'm using: https://github.com/ndejong/pfsense_fauxapi
I have tried calling config_get(self, section=none) but that does not seem to work.
import os
import json
import base64
import urllib
import requests
import datetime
import hashlib
class PfsenseFauxapiException(Exception):
pass
class PfsenseFauxapi:
host = '172.16.1.1'
proto = None
debug = None
version = None
apikey = 'key'
apisecret = 'secret'
use_verified_https = None
def __init__(self, host, apikey, apisecret, use_verified_https=False, debug=False):
self.proto = 'https'
self.base_url = 'fauxapi/v1'
self.version = __version__
self.host = host
self.apikey = apikey
self.apisecret = apisecret
self.use_verified_https = use_verified_https
self.debug = debug
if self.use_verified_https is False:
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
def config_get(self, section=None):
config = self._api_request('GET', 'config_get')
if section is None:
return config['data']['config']
elif section in config['data']['config']:
return config['data']['config'][section]
raise PfsenseFauxapiException('Unable to complete config_get request, section is unknown', section)
def config_set(self, config, section=None):
if section is None:
config_new = config
else:
config_new = self.config_get(section=None)
config_new[section] = config
return self._api_request('POST', 'config_set', data=config_new)
def config_patch(self, config):
return self._api_request('POST', 'config_patch', data=config)
def config_reload(self):
return self._api_request('GET', 'config_reload')
def config_backup(self):
return self._api_request('GET', 'config_backup')
def config_backup_list(self):
return self._api_request('GET', 'config_backup_list')
def config_restore(self, config_file):
return self._api_request('GET', 'config_restore', params={'config_file': config_file})
def send_event(self, command):
return self._api_request('POST', 'send_event', data=[command])
def system_reboot(self):
return self._api_request('GET', 'system_reboot')
def system_stats(self):
return self._api_request('GET', 'system_stats')
def interface_stats(self, interface):
return self._api_request('GET', 'interface_stats', params={'interface': interface})
def gateway_status(self):
return self._api_request('GET', 'gateway_status')
def rule_get(self, rule_number=None):
return self._api_request('GET', 'rule_get', params={'rule_number': rule_number})
def alias_update_urltables(self, table=None):
if table is not None:
return self._api_request('GET', 'alias_update_urltables', params={'table': table})
return self._api_request('GET', 'alias_update_urltables')
def function_call(self, data):
return self._api_request('POST', 'function_call', data=data)
def system_info(self):
return self._api_request('GET', 'system_info')
def _api_request(self, method, action, params=None, data=None):
if params is None:
params = {}
if self.debug:
params['__debug'] = 'true'
url = '{proto}://{host}/{base_url}/?action={action}&{params}'.format(
proto=self.proto, host=self.host, base_url=self.base_url, action=action, params=urllib.parse.urlencode(params))
if method.upper() == 'GET':
res = requests.get(
url,
headers={'fauxapi-auth': self._generate_auth()},
verify=self.use_verified_https
)
elif method.upper() == 'POST':
res = requests.post(
url,
headers={'fauxapi-auth': self._generate_auth()},
verify=self.use_verified_https,
data=json.dumps(data)
)
else:
raise PfsenseFauxapiException('Request method not supported!', method)
if res.status_code == 404:
raise PfsenseFauxapiException('Unable to find FauxAPI on target host, is it installed?')
elif res.status_code != 200:
raise PfsenseFauxapiException('Unable to complete {}() request'.format(action), json.loads(res.text))
return self._json_parse(res.text)
def _generate_auth(self):
# auth = apikey:timestamp:nonce:HASH(apisecret:timestamp:nonce)
nonce = base64.b64encode(os.urandom(40)).decode('utf-8').replace('=', '').replace('/', '').replace('+', '')[0:8]
timestamp = datetime.datetime.utcnow().strftime('%Y%m%dZ%H%M%S')
hash = hashlib.sha256('{}{}{}'.format(self.apisecret, timestamp, nonce).encode('utf-8')).hexdigest()
return '{}:{}:{}:{}'.format(self.apikey, timestamp, nonce, hash)
def _json_parse(self, data):
try:
return json.loads(data)
except json.JSONDecodeError:
pass
raise PfsenseFauxapiException('Unable to parse response data!', data)
Without having tested the above script myself, I can conclude that yes you are calling the function wrong. The above script is rather a class that must be instantiated before any function inside can be used.
For example you could first create an object with:
pfsense = PfsenseFauxapi(host='<host>', apikey='<API key>', apisecret='<API secret>')
replacing <host>, <API key> and <API secret> with the required values
Then call the function with:
pfsense.config_get() # self is not passed
where config_get can be replaced with any function
Also note
As soon as you call pfsense = PfsenseFauxapi(...), all the code in
the __init__ function is also run as it is the constructor (which
is used to initialize all the attributes of the class).
When a function has a parameter which is parameter=something, that something is the default value when nothing is passed for that parameter. Hence why use_verified_https, debug and section do not need to be passed (unless you want to change them of course)
Here is some more information on classes if you need.
You need to create an object of the class in order to call the functions of the class. For example
x = PfsenseFauxapi() (the init method is called during contructing the object)
and then go by x.'any function'. Maybe name the variable not x for a good naming quality.

Trouble passing variable as parameter between methods

I've created a script in python using class to log into a website making use of my credentials. When I run my script, I can see that it successfully logs in. What I can't do is find a suitable way to pass res.text being returned within login() method to get_data() method so that I can process it further. I don't wish to try like this return self.get_data(res.text) as it looks very awkward.
The bottom line is: when I run my script, It will automatically logs in like it is doing now. However, it will fetch data when I use this line scraper.get_data() within main function..
This is my try so far:
from lxml.html import fromstring
import requests
class CoffeeGuideBot(object):
login_url = "some url"
def __init__(self,session,username,password):
self.session = session
self.usrname = username
self.password = password
self.login(session,username,password)
def login(self,session,username,password):
session.headers['User-Agent'] = 'Mozilla/5.0'
payload = {
"Login1$UserName": username,
"Login1$Password": password,
"Login1$LoginButton": "Log on"
}
res = session.post(self.login_url,data=payload)
return res.text
def get_data(self,htmlcontent):
root = fromstring(htmlcontent,"lxml")
for iteminfo in root.cssselect("some selector"):
print(iteminfo.text)
if __name__ == '__main__':
session = requests.Session()
scraper = CoffeeGuideBot(session,"username","password")
#scraper.get_data() #This is how i wish to call this
What is the ideal way to pass variable as parameter between methods?
If I understood you requirement correctly, you want to access res.text inside get_data() without passing it as a method argument.
There are 2 options IMO.
Store res as a class instance variable of CoffeeGuideBot, access it in get_data()
def login(self,session,username,password):
<some code>
self.res = session.post(self.login_url,data=payload)
def get_data(self):
root = fromstring(self.res.text,"lxml")
<other code>
Almost same as above, but actually use the return value from login() to store res. In current code, the return statement is unnecessary.
def __init__(self,session,username,password):
<initializations>
self.res = self.login(session,username,password)
def login(self,session,username,password):
<some code>
return session.post(self.login_url,data=payload)
def get_data(self):
root = fromstring(self.res.text,"lxml")
<other code>
from lxml.html import fromstring
import requests
class CoffeeGuideBot(object):
login_url = "some url"
def __init__(self,session,username,password):
self.session = session
self.usrname = username
self.password = password
self._login = self.login(session,username,password)
def login(self,session,username,password):
session.headers['User-Agent'] = 'Mozilla/5.0'
payload = {
"Login1$UserName": username,
"Login1$Password": password,
"Login1$LoginButton": "Log on"
}
res = session.post(self.login_url,data=payload)
return res.text
def get_data(self):
htmlcontent = self._login
root = fromstring(htmlcontent,"lxml")
for iteminfo in root.cssselect("some selector"):
print(iteminfo.text)
if __name__ == '__main__':
session = requests.Session()
scraper = CoffeeGuideBot(session,"username","password")
scraper.get_data()

Calling regular functions from inside a class?

I'm new to Python and I have factory class that takes in API arguments and with my setup i get a NameError: 'self' is not defined. I understand that my functions calls are wrong. How would I refactor this to have the same logic
import requests
import json
class Call:
results = []
__response = None
__methods= dict(post=self.__post(), get=self.__get())
def __init__(self, root, endpoint, payload, header):
self.__root = root
self.__endpoint = endpoint
self.__payload = payload
self.__header = header
def __post(self):
self.__response = requests.post(
self.__root + self.__endpoint,
data = json.dumps(self.__payload),
headers = self.__header
)
self.__get_results()
def __get(self):
self.__response = requests.get(
self.__root + self.__endpoint,
data = json.dumps(self.__payload),
headers = self.__header
)
self.__get_results()
def __get_results(self):
if (self.__response.ok):
data = json.loads(self.__response.content)
results.append(
{
'result':data['result'],
'endpoint': self.__endpoint,
'status' : response.status_code
}
)
else:
results.append(
{
'result':'FAILED',
'endpoint': self.__endpoint,
'status' : response.status_code
}
)
def method(self, method):
return self.__methods[method]
login = Call(
Url.V1_PROD,
DriverEndpoint.LOGIN,
DriverPayload.LOGIN,
Request.HEADER
)
login.method('post')
You shouldn't use variable names starting with a __double __underscore, they are used to invoke name mangling, which you probably don't want.
Use a _single _underscore.
It's easier to declare your dictionary on the instance, in the __init__ method.
A common alternative would be to store the names of the methods you want to call, as strings, and use getattr to access the methods (see Call a Python method by name).
import requests
import json
class Call:
results = []
_response = None
def __init__(self, root, endpoint, payload, header):
self._root = root
self._endpoint = endpoint
self._payload = payload
self._header = header
# NO () after self._post, otherwise it would call the
# method and insert the return value in the dict
self._methods= dict(post=self._post, get=self._get)
def _post(self):
self._response = requests.post(
self._root + self._endpoint,
data = json.dumps(self._payload),
headers = self._header
)
self._get_results()
def _get(self):
self._response = requests.get(
self._root + self._endpoint,
data = json.dumps(self._payload),
headers = self._header
)
self._get_results()
def _get_results(self):
if (self.__response.ok):
data = json.loads(self.__response.content)
results.append(
{
'result':data['result'],
'endpoint': self._endpoint,
'status' : response.status_code
}
)
else:
results.append(
{
'result':'FAILED',
'endpoint': self._endpoint,
'status' : response.status_code
}
)
def method(self, method):
# Here, we have to effectively call the selected method,
# hence the () at the end
self._methods[method]()
login = Call(
Url.V1_PROD,
DriverEndpoint.LOGIN,
DriverPayload.LOGIN,
Request.HEADER
)
login.method('post')
__methods= dict(post=self.__post(), get=self.__get())
Your __methods variable is class variable, self is used to refer to an instance of Call, self cannot access to the class variable scope.
You can declare __method as an instance variable:
def __init__(self):
self.__methods= dict(post=self.__post(), get=self.__get())
def get_mothods(self):
return self.__methods

Categories

Resources