How to reduce the complexity of several for loops in python - python

I have the following function
import requests
children_dict = {}
def get_list_of_children(base_url, username, password, folder="1"):
token = get_token()
url = f"{base_url}/unix/repo/folders/{folder}/list"
json = requests_json(url,token)
for obj in json["list"]:
if obj['name'] == 'MainFolder':
folderId = obj['id']
url_parent = f"{base_url}/unix/repo/folders/{folderId}/list"
json_parent = requests_json(url_parent,token)
for obj_child in json_parent['list']:
if obj_child['folder'] == True:
folder_grand_child_id = obj_child['id']
url_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_child_id}/list"
json_grand_child = requests_json(url_grand_child,token)
for obj_grand_child in json_grand_child["list"]:
if obj_grand_child['name'] == 'SubFolder':
folder_grand_grand_child = obj_grand_child['id']
url_grand_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_grand_child}/list"
json_grand_grand_child = requests_json(url_grand_grand_child,token)
for obj_grand_grand_child in json_grand_grand_child["list"]:
if obj_grand_grand_child['name'] == 'MainTasks':
folder_grand_grand_grand_child = obj_grand_grand_child['id']
url_grand_grand_grand_child = f"{base_url}/unix/repo/folders/{folder_grand_grand_grand_child}/list"
json_grand_grand_grand_child = requests_json(url_grand_grand_grand_child,token)
for obj_grand_grand_grand_child in json_grand_grand_grand_child["list"]:
children_dict[[obj_grand_grand_grand_child['id']] = obj_grand_grand_grand_child['name']
return children_dict
What i am trying to accomplish here is to make repeated api calls to traverse through http folder structure to get the list of files in the last directory
The function works as intended but sonarlint is through the below error
Refactor this function to reduce its Cognitive Complexity from 45 to the 15 allowed. [+9 locations]sonarlint(python:S3776)
is there is a better way to handle this function ?
can anyone refactor this, pointing me in the right direct will do

This isn't a complete solution, but to answer your question "how can I simplify this" more generally, you need to look for repeated patterns in your code and generalize them into a function. Perhaps it's a function you can call recursively, or in a loop. For example, in that deeply-nested statement of yours it's just the same pattern over and over again like:
url = f"{base_url}/unix/repo/folders/{folder}/list"
json = requests_json(url,token)
for obj in json["list"]:
if obj['name'] == '<some folder name>':
folderId = obj['id']
# ...repeat...
So try generalizing this into a loop, maybe, like:
url_format = "{base_url}/unix/repo/folders/{folder_id}/list"
folder_hierarchy = ['MainFolder', 'SubFolder', 'MainTasks']
folder_id = '1' # this was the argument passed to your function
for subfolder in folder_hierarchy:
url = url_format.format(base_url=base_url, folder_id=folder_id)
folder_json = requests_json(url, token)
for obj in folder_json['list']:
if obj['name'] == subfolder:
folder_id = obj['id']
break
# Now the pattern repeats for the next level of the hierarchy but
# starting with the new folder_id
This is just schematic and you may need to generalize further, but it's one idea.
If your goal is to traverse a more complicated hierarchy you might want to look into tree-traversal algorithms.

There's plenty of repeating code. Once you identify the repeating patterns, you can extract them to the classes and functions. In particular, I find it useful to isolate all the web API logic from the rest of the code:
class Client:
def __init__(self, base_url, token):
self.base_url = base_url
self.token = token
def list_folder(self, folder_id):
return request_json(
f'{self.base_url}/unix/repo/folders/{folder_id}/list', self.token
)['list']
def get_subfolders(self, parent_id=1):
return [c for c in self.list_folder(parent_id) if c['folder']]
def get_subfolder(self, parent_id, name):
children = self.list_folder(parent_id)
for child in children:
if child['name'] == name:
return child
return None
def resolve_path(self, path, root_id=1):
parent_id = root_id
for p in path:
current = self.get_subfolder(parent_id, p)
if not current:
return None
parent_id = current['id']
return current
Now you can use the class above to simplify the main code:
client = Client(base_url, token)
for folder in client.get_subfolders():
child = client.resolve_path(folder['id'], ('SubFolder', 'MainTasks'))
if child:
# do the rest of the stuff
The code above is not guaranteed to work as is, just an illustration of the idea.

I can't really test it, but I'd make it like the following in order to easily build many levels of repeating code.
class NestedProcessing:
def __init__(self, base_url):
self.base_url = base_url
self.token = get_token()
self.obj_predicates = []
def next_level_predicate(self, obj_predicate):
self.obj_predicates.append(obj_predicate)
return self
def final_action(self, obj_action):
self.obj_action = obj_action
return self
def process(self, first_folder_id):
self.process_level(0, first_folder_id)
def process_level(self, index, folder_id):
obj_is_good = self.obj_predicates[index]
url = f"{self.base_url}/unix/repo/folders/{folder_id}/list"
json = requests_json(url, self.token)
for obj in json["list"]:
if index == len(self.obj_predicates) - 1: # last level
self.obj_action(obj)
elif obj_is_good(obj):
self.process_level(index + 1, obj['id'])
def get_list_of_children(base_url, username, password, folder="1"):
children_dict = {}
NestedProcessing(base_url)
.next_level_predicate(lambda obj: obj['name'] == 'MainFolder')
.next_level_predicate(lambda obj: obj['folder'] == True)
.next_level_predicate(lambda obj: obj['name'] == 'SubFolder')
.next_level_predicate(lambda obj: obj['name'] == 'MainTasks')
.final_action(lambda obj, storage=children_dict: storage.update({obj['id']: obj['name']})
.process(folder)
return children_dict

Related

Attritube error when importing local python file

I would like some help in figuring out an issue. The code below is attempting to import a file called game_getter.py to access it's all_games dictionary variable.
from django.db import models
from catolog import game_getter
# Create your models here.
class Game(models.Model):
url_clue = ["//images.igdb.com"]
for game in game_getter.all_games:
title = game_getter.all_games[game][0]
if url_clue in game_getter.all_games[game][1]:
cover_art = game_getter.all_games[game]
else:
pass
if game_getter.all_games[game][2] == None:
pass
else:
storyline = game_getter.all_games[game][2]
if game_getter.all_games[game][3] == None:
pass
else:
storyline = game_getter.all_games[game][3]
genre_pac = game_getter.all_games[game][4]
def __str__(self):
return self.title
class UserSearch():
user_input = str
At the bottom of this next session I used a return on the dictionary object all_games. I've even tried making it a global variable and the computer still won't see it.
# Create a search
def search_query(user_input, exp_time):
# Generate token if needed
generate_token(exp_time)
#establish client_id for wrapper
client_id = "not my client_id"
wrapper = IGDBWrapper(client_id, token)
# Query the API based on a search term.
query = wrapper.api_request(
'games', # Requesting the name, storyline, genre name, and cover art url where the user input is the search tearm
f'fields name, storyline, genres.slug, cover.url; offset 0; where name="{user_input}"*; sort first_release_date; limit: 100;',
# Also sorted by release date with a limit of 100 results
)
# Load the binary data into json
message_json = json.loads(query)
# List of all games returned
global all_games
all_games = dict()
key = 0
# Grab each value by key and separate per game
for game in message_json:
name = game.get('name')
cover_url = game.get('cover')
storyline = game.get('storyline')
summary = game.get('summary')
genre_set = game.get('genres')
# Genre posses none to many tags which needs to be sorted.
genre_list = []
if genre_set:
for type in genre_set:
for i in type:
genre_list.append(type[i])
for i in genre_list:
genre_list = [x for x in genre_list if not isinstance(x, int)]
else:
pass
# Group together by game
if game.get('cover') != None:
result = [name,cover_url.get('url'),storyline,summary,genre_list]
# Add the game to the collection of all games found
all_games[key] = result
key += 1
else:
result = [name,storyline,summary,genre_list]
# Add the game to the collection of all games found
all_games[key] = result
key += 1
return all_games
What am I missing?

I'm trying to use dictionary as an underlying data structure to create a simple file system. But I'm running into issues with accessing keys

The main idea is that when we make a directory and we call ls on a directory it should return whatever it's in b. However, I'm running into the issue that my code goes into the if statement meaning it knows there is a key in the defaultdict that matches our key(b) but it still returns None.
I'm not sure why and I would appreciate it if anyone can tell me what is wrong with my code.
Here is the command I ran:
fileSystem = FileSystem()
fileSystem.mkdir("/a/b/c")
fileSystem.ls("/a/b");
This should output ['c'] but rather my code is returning None
from collections import defaultdict
class FileSystem(object):
def __init__(self):
self.file_system = defaultdict(list)
self.saveData = dict()
self.currentPath = []
def ls(self, path):
split_path = path.split('/')
dest = split_path[-1]
if dest in self.file_system.keys():
return self.file_system.get(dest).sort()
else:
return self.currentPath
def mkdir(self, path):
split_path = path.split('/')
parent = split_path[1]
self.file_system[split_path[1]].append(split_path[1])
self.currentPath.append(split_path[1])
for folder in split_path[2:]:
self.file_system[parent].append(folder)
self.saveData[folder] = None
parent = folder
def addContentToFile(self, filePath, content):
split_path = filePath.split('/')
dest = split_path[-1]
if dest in self.saveData.keys():
history = self.saveData[dest]
self.saveData[dest] = history + content
else:
self.saveData[dest] = content
def readContentFromFile(self, filePath):
split_path = filePath.split('/')
dest = split_path[-1]
if dest in self.saveData:
return self.saveData[dest]
fileSystem = FileSystem()
fileSystem.mkdir("/a/b/c")
fileSystem.ls("/a/b");
In your ls method you have return self.file_system.get(dest).sort(). The sort method alters the existing list and returns None. You need
return sorted(self.file_system.get(dest))
Since you don't provide a default value for get this can be written with a simple dictionary access like
return sorted(self.file_system[dest])

How to create a many-to-many relationship variable and increase code usability?

I'm trying to create a common utility for file transfer, from source to destination. The arguments are separated into dir and filename, which is the result of os.path.dirname(some_file) and os.path.basename(some_file), respectively. The destination filename is the same as the source filename by default if not specified.
I have created a script that at least is working perfectly well with the specific requirements in my first project. But as you can notice, the get_src_des method is very repetitive that I want to improve the code re-usability out from the nasty if...elif... statement. Anyone has better idea to rewrite this method?
class FileTransfer:
def __init__(self, ftp_dir, local_dir, ftp_filename=None, local_filename=None):
self.ftp_dir = ftp_dir
self.ftp_filename = ftp_filename
self.local_dir = local_dir
self.local_filename = local_filename
self.ftp_dict = self.get_group(ftp_dir, ftp_filename)
self.local_dict = self.get_group(local_dir, local_filename)
#staticmethod
def get_group(dir, filename):
group = {
"dir": dir,
"filename": filename,
}
return group
def get_src_des(self, src):
if src == "ftp":
dict_src = self.ftp_dict
dict_des = self.local_dict
elif src == "local":
dict_src = self.local_dict
dict_des = self.ftp_dict
else:
dict_src = None
dict_des = None
return dict_src, dict_des
# other methods, such as download_from_src_to_des, upload_from_src_to_des, ...
Yes. This is classic use case for a dictionary.
You can rewrite your code as follows:
class FileTransfer:
def __init__(self, ftp_dir, local_dir, ftp_filename=None, local_filename=None):
self.ftp_dir = ftp_dir
self.ftp_filename = ftp_filename
self.local_dir = local_dir
self.local_filename = local_filename
self.ftp_dict = self.get_group(ftp_dir, ftp_filename)
self.local_dict = self.get_group(local_dir, local_filename)
self.param_dict = {
'ftp':(self.ftp_dict,self.local_dict),
'local' : (self.local_dict,self.ftp_dict)
}
#staticmethod
def get_group(dir, filename):
group = {
"dir": dir,
"filename": filename,
}
return group
def get_src_des(self, src):
if src in param_dict:
return param_dict[src]
else:
return (None,None)
Next time, instead of adding another elif statement, you just add another entry in your param_dict
I agree that that this could be done with a dictionary, as #Yakov Dan's answer suggests, but I would code it as shown below, which doesn't require any other changes to the class and is more dynamic.
The get_group() method could be written more concisely, as indicated.
class FileTransfer:
...
#staticmethod
def get_group(dir, filename):
return dict(dir=dir, filename=filename)
def get_src_des(self, src):
return {
'ftp': (self.ftp_dict, self.local_dict),
'local': (self.local_dict, self.ftp_dict)
}.get(src, (None, None))

Python class: The data is somehow lost

I am trying to define a Python class which analyzes subreddit data via the praw package.
I am fairly experienced with OOP in C++, but have not had much experience with OOP in Python. Here is the code I have so far:
import praw
class SubRedditAnalyzer:
def __init__(self, reddit_session, name='dataisbeautiful'):
self.name = name # subreddit name
self.reddit_session = reddit_session # assign the reddit session
self.subreddit = self.reddit_session.get_subreddit(self.name) # create the subreddit object
self.timeframe = 'day'
self.max_post_count = 10
self.submissions = self.subreddit.get_top_from_hour(limit=10)
def __del__(self):
class_name = self.__class__.__name__
print class_name, "destroyed"
def get_top_submissions(self, max_post_count):
timeframe = self.timeframe
if (timeframe == 'hour'):
self.submissions = self.subreddit.get_top_from_hour(limit= max_post_count)
elif (timeframe == 'day'):
self.submissions = self.subreddit.get_top_from_day(limit= max_post_count)
elif (timeframe == 'week'):
self.submissions = self.subreddit.get_top_from_week(limit= max_post_count)
elif (timeframe == 'month'):
self.submissions = self.subreddit.get_top_from_month(limit= max_post_count)
elif (timeframe == 'year'):
self.submissions = self.subreddit.get_top_from_year(limit= max_post_count)
elif (timeframe == 'all'):
self.submissions = self.subreddit.get_top_from_all(limit= max_post_count)
def combine_titles(self):
titles = ""
for submission in self.submissions:
titles += submission.title
self.titles = titles
def display_titles(self):
counter = 1
ya = self.submissions
for sub in self.submissions:
sc = sub.score
ti = sub.title
print('T%d- [%d] %s \n' %(counter,sc,ti))
counter += 1
def main():
r = praw.Reddit('Request to fetch data by user')
sr = SubRedditAnalyzer(r, 'dataisbeautiful')
sr.get_top_submissions(15) # top 15 from reddit
sr.combine_titles() # combine the top titles
sr.display_titles() # display all the titles
main()
For some unknown (to me) reason, it seems that the data in class 'sr' is lost after calling:
sr.combine_titles()
When I try to call this method, the data in class is empty:
sr.display_titles()
In fact, I do see the message that the class is destroyed:
SubRedditAnalyzer destroyed
What is it that I am doing wrong?
In advance, thanks for your attention.
It seems that self.submissions may be an iterable but not a collection (e.g. a list). The docs call get_top_from_hour() a generator method (although they state also that what is returned is a list...). If it is indeed a generator method, the result can be iterated over only once. All other attempts at iteration will fail silently (the loop in display_titles() executes nothing).
So, the solution would be:
self.submissions = list(self.subreddit.get_top_from_hour(limit=10))
in __init__() to convert an iterable into a permanent collection (list) that can be iterated over multiple times.
According to the PRAW docs, get_content and its associated methods like get_top_from_hour return a generator. A generator can only be iterated once, which you do in combine_titles. After that iteration, the generator is exhausted and cannot be iterated again.
You could presumably convert the submissions to a list when you get them in __init__:
self.submissions = list(self.subreddit.get_top_from_hour(limit=10))

url builder for python

I know about urllib and urlparse, but I want to make sure I wouldn't be reinventing the wheel.
My problem is that I am going to be fetching a bunch of urls from the same domain via the urllib library. I basically want to be able to generate urls to use (as strings) with different paths and query params. I was hoping that something might have a syntax like:
url_builder = UrlBuilder("some.domain.com")
# should give me "http://some.domain.com/blah?foo=bar
url_i_need_to_hit = url_builder.withPath("blah").withParams("foo=bar") # maybe a ".build()" after this
Basically I want to be able to store defaults that get passed to urlparse.urlunsplit instead of constantly clouding up the code by passing in the whole tuple every time.
Does something like this exist? Do people agree it's worth throwing together?
Are you proposing an extension to http://docs.python.org/library/urlparse.html#urlparse.urlunparse that would substitute into the 6-item tuple?
Are you talking about something like this?
def myUnparse( someTuple, scheme=None, netloc=None, path=None, etc. ):
parts = list( someTuple )
if scheme is not None: parts[0] = scheme
if netloc is not None: parts[1]= netloc
if path is not None: parts[2]= path
etc.
return urlunparse( parts )
Is that what you're proposing?
This?
class URLBuilder( object ):
def __init__( self, base ):
self.parts = list( urlparse(base) )
def __call__( self, scheme=None, netloc=None, path=None, etc. ):
if scheme is not None: self.parts[0] = scheme
if netloc is not None: self.parts[1]= netloc
if path is not None: self.parts[2]= path
etc.
return urlunparse( self.parts )
bldr= URLBuilder( someURL )
print bldr( scheme="ftp" )
Something like that?
You might want consider having a look at furl because it might be an answer to your needs.
Still not quite sure what you're looking for... But I'll give it a shot. If you're just looking to make a class that will keep your default values and such, it's simple enough to make your own class and use Python magic like str. Here's a scratched-out example (suboptimal):
class UrlBuilder:
def __init__(self,domain,path="blah",params="foo=bar"):
self.domain = domain
self.path = path
self.params = params
def withPath(self,path):
self.path = path
return self
def withParams(self,params):
self.params = params
return self
def __str__(self):
return 'http://' + self.domain + '/' + self.path + '?' + self.params
# or return urlparse.urlunparse( ( "http", self.domain, self.path, self.params, "", "" )
def build(self):
return self.__str__()
if __name__ == '__main__':
u = UrlBuilder('www.example.com')
print u.withPath('bobloblaw')
print u.withParams('lawyer=yes')
print u.withPath('elvis').withParams('theking=true')
If you're looking for more of the Builder Design Pattern, the Wikipedia article has a reasonable Python example (as well as Java).
I think you want http://pythonhosted.org/uritools/.
Example from the docs:
parts = urisplit('foo://user#example.com:8042/over/there?name=ferret#nose')
orig_uri = uriunsplit(parts)
The split value is a named tuple, not a regular list. It is accessible by name or index:
assert(parts[0] == parts.schema)
assert(parts[1] == parts.authority)
assert(parts[2] == parts.path)
assert(parts[3] == parts.query)
assert(parts[4] == parts.fragment)
Make a copy to make changes:
new_parts = [part for part in parts]
new_parts[2] = "/some/other/path"
new_uri = uriunsplit(new_parts)

Categories

Resources