Speed up nested for loops in Python - python

I have a function as below & the inputs are given as:
Inputs:
items = ['17','2','8','34']
item_mapping = {'17':['17','5','8'],'2':['2','0'],'8':['8','2','90'],'34':['34','33']}
item_history = {'India': {'17': '2021-11-14',
'2': '2021-11-10',
'8': '2021-11-8',
'34': '2021-09-22',
'90': '2021-11-5',
'33': '2021-11-11',
'56': '2021-09-22',
'0': '2021-11-3',
'5': '2021-11-8']},
'USA': {'17': '2021-11-10',
'2': '2021-11-20',
'8': '2021-11-25',
'34': '2021-09-22',
'90': '2021-11-6',
'33': '2021-11-30',
'56': '2021-09-22',
'0': '2021-11-1',
'5': '2021-11-13']
}
The function:
def get_results(items,item_mapping,item_history):
result_list = []
for item in items:
current_line = [item]
if item in item_mapping:
identical_item_list = item_mapping[item]
else:
identical_item_list = [item]
for area, history in item_history.items():
item_history_list = []
for identical_item in identical_item_list:
if identical_item in items and identical_item in history:
item_history_list.append([identical_item, history[identical_item]])
if len(item_history_list) > 0:
sorted_item_history_list = sorted(item_history_list, key=lambda x: x[1], reverse=True)
selected_item = sorted_item_history_list[0][0]
current_line += [area, selected_item]
else:
pass
master_item_to_item_list.append(" ".join(current_line))
return result_list
For each item in list - 'items', I have the list of items identical to it in the dictionary -'item_mapping'.
I also have a dictionary - 'item_history' which has the dates on which items were sold in different regions.
The objective I am trying to achieve is, for each item, I would like to get the latest sold similar item in each region.
For example, take item 17:
The similar items are ['17','5','8']
For region 'India', the latest sold similar item, for item 17 is 17.
For region 'USA', it is '8'.
Hence the output line will be : ['17' 'India' '17' 'USA' '8']
Similarly, I want to get for all items and write lines to a text file.
When I have hundreds of thousands of items, it takes hours to do this multiple for loops with sorting and searches. How can I optimize this code? Any other data structures I can use?
I tried implementing multiprocessing like below: but it's still equally slow.
def get_results(item,item_mapping,item_history):
result_list = []
current_line = [item]
if item in item_mapping:
identical_item_list = item_mapping[item]
else:
identical_item_list = [item]
for area, history in item_history.items():
item_history_list = []
for identical_item in identical_item_list:
if identical_item in items and identical_item in history:
item_history_list.append([identical_item, history[identical_item]])
if len(item_history_list) > 0:
sorted_item_history_list = sorted(item_history_list, key=lambda x: x[1], reverse=True)
selected_item = sorted_item_history_list[0][0]
current_line += [area, selected_item]
else:
pass
master_item_to_item_list.append(" ".join(current_line))
return result_list
import multiprocessing as mp
import functools
with mp.Pool(28) as pool:
result = pool.map(functools.partial(get_results,item_mapping=item_mapping,item_history=item_history), items)

Related

can't return a list with desired order from a python function

wanna write a code that would add to burgerlist the items of my_order with the following order: first and last element of burgerlist should be bread, second and pre-last element should be mayonnaise(if it exist among the arguments while calling function), then beef / chicken, then vegitables.
pls help to understand what to change here
def my_odrer(*g):
ingredients = [['long_bread', 'circle_bread'], ['mayonnaise', 'ketchup'], ['beef', 'chicken'],
['cucumber', 'tomato', 'onion']]
burgerlist = []
for i in g:
if i in ingredients[0]:
burgerlist.insert(0, i)
elif i in ingredients[1]:
burgerlist.insert(1, i)
elif i in ingredients[2]:
burgerlist.append(i)
elif i in ingredients[3]:
burgerlist.append(i)
if burgerlist[1] == 'mayonnaise':
burgerlist.append(burgerlist[1])
burgerlist.append(burgerlist[0])
return burgerlist
print(my_odrer('circle_bread', 'beef', 'tomato', 'mayonnaise', 'ketchup'))
the output is: ['circle_bread', 'ketchup', 'mayonnaise', 'beef', 'tomato', 'circle_bread']
but I want to get: ['circle_bread', 'mayonnaise', 'ketchup', 'beef', 'tomato','mayonnaise', 'circle_bread']
Create 3 lists containing the ingredients that should be at the beginning, middle, and end. Then concatenate them to produce the final result.
def my_odrer(*g):
breads = {'long_bread', 'circle_bread'}
condiments = {'ketchup'} # mayonnaise not included, since it's handled specially
meats = {'beef', 'chicken'}
vegetables = {'cucumber', 'tomato', 'onion'}
beginning = []
middle = []
end = []
for item in g:
if item in breads:
beginning.append(item)
end.append(item)
if "mayonnaise" in g:
beginning.append("mayonnaise")
end.insert(-1, "mayonnaise")
for item in g:
if item in condiments:
middle.append(item)
for item in g:
if item in meats:
middle.append(item)
for item in g:
if item in vegetables:
middle.append(item)
return beginning + middle + end
It works after I added an additional condition in your final if statement.
def my_odrer(*g):
ingredients = [['long_bread', 'circle_bread'], ['mayonnaise', 'ketchup'], ['beef', 'chicken'],
['cucumber', 'tomato', 'onion']]
burgerlist = []
for i in g:
if i in ingredients[0]:
burgerlist.insert(0, i)
elif i in ingredients[1]:
burgerlist.insert(1, i)
elif i in ingredients[2]:
burgerlist.append(i)
elif i in ingredients[3]:
burgerlist.append(i)
if burgerlist[1] == 'mayonnaise':
burgerlist.append(burgerlist[1])
elif burgerlist[2] == 'mayonnaise':
burgerlist[1], burgerlist[2] = burgerlist[2], burgerlist[1]
burgerlist.append(burgerlist[1])
burgerlist.append(burgerlist[0])
return burgerlist

Save text elements separated by tab and commas in lists

I have the following text format in python:
126 attr1,attr7,attr4 and attr8
1 attr6,attr2,attr9,attr78,attr23,attr56,attr75,attr77
5 attr5,attr3,attr2
7 attr0
67 attr12,attr13,attr14
So i want to save the ids(126,1,5 etc) in a list and every line attributes to be saved in a list or dict. I saved the ids with the following code but i cant save the attributes. Here is my code:
file = open("myfile.txt","r")
lines = file.readlines()
nodes = []
skills = [] #or dict()
for x in lines:
nodes.append(x.split('\t')[0])
skills.append(x.split(',')[0]) #i want a list of lists or a dict with attrs
I think that this will do the trick:
for x in lines:
x = x.split('\t')
nodes.append(x[0])
skills.append(x[1].split(','))
I would rather advice to use a single dictionary with ids as its key and attr as a list of values:
d = {}
file = open("myfile.txt","r")
lines = file.readlines()
for line in lines:
splitted = line.split()
d.update({splitted[0]: splitted[1].split(',')})
print(d)
# {'126': ['attr1', 'attr7', 'attr4', 'attr8'],
# '1': ['attr6', 'attr2', 'attr9', 'attr78', 'attr23', 'attr56', 'attr75', 'attr77'],
# '5': ['attr5', 'attr3', 'attr2'],
# '7': ['attr0'],
# '67': ['attr12', 'attr13', 'attr14']}
If you want all attributes as joined string then :
attr_dict={}
with open('file.txt','r') as f:
for line in f:
attr_dict[line.split()[0]]=line.split()[1:]
print(attr_dict)
output:
{'126': ['attr1,attr7,attr4', 'and', 'attr8'], '7': ['attr0'], '5': ['attr5,attr3,attr2'], '1': ['attr6,attr2,attr9,attr78,attr23,attr56,attr75,attr77'], '67': ['attr12,attr13,attr14']}
If you want indivisual element then :
attr_dict={}
with open('file.txt','r') as f:
for line in f:
data=line.split()
for sub_data in data:
attr_dict[line.split()[0]]=sub_data.split(',')
print(attr_dict)
output:
{'126': ['attr8'], '7': ['attr0'], '5': ['attr5', 'attr3', 'attr2'], '1': ['attr6', 'attr2', 'attr9', 'attr78', 'attr23', 'attr56', 'attr75', 'attr77'], '67': ['attr12', 'attr13', 'attr14']}

python list of dictionaries only updating 1 attribute and skipping others

I have a list of lists containing company objects:
companies_list = [companies1, companies2]
I have the following function:
def get_fund_amount_by_year(companies_list):
companies_length = len(companies_list)
for idx, companies in enumerate(companies_list):
companies1 = companies.values_list('id', flat=True)
funding_rounds = FundingRound.objects.filter(company_id__in=companies1).order_by('announced_on')
amount_per_year_list = []
for fr in funding_rounds:
fr_year = fr.announced_on.year
fr_amount = fr.raised_amount_usd
if not any(d['year'] == fr_year for d in amount_per_year_list):
year_amount = {}
year_amount['year'] = fr_year
for companies_idx in range(companies_length):
year_amount['amount'+str(companies_idx)] = 0
if companies_idx == idx:
year_amount['amount'+str(companies_idx)] = fr_amount
amount_per_year_list.append(year_amount)
else:
for year_amount in amount_per_year_list:
if year_amount['year'] == fr_year:
year_amount['amount'+str(idx)] += fr_amount
return amount_per_year_list
The problem is the resulting list of dictionaries has only one amount attribute updated.
As you can see "amount0" contains all "0" amounts:
[{'amount1': 12100000L, 'amount0': 0, 'year': 1999}, {'amount1':
8900000L, 'amount0': 0, 'year': 2000}]
What am I doing wrong?
I put list of dictionaries being built in the loop and so when it iterated it overwrote the last input. I changed it to look like:
def get_fund_amount_by_year(companies_list):
companies_length = len(companies_list)
**amount_per_year_list = []**
for idx, companies in enumerate(companies_list):
companies1 = companies.values_list('id', flat=True)
funding_rounds = FundingRound.objects.filter(company_id__in=companies1).order_by('announced_on')

How to resolve AttributeError: 'list' object has no attribute 'next' in Python

I am trying to read a file where the odd lines are department numbers, and even ones are sales totals. I need to be able to read a line and append it to a variable to be used later.
def main():
with open('P2data.txt') as x:
data = x.readlines()
dept = (data)[::2]
sales = (data)[1::2]
if dept == '1':
total = sales.next()
total.append(total1)
elif dept == '2':
total = sales.next()
total.append(total2)
else:
total = sales.next()
total.append(total3)
print('Dept 1:', total1)
print('Dept 2:', total2)
print('Dept 3:', total3)
main()
Your code is going in the wrong direction. You also are doing things like checking an entire data structure against what should be compared to one of that structure's elements, and mixing up the syntax for appending to a list. Simply loop over the data structures you create and add to a dictionary:
def main():
with open('P2data.txt') as x:
data = [line.strip() for line in x]
dept = data[::2]
sales = data[1::2]
totals = {'1':0, '2':0, '3':0}
for dep,sale in zip(dept, sales):
totals[dep] += float(sale)
for dep in sorted(totals):
print('Dept {}: {}'.format(dep, totals[dep]))
main()

Appending to create a list instead of creating a list inside a list

Currently my code looks as following:
data = ""
pattern1 = re.compile('')
pattern2 = re.compile('')
pattern3 = re.compile('')
items = re.findall(pattern1, data
mainlist = []
for item in items:
forename = re.findall(pattern2, item)
surname = re.findall(pattern3, item)
mainlist.append(surname)
The only problem with this layout is that I am getting lists like:
[['Smith', 'Patricks', 'Clark'], ['Austin', 'Hamilton', 'Day', 'Sidders'], ['Bennet']]
I'm wanting my lists to come out as follows:
['Smith', 'Patricks', 'Clark', 'Austin', 'Hamilton', 'Day', 'Sidders', 'Bennet']
Any ideas?
Thanks in advance
- Hy
Use extend:
for item in items:
forename = re.findall(pattern2, item)
surname = re.findall(pattern3, item)
mainlist.extend(surname)
myList.extend(L) adds the individual elements of L onto myList. It's similar to:
for element in L:
myList.append(element)

Categories

Resources