I am trying to use multiprocessing to get performance boosts and increase speeds in AWS Lambda for ML inference with PyTorch, I am going of a blog post on how to effectively use multiprocessing in AWS lambda, since AWS Lambda is pretty bare-bones and has no shared memory (no /dev/shm), i have to use multiprocessing.Pipe and multiprocessing.Process - BLOG POST: Parallel processing in python with aws lambda. I have followed what the post said, and the comments of that post, but when i push the code below to AWS lambda, it deadlocks and hangs, The deadlocking happens when the program stalls and is still running but no error is returned (Lambda times out once this happens of course), and no error message is left. The strange thing is that my code runs perfectly fine on my Macbook and google colab (link below).
If anybody has any ideas why this doesn't work in AWS Lambda, please let me know. Any help would be greatly appreciated.
GOOGLE COLAB
https://colab.research.google.com/drive/1UUEC9UDeNwHmkS2MwMAm8lGa2OqnZqYl?usp=sharing
CODE
# -*- coding: utf-8 -*-
from sentence_splitter import SentenceSplitter
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from re import sub
from multiprocessing import Process, Pipe, set_start_method
set_start_method('fork', force=True)
model_path = "sshleifer/distilbart-cnn-12-6"
print('getting tokenizer and model for summarization...')
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
print('got tokenizer and model for summarization')
def serverless_pipeline():
def predict(fulltext):
# precossesing text (not important)
def preprocess_plain_text(x):
x = sub(r"https*\S+", " ", x) # url
x = sub(r"#\S+", " ", x) # mentions
x = sub(r"\s{2,}", " ", x) # over spaces
x = x.replace('–', '-')
x = x.replace('–', '-')
x = x.replace('’', "'")
x = sub("[^.,[.*?\]!$#(%?'-)}&+{=;:A-Za-z0-9]+", " ", x) # special charachters except .,!$(%?'-)&+=;:
return x
# chunking text (not important)
def chunk_clean_text(text):
newtext = preprocess_plain_text(text)
splitter = SentenceSplitter(language='en')
sentences = (splitter.split(text=newtext))
current_chunk = 0
chunks = []
for sentence in sentences:
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(sentence.split(" ")) <= 450:
chunks[current_chunk].extend(sentence.split(" "))
else:
current_chunk += 1
chunks.append(sentence.split(" "))
else:
chunks.append(sentence.split(" "))
for chunk_id in range(len(chunks)):
chunks[chunk_id] = " ".join(chunks[chunk_id])
return chunks
clean_text = chunk_clean_text(fulltext)
def sumamrize_text(chunked_text, minimum_length, maximum_length):
print("starting actully summarizing the chunk...")
print('getting inputs in tokenizer()...')
inputs = tokenizer(str(chunked_text), return_tensors="pt")
print('got inputs in tokenizer()')
print('getting outputs in model.generate()...')
# DEADLOCKS HERE IN AWS LAMBDA
modelgenerate = model.generate(inputs["input_ids"], min_length=minimum_length, max_length=maximum_length, num_beams=4)[0]
print('got outputs in model.generate()')
chunk_summary = tokenizer.decode(modelgenerate)
print("finished actully summarizing chunk")
summarychunk = str(chunk_summary)
summarychunkend = chunk_summary[:-4]
summarychunk = summarychunkend[7:]
return str(summarychunk)
def summarize_norm(num_words, chunked_text, id, conn):
if num_words > 100:
print('starting summarize_norm')
minimum_length = num_words / 4.5
minimum_length = int(minimum_length)
maximum_length = int(num_words*100)
summarytext = sumamrize_text(chunked_text, minimum_length, maximum_length)
result = id, summarytext
print('finished making summary from summarize_norm')
print('sending result')
conn.send([result])
print('result sent')
print('clossing connection')
conn.close()
print('connection closed')
def summarize_short(summary_list, num_words, chunked_text):
if num_words <= 100 and num_words > 40:
minimum_length = num_words / 2
minimum_length = int(minimum_length)
maximum_length = int(num_words*100)
summarytext = sumamrize_text(chunked_text, minimum_length, maximum_length)
print(summarytext)
summary_list.append(str(summarytext))
def appendsummary(summary_list, num_words, chunked_text):
if num_words <= 40:
print(chunked_text)
summary_list.append(chunked_text)
def summarize():
summary_list = []
processes = []
parent_connections = []
result_list = []
for chunked_text in clean_text:
parent_conn, child_conn = Pipe(duplex=True)
parent_connections.append(parent_conn)
num_words = chunked_text.split()
num_words = len(num_words)
if num_words > 975:
return 'Make sure your prompt is properly punctuated.. And try again.'
if sub(r'[^\w\s]', '', chunked_text) == chunked_text:
return 'Make sure your prompt is properly punctuated.. And try again.'
id = clean_text.index(chunked_text)
process = Process(target=summarize_norm, args=(num_words, chunked_text, id, child_conn,))
processes.append(process)
process.start()
print('process started')
child_conn.close()
for parent_connection in parent_connections:
print('recving parent_connection results and appeding them...')
result_list.append(parent_connection.recv()[0])
print('recved parent_connection results and appeded them')
for process in processes:
print('joining proceses')
process.join()
print('joined proceses')
appendsummary(summary_list, num_words, clean_text[-1])
summarize_short(summary_list, num_words, clean_text[-1])
for summaries in sorted(result_list):
summaries = list(summaries)
summaries.remove(summaries[0])
summaries = str(summaries)
summaries = summaries[2:]
summaries = summaries[:-2]
summary_list.append(summaries)
finalsummary = ' '.join(summary_list)
return finalsummary
summary = summarize()
return summary
return predict
summarizing_pipeline = serverless_pipeline()
def handler(event, context):
# context = event['text']
print(context)
context = str(context)
summary = summarizing_pipeline(context)
return {'summary' : summary}
# Test it on random article
handler(1, '''
An article to summarize and test here
''')
Related
I want to make an inverted index using multiprocessing to speed up its work. My idea is to split the files into groups, and each process will build its own inverted index, and then I want to merge all these indexes into one inverted index. But I don't know how to return them to the main process that will merge them.
import multiprocessing as mp
from pathlib import Path
import re
import time
class InvertedIndex:
def __init__(self):
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt'))
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num
processes = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
currLi = pathList[startIndex:endIndex]
p = mp.Process(target=self.oneProcessTask, args=(currLi,))
processes.append(p)
[x.start() for x in processes]
[x.join() for x in processes]
#staticmethod
def oneProcessTask(listOfDoc):
#print(f'Start: {list[0]}, end: {list[-1]}') # temp
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
def getListOfDoc(self, keyWord):
return self.index[keyWord]
if __name__ == '__main__':
ii = InvertedIndex()
start_time = time.time()
ii.createIndex()
print("--- %s seconds ---" % (time.time() - start_time))
I used multiprocessing.manager to write everything in one dictionary, but that solution was too slow. So I went back to the idea of creating own inverted index for each process and then merging them. But I don't know how to return all indexes to one process.
Take a look at concurrent.futures (native library) with either ThreadPoolExecutor or ProcessPoolExecutor. FYI: I wrote on that in here and did not test but, this is more or less the jist of what I use all the time.
from concurrent.futures import ThreadPoolExecutor, as_completed
def foo(stuff: int) -> dict:
return {}
things_to_analyze = [1,2,3]
threads = []
results = []
with ThreadPoolExecutor() as executor:
for things in things_to_analyze:
threads.append(executor.submit(foo, thing))
for job in as_completed(threads):
results.append(job.results())
I found a solution. I used pool.starmap to return a list of indexes.
My code:
class InvertedIndex:
def __init__(self):
self.smallIndexes = None
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt')) # Рекурсивно проходимо по всіх текстових файлах і робимо з них список
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num # Розраховуємо скільки файлів має обробити один процес
processes_args = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
processes_args.append((path, startIndex, endIndex))
pool = mp.Pool(threads_num)
self.smallIndexes = pool.starmap(self.oneProcessTask, processes_args)
self.mergeIndex()
#staticmethod
def oneProcessTask(path, startIndex, endIndex):
pathList = list(Path(path).glob('**/*.txt'))
listOfDoc = pathList[startIndex:endIndex]
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
return tempDict
Execution time decreased from 200 seconds (when I used shared memory and menger.dict ) to 0.8 seconds (when I used pool.starmap).
I've been working for around a week to learn SimPy for a discrete simulation I have to run. I've done my best, but I'm just not experienced enough to figure it out quickly. I am dying. Please help.
The system in question goes like this:
order arrives -> resource_1 (there are 2) performs take_order -> order broken into items -> resource_2 (there are 10) performs process_item
My code runs and performs the simulation, but I'm having a lot of trouble getting the queues on the resources to function. As in, queues do not build up on either resource when I run it, and I cannot find the reason why. I try resource.get_queue and get empty lists. There should absolutely be queues, as the orders arrive faster than they can be processed.
I think it has something to do with the logic for requesting resources, but I can't figure it out. Here's how I've structured the code:
import simpy
import random
import numpy as np
total_items = []
total_a = []
total_b = []
total_c = []
order_Q = []
item_Q = []
skipped_visits = []
order_time_dict = {}
order_time_dict2 = {}
total_order_time_dict = {}
var = []
class System:
def __init__(self,env,num_resource_1,num_resource_2):
self.env = env
self.resource_1 = simpy.Resource(env,num_resource_1)
self.resource_2 = simpy.Resource(env,num_resource_2)
def take_order(self, order):
self.time_to_order = random.triangular(30/60,60/60,120/60)
arrive = self.env.now
yield self.env.timeout(self.time_to_order)
def process_item(self,item):
total_process_time = 0
current = env.now
order_num = item[1][0]
for i in range(1,item[1][1]):
if 'a' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6) #bagging time only
#here edit order time w x
if 'b' in item[0]:
total_process_time += random.triangular(.05,.3333,.75)
if 'c' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6)
#the following is handling time: getting to station, waiting on car to arrive at window after finished, handing to cust
total_process_time += random.triangular(.05, 10/60, 15/60)
item_finish_time = current + total_process_time
if order_num in order_time_dict2.keys():
start = order_time_dict2[order_num][0]
if order_time_dict2[order_num][1] < item_finish_time:
order_time_dict2[order_num] = (start, item_finish_time)
else:
order_time_dict2[order_num] = (current, item_finish_time)
yield self.env.timeout(total_process_time)
class Order:
def __init__(self, order_dict,order_num):
self.order_dict = order_dict
self.order_num = order_num
self.order_stripped = {}
for x,y in list(self.order_dict.items()):
if x != 'total':
if y != 0:
self.order_stripped[x] = (order_num,y) #this gives dictionary format {item: (order number, number items) } but only including items in order
self.order_list = list(self.order_stripped.items())
def generate_order(num_orders):
print('running generate_order')
a_demand = .1914 ** 3
a_stdev = 43.684104
b_demand = .1153
b_stdev = 28.507782
c_demand = .0664
c_stdev = 15.5562624349
num_a = abs(round(np.random.normal(a_demand)))
num_b = abs(round(np.random.normal(b_demand)))
num_c = abs(round(np.random.normal(c_demand)))
total = num_orders
total_a.append(num_a)
total_b.append(num_b)
total_c.append(num_c)
total_num_items = num_a + num_b + num_c
total_items.append(total_num_items)
order_dict = {'num_a':num_a, 'num_b':num_b,'num_c':num_c, 'total': total}
return order_dict
def order_process(order_instance,system):
enter_system_at = system.env.now
print("order " + str(order_instance.order_num) + " arrives at " + str(enter_system_at))
if len(system.resource_1.get_queue) > 1:
print("WORKING HERE ******************")
if len(system.resource_1.get_queue) <= 25:
with system.resource_1.request() as req:
order_Q.append(order_instance)
yield req
yield env.process(system.take_order(order_instance))
order_Q.pop()
enter_workstation_at = system.env.now
print("order num " + str(order_instance.order_num) + " enters workstation at " + str(enter_workstation_at))
for item in order_instance.order_list:
item_Q.append(item)
with system.resource_2.request() as req:
yield req
yield env.process(system.process_item(item))
if len(system.resource_2.get_queue) >1:
var.append(1)
item_Q.pop()
leave_workstation_at = system.env.now
print("Order num " + str(order_instance.order_num) + " leaves at " + str(leave_workstation_at))
order_time_dict[order_instance.order_num] = leave_workstation_at-enter_workstation_at
total_order_time_dict[order_instance.order_num]=leave_workstation_at-enter_system_at
else:
skipped_visits.append(1)
def setup(env):
system = System(env,2,15)
order_num = 0
while True:
next_order = random.expovariate(3.5) #where 20 is order arrival mean (lambda)
yield env.timeout(next_order)
order_num+=1
env.process(order_process(Order(generate_order(order_num),order_num),system))
env = simpy.Environment()
env.process(setup(env))
env.run(until=15*60)
print("1: \n", order_time_dict)
I think you are looking at the wrong queue.
the api for getting queued requests for resources is just attribute queue so try using
len(system.resource_1.queue)
get_queue and put_queue is from the base class and used to derive new resource classes.
but wait they are not what any reasonable person would assume, and I find this confusing too, but the doc says
Requesting a resources is modeled as “putting a process’ token into the resources” which means when you call request() the process is put into the put_queue, not the get_queue. And with resource, release always succeeds immediately so its queue (which is the get_queue) is always empty
I think queue is just a alias for the put_queue, but queue is much less confussing
thanks for the answer before and I have changed it what Alperen suggested, but I have another problem, my code :
import sys
import os
import itertools
import os.path
import random
from PIL import Image
from svmutil import *
DIMENSION = 200
sys.path.append("../train/")
ROOT_DIR = os.path.dirname(os.getcwd()) + "/train"
NEGATIVE = "negative"
POSITIVE = "positive"
CLASSES = [NEGATIVE, POSITIVE]
# libsvm constants
LINEAR = 0
RBF = 2
# Other
USE_LINEAR = False
IS_TUNING = False
def main():
try:
train, tune, test = getData(IS_TUNING)
models = getModels(train)
results = None
if IS_TUNING:
print ("!!! TUNING MODE !!!")
results = classify(models, tune)
else:
results = classify(models, test)
print
totalCount = 0
totalCorrect = 0
for clazz in CLASSES:
count, correct = results[clazz]
totalCount += count
totalCorrect += correct
print ("%s %d %d %f") % (clazz, correct, count, (float(correct) / count))
print ("%s %d %d %f") % ("Overall", totalCorrect, totalCount,(float(totalCorrect) / totalCount))
except Exception as e:
print (e)
return 5
def classify(models, dataSet):
results = {}
for trueClazz in CLASSES:
count = 0
correct = 0
for item in dataSet[trueClazz]:
predClazz, prob = predict(models, item)
print ("%s,%s,%f") % (trueClazz, predClazz, prob)
count += 1
if trueClazz == predClazz: correct += 1
results[trueClazz] = (count, correct)
return results
def predict(models, item):
maxProb = 0.0
bestClass = ""
for clazz, model in models.iteritems():
prob = predictSingle(model, item)
if prob > maxProb:
maxProb = prob
bestClass = clazz
return (bestClass, maxProb)
def predictSingle(model, item):
output = svm_predict([0], [item], model, "-q -b 1")
prob = output[2][0][0]
return prob
def getModels(trainingData):
models = {}
param = getParam(USE_LINEAR)
for c in CLASSES:
labels, data = getTrainingData(trainingData, c)
prob = svm_problem(labels, data)
m = svm_train(prob, param)
models[c] = m
return models
def getTrainingData(trainingData, clazz):
labeledData = getLabeledDataVector(trainingData, clazz, 1)
negClasses = [c for c in CLASSES if not c == clazz]
for c in negClasses:
ld = getLabeledDataVector(trainingData, c, -1)
labeledData += ld
random.shuffle(labeledData)
unzipped = [list(t) for t in zip(*labeledData)]
labels, data = unzipped[0], unzipped[1]
return (labels, data)
def getParam(linear = True):
param = svm_parameter("-q")
param.probability = 1
if(linear):
param.kernel_type = LINEAR
param.C = .01
else:
param.kernel_type = RBF
param.C = .01
param.gamma = .00000001
return param
def getLabeledDataVector(dataset, clazz, label):
data = dataset[clazz]
labels = [label] * len(data)
output = zip(labels, data)
return output
def getData(generateTuningData):
trainingData = {}
tuneData = {}
testData = {}
for clazz in CLASSES:
(train, tune, test) = buildTrainTestVectors(buildImageList(ROOT_DIR + clazz + "/"), generateTuningData)
trainingData[clazz] = train
tuneData[clazz] = tune
testData[clazz] = test
return (trainingData, tuneData, testData)
def buildImageList(dirName):
imgs = [Image.open(dirName + fileName).resize((DIMENSION, DIMENSION)) for fileName in os.listdir(dirName)]
imgs = [list(itertools.chain.from_iterable(img.getdata())) for img in imgs]
return imgs
def buildTrainTestVectors(imgs, generateTuningData):
# 70% for training, 30% for test.
testSplit = int(.7 * len(imgs))
baseTraining = imgs[:testSplit]
test = imgs[testSplit:]
training = None
tuning = None
if generateTuningData:
# 50% of training for true training, 50% for tuning.
tuneSplit = int(.5 * len(baseTraining))
training = baseTraining[:tuneSplit]
tuning = baseTraining[tuneSplit:]
else:
training = baseTraining
return (training, tuning, test)
if __name__ == "__main__":
sys.exit(main())
and I got the new massage
Klik this massage to see new error massage
What should I do? I have searched every answer but never make me get the answer. Now I use this code for my final project at university. I hope anyone can help me for this problem. But thank you for another last answer
EDIT:
This lines causes the error:
labeledData += ld
+= operand doesn't work for zips. You can change zips to list.
def getLabeledDataVector(dataset, clazz, label):
...
return list(output)
Also, unzipped list can be empty, you should fix this line too(Thanks to ShadowRanger for comment):
labels, data = unzipped if unzipped else ([], [])
This changes probably will affect your code's logic. You should fix them on your own.
BEFORE EDIT:
In getData(generateTuningData) function, ROOT_DIR + clazz expression causes the error, because ROOT_DIR is None.
sys.path.append doesn't return anything(returns None).
You need to change your code as:
...
import os.path
...
sys.path.append("../train/")
ROOT_DIR = os.path.dirname(os.getcwd()) + "/train/" # parent directory and "/train/"
...
I assumed ROOT_DIR is your current working directory's parent + "/train/". If it is not, you can fix it.
Also, there may be other problems, but this solves unsupported operand type(s).
I am trying to optimize this code, as of right now it runs 340 Requests in 10 mins. I have trying to get 1800 requests in 30 mins. Since I can run a request every second, according to amazon api. Can I use multithreading with this code to increase the number of runs??
However, I was reading in the full data to the main function, should I split it now, how can I figure out how many each thread should take?
def newhmac():
return hmac.new(AWS_SECRET_ACCESS_KEY, digestmod=sha256)
def getSignedUrl(params):
hmac = newhmac()
action = 'GET'
server = "webservices.amazon.com"
path = "/onca/xml"
params['Version'] = '2013-08-01'
params['AWSAccessKeyId'] = AWS_ACCESS_KEY_ID
params['Service'] = 'AWSECommerceService'
params['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
key_values = [(urllib.quote(k), urllib.quote(v)) for k,v in params.items()]
key_values.sort()
paramstring = '&'.join(['%s=%s' % (k, v) for k, v in key_values])
urlstring = "http://" + server + path + "?" + \
('&'.join(['%s=%s' % (k, v) for k, v in key_values]))
hmac.update(action + "\n" + server + "\n" + path + "\n" + paramstring)
urlstring = urlstring + "&Signature="+\
urllib.quote(base64.encodestring(hmac.digest()).strip())
return urlstring
def readData():
data = []
with open("ASIN.csv") as f:
reader = csv.reader(f)
for row in reader:
data.append(row[0])
return data
def writeData(data):
with open("data.csv", "a") as f:
writer = csv.writer(f)
writer.writerows(data)
def main():
data = readData()
filtData = []
i = 0
count = 0
while(i < len(data) -10 ):
if (count %4 == 0):
time.sleep(1)
asins = ','.join([data[x] for x in range(i,i+10)])
params = {'ResponseGroup':'OfferFull,Offers',
'AssociateTag':'4chin-20',
'Operation':'ItemLookup',
'IdType':'ASIN',
'ItemId':asins}
url = getSignedUrl(params)
resp = requests.get(url)
responseSoup=BeautifulSoup(resp.text)
quantity = ['' if product.amount is None else product.amount.text for product in responseSoup.findAll("offersummary")]
price = ['' if product.lowestnewprice is None else product.lowestnewprice.formattedprice.text for product in responseSoup.findAll("offersummary")]
prime = ['' if product.iseligibleforprime is None else product.iseligibleforprime.text for product in responseSoup("offer")]
for zz in zip(asins.split(","), price,quantity,prime):
print zz
filtData.append(zz)
print i, len(filtData)
i+=10
count +=1
writeData(filtData)
threading.Timer(1.0, main).start()
If you are using python 3.2 you can use concurrent.futures library to make it easy to launch tasks in multiple threads. e.g. here I am simulating running 10 url parsing job in parallel, each one of which takes 1 sec, if run synchronously it would have taken 10 seconds but with thread pool of 10 should take about 1 seconds
import time
from concurrent.futures import ThreadPoolExecutor
def parse_url(url):
time.sleep(1)
print(url)
return "done."
st = time.time()
with ThreadPoolExecutor(max_workers=10) as executor:
for i in range(10):
future = executor.submit(parse_url, "http://google.com/%s"%i)
print("total time: %s"%(time.time() - st))
Output:
http://google.com/0
http://google.com/1
http://google.com/2
http://google.com/3
http://google.com/4
http://google.com/5
http://google.com/6
http://google.com/7
http://google.com/8
http://google.com/9
total time: 1.0066466331481934
multi() freezes somewhere in the middle of its activity:
def current_proc(): print mp.current_process().name, 'started'
def multi(fn, func):
print 'Process started on',time.strftime('%H:%M:%S')
count = mp.cpu_count()*2
input = nohead(xlsx2array(fn))
parts = chunks(input, 10)
pool = mp.Pool(processes = count, initializer = current_proc, maxtasksperchild = 1)
for part in parts:
with stopwatch() as r: pool.map(func, part)
return r
pool.close()
pool.join()
I am using multiprocessing with the function to get effective urls:
def query(i):
attempts = 2
while attempts:
try:
q = requests.get(i, allow_redirects = True, verify = False, timeout = 2)
match = q.url
match = str(match)
break
except:
attempts -= 1
match = 'pattern not found'
pass
return [ i, match ]
Please advise how can I avoid such freezing. Thanks,