Multiprocessing In Django Function - python

Is it possible to use multi processing in Django on a request.
#so if I send a request to http://127.0.0.1:8000/wallet_verify
def wallet_verify(request):
walelts = botactive.objects.all()
#here I check if the user want to be included in the process or not so if they set it to True then i'll include them else ignore.
for active in walelts:
check_active = active.active
if check_active == True:
user_is_active = active.user
#for the ones that want to be included I then go to get their key data.
I need to get both api and secret so then I loop through to get the data from active users.
database = Bybitapidatas.objects.filter(user=user_is_active)
for apikey in database:
apikey = apikey.apikey
for apisecret in database:
apisecret = apisecret.apisecret
#since I am making a request to an exchange endpoint I can only include one API and secret at a time . So for 1 person at a time this is why I want to run in parallel.
for a, b in zip(list(Bybitapidatas.objects.filter(user=user_is_active).values("apikey")), list(Bybitapidatas.objects.filter(user=user_is_active).values("apisecret"))):
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=a['apikey'], api_secret=b['apisecret'])
#here I check to see if they have balance to open trades if they have selected to be included.
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
#if they don't have enough balance I skip the user.
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
How can I run this process in parallel while looping through the database to also get data for each individual user.
I am still new to coding so hope I explained that it makes sense.
I have tried multiprocessing and pools but then I get that the app has not started yet and I have to run it outside of wallet_verify is there a way to do it in wallet_verify
and when I send the Post Request.
Any help appreciated.

Filtering the Database to get Users who have set it to True
Listi - [1,3](these are user ID's Returned
processess = botactive.objects.filter(active=True).values_list('user')
listi = [row[0] for row in processess]
Get the Users from the listi and perform the action.
def wallet_verify(listi):
# print(listi)
database = Bybitapidatas.objects.filter(user = listi)
print("---------------------------------------------------- START")
for apikey in database:
apikey = apikey.apikey
print(apikey)
for apisecret in database:
apisecret = apisecret.apisecret
print(apisecret)
start_time = time.time()
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=apikey, api_secret=apisecret)
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
print ("My program took", time.time() - start_time, "to run")
print("---------------------------------------------------- END")
return HttpResponse("Wallets verified")
Verifyt is what I use for the multiprocessing since I don't want it to run without being requested to run. also initialiser starts apps for each loop
def verifyt(request):
with ProcessPoolExecutor(max_workers=4, initializer=django.setup) as executor:
results = executor.map(wallet_verify, listi)
return HttpResponse("done")
```

Related

Fetching data in realtime from database in python

I have a class for multiprocessing in Python which creates 3 different processes. First process is for checking if there is any signal from my hardware and pushing it into a Queue, second process is for getting the data out of the Queue and pushing it into a database and the third processes is for getting the data out of the database and pushing it on a server.
obj = QE()
stdFunct = standardFunctions()
watchDogProcess = multiprocessing.Process(target=obj.watchDog)
watchDogProcess.start()
pushToDBSProcess = multiprocessing.Process(target=obj.pushToDBS)
pushToDBSProcess.start()
pushToCloud = multiprocessing.Process(target=stdFunct.uploadCycleTime)
pushToCloud.start()
watchDogProcess.join()
pushToDBSProcess.join()
pushToCloud.join()
My first two processes are running perfectly as desired, however I am struggling with the third process. The following is the code of my third process :
def uploadCycleTime(self):
while True:
uploadCycles = []
lastUpPointer = "SELECT id FROM lastUploaded"
lastUpPointer = self.dbFetchone(lastUpPointer)
lastUpPointer = lastUpPointer[0]
# print("lastUploaded :"+str(lastUpPointer))
cyclesToUploadSQL = "SELECT id,machineId,startDateTime,endDateTime,type FROM cycletimes WHERE id > "+str(lastUpPointer)
cyclesToUpload = self.dbfetchMany(cyclesToUploadSQL,15)
cyclesUploadLength = len(cyclesToUpload)
if(cyclesUploadLength>0):
for cycles in cyclesToUpload:
uploadCycles.append({"dataId":cycles[0],"machineId":cycles[1],"startDateTime":cycles[2].strftime('%Y-%m-%d %H:%M:%S.%f'),"endDateTime":cycles[3].strftime('%Y-%m-%d %H:%M:%S.%f'),"type":cycles[4]})
# print("length : "+str(cyclesUploadLength))
lastUpPointer = uploadCycles[cyclesUploadLength-1]["dataId"]
uploadCycles = json.dumps(uploadCycles)
api = self.dalUrl+"/cycle-times"
uploadResponse = self.callPostAPI(api,str(uploadCycles))
print(lastUpPointer)
changePointerSQL = "UPDATE lastUploaded SET id="+str(lastUpPointer)
try:
changePointerSQL = self.dbAbstraction(changePointerSQL)
except Exception as errorPointer:
print("Pointer change Error : "+str(errorPointer))
time.sleep(2)
Now I am saving a pointer to remember the last id uploaded, and from there on keep uploading 15 packets. When there is data existing in the DB the code works well, however if there is no existing when the process is initiated and data is sent afterwards then it fails to fetch the data from the DB.
I tried printing the length in realtime, it keeps giving me 0, inspite of data being continuously pushed into the DB in real-time.
In my upload process, I missed out on a commit()
def dbFetchAll(self,dataString):
# dataToPush = self.cycletimeQueue.get()
# print(dataToPush)
dbTry = 1
try:
while(dbTry == 1): # This while is to ensure the data has been pushed
sql = dataString
self.conn.execute(sql)
response = self.conn.fetchall()
dbTry = 0
return response
# print(self.conn.rowcount, "record inserted.")
except Exception as error:
print ("Error : "+str(error))
return dbTry
***finally:
self.mydb.commit()***

Locust- repeat the user list

In the following locust file, we are sending two user details. This works fine when we run with 2 users. However, when we run the same with more than 2 users (for example 5 users), then it starts failing for that remaining new users ( i.e 3 users).
import json
from locust import User, HttpUser, task, between, SequentialTaskSet,
HttpLocust
USER_CREDENTIALS = [
("abc#xyz.com", "Yahoo#123"),
("xyz#xyz.com", "Yahh-69-Wrap")
]
class CLMLoginTaskSet(SequentialTaskSet):
hostname_app2 = "http://app2.stage.cloud.local"
hostname_app1 = "http://app1.stage.cloud.local"
port = "8090"
username = "NOT_FOUND"
password = "NOT_FOUND"
access_token_value1 = None
#task
def userlist(self):
if len(USER_CREDENTIALS) > 0:
self.username, self.password = USER_CREDENTIALS.pop()
#task
def generate_clm_token(self):
print("username", self.username)
print("password", self.password)
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
clm_response =
self.client.post(self.hostname_app2+":"+self.port+"/oauth2/access?
grant_type=password&username="+self.username+"&password="+self.password",
headers=headers)
print("CLM Response", clm_response.text)
json_clm_response = clm_response.json()
access_token_value = json_clm_response['access_token']
print("This is the access token value", access_token_value)
self.access_token_value1 = access_token_value
class CLMLogin(HttpUser):
tasks = [CLMLoginTaskSet]
host = ""
wait_time = between(1, 2)
It should repeat and use the same user list if we start the load test with more users. How to achieve this ?
you have 2 credentials and popping them on start. pop removes the item from the list so after your first 2 users the list is empty and has nothing to pop. Try increasing credentials if you still want to use pop method or you can just access one credential with random.choice method without popping so next user can still use that. Beware that some users will have same user so they can affect each other depending on how the system works.

Fastest way to delete a Collection from Firestore?

I have an application that loads millions of documents to a collection, using 30-80 workers to simultaneously load the data. Sometimes, I find that the loading process didn't complete smoothly, and with other databases I can simply delete the table and start over, but not with Firestore collections. I have to list the documents and delete them, and I've not found a way to scale this with the same capacity as my loading process. What I'm doing now is that I have two AppEngine hosted Flask/Python methods, one to get a page of 1000 documents and pass to another method to delete them. This way the process to list documents is not blocked by the process to delete them. It's still taking days to complete which is too long.
Method to get list of documents and create a task to delete them, which is single threaded:
#app.route('/delete_collection/<collection_name>/<batch_size>', methods=['POST'])
def delete_collection(collection_name, batch_size):
batch_size = int(batch_size)
coll_ref = db.collection(collection_name)
print('Received request to delete collection {} {} docs at a time'.format(
collection_name,
batch_size
))
num_docs = batch_size
while num_docs >= batch_size:
docs = coll_ref.limit(batch_size).stream()
found = 0
deletion_request = {
'doc_ids': []
}
for doc in docs:
deletion_request['doc_ids'].append(doc.id)
found += 1
num_docs = found
print('Creating request to delete docs: {}'.format(
json.dumps(deletion_request)
))
# Add to task queue
queue = tasks_client.queue_path(PROJECT_ID, LOCATION, 'database-manager')
task_meet = {
'app_engine_http_request': { # Specify the type of request.
'http_method': 'POST',
'relative_uri': '/delete_documents/{}'.format(
collection_name
),
'body': json.dumps(deletion_request).encode(),
'headers': {
'Content-Type': 'application/json'
}
}
}
task_response_meet = tasks_client.create_task(queue, task_meet)
print('Created task to delete {} docs: {}'.format(
batch_size,
json.dumps(deletion_request)
))
Here is the method I use to delete the documents, which can scale. In effect it only processes 5-10 at a time, limited by the rate which the other method passes pages of doc_ids to delete. Separating the two helps, but not that much.
#app.route('/delete_documents/<collection_name>', methods=['POST'])
def delete_documents(collection_name):
# Validate we got a body in the POST
if flask.request.json:
print('Request received to delete docs from :{}'.format(collection_name))
else:
message = 'No json found in request: {}'.format(flask.request)
print(message)
return message, 400
# Validate that the payload includes a list of doc_ids
doc_ids = flask.request.json.get('doc_ids', None)
if doc_ids is None:
return 'No doc_ids specified in payload: {}'.format(flask.request.json), 400
print('Received request to delete docs: {}'.format(doc_ids))
for doc_id in doc_ids:
db.collection(collection_name).document(doc_id).delete()
return 'Finished'
if __name__ == '__main__':
# Set environment variables for running locally
app.run(host='127.0.0.1', port=8080, debug=True)
I've tried running multiple concurrent executions of delete_collection(), but am not certain that even helps, as I'm not sure if every time it calls limit(batch_size).stream() that it gets a distinct set of documents or possibly is getting duplicates.
How can I make this run faster?
This is what I came up with. It's not super fast (120-150 docs per second), but all the other examples I found in python didn't work at all:
now = datetime.now()
then = now - timedelta(days=DOCUMENT_EXPIRATION_DAYS)
doc_counter = 0
commit_counter = 0
limit = 5000
while True:
docs = []
print('Getting next doc handler')
docs = [snapshot for snapshot in db.collection(collection_name)
.where('id.time', '<=', then)
.limit(limit)
.order_by('id.time', direction=firestore.Query.ASCENDING
).stream()]
batch = db.batch()
for doc in docs:
doc_counter = doc_counter + 1
if doc_counter % 500 == 0:
commit_counter += 1
print('Committing batch {} from {}'.format(commit_counter, doc.to_dict()['id']['time']))
batch.commit()
batch.delete(doc.reference)
batch.commit()
if len(docs) == limit:
continue
break
print('Deleted {} documents in {} seconds.'.format(doc_counter, datetime.now() - now))
As mentioned in the other comments, .stream() has a 60 second deadline. This iterative structure sets a limit of 5000 after which .stream() is called again, which keeps it under the 60 second limit. If anybody knows how to speed this up, let me know.
Here is my simple Python script that I used to test batch deletes. Like #Chris32 said, the batch mode will delete thousands of documents per second if latency isn't too bad.
from time import time
from uuid import uuid4
from google.cloud import firestore
DB = firestore.Client()
def generate_user_data(entries = 10):
print('Creating {} documents'.format(entries))
now = time()
batch = DB.batch()
for counter in range(entries):
# Each transaction or batch of writes can write to a maximum of 500 documents.
# https://cloud.google.com/firestore/quotas#writes_and_transactions
if counter % 500 == 0 and counter > 0:
batch.commit()
user_id = str(uuid4())
data = {
"some_data": str(uuid4()),
"expires_at": int(now)
}
user_ref = DB.collection(u'users').document(user_id)
batch.set(user_ref, data)
batch.commit()
print('Wrote {} documents in {:.2f} seconds.'.format(entries, time() - now))
def delete_one_by_one():
print('Deleting documents one by one')
now = time()
docs = DB.collection(u'users').where(u'expires_at', u'<=', int(now)).stream()
counter = 0
for doc in docs:
doc.reference.delete()
counter = counter + 1
print('Deleted {} documents in {:.2f} seconds.'.format(counter, time() - now))
def delete_in_batch():
print('Deleting documents in batch')
now = time()
docs = DB.collection(u'users').where(u'expires_at', u'<=', int(now)).stream()
batch = DB.batch()
counter = 0
for doc in docs:
counter = counter + 1
if counter % 500 == 0:
batch.commit()
batch.delete(doc.reference)
batch.commit()
print('Deleted {} documents in {:.2f} seconds.'.format(counter, time() - now))
generate_user_data(10)
delete_one_by_one()
print('###')
generate_user_data(10)
delete_in_batch()
print('###')
generate_user_data(2000)
delete_in_batch()
In this public documentation is described how using a callable Cloud Function you can take advantage of the firestore delete command in the Firebase Command Line Interface deleting up to 4000 documents per second.

Search via Python Search API timing out intermittently

We have an application that is basically just a form submission for requesting a team drive to be created. It's hosted on Google App Engine.
This timeout error is coming from a single field in the form that simply does typeahead for an email address. All of the names on the domain are indexed in the datastore, about 300k entities - nothing is being pulled directly from the directory api. After 10 seconds of searching (via the Python Google Search API), it will time out. This is currently intermittent, but errors have been increasing in frequency.
Error: line 280, in get_result raise _ToSearchError(e) Timeout: Failed to complete request in 9975ms
Essentially, speeding up the searches will resolve. I looked at the code and I don't believe there is any room for improvement there. I am not sure if increasing the instance class will improve this, it is currently an F2. Or if perhaps there is another way to improve the index efficiency. I'm not entirely sure how one would do that however. Any thoughts would be appreciated.
Search Code:
class LookupUsersorGrpService(object):
'''
lookupUsersOrGrps accepts various params and performs search
'''
def lookupUsersOrGrps(self,params):
search_results_json = {}
search_results = []
directory_users_grps = GoogleDirectoryUsers()
error_msg = 'Technical error'
query = ''
try:
#Default few values if not present
if ('offset' not in params) or (params['offset'] is None):
params['offset'] = 0
else:
params['offset'] = int(params['offset'])
if ('limit' not in params) or (params['limit'] is None):
params['limit'] = 20
else:
params['limit'] = int(params['limit'])
#Search related to field name
query = self.appendQueryParam(q=query, p=params, qname='search_name', criteria=':', pname='query', isExactMatch=True,splitString=True)
#Search related to field email
query = self.appendQueryParam(q=query, p=params, qname='search_email', criteria=':', pname='query', isExactMatch=True, splitString=True)
#Perform search
log.info('Search initialized :\"{}\"'.format(query) )
# sort results by name ascending
expr_list = [search.SortExpression(expression='name', default_value='',direction=search.SortExpression.ASCENDING)]
# construct the sort options
sort_opts = search.SortOptions(expressions=expr_list)
#Prepare the search index
index = search.Index(name= "GoogleDirectoryUsers",namespace="1")
search_query = search.Query(
query_string=query.strip(),
options=search.QueryOptions(
limit=params['limit'],
offset=params['offset'],
sort_options=sort_opts,
returned_fields = directory_users_grps.get_search_doc_return_fields()
))
#Execute the search query
search_result = index.search(search_query)
#Start collecting the values
total_cnt = search_result.number_found
params['limit'] = len(search_result.results)
#Prepare the response object
for teamdriveDoc in search_result.results:
teamdriveRecord = GoogleDirectoryUsers.query(GoogleDirectoryUsers.email==teamdriveDoc.doc_id).get()
if teamdriveRecord:
if teamdriveRecord.suspended == False:
search_results.append(teamdriveRecord.to_dict())
search_results_json.update({"users" : search_results})
search_results_json.update({"limit" : params['limit'] if len(search_results)>0 else '0'})
search_results_json.update({"total_count" : total_cnt if len(search_results)>0 else '0'})
search_results_json.update({"status" : "success"})
except Exception as e:
log.exception("Error in performing search")
search_results_json.update({"status":"failed"})
search_results_json.update({"description":error_msg})
return search_results_json
''' Retrieves the given param from dict and adds to query if exists
'''
def appendQueryParam(self, q='', p=[], qname=None, criteria='=', pname=None,
isExactMatch = False, splitString = False, defaultValue=None):
if (pname in p) or (defaultValue is not None):
if len(q) > 0:
q += ' OR '
q += qname
if criteria:
q += criteria
if defaultValue is None:
val = p[pname]
else:
val = defaultValue
if splitString:
val = val.replace("", "~")[1: -1]
#Helps to retain passed argument as it is, example email
if isExactMatch:
q += "\"" +val + "\""
else:
q += val
return q
An Index instance's search method accepts a deadline parameter, so you could use that to increase the time that you are willing to wait for the search to respond:
search_result = index.search(search_query, deadline=30)
The documentation doesn't specify acceptable value for deadline, but other App Engine services tend to accept values up to 60 seconds.

timeout and performance issues on redirecting inside django

I am currently having problems with Timeouts and performance on Django redirection. The issue was not visible until I was surfing to my locally hosted application with 2 devices and only one worker enabled on my localhost, timeout set to 30 seconds.
I have a views.py function that redirects a page, based on that is given the URL. I do a lookup for the pk in a table and return the url. I also have a counter that keeps track of the amount of forwards.
urls.py here:
url(r'^i/(?P<pk>[-\w]+)/$', frontendapp_views.item_view, name="item_view"),
The page redirects instantly to the "desired_url_forward", however, the connection stays open with the user, while in fact, the user has left my Django environment. This somehow leaves my worker waiting for 30 seconds while I was already forwarded to an external page, not allowing to process any other request with one worker.
I could increase the number of workers or shorten the timeout time, but that doesn't feel right as it is not fixing the core issue.
This is the only thing I found out on this topic but I am not skilled enough to understand this: https://github.com/requests/requests/issues/520
This is how the views.py looks like:
def item_view(request,pk):
pk_binairy = urlsafe_base64_decode(pk)
pk_int = int.from_bytes(pk_binairy, byteorder='little')
desired_url_forward_object = get_object_or_404(forwards,pk = pk_int)
channel_cleaned_utm = re.sub(' +',' ',"".join([request.GET.get('utm_source', ''),' ',request.GET.get('utm_medium', ''),' ',request.GET.get('utm_campaign', ''),' ',request.GET.get('utm_term', ''),' ',request.GET.get('utm_content', '')]))
channel_cleaned = request.META.get('HTTP_REFERER')
if channel_cleaned is None:
channel_cleaned = 'Direct Traffic'
visitor_ip_request = get_client_ip(request)
location_request = get_client_location(request, visitor_ip_request)
clickstat = clickstats(
urlid = pk_int,
user = desired_url_forward_object.user,
channel = channel_cleaned,
visitor_ip = visitor_ip_request,
city = location_request['city'],
region = location_request['region'],
country = location_request['country'],
device_type = request.user_agent.device.family,
browser = request.user_agent.browser.family,
browser_version = request.user_agent.browser.version_string,
operating_system = request.user_agent.os.family ,
operating_system_version = request.user_agent.os.version_string
)
clickstat.save()
if desired_url_forward_object.counterA <= desired_url_forward_object.counterB:
desired_url_forward = desired_url_forward_object.urlA
desired_url_forward_object.counterA = F('counterA') + 1
else:
desired_url_forward = desired_url_forward_object.urlB
desired_url_forward_object.counterB = F('counterB') + 1
desired_url_forward_object.save()
return redirect(desired_url_forward)
Anyone suggestions? Thanks for the help!

Categories

Resources