Django database query round trips - python

I have the query below, as you can see in my loop I add each message. I want to reduce the total round trips I have to make to the DB. Is this a way I can process the message create in batches of say 20 at a time? Will this help with speed? any suggestions welcome.
class ProcessRequests(Task):
"""
Celery Task to start request to process that are not scheduled.
"""
name = "Request to Process"
max_retries = 1
default_retry_delay = 3
def run(self, batch):
# Only run this task on non-scheduled tasks
if batch.status != "Scheduled":
q = Contact.objects.filter(contact_owner=batch.user, subscribed=True)
if batch.group == None:
q = q.filter(id=batch.contact_id)
else:
q = q.filter(group=batch.group)
for e in q:
msg = Message.objects.create(
recipient_number=e.mobile,
content=batch.content,
sender=e.contact_owner,
billee=batch.user,
sender_name=batch.sender_name
)
gateway = Gateway.objects.get(pk=2)
msg.send(gateway)

You can use bulk_create.
Also note you're getting the same gateway object each time through the loop, it would be better to get it once outside of the loop and use the same one each time.

Related

Best way to output concurrent task results in shell on the go

That is maybe a very basic question, and I can think of solutions, but I wondered if there is a more elegant one I don't know of (quick googling didn't bring anything useful).
I wrote a script to communicate to a remote device. However, now that I have more than one of that type, I thought I just make the communication in concurrent futures and handle it simultaneously:
with concurrent.futures.ThreadPoolExecutor(20) as executor:
executor.map(device_ctl, ids, repeat(args))
So it just calls up to 20 threads of device_ctl with respective IDs and the same args. device_ctl is now printing some results, but since they all run in parallel, it gets mixed up and looks messy. Ideally I could have 1 line per ID that shows the current state of the communication and gets updated once it changes, e.g.:
Dev1 Connecting...
Dev2 Connected! Status: Idle
Dev3 Connected! Status: Updating
However, I don't really know how to solve it nicely. I can think of a status list that outside of the threads gets assembled into one status string, which gets frequently updated. But it feels like there could be a simpler method! Ideas?
Since there was no good answer, I made my own solution, which is quite compact but efficient. I define a class that I call globally. Each thread populates it or updates a value based on its ID. The ID is meant to be the same list entry as taken for the thread. Here I made a simple example how to use it:
class collect:
ids = []
outs = []
LINE_UP = "\033[1A"
LINE_CLEAR = "\x1b[2K"
printed = 0
def init(list):
collect.ids = [i for i in list]
collect.outs = ["" for i in list]
collect.printall()
def write(id, out):
if id not in collect.ids:
collect.ids.append(id)
collect.outs.append(out)
else:
collect.outs[collect.ids.index(id)] = out
def writeout(id, out):
if id not in collect.ids:
collect.ids.append(str(id))
collect.outs.append(str(out))
else:
collect.outs[collect.ids.index(id)] = str(out)
collect.printall()
def append(id, out):
if id not in collect.ids:
collect.ids.append(str(id))
collect.outs.append(str(out))
else:
collect.outs[collect.ids.index(id)] += str(out)
def appendout(id, out):
if id not in collect.ids:
collect.ids.append(id)
collect.outs.append(out)
else:
collect.outs[collect.ids.index(id)] += str(out)
collect.printall()
def read(id):
return collect.outs[collect.ids.index(str(id))]
def readall():
return collect.outs, "\n".join(collect.outs)
def printall(filter=""):
if collect.printed > 0:
print(collect.LINE_CLEAR + collect.LINE_UP * len(collect.ids), end="")
print(
"\n".join(
[
collect.ids[i] + "\t" + collect.outs[i] + " " * 30
for i in range(len(collect.outs))
if filter in collect.ids[i]
]
)
)
collect.printed = len(collect.ids)
def device_ctl(id,args):
collect.writeout(id,"Connecting...")
if args.connected:
collect.writeout(id,"Connected")
collect.init(ids)
with concurrent.futures.ThreadPoolExecutor(20) as executor:
executor.map(device_ctl, ids, repeat(args))

How to use other def values?

I want to use other def values.
For example, I added a 'pt' in the 'clean_beds_process' definition and add 'Patients' in the 'run' definition.
I want to patient information when the 'clean_beds_process' function is called.
However, this makes this error 'AttributeError: type object 'Patients' has no attribute 'id''
I don't know why this happen.
Maybe I have something wrong understanding of mechanism of simpy.
Please let me know how can I use a patient information when 'clean_beds_process' function is called.
Thank you.
import simpy
import random
class Patients:
def __init__(self, p_id):
self.id = p_id
self.bed_name = ""
self.admission_decision = ""
def admin_decision(self):
admin_decision_prob = random.uniform(0, 1)
if admin_decision_prob <= 0.7:
self.admission_decision = "DIS"
else:
self.dmission_decision = "IU"
return self.admission_decision
class Model:
def __init__(self, run_number):
self.env = simpy.Environment()
self.pt_ed_q = simpy.Store(self.env )
self.pt_counter = 0
self.tg = simpy.Resource(self.env, capacity = 4)
self.physician = simpy.Resource(self.env, capacity = 4)
self.bed_clean = simpy.Store(self.env)
self.bed_dirty = simpy.Store(self.env)
self.IU_bed = simpy.Resource(self.env, capacity = 50)
def generate_beds(self):
for i in range(77):
yield self.env.timeout(0)
yield self.bed_clean.put(f'bed{i}')
def generate_pt_arrivals(self):
while True:
self.pt_counter += 1
pt = Patients(self.pt_counter)
yield self.env.timeout(5)
self.env.process(self.process(pt))
def clean_beds_process(self, cleaner_id, pt):
while True:
print(pt.id)
bed = yield self.bed_dirty.get()
yield self.env.timeout(50)
yield self.bed_clean.put(bed)
def process(self, pt):
with self.tg.request() as req:
yield req
yield self.env.timeout(10)
bed = yield self.bed_clean.get()
pt.bed_name = bed
pt.admin_decision()
if pt.admission_decision == "DIS":
with self.IU_bed.request() as req:
dirty_bed_name = pt.bed_name
yield self.bed_dirty.put(dirty_bed_name)
yield self.env.timeout(600)
else:
dirty_bed_name = pt.bed_name
yield self.bed_dirty.put(dirty_bed_name)
def run(self):
self.env.process(self.generate_pt_arrivals())
self.env.process(self.generate_beds())
for i in range(2):
self.env.process(self.clean_beds_process(i+1, Patients))
self.env.run(until = 650)
run_model = Model(0)
run_model.run()
So if a patient can use either a clean bed or a dirty bed then the patient needs to make two request (one for each type of bed) and use env.any_of to wait for the first request to fire. You also need to deal with the case that both events fire at the same time. Don't forget to cancel the request you do not use. If the request that fires is for a clean bed, things stay mostly the same. But if the request is for a dirty bed, then you need to add a step to clean the bed. For this I would make the cleaners Resources instead of processes. So the patient would request a cleaner, and do a timeout for the cleaning time, release the cleaner. To collect patient data I would create a log with the patient id, key event, time, and crunch these post sim to get the stats I need. To process the log I often create a dataframe that filters the log for the first, a second dataframe that filters for the second envent, join the two dataframes on patient id. Now both events for a patient is on one row so I can get the delta. once I have have the delta I can do a sum and count. For example, if my two events are when a patient arrives, and when a patient gets a bed, get the sum of deltas and divide by the count and I have the average time to bed.
If you remember, one of the first answers I gave you awhile ago had a example to get the first available bed from two different queues
I do not have a lot of time right know, but I hope this dissertation helps a bit

Python multiprocessing on same method that deals with data

I have a Django application where I am trying to manage the data for one of my models, since the number of its table rows in the database has gotten quite unruly.
I have a model staticmethod that gets all the 'Vehicle' objects in my database, goes through them, check if their image url is still active, and if not deletes it.
There are 1.2 million records (and some records have more than one image to check) so its going to take a pretty long time to go through all the records.
I know that you can use threading to run multiple processes, but I also know that for a method that deals with data, each thread has to be aware of the other thread. Is there a way that I can use multithreading to cut down the time it would take to go through a queryset and make those checks, e.g. If the first thread looks at queryset item 1, and 2, thread 2 will start looking at queryset item 3, and than thread 1 will skip to queryset item 4 if its done, and thread 2 hasn't finished with queryset item 3 ?
Method
#staticmethod
def image_existence_check():
import threading
import requests
from dealer.models import Dealer
vehicles = Vehicle.objects.all()
for index, veh in enumerate(vehicles):
images = veh.images.all()
if images.count() == 1:
image = images[0]
response = requests.get(image.image_url)
if response.status_code == 200:
veh.has_image = True
else:
veh.has_image = False
elif images.count() > 1:
has_image = True
for img in images:
response = requests.get(img.image_url)
if response != 200:
has_image = False
veh.has_image = has_image
else:
veh.has_image = False
veh.save()

Issue with sharing data between Python processes with multiprocessing

I've seen several posts about this, so I know it is fairly straightforward to do, but I seem to be coming up short. I'm not sure if I need to create a worker pool, or use the Queue class. Basically, I want to be able to create several processes that each act autonomously (which is why they inherit from the Agent superclass).
At random ticks of my main loop I want to update each Agent. I'm using time.sleep with different values in the main loop and the Agent's run loop to simulate different processor speeds.
Here is my Agent superclass:
# Generic class to handle mpc of each agent
class Agent(mpc.Process):
# initialize agent parameters
def __init__(self,):
# init mpc
mpc.Process.__init__(self)
self.exit = mpc.Event()
# an agent's main loop...generally should be overridden
def run(self):
while not self.exit.is_set():
pass
print "You exited!"
# safely shutdown an agent
def shutdown(self):
print "Shutdown initiated"
self.exit.set()
# safely communicate values to this agent
def communicate(self,value):
print value
A specific agent's subclass (simulating an HVAC system):
class HVAC(Agent):
def __init__(self, dt=70, dh=50.0):
super(Agent, self).__init__()
self.exit = mpc.Event()
self.__pref_heating = True
self.__pref_cooling = True
self.__desired_temperature = dt
self.__desired_humidity = dh
self.__meas_temperature = 0
self.__meas_humidity = 0.0
self.__hvac_status = "" # heating, cooling, off
self.start()
def run(self): # handle AC or heater on
while not self.exit.is_set():
ctemp = self.measureTemp()
chum = self.measureHumidity()
if (ctemp < self.__desired_temperature):
self.__hvac_status = 'heating'
self.__meas_temperature += 1
elif (ctemp > self.__desired_temperature):
self.__hvac_status = 'cooling'
self.__meas_temperature += 1
else:
self.__hvac_status = 'off'
print self.__hvac_status, self.__meas_temperature
time.sleep(0.5)
print "HVAC EXITED"
def measureTemp(self):
return self.__meas_temperature
def measureHumidity(self):
return self.__meas_humidity
def communicate(self,updates):
self.__meas_temperature = updates['temp']
self.__meas_humidity = updates['humidity']
print "Measured [%d] [%f]" % (self.__meas_temperature,self.__meas_humidity)
And my main loop:
if __name__ == "__main__":
print "Initializing subsystems"
agents = {}
agents['HVAC'] = HVAC()
# Run simulation
timestep = 0
while timestep < args.timesteps:
print "Timestep %d" % timestep
if timestep % 10 == 0:
curr_temp = random.randrange(68,72)
curr_humidity = random.uniform(40.0,60.0)
agents['HVAC'].communicate({'temp':curr_temp, 'humidity':curr_humidity})
time.sleep(1)
timestep += 1
agents['HVAC'].shutdown()
print "HVAC process state: %d" % agents['HVAC'].is_alive()
So the issue is that, whenever I run agents['HVAC'].communicate(x) within the main loop, I can see the value being passed into the HVAC subclass in its run loop (so it prints the received value correctly). However, the value never is successfully stored.
So typical output looks like this:
Initializing subsystems
Timestep 0
Measured [68] [56.948675]
heating 1
heating 2
Timestep 1
heating 3
heating 4
Timestep 2
heating 5
heating 6
When in reality, as soon as Measured [68] appears, the internal stored value should be updated to output 68 (not heating 1, heating 2, etc.). So effectively, the HVAC's self.__meas_temperature is not being properly updated.
Edit: After a bit of research, I realized that I didn't necessarily understand what is happening behind the scenes. Each subprocess operates with its own virtual chunk of memory and is completely abstracted away from any data being shared this way, so passing the value in isn't going to work. My new issue is that I'm not necessarily sure how to share a global value with multiple processes.
I was looking at the Queue or JoinableQueue packages, but I'm not necessarily sure how to pass a Queue into the type of superclass setup that I have (especially with the mpc.Process.__init__(self) call).
A side concern would be if I can have multiple agents reading values out of the queue without pulling it out of the queue? For instance, if I wanted to share a temperature value with multiple agents, would a Queue work for this?
Pipe v Queue
Here's a suggested solution assuming that you want the following:
a centralized manager / main process which controls lifetimes of the workers
worker processes to do something self-contained and then report results to the manager and other processes
Before I show it though, for the record I want to say that in general unless you are CPU bound multiprocessing is not really the right fit, mainly because of the added complexity, and you'd probably be better of using a different high-level asynchronous framework. Also, you should use python 3, it's so much better!
That said, multiprocessing.Manager, makes this pretty easy to do using multiprocessing. I've done this in python 3 but I don't think anything shouldn't "just work" in python 2, but I haven't checked.
from ctypes import c_bool
from multiprocessing import Manager, Process, Array, Value
from pprint import pprint
from time import sleep, time
class Agent(Process):
def __init__(self, name, shared_dictionary, delay=0.5):
"""My take on your Agent.
Key difference is that I've commonized the run-loop and used
a shared value to signal when to stop, to demonstrate it.
"""
super(Agent, self).__init__()
self.name = name
# This is going to be how we communicate between processes.
self.shared_dictionary = shared_dictionary
# Create a silo for us to use.
shared_dictionary[name] = []
self.should_stop = Value(c_bool, False)
# Primarily for testing purposes, and for simulating
# slower agents.
self.delay = delay
def get_next_results(self):
# In the real world I'd use abc.ABCMeta as the metaclass to do
# this properly.
raise RuntimeError('Subclasses must implement this')
def run(self):
ii = 0
while not self.should_stop.value:
ii += 1
# debugging / monitoring
print('%s %s run loop execution %d' % (
type(self).__name__, self.name, ii))
next_results = self.get_next_results()
# Add the results, along with a timestamp.
self.shared_dictionary[self.name] += [(time(), next_results)]
sleep(self.delay)
def stop(self):
self.should_stop.value = True
print('%s %s stopped' % (type(self).__name__, self.name))
class HVACAgent(Agent):
def get_next_results(self):
# This is where you do your work, but for the sake of
# the example just return a constant dictionary.
return {'temperature': 5, 'pressure': 7, 'humidity': 9}
class DumbReadingAgent(Agent):
"""A dumb agent to demonstrate workers reading other worker values."""
def get_next_results(self):
# get hvac 1 results:
hvac1_results = self.shared_dictionary.get('hvac 1')
if hvac1_results is None:
return None
return hvac1_results[-1][1]['temperature']
# Script starts.
results = {}
# The "with" ensures we terminate the manager at the end.
with Manager() as manager:
# the manager is a subprocess in its own right. We can ask
# it to manage a dictionary (or other python types) for us
# to be shared among the other children.
shared_info = manager.dict()
hvac_agent1 = HVACAgent('hvac 1', shared_info)
hvac_agent2 = HVACAgent('hvac 2', shared_info, delay=0.1)
dumb_agent = DumbReadingAgent('dumb hvac1 reader', shared_info)
agents = (hvac_agent1, hvac_agent2, dumb_agent)
list(map(lambda a: a.start(), agents))
sleep(1)
list(map(lambda a: a.stop(), agents))
list(map(lambda a: a.join(), agents))
# Not quite sure what happens to the shared dictionary after
# the manager dies, so for safety make a local copy.
results = dict(shared_info)
pprint(results)

Making a python program wait until Twisted deferred returns a value

I have a program that fetches info from other pages and parses them using BeautifulSoup and Twisted's getPage. Later on in the program I print info that the deferred process creates. Currently my program tries to print it before the differed returns the info. How can I make it wait?
def twisAmaz(contents): #This parses the page (amazon api xml file)
stonesoup = BeautifulStoneSoup(contents)
if stonesoup.find("mediumimage") == None:
imageurl.append("/images/notfound.png")
else:
imageurl.append(stonesoup.find("mediumimage").url.contents[0])
usedPdata = stonesoup.find("lowestusedprice")
newPdata = stonesoup.find("lowestnewprice")
titledata = stonesoup.find("title")
reviewdata = stonesoup.find("editorialreview")
if stonesoup.find("asin") != None:
asin.append(stonesoup.find("asin").contents[0])
else:
asin.append("None")
reactor.stop()
deferred = dict()
for tmpISBN in isbn: #Go through ISBN numbers and get Amazon API information for each
deferred[(tmpISBN)] = getPage(fetchInfo(tmpISBN))
deferred[(tmpISBN)].addCallback(twisAmaz)
reactor.run()
.....print info on each ISBN
What it seems like is you're trying to make/run multiple reactors. Everything gets attached to the same reactor. Here's how to use a DeferredList to wait for all of your callbacks to finish.
Also note that twisAmaz returns a value. That value is passed through the callbacks DeferredList and comes out as value. Since a DeferredList keeps the order of the things that are put into it, you can cross-reference the index of the results with the index of your ISBNs.
from twisted.internet import defer
def twisAmazon(contents):
stonesoup = BeautifulStoneSoup(contents)
ret = {}
if stonesoup.find("mediumimage") is None:
ret['imageurl'] = "/images/notfound.png"
else:
ret['imageurl'] = stonesoup.find("mediumimage").url.contents[0]
ret['usedPdata'] = stonesoup.find("lowestusedprice")
ret['newPdata'] = stonesoup.find("lowestnewprice")
ret['titledata'] = stonesoup.find("title")
ret['reviewdata'] = stonesoup.find("editorialreview")
if stonesoup.find("asin") is not None:
ret['asin'] = stonesoup.find("asin").contents[0]
else:
ret['asin'] = 'None'
return ret
callbacks = []
for tmpISBN in isbn: #Go through ISBN numbers and get Amazon API information for each
callbacks.append(getPage(fetchInfo(tmpISBN)).addCallback(twisAmazon))
def printResult(result):
for e, (success, value) in enumerate(result):
print ('[%r]:' % isbn[e]),
if success:
print 'Success:', value
else:
print 'Failure:', value.getErrorMessage()
callbacks = defer.DeferredList(callbacks)
callbacks.addCallback(printResult)
reactor.run()
Another cool way to do this is with #defer.inlineCallbacks. It lets you write asynchronous code like a regular sequential function: http://twistedmatrix.com/documents/8.1.0/api/twisted.internet.defer.html#inlineCallbacks
First, you shouldn't put a reactor.stop() in your deferred method, as it kills everything.
Now, in Twisted, "Waiting" is not allowed. To print results of you callback, just add another callback after the first one.

Categories

Resources