Plotting Celery canvas before running - python

There is documentation on how to produce a graph after running a canvas job in Celery.
However I'd like to generate a graph before I run the job.
Say I created a simple chain:
c = chain(add.s(1, 2), mul(4))
How can I generate a graph of the chain?
Thanks,
Miki

I had the exact same desire. Generate the graph before running the job. So I worked a bit on it :)
It appears that celery does not allow it. The reason for that (at least what I understood when trying to do it) is that in the graph each node has to have a unique name. Once the canvas is executed this unique name is the celery task_id but before execution there is nothing that allow such a distinction.
So the solution is to generate this graph by yourself, and of course identify uniquely each node (for this a counter can do the work).
This is the job of this function:
# -*- coding: utf-8 -*-
from celery.canvas import chain, group, Signature
def analyze_canvas(canvas):
return _analyze_canvas(canvas)['dependencies']
def _analyze_canvas(canvas, previous=[], i=0):
dependencies = []
if isinstance(canvas, chain):
for t in canvas.tasks:
if not (isinstance(t, group) or isinstance(t, chain)):
n = str(t) + " - (" + str(i) + ")"
i += 1
dependencies.append((n, previous))
previous = [n]
else:
analysis = _analyze_canvas(t, previous, i)
dependencies.extend(analysis['dependencies'])
previous = analysis['previous']
elif isinstance(canvas, group):
new_previous = []
for t in canvas.tasks:
if not (isinstance(t, group) or isinstance(t, chain)):
n = str(t) + " - (" + str(i) + ")"
i += 1
dependencies.append((n, previous))
new_previous.append(n)
else:
analysis = _analyze_canvas(t, previous, i)
dependencies.extend(analysis['dependencies'])
new_previous = analysis['previous']
previous = new_previous
elif isinstance(canvas, Signature):
n = str(t) + " - (" + str(i) + ")"
i += 1
dependencies.append((n, previous))
previous = [n]
return {"dependencies": dependencies,
"previous": previous}
It generates the dependency graph of your canvas. The idea is just to iterate other the tasks of the canvas and identify group/chain/Signatures to generate the right dependencies.
From this point you can use some more celery utils to generate the dot file. Here is a small usage example:
from celery_util import analyze_canvas
from celery.datastructures import DependencyGraph
from celery import Celery, group
app = Celery()
#app.task
def t1():
pass
#app.task
def t2():
pass
canvas = t1.si() | t2.si() | group(t1.si(), t1.si(), t2.si()) | t2.si()
d = analyze_canvas(canvas)
dg = DependencyGraph(it=d)
pipo = open("pipo.dot", "w+")
dg.to_dot(pipo)
In this example I just declare dummy tasks and chain/group them in a nice pretty canvas. I use the celery util DependencyGraph to have the object representation and the ability to dump the graph in dot which I do with to_dot method.
And the beautiful result is:

I have updated the code from https://stackoverflow.com/a/29105701/928489 to work with celery4. It works with chain, group and chord.
from app.instant_design import get_instant_design_tasks
from celery.canvas import _chain, group, chord
def analyze_canvas(canvas):
return _analyze_canvas(canvas)[0]
def _analyze_canvas(canvas, previous=[], i=0):
dependencies = []
if isinstance(canvas, _chain):
for i, t in enumerate(canvas.tasks, i):
dep, previous = _analyze_canvas(t, previous, i)
dependencies.extend(dep)
elif isinstance(canvas, group) or isinstance(canvas, chord):
new_previous = []
for i, t in enumerate(canvas.tasks, i):
dep, p = _analyze_canvas(t, previous, i)
dependencies.extend(dep)
new_previous.extend(p)
if isinstance(canvas, chord):
dep, p = _analyze_canvas(canvas.body, new_previous, i)
return dependencies + dep, p
else:
t = canvas.name + " - (" + str(i) + ")"
dependencies = [(t, previous)]
previous = [t]
return dependencies, previous

Related

How to return a dictionary from a process in Python?

I want to make an inverted index using multiprocessing to speed up its work. My idea is to split the files into groups, and each process will build its own inverted index, and then I want to merge all these indexes into one inverted index. But I don't know how to return them to the main process that will merge them.
import multiprocessing as mp
from pathlib import Path
import re
import time
class InvertedIndex:
def __init__(self):
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt'))
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num
processes = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
currLi = pathList[startIndex:endIndex]
p = mp.Process(target=self.oneProcessTask, args=(currLi,))
processes.append(p)
[x.start() for x in processes]
[x.join() for x in processes]
#staticmethod
def oneProcessTask(listOfDoc):
#print(f'Start: {list[0]}, end: {list[-1]}') # temp
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
def getListOfDoc(self, keyWord):
return self.index[keyWord]
if __name__ == '__main__':
ii = InvertedIndex()
start_time = time.time()
ii.createIndex()
print("--- %s seconds ---" % (time.time() - start_time))
I used multiprocessing.manager to write everything in one dictionary, but that solution was too slow. So I went back to the idea of creating own inverted index for each process and then merging them. But I don't know how to return all indexes to one process.
Take a look at concurrent.futures (native library) with either ThreadPoolExecutor or ProcessPoolExecutor. FYI: I wrote on that in here and did not test but, this is more or less the jist of what I use all the time.
from concurrent.futures import ThreadPoolExecutor, as_completed
def foo(stuff: int) -> dict:
return {}
things_to_analyze = [1,2,3]
threads = []
results = []
with ThreadPoolExecutor() as executor:
for things in things_to_analyze:
threads.append(executor.submit(foo, thing))
for job in as_completed(threads):
results.append(job.results())
I found a solution. I used pool.starmap to return a list of indexes.
My code:
class InvertedIndex:
def __init__(self):
self.smallIndexes = None
self.index = dict()
def createIndex(self, path='data', threads_num=4):
pathList = list(Path(path).glob('**/*.txt')) # Рекурсивно проходимо по всіх текстових файлах і робимо з них список
fileNum = len(pathList)
oneProcessNum = fileNum / threads_num # Розраховуємо скільки файлів має обробити один процес
processes_args = []
for i in range(threads_num):
startIndex = int(i * oneProcessNum)
endIndex = int((i + 1) * oneProcessNum)
processes_args.append((path, startIndex, endIndex))
pool = mp.Pool(threads_num)
self.smallIndexes = pool.starmap(self.oneProcessTask, processes_args)
self.mergeIndex()
#staticmethod
def oneProcessTask(path, startIndex, endIndex):
pathList = list(Path(path).glob('**/*.txt'))
listOfDoc = pathList[startIndex:endIndex]
tempDict = dict()
for name in listOfDoc:
with open(name) as f:
text = f.read()
li = re.findall(r'\b\w+\b', text)
for w in li:
if tempDict.get(w) is None:
tempDict[w] = set()
tempDict[w].add(str(name))
return tempDict
Execution time decreased from 200 seconds (when I used shared memory and menger.dict ) to 0.8 seconds (when I used pool.starmap).

Why isn't my Simpy resource keeping a queue?

I've been working for around a week to learn SimPy for a discrete simulation I have to run. I've done my best, but I'm just not experienced enough to figure it out quickly. I am dying. Please help.
The system in question goes like this:
order arrives -> resource_1 (there are 2) performs take_order -> order broken into items -> resource_2 (there are 10) performs process_item
My code runs and performs the simulation, but I'm having a lot of trouble getting the queues on the resources to function. As in, queues do not build up on either resource when I run it, and I cannot find the reason why. I try resource.get_queue and get empty lists. There should absolutely be queues, as the orders arrive faster than they can be processed.
I think it has something to do with the logic for requesting resources, but I can't figure it out. Here's how I've structured the code:
import simpy
import random
import numpy as np
total_items = []
total_a = []
total_b = []
total_c = []
order_Q = []
item_Q = []
skipped_visits = []
order_time_dict = {}
order_time_dict2 = {}
total_order_time_dict = {}
var = []
class System:
def __init__(self,env,num_resource_1,num_resource_2):
self.env = env
self.resource_1 = simpy.Resource(env,num_resource_1)
self.resource_2 = simpy.Resource(env,num_resource_2)
def take_order(self, order):
self.time_to_order = random.triangular(30/60,60/60,120/60)
arrive = self.env.now
yield self.env.timeout(self.time_to_order)
def process_item(self,item):
total_process_time = 0
current = env.now
order_num = item[1][0]
for i in range(1,item[1][1]):
if 'a' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6) #bagging time only
#here edit order time w x
if 'b' in item[0]:
total_process_time += random.triangular(.05,.3333,.75)
if 'c' in item[0]:
total_process_time += random.triangular(.05,7/60,1/6)
#the following is handling time: getting to station, waiting on car to arrive at window after finished, handing to cust
total_process_time += random.triangular(.05, 10/60, 15/60)
item_finish_time = current + total_process_time
if order_num in order_time_dict2.keys():
start = order_time_dict2[order_num][0]
if order_time_dict2[order_num][1] < item_finish_time:
order_time_dict2[order_num] = (start, item_finish_time)
else:
order_time_dict2[order_num] = (current, item_finish_time)
yield self.env.timeout(total_process_time)
class Order:
def __init__(self, order_dict,order_num):
self.order_dict = order_dict
self.order_num = order_num
self.order_stripped = {}
for x,y in list(self.order_dict.items()):
if x != 'total':
if y != 0:
self.order_stripped[x] = (order_num,y) #this gives dictionary format {item: (order number, number items) } but only including items in order
self.order_list = list(self.order_stripped.items())
def generate_order(num_orders):
print('running generate_order')
a_demand = .1914 ** 3
a_stdev = 43.684104
b_demand = .1153
b_stdev = 28.507782
c_demand = .0664
c_stdev = 15.5562624349
num_a = abs(round(np.random.normal(a_demand)))
num_b = abs(round(np.random.normal(b_demand)))
num_c = abs(round(np.random.normal(c_demand)))
total = num_orders
total_a.append(num_a)
total_b.append(num_b)
total_c.append(num_c)
total_num_items = num_a + num_b + num_c
total_items.append(total_num_items)
order_dict = {'num_a':num_a, 'num_b':num_b,'num_c':num_c, 'total': total}
return order_dict
def order_process(order_instance,system):
enter_system_at = system.env.now
print("order " + str(order_instance.order_num) + " arrives at " + str(enter_system_at))
if len(system.resource_1.get_queue) > 1:
print("WORKING HERE ******************")
if len(system.resource_1.get_queue) <= 25:
with system.resource_1.request() as req:
order_Q.append(order_instance)
yield req
yield env.process(system.take_order(order_instance))
order_Q.pop()
enter_workstation_at = system.env.now
print("order num " + str(order_instance.order_num) + " enters workstation at " + str(enter_workstation_at))
for item in order_instance.order_list:
item_Q.append(item)
with system.resource_2.request() as req:
yield req
yield env.process(system.process_item(item))
if len(system.resource_2.get_queue) >1:
var.append(1)
item_Q.pop()
leave_workstation_at = system.env.now
print("Order num " + str(order_instance.order_num) + " leaves at " + str(leave_workstation_at))
order_time_dict[order_instance.order_num] = leave_workstation_at-enter_workstation_at
total_order_time_dict[order_instance.order_num]=leave_workstation_at-enter_system_at
else:
skipped_visits.append(1)
def setup(env):
system = System(env,2,15)
order_num = 0
while True:
next_order = random.expovariate(3.5) #where 20 is order arrival mean (lambda)
yield env.timeout(next_order)
order_num+=1
env.process(order_process(Order(generate_order(order_num),order_num),system))
env = simpy.Environment()
env.process(setup(env))
env.run(until=15*60)
print("1: \n", order_time_dict)
I think you are looking at the wrong queue.
the api for getting queued requests for resources is just attribute queue so try using
len(system.resource_1.queue)
get_queue and put_queue is from the base class and used to derive new resource classes.
but wait they are not what any reasonable person would assume, and I find this confusing too, but the doc says
Requesting a resources is modeled as “putting a process’ token into the resources” which means when you call request() the process is put into the put_queue, not the get_queue. And with resource, release always succeeds immediately so its queue (which is the get_queue) is always empty
I think queue is just a alias for the put_queue, but queue is much less confussing

Running Two Objects at Once

I am trying to see if Python can run two objects at once or simultaneously. I have researched multi threading and seems like it can handle this. All the posts I am seeing is to add functions to the target but not an actual instantiation of an object.
file1.py:
from threading import Thread
class Move_Images:
def __init__(self, rps_img_archive_fp, destination_fp, images_count, set_capturebatchno):
self.rps_img_archive_fp = rps_img_archive_fp
self.destination_fp = destination_fp
self.images_count = images_count
self.set_capturebatchno = set_capturebatchno
def get_fp_elem(self):
count = 0
capturebatchno_l = []
img_fn_l = []
final_dest_fp_l = []
input_img_fp_l = []
for path, directories, files in os.walk(self.rps_img_archive_fp):
for file in files:
count += 1
# if count <= self.images_count:
img_fp = os.path.join(path, file)
capturebatchno = img_fp.split("\\")[7]
get_cbn = [cbn for cbn in self.set_capturebatchno if cbn in capturebatchno]
if get_cbn:
capturebatchno_l.append(get_cbn[0])
filename = img_fp.split("\\")[8]
img_fn_l.append(filename)
input_img_fp_l.append(img_fp)
res = []
for i in capturebatchno_l:
if i not in res:
res.append(i)
for cbn, fn in zip(capturebatchno_l, img_fn_l):
dest_fp = os.path.join(str(self.destination_fp) + "\\" + str(cbn) + "\\" + str(fn))
final_dest_fp_l.append(dest_fp)
return res, self.destination_fp, img_fn_l, self.rps_img_archive_fp, input_img_fp_l, final_dest_fp_l
move_images = Move_Images(rps_img_archive_fp=r'c:\\test\test\input',
destination_fp=r'c:\\test\test\output', images_count=100,
set_capturebatchno=['00004002', '00004005'])
res, destination_fp, img_fn_l,rps_img_archive_fp, input_img_fp_l,final_dest_fp_l = \
move_images.get_fp_elem()
move_images_2 = Move_Images(rps_img_archive_fp=rr'c:\\test\test\input',
destination_fp=r'c:\\test\test\output', images_count=100,
set_capturebatchno=['000040010', '000040012'])
if __name__ == '__main__':
Thread(target=move_images).start()
Thread(target=move_images_2).start()
you can create for example a class (Move_Images) that inherhit from Thread itself and create a run function. Look into the accepted answer on this post maybe this is what u want.

arcs constraints for jobshop Scheduling

I have built a job shop scheduling algorithm using the ortools optimization library for python.
the problem is when i made a flexible jobshop model with setup times it doesn't work and i think it is due to the arcs that i made, if there is anybody here who can explain more the circuit constraint, that would help me. By the way when i use a single machine it works.
Code:
from __future__ import print_function
import collections
from google.protobuf import text_format
from ortools.sat.python import cp_model
# Import Python wrapper for or-tools CP-SAT solver.
from ortools.sat.python import cp_model
# Intermediate solution printer
class SolutionPrinter(cp_model.CpSolverSolutionCallback):
"""Print intermediate solutions."""
def __init__(self):
cp_model.CpSolverSolutionCallback.__init__(self)
self.__solution_count = 0
def on_solution_callback(self):
"""Called after each new solution found."""
print('Solution %i, time = %f s, objective = %i' %
(self.__solution_count, self.WallTime(), self.ObjectiveValue()))
self.__solution_count += 1
def MinimalJobshopSat():
"""Minimal jobshop problem."""
# Create the model.
model = cp_model.CpModel()
jobs_data = [[(0, 2546), (1, 2000), (2, 1400)],
[(0, 1289), (1, 2546), (2, 2546)],
[(0, 2839), (1, 1576), (2, 1200)]
]
setup_times = [
[
[3559, 1638, 2000],
[1442, 3010, 1641],
[1728, 3583, 3243]],
[
[3559, 1638, 2000],
[1442, 3010, 1641],
[1728, 3583, 3243]],
[
[3559, 1638, 2000],
[1442, 3010, 1641],
[1728, 3583, 3243]]
]
all_jobs = range(len(jobs_data))
machines_count = 1 + max(task[0] for job in jobs_data for task in job)
all_machines = range(machines_count)
for machine in all_machines:
for job_id in all_jobs:
min_incoming_setup = min(
setup_times[machine][j][job_id] for j in all_jobs)
if min_incoming_setup == 0:
continue
print('job %i at machine %i has a min incoming setup of %i' %
(job_id, machine, min_incoming_setup))
# We can transfer some setup times to the duration of the job.
jobs_data[job_id][machine] = (machine, jobs_data[job_id][machine][1] + min_incoming_setup)
# Decrease corresponding incoming setup times.
for j in all_jobs:
setup_times[machine][j][job_id] -= min_incoming_setup
# Computes horizon dynamically as the sum of all durations.
horizon = sum(task[1] for job in jobs_data for task in job)
for times in setup_times:
for time in times:
horizon += max(time)
# Named tuple to store information about created variables.
task_type = collections.namedtuple('task_type', 'start end interval')
# Named tuple to manipulate solution information.
assigned_task_type = collections.namedtuple('assigned_task_type',
'start job index duration')
# Creates job intervals and add to the corresponding machine lists.
all_tasks = {}
machine_to_intervals = collections.defaultdict(list)
starts = collections.defaultdict(list)
ends = collections.defaultdict(list)
for job_id, job in enumerate(jobs_data):
for task_id, task in enumerate(job):
machine = task[0]
duration = task[1]
suffix = '_%i_%i' % (job_id, task_id)
start_var = model.NewIntVar(0, horizon, 'start' + suffix)
end_var = model.NewIntVar(0, horizon, 'end' + suffix)
interval_var = model.NewIntervalVar(start_var, duration, end_var,
'interval' + suffix)
all_tasks[job_id, task_id] = task_type(
start=start_var, end=end_var, interval=interval_var)
machine_to_intervals[machine].append(interval_var)
starts[machine].append(start_var)
ends[machine].append(end_var)
# Create and add disjunctive constraints.
for machine in all_machines:
model.AddNoOverlap(machine_to_intervals[machine])
#----------------------------------------------------------------------------
# Transition times using a circuit constraint.
list_arcs = []
for machine in all_machines:
arcs = []
for i in all_jobs:
# Initial arc from the dummy node (0) to a task.
start_lit = model.NewBoolVar('')
arcs.append([0, i + 1, start_lit])
# If this task is the first, set to minimum starting time.
min_start_time = min(0,setup_times[machine][0][i])
model.Add(starts[machine][i] == min_start_time).OnlyEnforceIf(start_lit)
# Final arc from an arc to the dummy node.
arcs.append([i + 1, 0, model.NewBoolVar('')])
for j in all_jobs:
if i == j:
continue
lit = model.NewBoolVar('%i_%i follows %i_%i' % (j, machine, i, machine))
arcs.append([i + 1, j + 1, lit])
# We add the reified precedence to link the literal with the times of the
# two tasks.
# If release_dates[j] == 0, we can strenghten this precedence into an
# equality as we are minimizing the makespan.
model.Add(starts[machine][j] >=
ends[machine][i] + setup_times[machine][i][j]).OnlyEnforceIf(lit)
list_arcs.append(arcs)
model.AddCircuit(arcs)
#----------------------------------------------------------------------------
# Precedences inside a job.
for job_id, job in enumerate(jobs_data):
for task_id in range(len(job) - 1):
model.Add(all_tasks[job_id, task_id +
1].start >= all_tasks[job_id, task_id].end)
# Makespan objective.
obj_var = model.NewIntVar(0, horizon, 'makespan')
model.AddMaxEquality(obj_var, [
all_tasks[job_id, len(job) - 1].end
for job_id, job in enumerate(jobs_data)
])
model.Minimize(obj_var)
# Solve model.
solver = cp_model.CpSolver()
solver.parameters.max_time_in_seconds = 60
status=solver.Solve(model)
solver.parameters
solution_printer = SolutionPrinter()
solver.SolveWithSolutionCallback(model, solution_printer)
print(solver.ResponseStats())
if status == cp_model.FEASIBLE:
# Create one list of assigned tasks per machine.
assigned_jobs = collections.defaultdict(list)
for job_id, job in enumerate(jobs_data):
for task_id, task in enumerate(job):
machine = task[0]
assigned_jobs[machine].append(
assigned_task_type(
start=solver.Value(all_tasks[job_id, task_id].start),
job=job_id,
index=task_id,
duration=task[1]))
# Create per machine output lines.
output = ''
for machine in all_machines:
# Sort by starting time.
assigned_jobs[machine].sort()
sol_line_tasks = 'Machine ' + str(machine) + ': '
sol_line = ' '
for assigned_task in assigned_jobs[machine]:
name = 'job_%i_%i' % (assigned_task.job, assigned_task.index)
# Add spaces to output to align columns.
sol_line_tasks += '%-10s' % name
start = assigned_task.start
duration = assigned_task.duration
sol_tmp = '[%i,%i]' % (start, start + duration)
# Add spaces to output to align columns.
sol_line += '%-10s' % sol_tmp
sol_line += '\n'
sol_line_tasks += '\n'
output += sol_line_tasks
output += sol_line
# Finally print the solution found.
print('Optimal Schedule Length: %i' % solver.ObjectiveValue())
print(output)
if __name__ == '__main__':
MinimalJobshopSat()()
If a task is optional, you need to add a self looping arc on the node that corresponds to this arc.
So let's assume task_i with Boolean presence literal lit_i, you need to add
arcs.append([i + 1, i + 1, lit_i.Not()])

Perfomance improvement - Looping with Get Method

I've built a program to fill up a databank and, by the time, it's working. Basically, the program makes a request to the app I'm using (via REST API) returns the data I want and then manipulate to a acceptable form for the databank.
The problem is: the GET method makes the algorithm too slow, because I'm acessing the details of particular entries, so for each entry I have to make 1 request. I have something close to 15000 requests to do and each row in the bank is taking 1 second to be made.
Is there any possible way to make this requests faster? How can I improve the perfomance of this method? And by the way, any tips to measure the perfomance of the code?
Thanks in advance!!
here's the code:
# Retrieving all the IDs I want to get the detailed info
abc_ids = serializers.serialize('json', modelExample.objects.all(), fields=('id'))
abc_ids = json.loads(abc_ids)
abc_ids_size = len(abc_ids)
# Had to declare this guys right here because in the end of the code I use them in the functions to create and uptade the back
# And python was complaining that I stated before assign. Picked random values for them.
age = 0
time_to_won = 0
data = '2016-01-01 00:00:00'
# First Loop -> Request to the detailed info of ABC
for x in range(0, abc_ids_size):
id = abc_ids[x]['fields']['id']
url = requests.get(
'https://api.example.com/v3/abc/' + str(
id) + '?api_token=123123123')
info = info.json()
dealx = dict(info)
# Second Loop -> Picking the info I want to uptade and create in the bank
for key, result in dealx['data'].items():
# Relevant only for ModelExample -> UPTADE
if key == 'age':
result = dict(result)
age = result['total_seconds']
# Relevant only For ModelExample -> UPTADE
elif key == 'average_time_to_won':
result = dict(result)
time_to_won = result['total_seconds']
# Relevant For Model_Example2 -> CREATE
# Storing a date here to use up foward in a datetime manipulation
if key == 'add_time':
data = str(result)
elif key == 'time_stage':
# Each stage has a total of seconds that the user stayed in.
y = result['times_in_stages']
# The user can be in any stage he want, there's no rule about the order.
# But there's a record of the order he chose.
z = result['order_of_stages']
# Creating a list to fill up with all stages info and use in the bulk_create.
data_set = []
index = 0
# Setting the number of repititions base on the number of the stages in the list.
for elemento in range(0, len(z)):
data_set_i = {}
# The index is to define the order of the stages.
index = index + 1
for key_1, result_1 in y.items():
if int(key_1) == z[elemento]:
data_set_i['stage_id'] = int(z[elemento])
data_set_i['index'] = int(index)
data_set_i['abc_id'] = id
# Datetime manipulation
if result_1 == 0 and index == 1:
data_set_i['add_date'] = data
# I know that I totally repeated the code here, I was trying to get this part shorter
# But I could not get it right.
elif result_1 > 0 and index == 1:
data_t = datetime.strptime(data, "%Y-%m-%d %H:%M:%S")
data_sum = data_t + timedelta(seconds=result_1)
data_sum += timedelta(seconds=3)
data_nova = str(data_sum.year) + '-' + str(formaters.DateNine(
data_sum.month)) + '-' + str(formaters.DateNine(data_sum.day)) + ' ' + str(
data_sum.hour) + ':' + str(formaters.DateNine(data_sum.minute)) + ':' + str(
formaters.DateNine(data_sum.second))
data_set_i['add_date'] = str(data_nova)
else:
data_t = datetime.strptime(data_set[elemento - 1]['add_date'], "%Y-%m-%d %H:%M:%S")
data_sum = data_t + timedelta(seconds=result_1)
data_sum += timedelta(seconds=3)
data_nova = str(data_sum.year) + '-' + str(formaters.DateNine(
data_sum.month)) + '-' + str(formaters.DateNine(data_sum.day)) + ' ' + str(
data_sum.hour) + ':' + str(formaters.DateNine(data_sum.minute)) + ':' + str(
formaters.DateNine(data_sum.second))
data_set_i['add_date'] = str(data_nova)
data_set.append(data_set_i)
Model_Example2_List = [Model_Example2(**vals) for vals in data_set]
Model_Example2.objects.bulk_create(Model_Example2_List)
ModelExample.objects.filter(abc_id=id).update(age=age, time_to_won=time_to_won)
if the bottleneck is in your network request, there isn't much you can do except to perhaps use gzip or deflate but with requests ..
The gzip and deflate transfer-encodings are automatically decoded for
you.
If you want to be doubly sure, you can add the following headers to the get request.
{ 'Accept-Encoding': 'gzip,deflate'}
The other alternative is to use threading and have many requests operate in parrallel, a good option if you have lot's of bandwidth and multiple cores.
Lastly, there are lots of different ways to profile python including with cprofile + kcachegrind combo.

Categories

Resources