Optimization of python multithreading script - huge memory consumption

Optimization of python multithreading script - huge memory consumption - python

I have a script (Django Management-Command) wiht over 800 lines of code.
This should import data from a external Web-Service, manipulate sth. and write it to a Postgres DB.
I use multithreading, because fetching data from webservice ist not very fast.
There ist one Thread for fetching the data with a bulk command to get a bulk of 64 data sets an write each data set in a queue.
Simultaneously at the beginning there is one worker-thread wich manipulates the data and write it to a DB.
In the main (handle) class, there is a while-loop that looks every 5 seconds for the quantity of elements in the queue and the quantity of running worker-threads.
If there are more than 500 elements in the queue and there are less then 5 worker-threads, it starts a new worker-thread.
All worker-threads get one item from the queue, manipulate sth., write the data set to the DB and append one String (up to 14 chars) to a different queue (#2).
The queue #2 ist necessary to have all imported objects at the end of the import to mark them as new respectively delete all other items from the DB, which are currently not imported.
For DB's with a quantity of not more then 200.000 data sets everything works fine.
But if there is for example a DB with 1.000.000 data sets, the memory consumption increases during the processing of the hole script up to 8 GB of RAM.
Is there a method to watch the memory consumption of threads and / or queue's?
Is there a method to "clean" memory after each while-loop?
# -*- coding: utf-8 -*-
import os
import threading
import Queue
import time
from optparse import OptionParser, make_option
from decimal import Decimal
from datetime import datetime
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.conf import settings
def is_someone_alive(thread_list):
so_alive = False
for t in thread_list:
if t.is_alive():
so_alive = True
return so_alive
class insert_item(threading.Thread):
VarLock2 = threading.Lock()
def __init__(self, queue1, item_still_exist2, name, *args, **options):
threading.Thread.__init__(self)
self.options = options
self.name = name
self.queue1 = queue1
self.item_still_exist2 = item_still_exist2
def run(self):
while not self.queue1.empty() or getItemBulkThread.isrunning:
item = self.queue1.get()
artikelobj, created = Artikel.objects.get_or_create(artikelnr=item['Nr'])
"""
manipulate data
"""
self.item_still_exist2.put(artikelobj.artikelnr)
artikelobj.save()
self.queue1.task_done()
class getItemBulkThread(threading.Thread):
isrunning = True
VarLock = threading.Lock()
def __init__(self, queue1, name, *args, **options):
threading.Thread.__init__(self)
self.options = options
if self.options['nrStart'] != '':
self.nrab = self.options['nrStart']
else:
self.nrab = ''
self.name = name
#self.nrab = '701307'
self.queue1 = queue1
self.anz_artikel = 64
self.max_artikel = 64
self.skipped = 0
self.max_skip = 20
def run(self):
count_sleep = 0
while True:
while self.queue1.qsize() > 5000:
time.sleep(5)
count_sleep += 1
if count_sleep > 0:
print "~ Artikel-Import %(csleep)sx für 5s pausiert, da Queue-Size > 5000" % {'csleep': count_sleep}
count_sleep = 0
try:
items = getItemBulk() # from external service
except Exception as exc1:
if ('"normal" abort-condition' in str(exc1)):
getItemBulkThread.VarLock.acquire()
getItemBulkThread.isrunning = False
getItemBulkThread.VarLock.release()
break
elif self.anz_artikel > 1:
self.anz_artikel /= 2
continue
elif self.skipped <= self.max_skip:
self.nrab += 1
self.skipped += 1
time.sleep(5)
continue
elif self.skipped > self.max_skip:
raise Exception("[EXCEPTION] Fehler im Thread: too much items skipped")
else:
getItemBulkThread.VarLock.acquire()
getItemBulkThread.isrunning = False
getItemBulkThread.VarLock.release()
raise
last_item = len(items) - 1
self.nrab = items[last_item]['Nr']
for artikel in items:
artikel['katItem'] = False
self.queue1.put(artikel)
if self.anz_artikel < self.max_artikel:
self.anz_artikel *= 2
self.skipped = 0
class Command(BaseCommand):
"""
Django-mgm-command
"""
help = u'Import'
def create_parser(self, prog_name, subcommand):
"""
Create and return the ``OptionParser`` which will be used to
parse the arguments to this command.
"""
return OptionParser(prog=prog_name, usage=self.usage(subcommand),
version=self.get_version(),
option_list=self.option_list,
conflict_handler="resolve")
def handle(self, *args, **options):
startzeit = datetime.now()
anzahl_Artikel_vorher = Artikel.objects.all().count() # Artikel is a model
self.options = options
items_vorher = []
queue1 = Queue.Queue()
item_still_exists2 = Queue.Queue()
running_threads = []
thread = getItemBulkThread(queue1, name="Artikel", *args, **options)
running_threads.append(thread)
thread.daemon = True
thread.start()
anz_worker_threads = 1
anz_max_worker_threads = 5
insert_threads = [insert_item(queue1, item_still_exists2, name="Worker-%(anz)s" % {'anz': i + 1}, *args, **options) for i in range(anz_worker_threads)]
for thread in insert_threads:
running_threads.append(thread)
thread.setDaemon(True)
thread.start()
add_seconds = 5
element_grenze = 500
lastelemente = 0
asc_elemente = 0
anz_abgearbeitet = 0
while getItemBulkThread.isrunning or not queue1.empty():
time.sleep(add_seconds)
elemente = queue1.qsize()
akt_zeit = datetime.now()
diff_zeit = akt_zeit - startzeit
diff = elemente - lastelemente
anz_abgearbeitet = item_still_exists2.qsize()
art_speed = (anz_abgearbeitet / timedelta_total_seconds(diff_zeit)) * 60
ersetz_var = {'anz': elemente, 'zeit': diff_zeit, 'tstamp': akt_zeit.strftime('%Y.%m.%d-%H:%M:%S'), 'anzw': anz_worker_threads, 'diff': diff, 'anza': anz_abgearbeitet, 'art_speed': art_speed}
print("%(zeit)s vergangen - %(tstamp)s - %(anz)s Elemente in Queue, Veränderung: %(diff)s - Anz Worker: %(anzw)s - Artikel importiert: %(anza)s - Speed: %(art_speed)02d Art/Min" % ersetz_var)
if diff > 0:
asc_elemente += 1
else:
asc_elemente = 0
if asc_elemente > 2 and anz_worker_threads < anz_max_worker_threads and elemente > element_grenze:
ersetz_var = {'maxw': anz_max_worker_threads, 'nr': anz_worker_threads + 1, 'element_grenze': element_grenze}
print "~~ 2x in Folge mehr Queue-Elemente als vorher, die max. Anzahl an Workern %(maxw)s noch nicht erreicht und mehr als %(element_grenze)s Elemente in der Queue, daher Start eines neuen Workers (Nr %(nr)s)" % ersetz_var
anz_worker_threads += 1
thread = insert_item(queue1, item_still_exists2, name="Worker-%(anz)s" % {'anz': anz_worker_threads}, *args, **options)
running_threads.append(thread)
thread.setDaemon(True)
thread.start()
asc_elemente = 0
lastelemente = elemente
queue1.join()
items_nachher = []
while not item_still_exists2.empty():
item = item_still_exists2.get()
if item in items_vorher:
items_nachher.append(item)
items_vorher.remove(item)
item_still_exists2.task_done()
item_still_exists2.join()
if len(items_vorher) > 0:
Artikel.objects.filter(artikelnr__in=items_vorher).delete()
anzahl_Artikel_nachher = Artikel.objects.all().count()
anzahl_Artikel_diff = anzahl_Artikel_nachher - anzahl_Artikel_vorher
endzeit = datetime.now()
dauer = endzeit - startzeit
I've abbreviated the Code at some positions :)

A possible cause for excessive memory consumption is that you don't set a maximum size for the input queue. See the maxsize parameter.
On a related note, you write:
In the main (handle) class, there is a while-loop that looks every 5
seconds for the quantity of elements in the queue and the quantity of
running worker-threads. If there are more than 500 elements in the
queue and there are less then 5 worker-threads, it starts a new
worker-thread.
Creating a new thread does not necessarily increase the throughput. You should rather do some tests to determine the optimal number of threads, which may turn out to be 1.

Related

How to send and receive messages between processes in Python (through channels)?

I am trying to implement a simple program in which there are several processes that concurrently communicate with each other by sending and receiving messages. In the program, there are 4 participants (each of which corresponds to a process) and communicate with each other as follows:
P1 sends P2 some_message then P2 sends P3 another_message then P3 sends P4 a_message. Based on the messages each participant receives, they perform a specific action.
Obviously, when, for instance, P1 sends P2 a message, P2 is receiving that message from P1, so they are paired.
I have found different approaches none of which are suitable as they seem to be complicated for I am looking for. For example,
Python MPI which has a restriction of "There are not enough slots available in the system". There are a few ways suggested to sort out the issue but the solutions are a bit complicated.
Socket programming which mostly suits server and client scenario. But my program doesn't have a server. I also checked this answer, which is again based on socket programming.
My question is that isn't there any simpler approach than the above ones so that I can implement what I explained? Is it possible to create communication channels in Python fairly similar to the ones in Golang?

This code I wrote a while ago to get to grips with os.pipe - it is self contained but not "minimally reproducible" since I don't have the time to redo it. It uses tkinter Uis to simulate processes and sends and receives data between them. Note that the code was written only for my private purpose.
"""Test run of the use of pipes between processes.
.. processes are control, startup , send and receive.
.. pipes from control to startup and send
.. pipe from startup to send
.. pipe from send to receive
.. startup, user input of run mode
... prompt, timer (seconds) or number of runs
.. send, user input of data
.. receive, display of data received
. each process operates independently of, and in isolation from, the other processes until data is transferred through pipes
"""
# fr read file descriptor
# fw write file descriptor
# wbs write bytes
# snb string length of output filled with 0 to write as header
# bsnb for number of bytes written, needed for read
# maxbuf number of bytes of header, 4 digits, max 9999 characters in a string/byte literal
# onb output number of bytes
# dbs data read in bytes
import tkinter as tk
from os import pipe as ospipe
from os import read as osread
from os import write as oswrite
from os import close as osclose
from datetime import datetime as dt
from time import monotonic as clock
from functools import partial
BG = '#fa4'
TBG = '#fe8'
SndBG = '#f91'
BLK = '#000'
STOP = '#d30'
START = '#0b0'
start = clock()
def timer(halt):
tm = int(clock())
if int(tm - start) > halt:
return True
else: return False
def piperead(r):
maxbuf = 4
onb = osread(r,maxbuf)
oi = int(onb.decode())
dbs = osread(r,oi).decode() # bytes to string
osclose(r)
return dbs
def pipewrite(w,s):
wbs = bytes(s, encoding='utf-8')
snb = str(len(s)).zfill(4)
bsnb = bytes(snb, encoding='utf-8')
wbs = bsnb + wbs
oswrite(w,wbs)
osclose(w)
def setpipe(process, sub=None, vars=None):
fdr, fdw = ospipe()
if sub: process(fdw,proc=(sub,vars))
else: process(fdw)
return piperead(fdr)
class Sloop():
def __init__(sl, pipewrite=None):
sl.fw = pipewrite
sl.w = tk.Tk()
sl.w.geometry('400x200-100+80')
sl.w.overrideredirect(1)
sl.w['bg'] = BG
uifnt = sl.w.tk.call('font', 'create', 'uifnt', '-family','Consolas', '-size',11)
sl.lvb = tk.Button(sl.w, bg=BLK, activebackground=BG, relief='flat', command=sl.stop)
sl.lvb.pack()
sl.lvb.place(width=15,height=15, x=380,y=10)
sl.sndb = tk.Button(sl.w,bg=SndBG,activebackground=BG,fg=TBG, text=chr(11166), command=sl.send)
sl.sndb.pack()
sl.sndb.place(width=25,height=25, x=20,y=160)
sl.tlbl = tk.Label(sl.w,bg=BG, text='write data to send...')
sl.tlbl.pack()
sl.tlbl.place(x=20,y=20)
sl.t = tk.Text(sl.w,bg=TBG)
sl.t.pack()
sl.t.place(width=300,height=100, x=20,y=45)
sl.t.focus_set()
sl.w.mainloop()
def send(sl):
sl.output = sl.t.get('1.0','end')
if sl.output != '\n':
pipewrite(sl.fw,sl.output)
sl.close()
else:
sl.error()
def error(sl):
def _clearlbl(ev):
sl.erlbl.destroy()
sl.erlbl = tk.Label(sl.w,bg=TBG,text='there is nothing to send')
sl.erlbl.pack()
sl.erlbl.place(x=20,y=160)
sl.t.focus_set()
sl.t.bind('<KeyPress>',_clearlbl)
def stop(sl):
pipewrite(sl.fw,'stop')
sl.close()
def close(sl):
sl.w.destroy()
class Rloop():
def __init__(rl, pipefread=None):
rl.fr = pipefread
rl.w = tk.Tk()
rl.w.geometry('400x200-100+320')
rl.w.overrideredirect(1)
rl.w['bg'] = BG
uifnt = rl.w.tk.call('font', 'create', 'uifnt', '-family','Consolas', '-size',10)
rl.lvb = tk.Button(rl.w, bg=BLK, activebackground=BG, relief='flat', command=rl.close)
rl.lvb.pack()
rl.lvb.place(width=15,height=15, x=380,y=10)
rl.tlbl = tk.Label(rl.w,bg=BG, text='received...')
rl.tlbl.pack()
rl.tlbl.place(x=20,y=20)
rl.t = tk.Text(rl.w,bg=TBG)
rl.t['font'] = uifnt
rl.t.pack()
rl.t.place(width=300,height=100, x=20,y=45)
rl.t.focus_set()
rl.receive()
rl.w.mainloop()
def receive(rl):
rec = piperead(rl.fr)
if rec != 'stop':
rl.t.insert('end', '\n'.join([str(dt.now()), rec]))
else: rl.close()
def close(rl):
rl.w.destroy()
class Startup():
def __init__(su, pipefwrite=None):
su.fw = pipefwrite
su.mode = ''
su.w = tk.Tk()
su.w.geometry('400x200-100+500')
su.w.overrideredirect(1)
su.w['bg'] = BG
uifnt = su.w.tk.call('font', 'create', 'uifnt', '-family','Consolas', '-size',11)
su.lvb = tk.Button(su.w, bg=BLK, activebackground=BG, relief='flat', command=su.stop)
su.lvb.pack()
su.lvb.place(width=15,height=15, x=380,y=10)
su.sndb = tk.Button(su.w,bg=SndBG,activebackground=BG,fg=TBG, text=chr(11166), command=su.send)
su.sndb.pack()
su.sndb.place(width=25,height=25, x=20,y=160)
su.title = tk.Label(su.w,bg=BG, text='Modes to continue data input')
su.title.pack()
su.titley = 10
su.title.place(x=20,y=su.titley)
su.ysp = 20
su.margin = 200
ptxt = 'prompt'
su.pb = tk.Button(su.w,bg=BG, activebackground=BG, text=ptxt, relief='flat', cursor='hand2', command=partial(su._get,e=None, nm='su.pb', ent=None))
tmtxt = ' timer '
su.tmb = tk.Button(su.w,bg=BG, activebackground=BG, text=tmtxt, relief='flat', cursor='hand2', command=partial(su._enter,ent='su.tmb'))
rntxt = ' runs '
su.rnb = tk.Button(su.w,bg=BG, activebackground=BG, text=rntxt, relief='flat', cursor='hand2', command=partial(su._enter,ent='su.rnb'))
su.pb.pack()
su.pby = su.titley + 1.5*su.ysp
su.pb.place(x=25,y=su.pby)
su.tmb.pack()
su.tmby = su.pby + 2*su.ysp
su.tmb.place(x=25,y=su.tmby)
su.rnb.pack()
su.rnby = su.pby + 4*su.ysp
su.rnb.place(x=25,y=su.rnby)
su.formd = {'su.pb':su.pb, 'su.tmb':su.tmb, 'su.rnb':su.rnb}
su.w.mainloop()
def _able(su,nm):
for key in su.formd:
if nm[0:4] not in key:
su.formd[key]['state'] = 'disabled'
else:
su.formd[key]['state'] = 'normal'
def _enter(su,ent):
if ent == 'su.tmb':
tmtxt = 'seconds'
su.tmlbl = tk.Label(su.w,bg=BG, text=tmtxt)
su.tment = tk.Entry(su.w,bg=TBG)
su.tmlbl.pack()
su.tment.pack()
tmlbly = su.tmby
su.tmlbl.place(x=su._margin(tmtxt), y=tmlbly)
su.tment.place(x=su.margin, y=tmlbly)
su.tment.focus_set()
su.tment.bind('<Return>', partial(su._get,nm='su.tment', ent=su.tment))
su.formd = su.formd | {'su.tmlbl':su.tmlbl, 'su.tment':su.tment}
elif ent == 'su.rnb':
rntxt = 'number'
su.rnlbl = tk.Label(su.w,bg=BG, text=rntxt)
su.rnent = tk.Entry(su.w,bg=TBG)
su.rnlbl.pack()
su.rnent.pack()
rnlbly = su.rnby
su.rnlbl.place(x=su._margin(rntxt), y=rnlbly)
su.rnent.place(x=su.margin, y=rnlbly)
su.rnent.focus_set()
su.rnent.bind('<Return>', partial(su._get,nm='su.rnent', ent=su.rnent))
su.formd = su.formd | {'su.rnlbl':su.rnlbl, 'su.rnent':su.rnent}
def _get(su,e,nm,ent):
if nm == 'su.pb':
su._able('su.pb')
su.mode = 'prompt,'+'1'
else:
su._able(nm)
for key in su.formd:
if key == nm:
if 'tm' in key: modestr = 'timer'
elif 'rn' in key: modestr = 'runs'
su.formd[key]['bg']=BG
su.mode = ','.join([modestr,str(ent.get())])
break
def _margin(su,txt):
return su.margin-(len(txt)*8)
def send(su):
pipewrite(su.fw,su.mode)
su.close()
def stop(su):
pipewrite(su.fw,'stop')
su.close()
def close(su):
su.w.destroy()
class Control():
def __init__(c, pipefwrite=None, proc=None):
c.fw = pipefwrite
c.proc = proc
if c.proc:
c.proc = proc[0]
if proc[1]:
c.procv = proc[1]
else:
c.procvl = None
c.procd = {'start':c._strtui, 'prompt':c._prui, 'timer':c._tmui, 'runs':c._rnui}
c.w = tk.Tk()
c.w.geometry('100x200-60+80')
c.w.overrideredirect(1)
c.w['bg'] = BG
uifnt = c.w.tk.call('font', 'create', 'uifnt', '-family','Consolas', '-size',11)
c.lvb = tk.Button(c.w, bg=BLK, activebackground=BG, relief='flat', command=c.stop)
c.lvb.pack()
c.lvb.place(width=15,height=15, x=80,y=10)
c.title = tk.Label(c.w,bg=BG, text='pipe test\nControl')
c.title.pack()
c.title.place(x=5,y=5)
c.stpclr = tk.Label(c.w,bg=STOP)
c.stpclr.pack()
stpy = 160
c.stpclr.place(width=7,height=7,x=2,y=stpy+10)
c.stopb = tk.Button(c.w, bg=BG, text='stop', cursor='hand2', relief='flat', activebackground=BG, command=c.stop)
c.stopb.pack()
c.stopb.place(x=10,y=160)
c.procd[c.proc]()
c.w.mainloop()
def _strtui(c):
c.strtclr = tk.Label(c.w,bg=START)
c.strtclr.pack()
strty = 60
c.strtclr.place(width=7,height=7,x=2,y=strty+10)
c.startb = tk.Button(c.w, bg=BG, text='start', cursor='hand2', relief='flat', activebackground=BG, command=c.strtup)
c.startb.pack()
c.startb.place(x=10,y=strty)
def __write(c,s):
pipewrite(c.fw,s)
c.close()
def _prui(c):
prb = tk.Button(c.w,bg=TBG, text='--- next ---', activebackground=BG, relief='flat',cursor='hand2', command=partial(c.__write,'prompt'))
prb.pack()
prb.place(x=10,y=80)
def __confirm(c):
cb = tk.Button(c.w, bg=TBG, text='confirm', activebackground=BG, relief= 'flat', cursor='hand2', command=partial(c.__write,'confirm'))
cb.pack()
cb.place(x=20,y=120)
def _tmui(c):
tmt = ''.join(['run for\n',str(c.procv),' seconds'])
tmlbl = tk.Label(c.w,bg=BG, text=tmt)
tmlbl.pack()
tmlbl.place(x=10,y=80)
c.__confirm()
def _rnui(c):
rnt = ''.join(['run\n ',str(c.procv),' times'])
rnlbl = tk.Label(c.w,bg=BG, text=rnt)
rnlbl.pack()
rnlbl.place(x=10,y=80)
c.__confirm()
def strtup(c):
pipewrite(c.fw,'startup')
c.close()
def stop(c):
pipewrite(c.fw,'stop')
c.close()
def close(c):
c.w.destroy()
def once():
fr, fw = ospipe()
Sloop(fw)
Rloop(fr)
def many(mkey,mint=1):
"""modes are ('prompt',1), ('timer',secs), ('runs',runs)
"""
if mkey == 'timer':
rec = setpipe(Control,sub='timer',vars=mint)
if rec == 'confirm':
while not timer(mint):
once()
return True
elif rec == 'stop':
return False
elif mkey == 'runs':
rec = setpipe(Control,sub='runs',vars=mint)
if rec == 'confirm':
for r in range(mint):
once()
return True
elif rec == 'stop':
return False
elif mkey == 'prompt':
quit = False
while not quit:
once()
rec = setpipe(Control,sub='prompt')
if rec != 'prompt':
quit = True
return True
def testui():
incontrol = True
while incontrol:
rec = setpipe(Control,sub='start')
if rec == 'startup':
rec = setpipe(Startup)
if rec != 'stop':
modes, p, ns = rec.partition(',')
incontrol = many(modes,int(ns))
else:
incontrol = False
if __name__ == '__main__':
testui()

Creating a function from current source code "list comprehension"

Outcome 1 required:
The first batch of code below is in its working form.
Please assist in creating a function " def Calculations():" inclusive of all the list calculations to return the same results with the static list. With the calculations in proper functions I will be able to refine the problem and might be able to move forward ...
Outcome 2 required for those that want to go in depth...:
When I run the code on a live list that appends every x intervals it stalls the information feed. I believe it could be the creating of the appending lists in batches of increasing numbers... but I don't have a solution for it... Below is the working code...
I am getting my live data from Binance in a appending list of closes only for those who would like to test it in the live status...
The data could be coming from any source , does not need to be Binance as long as its an appending list of closes in float format...
See code below...
import itertools
l = [16.329,16.331, 16.3705, 16.3965, 16.44, 16.4227, 16.4028, 16.37, 16.3829, 16.3482, 16.3614, 16.4191, 16.4008, 16.4048, 16.4076, 16.3724, 16.3599, 16.3872, 16.3794, 16.3528, 16.3886, 16.3904, 16.3815, 16.3864, 16.4254, 16.4411, 16.4151, 16.4338, 16.4212, 16.3819, 16.2857, 16.2703, 16.2408, 16.1938, 16.2038, 16.2035, 16.217, 16.2374, 16.2414, 16.2238, 16.1787, 16.2725, 16.2964, 16.3155, 16.238, 16.2149, 16.2992, 16.3568, 16.2793, 16.2467, 16.312, 16.3117, 16.3017, 16.3465, 16.3882, 16.3698, 16.307, 16.3328, 16.3311, 16.3466, 16.3382, 16.3703, 16.3502, 16.3661, 16.38, 16.3972, 16.4141, 16.393, 16.3769, 16.3683, 16.4136, 16.3774, 16.3709, 16.3179, 16.3019, 16.3149, 16.2838, 16.2689, 16.2602, 16.2679, 16.2921, 16.312, 16.3158, 16.3198, 16.2955, 16.303, 16.327, 16.356, 16.313, 16.3, 16.2806, 16.2634, 16.2856, 16.2702, 16.2136, 16.2782, 16.276, 16.2231, 16.2255, 16.1314, 16.0796, 16.1192, 16.0977, 16.1358, 16.1408, 16.1703]
#### VARIABLES & SETTINGS ####
dataingestperiod = 17
original_list_count = len(l)
timeframed_list = l[-dataingestperiod:]
timeframed_list_count = len(timeframed_list)
def groupSequence(x):
it = iter(x)
prev, res = next(it), []
while prev is not None:
start = next(it, None)
if start and start > prev:
res.append(prev)
elif res:
yield list(res + [prev])
res = []
prev = start
def divbyZero(increasingcount,decreasingcount):
try: return increasingcount/decreasingcount
except ZeroDivisionError: return 0
def divbyZeroMain(increasingcountMain,decreasingcountMain):
try: return increasingcountMain/decreasingcountMain
except ZeroDivisionError: return 0
#### TIMEFRAMED LIST CALCULATIONS#####
listA_1 = (list(groupSequence(timeframed_list))) # obtain numbers in increasing sequence
# print(len(listA_1)) # number of increases in mixed format
listA = list(itertools.chain.from_iterable(listA_1)) # remove double brackets to enable list count
increasingcount = len(listA)
decreasingcount = timeframed_list_count - increasingcount
trend = divbyZero(increasingcount,decreasingcount)
#### MAIN APPENDING LIST CALCULATIONS #####
listMain_1 = (list(groupSequence(l)))
listMain = list(itertools.chain.from_iterable(listMain_1))
increasingcountMain = len(listMain)
decreasingcountMain = original_list_count - increasingcountMain
trendMain = divbyZeroMain(increasingcountMain,decreasingcountMain)
###Timeframed list increases-only appending to max last"dataingestperiod" perhaps problem on live feed data....###
increase_list_timeframed = []
for x in listA:
increase_list_timeframed.append(x)
### Main list increases only appending...####
increase_list_Main = []
for x in listMain:
increase_list_Main.append(x)
###Prints ON TIMEFRAMED LIST ####
print ("---------------")
print ("---------------")
print ("Timeframed list count set at max {}".format(timeframed_list_count))
print ("Count of decreases in timeframed list is {}".format(decreasingcount))
print ("Count of increases in timeframed list is {}".format(increasingcount))
print ("Current timeframed trend is {}".format(trend))
print ("---------------")
print ("---------------")
###Prints ON MAIN LIST ####
print ("Main appending list count so far is {}".format(original_list_count))
print ("Count of decreases in Main appending list is {}".format(decreasingcountMain))
print ("Count of increases in Main appending list is {}".format(increasingcountMain))
print ("Current Main trend is {}".format(trendMain))
The actual code as live to binance is listed below with the above code included. You also need to install "pip install python-binance" and "pip install websocket_client" got the binance access code from ParttimeLarry
Outcome 2 required: When run live that all calculations run uninterruptedly...
import itertools
import copy
import websocket, json, pprint, talib, numpy
from binance.client import Client
from binance.enums import *
#DATA FROM WEBSOCKETS########
SOCKET = "wss://stream.binance.com:9443/ws/linkusdt#kline_1m"
API_KEY = 'yourAPI_KEY'
API_SECRET ='yourAPI_Secret'
closes = [] # created for RSI indicator only using closes
in_position = False
client = Client(API_KEY, API_SECRET) # tld='us'
def order(side, quantity, symbol,order_type=ORDER_TYPE_MARKET):
try:
print("sending order")
order = client.create_order(symbol=symbol, side=side, type=order_type, quantity=quantity)
print(order)
except Exception as e:
print("an exception occured - {}".format(e))
return False
return True
def on_open(ws):
print('opened connection')
# start_time = datetime.datetime.now().time().strftime('%H:%M:%S')
# try:
# file = open("C:/GITPROJECTS/binance-bot/csvstats.txt","+a")
# file.write("New Session Open Connection Start at time {}\n".format(datetime.datetime.now())))
# finally:
# file.close()
def on_close(ws):
print('closed connection')
def on_message(ws, message):
global closes, in_position
print('received message')
json_message = json.loads(message)
pprint.pprint(json_message)
candle = json_message['k']
is_candle_closed = candle['x']
close = candle['c']
if is_candle_closed:
print("candle closed at {}".format(close))
closes.append(float(close))
print("closes")
print(closes)
####################################################################################
########CALCULATIONS ON INDICATORS #################################################
# dataingestperiod = 5
l = copy.deepcopy(closes)
maincountofcloses = len(l)
print ("Total count of closes so far {}".format(maincountofcloses))
#### VARIABLES & SETTINGS ####
l = copy.deepcopy(closes)
dataingestperiod = 3
original_list_count = len(l)
#print ("Main list count so far is {}".format(original_list_count))
timeframed_list = l[-dataingestperiod:]
timeframed_list_count = len(timeframed_list)
#print ("Timeframed list count set at max {}".format(timeframed_list_count))
def groupSequence(x):
it = iter(x)
prev, res = next(it), []
while prev is not None:
start = next(it, None)
if start and start > prev:
res.append(prev)
elif res:
yield list(res + [prev])
res = []
prev = start
def divbyZero(increasingcount,decreasingcount):
try: return increasingcount/decreasingcount
except ZeroDivisionError: return 0
def divbyZeroMain(increasingcountMain,decreasingcountMain):
try: return increasingcountMain/decreasingcountMain
except ZeroDivisionError: return 0
#### TIMEFRAMED LIST CALCULATIONS#####
listA_1 = (list(groupSequence(timeframed_list))) # obtain numbers in increasing sequence
# print(len(listA_1)) # number of increases in mixed format
listA = list(itertools.chain.from_iterable(listA_1)) # remove double brackets to enable list count
increasingcount = len(listA)
decreasingcount = timeframed_list_count - increasingcount
trend = divbyZero(increasingcount,decreasingcount)
#### MAIN APPENDING LIST CALCULATIONS #####
listMain_1 = (list(groupSequence(l)))
listMain = list(itertools.chain.from_iterable(listMain_1))
increasingcountMain = len(listMain)
decreasingcountMain = original_list_count - increasingcountMain
trendMain = divbyZeroMain(increasingcountMain,decreasingcountMain)
increase_list_timeframed = []
for x in listA:
increase_list_timeframed.append(x)
increase_list_Main = []
for x in listMain:
increase_list_Main.append(x)
###Prints ON TIMEFRAMED LIST ####
print ("---------------")
print ("---------------")
print ("Timeframed list count set at max {}".format(timeframed_list_count))
print ("Count of decreases in timeframed list is {}".format(decreasingcount))
print ("Count of increases in timeframed list is {}".format(increasingcount))
print ("Current timeframed trend is {}".format(trend))
print ("---------------")
print ("---------------")
###Prints ON MAIN LIST ####
print ("Main appending list count so far is {}".format(original_list_count))
print ("Count of decreases in Main appending list is {}".format(decreasingcountMain))
print ("Count of increases in Main appending list is {}".format(increasingcountMain))
print ("Current Main trend is {}".format(trendMain))
# threading.Timer(10.0, divbyZeroMain).start()
# threading.Timer(10.0, divbyZero).start()
# ws = websocket.WebSocketApp(SOCKET, on_open=on_open, on_close=on_close, on_message=on_message)
# ws.run_forever()
ws = websocket.WebSocketApp(SOCKET, on_open=on_open, on_close=on_close, on_message=on_message)
ws.run_forever()

How do you model multiple arrival distributions?

Python:
I am simulating a call-centre with 2 types of incoming calls: Sales calls, and service calls.
These calls have different, independent distributions, which enter the same system.
I have function, arrivals which contains:
iat_sales = random.expovariate(1/3)
yield env.timeout(iat_sales)
I want to incorporate:
iat_service = random.triangular(0,0,6)
yield env.timeout(iat_service)
how can I yield each event simultaneously?
This is the solution I have come up with:
def arrival_list():
sales_time = 0 #sim time of sales arrival
service_time = 0 #sim time of service arrival
sales_list=[] #list of sequential sales arrivals [arrival time,'sales']
service_list=[] #list of sequential sales arrivals [arrival time,'service']
arrivals = [] #ordered list of arrivals (sales and service merged) [arrival time,arrival type,iat]
while sales_time < sim_end:
iat_sales = random.expovariate(sales_rate)
sales_time += iat_sales
sales=[sales_time,'sales']
sales_list.append(sales)
while service_time < sim_end:
iat_service = random.triangular(0,6,0) ####
service_time += iat_service
service=[service_time,'service']
service_list.append(service)
arrivals = sales_list + service_list
arrivals.sort()
arrivals[0].append(arrivals[0][0])
for i in range(len(arrivals)-1):
arrivals[i+1].append(arrivals[i+1][0]-arrivals[i][0])
return arrivals

As a reference, a simple implementation can be done like this, where a simulation is run indefinitely with 1 second intervals and calls are considered to arrive if their random values exceed some thresholds:
import random
import time
def generate_calls():
return random.expovariate(1/3), random.triangular(10, 20, 5)
def simulation(count, sales_acceptance, services_acceptance):
# run the simulation indefinitely
while True:
print('Time: {}'.format(count))
sales_call, services_call = generate_calls()
# calls arrive if the values exceed some thresholds
if sales_call > sales_acceptance:
print('Sales call arrived!')
if services_call > services_acceptance:
print('Services call arrived!')
time.sleep(1)
count += 1
simulation(1, 2, 13)

You can have three separate parallel processes.
1- One process for making Sales calls.
2- One process for making service calls.
3- One process for handling calls.
import simpy
import random
sim_end = 1000;
def generateSalesCall(env, call_pipe):
while env.now < sim_end:
# put call in the pipe
yield call_pipe.put("sales");
interval = random.expovariate(1/3);
yield env.timeout(interval);
def generateServiceCall(env, call_pipe):
while env.now < sim_end:
# put call in the pipe
yield call_pipe.put("service");
interval = random.triangular(0,6,0);
yield env.timeout(interval);
def handleCalls(env, call_pipe):
while(True):
call = yield call_pipe.get();
if call == "sales":
print(env.now, "sales call");
elif call == "service":
print(env.now, "service call");
env = simpy.Environment();
call_pipe = simpy.Store(env);
env.process(generateSalesCall(env, call_pipe));
env.process(generateServiceCall(env, call_pipe));
env.process(handleCalls(env, call_pipe));
env.run();

Python 3 verification script not checking properly

I've been working on a python script and am having issues with some verification's I set up. I have this procedure file that has a function that uses a order number and a customer number to check some past history about the customers orders. Ive been testing live on our server and I keep failing the last if statement. The order number and customer number Im using does have more than one order and some are over 60 days so it should pass the test but it doesnt. Ive been looking over my code and I just cant see what could be causing this
edit: here are the print results of current and retrieved timestamps:
current_timestamp = 1531849617.921927
retrieved_timestamp = 1489622400
two_month_seconds = 5184000
one_month_seconds = 2592000
Python3
from classes import helper
from classes import api
from classes import order
from procedures import orderReleaseProcedure
import time
import datetime
import re
def verifyCustomer(customer_id, order_id):
self_helper = helper.Helper()
customer_blocked_reasons = self_helper.getConfig('customer_blocked_reasons')
order_statuses = self_helper.getConfig('order_statuses')
customer_is_blocked = False
self_api = api.Api()
self_order =order.Order(order_id)
status = {
'success' : 0,
'message' :'verify_payment_method'
}
results = self_api.which_api('orders?customer_id={}'.format(customer_id))
order_count = results['total_count']
if order_count > 1:
for result in results['orders']:
order_status_info= self_api.which_api('order_statuses/%d' % result['order_status_id'])
for customer_blocked_reason in customer_blocked_reasons:
if customer_blocked_reason in order_status_info['name']:
customer_is_blocked = True
order_id = 0
order_date = result['ordered_at']
two_month_seconds = (3600 * 24) * 60
one_month_seconds = (3600 * 24) * 30
stripped_date = order_date[:order_date.find("T")]
current_timestamp = time.time()
retrieved_timestamp = int(datetime.datetime.strptime(stripped_date, '%Y-%m-%d').strftime("%s"))
if retrieved_timestamp > (current_timestamp - one_month_seconds) and not customer_is_blocked:
status['success'] = 1
status['message'] = "Customer Verified with orders older than 30 days and no blocking reasons"
print(' 30 day check was triggered ')
print(status)
break
elif customer_is_blocked:
status_change_result = self_order.update_status(order_statuses['order_hold_manager_review'])
status['success'] = 1
status['message'] = "Changed order status to Order Hold - Manager Review"
print(' Customer block was triggered ')
print(status_change_result)
break
elif not retrieved_timestamp < (current_timestamp - two_month_seconds):
status['success'] = 0
status['message'] = "There is more than 1 order, and none are greater than 60 days, we need to check manually"
print(' 60 day check was triggered ')
print(status)
break
return status

Python script error sqlite3.OperationalError: no such column:

I get this error when I run the script and I cannot see the solution. This program is supposed to draw a giveaway from a sqlite3 file which has the number of raffle tickets for a user. And recently the program the gives that creates the sqlite3 file updated some stuff (The script is made by me) and I can figure out the solution.
Traceback (most recent call last):
File "C:\Users\Admin\Desktop\Draw\Test\dave-draw.py", line 244, in <module>
dd = DaveDraw()
File "C:\Users\Admin\Desktop\Draw\Test\dave-draw.py", line 64, in __init__
self.get_viewers()
File "C:\Users\Admin\Desktop\Draw\Test\dave-draw.py", line 215, in
get_viewers
''').fetchall()
sqlite3.OperationalError: no such column: viewer_id
there's the code
#!/usr/bin/env python3
import pdb
import random
import sqlite3
class Viewer(object):
def __init__(self,
viewer_id,
twitch_name,
beam_name,
beam_id,
viewer_type,
rank,
points,
points2,
hours,
raids,
gains_currency,
gains_hours,
in_giveaways,
last_seen,
sub,
entrance_message,
entrance_message_type,
entrance_sfx
):
self.viewer_id = viewer_id
self.twitch_name = twitch_name
self.beam_name = beam_name
self.beam_id = beam_id
self.viewer_type = viewer_type
self.rank = rank
self.points = points
self.points2 = points2
self.hours = hours
self.raids = raids
self.gains_currency = gains_currency
self.gains_hours = gains_hours
self.in_giveaways = in_giveaways
self.last_seen = last_seen
self.sub = sub
self.entrance_message = entrance_message
self.entrance_message_type = entrance_message_type
self.entrance_sfx = entrance_sfx
def win_chance(self, total_tickets):
"""
Takes the total tickets (points) as a paramter and works
out the percentage chance that the viewer has of winning.
Returns the viewers win chance in percent.
"""
percent = total_tickets / 100.00
return self.points2 / percent
class DaveDraw(object):
def __init__(self):
self.debug = False
self.database_path = 'Viewers3DB.sqlite'
self.db_conn = sqlite3.connect(self.database_path)
self.get_viewers()
self.calculate_total_points()
self.assign_tickets()
def assign_tickets(self):
"""
Assigns each user a number range based on the number of
tickets they have.
e.g.
10 1-10
10 11-20
30 21-50
1 51
"""
self.tickets = {}
latest_ticket = 0
for viewer in self.viewers:
# skip anyone with no points
if viewer.points2 == 0:
continue
ticket_range_beg = latest_ticket + 1
ticket_range_end = latest_ticket + 1 + viewer.points2
latest_ticket = ticket_range_end
viewer.tickets = range(ticket_range_beg, ticket_range_end)
# assign a range of tickets:
if self.debug:
print("Assigning viewer twitch: %s beam: %s tickets %i-%i" % (viewer.twitch_name, viewer.beam_name, viewer.tickets.start, viewer.tickets.stop))
if ticket_range_beg == ticket_range_end:
if self.debug:
print("Assigning ticket {} to {}".format(ticket_range_beg, viewer.twitch_name))
self.tickets[ticket_range_beg] = viewer
next
for ticket in viewer.tickets:
if self.debug:
print("Assigning ticket {} to {}".format(ticket, viewer.twitch_name))
self.tickets[ticket] = viewer
def calculate_total_points(self):
"""
Gets the total amount of points awarded to all
viewers.
"""
self.total_points = 0
for viewer in self.viewers:
self.total_points += viewer.points2
self.total_points_percent = self.total_points / 100
print("Total points awarded (total tickets): %s" % self.total_points)
def draw(self):
"""
Picks a random number between 1 and total tickets, finds
the user that has been assigned tickets within that range and
returns the user.
"""
ticket = random.randint(1, self.total_points)
try:
winner = self.tickets[ticket]
except:
pdb.set_trace()
print("\n===== WINNER Twitch: {} / Beam: {} =====\n".format(winner.twitch_name, winner.beam_id))
print("Picked ticket {}\n".format(ticket))
print("Winner win chance: {:f}".format(winner.win_chance(self.total_points)))
print("Winner's ticket range: {}-{}".format(winner.tickets.start, winner.tickets.stop))
print("Winner's ticket amount: {}\n".format(winner.points2))
self.display_viewer(winner)
def display_random_viewer(self):
"""
Displays random viewer.
"""
self.display_viewer(self.get_random_viewer())
def display_viewer(self, viewer):
"""
Outputs the data on all viewers.
"""
print("""Viewer ID: %s\nTwitch Name: %s\nBeam Name: %s\nBeam ID: %s\nRank: %s\nPoints: %s\nPoints2: %s\nHours: %s\nRaids: %s\nGains Currency: %s\nGains Hours: %s\nInGiveaways: %s\nLastSeen: %s\nEntrance Message: %s\nEntranceMsgType: %s\nEntranceSFX: %s"""
% (
viewer.viewer_id,
viewer.twitch_name,
viewer.beam_name,
viewer.beam_id,
viewer.rank,
viewer.points,
viewer.points2,
viewer.hours,
viewer.raids,
viewer.gains_currency,
viewer.gains_hours,
viewer.in_giveaways,
viewer.last_seen,
viewer.entrance_message,
viewer.entrance_message_type,
viewer.entrance_sfx
)
)
def get_random_viewer(self):
"""
Gets a completely random viewer.
"""
return random.choice(self.viewers)
def get_viewers(self):
"""
Gets data on all the viewers in the database and stores
the data in self.viewers.
"""
c = self.db_conn.cursor()
viewers = c.execute('''
SELECT
viewer_id,
TwitchName,
BeamName,
BeamID,
Type,
Rank,
Points,
Points2,
Hours,
Raids,
GainsCurrency,
GainsHours,
InGiveaways,
LastSeen,
Sub,
EntranceMessage,
EntranceMsgType,
EntranceSFX
FROM Viewer
WHERE Type != 1
AND TwitchName NOT IN (
\'treeboydave\',
\'treebotdave\'
);
''').fetchall()
self.viewers = []
for cur_viewer in viewers:
self.viewers.append(
Viewer(
cur_viewer[0],
cur_viewer[1],
cur_viewer[2],
cur_viewer[3],
cur_viewer[4],
cur_viewer[5],
cur_viewer[6],
cur_viewer[7],
cur_viewer[8],
cur_viewer[9],
cur_viewer[10],
cur_viewer[11],
cur_viewer[12],
cur_viewer[13],
cur_viewer[14],
cur_viewer[15],
cur_viewer[16],
cur_viewer[17]
)
)
if __name__ == '__main__':
dd = DaveDraw()
dd.draw()

All your other SQL columns are capitalised, any chance that's why it's not finding the viewer_id column? Maybe it's Viewer_Id or similar?
If you sql execute 'HELP TABLE Viewer' and print what it returns, it will give you an outline of all of the columns in that database table, so you can make sure you have the capitalisation correct, or whether the column actually isn't there at all.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Optimization of python multithreading script - huge memory consumption - python

Related

How to send and receive messages between processes in Python (through channels)?

Creating a function from current source code "list comprehension"

How do you model multiple arrival distributions?

Python 3 verification script not checking properly

Python script error sqlite3.OperationalError: no such column:

Categories

Resources