Unable to update nested dictionary value in multiprocessing's manager.dict()

Unable to update nested dictionary value in multiprocessing's manager.dict() - python

I am trying to update a key in a nested dictionary of multiprocessing module's manager.dict() but not able to do so. It doesn't update the value and doesn't throw any error too.
Code:
import time
import random
from multiprocessing import Pool, Manager
def spammer_task(d, token, repeat):
success = 0
fail = 0
while success+fail<repeat:
time.sleep(random.random()*2.0)
if (random.random()*100)>98.0:
fail+=1
else:
success+=1
d[token] = {
'status': 'ongoing',
'fail': fail,
'success': success,
'repeat': repeat
}
print d[token]['status']
d[token]['status'] = 'complete'
return
p = Pool()
m = Manager()
d = m.dict()
p.apply_async(spammer_task (d, 'abc', 5))
print d
Output:
ongoing
{'abc': {'status': 'ongoing', 'fail': 0, 'repeat': 5, 'success': 5}}
My expectations are that as soon as while loop ends, it should make d['abc']['status'] = complete. But on final print it prints its status as 'ongoing' only.

not sure why, but the Manager DictProxy object can't seem to handle mutating a nested part. this code works:
import time
import random
from multiprocessing import Pool, Manager
def spammer_task(d, token, repeat):
success = 0
fail = 0
while success+fail<repeat:
time.sleep(random.random()*2.0)
if (random.random()*100)>98.0:
fail+=1
else:
success+=1
d[token] = {
'status': 'ongoing',
'fail': fail,
'success': success,
'repeat': repeat,
}
print d[token]['status']
foo = d[token]
foo['status'] = 'complete'
d[token] = foo
return
p = Pool()
m = Manager()
d = m.dict()
p.apply_async(spammer_task(d, 'abc', 5))
print d

Looks like this issue remains per code below:
import multiprocessing, sys;
if __name__ == '__main__':
print(sys.version);
mpd = multiprocessing.Manager().dict();
mpd['prcss'] = {'q' : 'queue_1', 'ctlg' : 'ctlg_1' };
# update 1 - doesn't work!
mpd['prcss'].update( { 'name': 'concfun_1'} );
print('Result of failed update 1:', mpd['prcss']);
# update 2 - doesn't work!
mpd['prcss']['name'] = 'concfun_1';
print('Result of failed update 2:', mpd['prcss']);
# update 3 - works!
mpd_prcss = mpd['prcss'];
mpd_prcss['name'] = 'concfun_1';
mpd['prcss'] = mpd_prcss;
print('Result of successful update 3:', mpd['prcss']);
Output:
3.6.1 (v3.6.1:69c0db5, Mar 21 2017, 17:54:52) [MSC v.1900 32 bit (Intel)]
Result of failed update 1: {'q': 'queue_1', 'ctlg': 'ctlg_1'}
Result of failed update 2: {'q': 'queue_1', 'ctlg': 'ctlg_1'}
Result of successful update 3: {'q': 'queue_1', 'ctlg': 'ctlg_1',
'name': 'concfun_1'}

Related

Multiprocessing and relationship traversal?

I am trying to implement multiprocessing to speed up traversing a relationship graph. I want to capture items that have a total less than 1000. If the parent is over 1000, process the children until there's no more to check.
I've mocked up an illustration that shows that ThreadPoolExecutor only processes the initial items provided to the class while the class.search_queue_list list is still populated. I also tried using a Queue instead of a list with similar results. Synchronous processing works as expected for list and Queue. Is there a way to make multiprocessing work here when the initial array of items can change?
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
from time import sleep
dummy_data = {
'id1': {'total': 1001, 'children': ['id101','id102']}, # over 1000, children will be processed
'id2': {'total': 999, 'children': ['id201','id202']}, # under 1000, children won't be processed
'id101': {'total': 501, 'children': ['more_children']},
'id102': {'total': 500, 'children': ['more_children']},
'id201': {'total': 499,'children': ['more_children']},
'id202': {'total': 500, 'children': ['more_children']},
}
class SearchDummy(object):
def __init__(self, start_list):
# with list
self.search_queue_list = start_list
# with Queue
self.search_queue_queue = Queue()
for item in self.search_queue_list:
self.search_queue_queue.put(item)
self.good_ids = []
def get_total(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['total']
def get_children(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['children']
# START LIST
def current_check_list(self):
# get first element in search_queue_list
current_id = self.search_queue_list.pop(0)
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# prepend children to search_queue_list
self.search_queue_list.extend(self.get_children(current_id))
def search_list(self):
while self.search_queue_list:
self.current_check_list()
def multi_search_list(self):
with ThreadPoolExecutor() as e:
while self.search_queue_list:
e.submit(self.current_check_list)
# END LIST
# START QUEUE
def current_check_queue(self):
# get item from search_queue_queue
current_id = self.search_queue_queue.get()
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# put children in search_queue_queue
for child in self.get_children(current_id):
self.search_queue_queue.put(child)
def search_queue(self):
while not self.search_queue_queue.empty():
self.current_check_queue()
def multi_search_queue(self):
with ThreadPoolExecutor() as e:
while not self.search_queue_queue.empty():
e.submit(self.current_check_queue)
# END QUEUE
# synchronous list
s = SearchDummy(['id1','id2'])
s.search_list()
print('List output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining list size', len(s.search_queue_list)) # returns 0
# synchronous queue
s = SearchDummy(['id1','id2'])
s.search_queue()
print('Queue output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining queue size', s.search_queue_queue.qsize()) # returns 0
# multiprocessing list
s = SearchDummy(['id1','id2'])
s.multi_search_list()
print('Multi list output', s.good_ids) # returns ['id2']
print('Multi list remaining', s.search_queue_list) # returns ['id101', 'id102']
# multiprocessing queue
s = SearchDummy(['id1','id2'])
s.multi_search_queue()
print('Multi queue output', s.good_ids) # returns ['id2']
print('Multi queue remaining', list(s.search_queue_queue.queue)) # returns ['id101', 'id102']

Thread returning same value in infinite loop

I am new to threading, I'm able to read the data from the database using (def value) function and passing that data to (def main). I've created an infinite loop so that whenever new value is added to database, the (def value) function can read it and pass the new data accordingly, which will be passed to another device. But new data is not acquired automatically, I've to start the program again in order to load the new program and pass it to different device. I'm confused why I'm not getting new value while the loop is still running.
Here is the data format:
data = {'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '96/23', 'dupe_id': 'S<:c-74.18'}
Here is the code:
def value(id):
headers = { 'authorization': 'xyz'} #authorization token here
r = requests.get(f'https://traindata.com/api/v9/{id}', headers=headers) #website link along with variable passed here
jsonn = json.loads(r.text) #read the values
i = 0
d = {}
while i < len(jsonn):
data = jsonn[i]["content].split("")[1] #splitting information which is needed
d[str(i)] = {}
d[str(i)]["data"] = data
d[str(i)]["code"] = code
i+= 1
return d
X = value('987654') #passing the id which gives data from specific file
def main(cds):
data = X #passing value which was outside the loop
for s in data.values():
print(s)
while data != "stop": # unless I press stop the program should run
if data == "stop": #when I press stop the program stops
os.system("disconnect")
else:
cds = d['code'].split('/')[-1].split(',')
#splits value which is needed to be passed onto another device
return cds
m = main(cds)
if __name__ == "__main__":
t1 = Thread(target=value, args=(987654, )) #thread 1
t2 = Thread(target=main, args=(m, )) #thread 2
t1.setDaemon(True)
t2.setDaemon(True)
t1.start()
t2.start()
t1.join()
t2.join()
while True:
pass
The output I'm getting is this:
Number of active threads: 1
Number of active threads: 1
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'}
35.6547,56475
Number of active threads ae: 2
Number of active threads: 3
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'} #same data is printed as previous one
Same data is printed even after thread is running in infinite loop. I want something like this:
Number of active threads: 1
Number of active threads: 1
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'}
35.6547,56475
Number of active threads ae: 2
Number of active threads: 3
{'data': 'xyz', 'code': '<:c:605455> **[Code](https://traindata/42.6247,28.47023', 'time': '2021-12-30T09:59:57.758', 'value': 'True', 'stats': '90/110', 'dupe_id': 'S<:c-74.18'} #different data should be printed (having different time stamp, and code)
42.6247,28.47023

Run a function in background using thread in Flask

I am trying to implement limit order book using flask and I am working on the backend part right now. I am new to flask so I am still learning and I am not much aware about how backend of trading works but I am trying to learn via this small project.
I have created 3 endpoints in my application which add order, remove order and give a response of the order status and these three endpoints are working fine checked them with postman. Now I am trying to run a function in background which will continuously check the new orders (buy/sell) from a json file which save all new orders. It will pick them one by one and will find a match based on price if a user's buy order matches a different user's sell order it will process and store it in a dict which I want to return or store all those successful order to the user.
Here is my code for the class I have created:
import json
import bisect
import random
import os
class Process(object):
def __init__(self):
self.trade_book = []
self.bid_prices = []
self.ask_prices = []
self.ask_book = {}
self.bid_book = {}
self.confirm_traded = []
self.orders_history = {}
self.traded = False
self.counter = 0
def save_userdata(self,order, newId):
orderid = order['order']['trader'] +"_"+ str(newId)
user_list = order
newJson = {
"orders":[
{ orderid: order['order']}
]
}
with open('data_user.json', 'a+') as jsonFile:
with open('data_user.json', 'r') as readableJson:
try:
jsonObj = json.load(readableJson)
except Exception as e:
jsonObj = {}
if jsonObj == {}:
json.dump(newJson, jsonFile)
else:
with open('data_user.json', 'w+') as writeFile:
exists = False
for item in jsonObj['orders']:
if item.get(orderid, None) is not None:
item[orderid] = order['order']
exists = True
break
if not exists:
jsonObj['orders'].append(newJson['orders'][0])
json.dump(jsonObj, writeFile)
return orderid
def get_userdata(self):
with open('data_user.json', 'r') as readableJson:
return json.load(readableJson)
def removeOrder(self, orderid):
order_id = list(orderid.values())[0]
with open('data_user.json') as data_file:
data = json.load(data_file)
newData = []
for item in data['orders']:
if item.get(order_id, None) is not None:
del item[order_id]
else:
newData.append(item)
data['orders'] = newData
with open('data_user.json', 'w') as data_file:
data = json.dump(data, data_file)
return order_id
def add_order_to_book(self, order):
index = list(order.keys())[0]
book_order = order[index]
print(index)
if order[index]['side'] == 'buy':
book_prices = self.bid_prices
book = self.bid_book
else: #order[index]['side'] == 'sell'
book_prices = self.ask_prices
book = self.ask_book
if order[index]['price'] in book_prices:
book[order[index]['price']]['num_orders'] += 1
book[order[index]['price']]['size'] += order[index]['quantity']
book[order[index]['price']]['order_ids'].append(index)
book[order[index]['price']]['orders'][index] = book_order
else:
bisect.insort(book_prices, order[index]['price'])
book[order[index]['price']] = {'num_orders': 1, 'size': order[index]['quantity'],'order_ids':
[index],
'orders': {index: book_order}}
def confirm_trade(self,order_id, timestamp, order_quantity, order_price, order_side):
trader = order_id.partition('_')[0]
self.confirm_traded.append({ 'trader': trader,'quantity': order_quantity, 'side': order_side,
'price': order_price,
'status': 'Successful'})
return self.confirm_traded
def process_trade_orders(self, order):
self.traded = False
index = list(order.keys())[0]
if order[index]['side'] == 'buy':
book = self.ask_book
if order[index]['price'] in self.ask_prices:
remainder = order[index]['quantity']
while remainder > 0:
book_order_id = book[order[index]['price']]['order_ids'][0]
book_order = book[order[index]['price']]['orders'][book_order_id]
if remainder >= book_order['quantity']:
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': book_order['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.traded = True
remainder = remainder - book_order['quantity']
self.save_historty_orders(index, order[index])
break
else:
self.traded = True
self.trade_book.append({'order_id': index, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.save_historty_orders(index, order[index])
break
else:
self.add_order_to_book(order)
self.save_historty_orders(index, order[index])
else: #order['side'] == 'sell'
book = self.bid_book
if order[index]['price'] in self.bid_prices:
remainder = order[index]['quantity']
while remainder > 0:
book_order_id = book[order[index]['price']]['order_ids'][0]
book_order = book[order[index]['price']]['orders'][book_order_id]
if remainder >= book_order['quantity']:
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.traded = True
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
remainder = remainder - book_order['quantity']
self.save_historty_orders(index, order[index])
break
else:
self.traded = True
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.save_historty_orders(index, order[index])
break
else:
self.add_order_to_book(order)
self.save_historty_orders(index, order[index])
This class process I create object in my app.py and call the function process_trade_orders in that inside a function processing():
app = Flask(__name__)
app.config['DEBUG'] = True
newUser = Process()
succorder = Success()
#sched = BackgroundScheduler()
def generate_orderid():
num = 0
while num < 1000:
yield num
num = num + 1
genid = generate_orderid()
proc = Process()
sucorder = Success()
#Processing orders to find if they have a match
def processing():
get_orders_data = proc.get_userdata()
print(get_orders_data)
print("\n")
for data in get_orders_data['orders']:
index = list(data.keys())[0]
if data[index]['status'] == 'Successful':
sucorder.add_trader_orders(data[index],index)
else:
proc.process_trade_orders(data)
# sched = BackgroundScheduler()
# sched.add_job(func = processing, trigger="interval", seconds = 2)
# sched.start()
I did use APSbackground-scheduler for the same but I want to use thread for it. I was thinking of running a main thread in infinite loop as a daemon and use worker thread to run this function processing() in app.py which will be called after every few seconds to check if there are any successful order it will return the value to the main thread and those list of dict every new one I can return a response or some other way to the user about this successful order getting matched.
Note that this will be running is short intervals like 5 seconds and multiple add orders will be added and will be continuously running the checks asynchronously so I am not sure how will I return those values. I am just confused so if anyone can help me will be grateful.

If you want to make a threaded function that runs in background, just use the threading module, like this:
from threading import Thread
def bg_func():
doSomething
t = Thread(target=bg_func)
t.start() # will start the function and continue, even if the function still runs
doSomethingelseAtSameTime # runs with bg_func
You can also have multiple background threads.
Check the documentation for more info.

Time difference in function with similar logic

I am writing scripts to test sql-server DB. Trying to do that in optimal way and toying with sqlite.
Code snippet is just comparison of DB query to pre-determined value doing that in two different approaches but similar logic,
a) tcount() function
b) newcount() and test_test() function
I could not figure out why this time taken difference happens in code. Or this is too less to be ignored?
import sqlite3
import sys, time, re, timeit
import pytest
def timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print 'Function :%s took %0.3f ms' % (f.func_name, (time2-time1)*1000.0)
return ret
return wrap
conn = sqlite3.connect(r'E:\Python_Projects\Demo\sqlite-DBS\newdb.db')
#timing
def tcount():
table_list = ['Album', 'Artist', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist']
count_query = """select count(*) from %s;"""
count = {'Album': 347, 'Playlist': 18, 'Artist': 275, 'MediaType': 5, 'Genre': 25, 'Invoice': 412, 'InvoiceLine': 2240, 'Employee': 8}
table_count = {}
for table in table_list:
try:
result = conn.execute(count_query % table)
for x in result:
table_count[table] = x[0]
except:
e = sys.exc_info()[0]
print e
return (table_count == count)
#timing
def newcount():
table_list = ['Album', 'Artist', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist']
count_query = """select count(*) from %s;"""
table_count = {}
for table in table_list:
try:
result = conn.execute(count_query % table)
for x in result:
table_count[table] = x[0]
except:
e = sys.exc_info()[0]
print e
return table_count
#timing
def test_test():
count = {'Album': 347, 'Playlist': 18, 'Artist': 275, 'MediaType': 5, 'Genre': 25, 'Invoice': 412, 'InvoiceLine': 2240, 'Employee': 8}
return (newcount() == count)
print tcount()
print test_test()
conn.close()
output:
Function :tcount took 0.000 ms
True
Function :newcount took 0.000 ms
Function :test_test took 16.000 ms
True

You should propably use the timeit module (https://docs.python.org/3.5/library/timeit.html) since it is far better in benchmarking than time.
But I think every SQL Command has some info command that can be called after the actual execution and prints the status of that code and how long it took to execute - which will be even more accurate than the timeit.
But since I haven't used sql in a long time I can't give you more information on that.

Python 2.7 yield is creating strange behavior

Using Python 2.7....
The print thread["Subject"] should return:
please ignore - test2
and
Please ignore - test
It does successfully with this:
def thread_generator(threads):
tz = pytz.timezone('America/Chicago')
POSTS_SINCE_HOURS = 24
now_date = datetime.datetime.now(tz)
time_stamp = now_date - datetime.timedelta(hours=POSTS_SINCE_HOURS)
for thread in threads:
last_post_time = convert_time(thread["LatestPostDate"])
if last_post_time > time_stamp:
print thread["Subject"]
row = {
"url": thread["Url"],
"author_username": thread["Author"]["Username"],
"author_name": thread["Author"]["DisplayName"],
"thread_id": thread["Id"],
"forum_id": thread["ForumId"],
"subject": thread["Subject"],
"created_date": thread["Content"]["CreatedDate"],
"reply_count": thread["ReplyCount"],
"latest_post_date": thread["LatestPostDate"],
"latest_reply_author": thread["LatestForumReplyAuthorId"] }
But, when adding the yield row the print thread["Subject"] does not show Please ignore - test as it should.
def thread_generator(threads):
tz = pytz.timezone('America/Chicago')
POSTS_SINCE_HOURS = 24
now_date = datetime.datetime.now(tz)
time_stamp = now_date - datetime.timedelta(hours=POSTS_SINCE_HOURS)
for thread in threads:
last_post_time = convert_time(thread["LatestPostDate"])
if last_post_time > time_stamp:
print thread["Subject"]
row = {
"url": thread["Url"],
"author_username": thread["Author"]["Username"],
"author_name": thread["Author"]["DisplayName"],
"thread_id": thread["Id"],
"forum_id": thread["ForumId"],
"subject": thread["Subject"],
"created_date": thread["Content"]["CreatedDate"],
"reply_count": thread["ReplyCount"],
"latest_post_date": thread["LatestPostDate"],
"latest_reply_author": thread["LatestForumReplyAuthorId"] }
yield row
Why is this? Please ignore - test should still show with print thread["Subject"]. Makes no sense to me.
UPDATE: How the generators is called
def sanitize_threads(threads):
for thread in thread_generator(threads):
do stuff
thread_batch.append(thread)
return thread_batch
def get_unanswered_threads():
slug = 'forums/threads/unanswered.json?PageSize=100'
status_code, threads = do_request(slug)
if status_code == 200:
threads = threads["Threads"]
thread_batch = sanitize_threads(threads)
database_update(thread_batch)

Have you tried actually calling next() on the resultant generator? If you call the function with yield the same way you call the function without, in the yield case you'll get a generator object as a result. A generator doesn't evaluate what's inside it until you actually require a value of it, which can be done with next(generator).
For example:
>>> def nogen():
... '''Create a list of values 0-3 and print each one as you build the list.'''
... r = []
... for i in range(3):
... print(i)
... r.append(i)
... return r
...
>>> def firstgen():
... '''Create an iterator of values 0-3 and print each one as you yield the value.'''
... for i in range(3):
... print(i)
... yield i
...
>>> a = nogen() # This function is eager and does everything at once.
0
1
2
>>> a
[0, 1, 2]
>>> b = firstgen() # Note there's no output immediately after this line.
>>> list(b) # The output happens as the list coercion causes the generator to execute.
0
1
2
[0, 1, 2]
>>> c = firstgen() # You can also see this a step at a time using `next`
>>> next(c)
0
0
>>> next(c)
1
1
>>> next(c)
2
2
>>> next(c)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Unable to update nested dictionary value in multiprocessing's manager.dict() - python

Related

Multiprocessing and relationship traversal?

Thread returning same value in infinite loop

Run a function in background using thread in Flask

Time difference in function with similar logic

Python 2.7 yield is creating strange behavior

Categories

Resources