Thread returning same value in infinite loop

Thread returning same value in infinite loop - python

I am new to threading, I'm able to read the data from the database using (def value) function and passing that data to (def main). I've created an infinite loop so that whenever new value is added to database, the (def value) function can read it and pass the new data accordingly, which will be passed to another device. But new data is not acquired automatically, I've to start the program again in order to load the new program and pass it to different device. I'm confused why I'm not getting new value while the loop is still running.
Here is the data format:
data = {'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '96/23', 'dupe_id': 'S<:c-74.18'}
Here is the code:
def value(id):
headers = { 'authorization': 'xyz'} #authorization token here
r = requests.get(f'https://traindata.com/api/v9/{id}', headers=headers) #website link along with variable passed here
jsonn = json.loads(r.text) #read the values
i = 0
d = {}
while i < len(jsonn):
data = jsonn[i]["content].split("")[1] #splitting information which is needed
d[str(i)] = {}
d[str(i)]["data"] = data
d[str(i)]["code"] = code
i+= 1
return d
X = value('987654') #passing the id which gives data from specific file
def main(cds):
data = X #passing value which was outside the loop
for s in data.values():
print(s)
while data != "stop": # unless I press stop the program should run
if data == "stop": #when I press stop the program stops
os.system("disconnect")
else:
cds = d['code'].split('/')[-1].split(',')
#splits value which is needed to be passed onto another device
return cds
m = main(cds)
if __name__ == "__main__":
t1 = Thread(target=value, args=(987654, )) #thread 1
t2 = Thread(target=main, args=(m, )) #thread 2
t1.setDaemon(True)
t2.setDaemon(True)
t1.start()
t2.start()
t1.join()
t2.join()
while True:
pass
The output I'm getting is this:
Number of active threads: 1
Number of active threads: 1
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'}
35.6547,56475
Number of active threads ae: 2
Number of active threads: 3
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'} #same data is printed as previous one
Same data is printed even after thread is running in infinite loop. I want something like this:
Number of active threads: 1
Number of active threads: 1
{'data': 'xyz', 'code': '<:c:605445> **[Code](https://traindata/35.6547,56475', 'time': '2021-12-30T09:56:53.547', 'value': 'True', 'stats': '95/23', 'dupe_id': 'S<:c-74.18'}
35.6547,56475
Number of active threads ae: 2
Number of active threads: 3
{'data': 'xyz', 'code': '<:c:605455> **[Code](https://traindata/42.6247,28.47023', 'time': '2021-12-30T09:59:57.758', 'value': 'True', 'stats': '90/110', 'dupe_id': 'S<:c-74.18'} #different data should be printed (having different time stamp, and code)
42.6247,28.47023

Related

Multiprocessing and relationship traversal?

I am trying to implement multiprocessing to speed up traversing a relationship graph. I want to capture items that have a total less than 1000. If the parent is over 1000, process the children until there's no more to check.
I've mocked up an illustration that shows that ThreadPoolExecutor only processes the initial items provided to the class while the class.search_queue_list list is still populated. I also tried using a Queue instead of a list with similar results. Synchronous processing works as expected for list and Queue. Is there a way to make multiprocessing work here when the initial array of items can change?
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
from time import sleep
dummy_data = {
'id1': {'total': 1001, 'children': ['id101','id102']}, # over 1000, children will be processed
'id2': {'total': 999, 'children': ['id201','id202']}, # under 1000, children won't be processed
'id101': {'total': 501, 'children': ['more_children']},
'id102': {'total': 500, 'children': ['more_children']},
'id201': {'total': 499,'children': ['more_children']},
'id202': {'total': 500, 'children': ['more_children']},
}
class SearchDummy(object):
def __init__(self, start_list):
# with list
self.search_queue_list = start_list
# with Queue
self.search_queue_queue = Queue()
for item in self.search_queue_list:
self.search_queue_queue.put(item)
self.good_ids = []
def get_total(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['total']
def get_children(self, search_id):
# artificial delay
sleep(0.5)
return dummy_data[search_id]['children']
# START LIST
def current_check_list(self):
# get first element in search_queue_list
current_id = self.search_queue_list.pop(0)
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# prepend children to search_queue_list
self.search_queue_list.extend(self.get_children(current_id))
def search_list(self):
while self.search_queue_list:
self.current_check_list()
def multi_search_list(self):
with ThreadPoolExecutor() as e:
while self.search_queue_list:
e.submit(self.current_check_list)
# END LIST
# START QUEUE
def current_check_queue(self):
# get item from search_queue_queue
current_id = self.search_queue_queue.get()
# check if current_id['total'] is over 1000
if self.get_total(current_id) <= 1000:
self.good_ids.append(current_id)
else:
# put children in search_queue_queue
for child in self.get_children(current_id):
self.search_queue_queue.put(child)
def search_queue(self):
while not self.search_queue_queue.empty():
self.current_check_queue()
def multi_search_queue(self):
with ThreadPoolExecutor() as e:
while not self.search_queue_queue.empty():
e.submit(self.current_check_queue)
# END QUEUE
# synchronous list
s = SearchDummy(['id1','id2'])
s.search_list()
print('List output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining list size', len(s.search_queue_list)) # returns 0
# synchronous queue
s = SearchDummy(['id1','id2'])
s.search_queue()
print('Queue output', s.good_ids) # returns ['id101', 'id102', 'id2']
print('Remaining queue size', s.search_queue_queue.qsize()) # returns 0
# multiprocessing list
s = SearchDummy(['id1','id2'])
s.multi_search_list()
print('Multi list output', s.good_ids) # returns ['id2']
print('Multi list remaining', s.search_queue_list) # returns ['id101', 'id102']
# multiprocessing queue
s = SearchDummy(['id1','id2'])
s.multi_search_queue()
print('Multi queue output', s.good_ids) # returns ['id2']
print('Multi queue remaining', list(s.search_queue_queue.queue)) # returns ['id101', 'id102']

Run a function in background using thread in Flask

I am trying to implement limit order book using flask and I am working on the backend part right now. I am new to flask so I am still learning and I am not much aware about how backend of trading works but I am trying to learn via this small project.
I have created 3 endpoints in my application which add order, remove order and give a response of the order status and these three endpoints are working fine checked them with postman. Now I am trying to run a function in background which will continuously check the new orders (buy/sell) from a json file which save all new orders. It will pick them one by one and will find a match based on price if a user's buy order matches a different user's sell order it will process and store it in a dict which I want to return or store all those successful order to the user.
Here is my code for the class I have created:
import json
import bisect
import random
import os
class Process(object):
def __init__(self):
self.trade_book = []
self.bid_prices = []
self.ask_prices = []
self.ask_book = {}
self.bid_book = {}
self.confirm_traded = []
self.orders_history = {}
self.traded = False
self.counter = 0
def save_userdata(self,order, newId):
orderid = order['order']['trader'] +"_"+ str(newId)
user_list = order
newJson = {
"orders":[
{ orderid: order['order']}
]
}
with open('data_user.json', 'a+') as jsonFile:
with open('data_user.json', 'r') as readableJson:
try:
jsonObj = json.load(readableJson)
except Exception as e:
jsonObj = {}
if jsonObj == {}:
json.dump(newJson, jsonFile)
else:
with open('data_user.json', 'w+') as writeFile:
exists = False
for item in jsonObj['orders']:
if item.get(orderid, None) is not None:
item[orderid] = order['order']
exists = True
break
if not exists:
jsonObj['orders'].append(newJson['orders'][0])
json.dump(jsonObj, writeFile)
return orderid
def get_userdata(self):
with open('data_user.json', 'r') as readableJson:
return json.load(readableJson)
def removeOrder(self, orderid):
order_id = list(orderid.values())[0]
with open('data_user.json') as data_file:
data = json.load(data_file)
newData = []
for item in data['orders']:
if item.get(order_id, None) is not None:
del item[order_id]
else:
newData.append(item)
data['orders'] = newData
with open('data_user.json', 'w') as data_file:
data = json.dump(data, data_file)
return order_id
def add_order_to_book(self, order):
index = list(order.keys())[0]
book_order = order[index]
print(index)
if order[index]['side'] == 'buy':
book_prices = self.bid_prices
book = self.bid_book
else: #order[index]['side'] == 'sell'
book_prices = self.ask_prices
book = self.ask_book
if order[index]['price'] in book_prices:
book[order[index]['price']]['num_orders'] += 1
book[order[index]['price']]['size'] += order[index]['quantity']
book[order[index]['price']]['order_ids'].append(index)
book[order[index]['price']]['orders'][index] = book_order
else:
bisect.insort(book_prices, order[index]['price'])
book[order[index]['price']] = {'num_orders': 1, 'size': order[index]['quantity'],'order_ids':
[index],
'orders': {index: book_order}}
def confirm_trade(self,order_id, timestamp, order_quantity, order_price, order_side):
trader = order_id.partition('_')[0]
self.confirm_traded.append({ 'trader': trader,'quantity': order_quantity, 'side': order_side,
'price': order_price,
'status': 'Successful'})
return self.confirm_traded
def process_trade_orders(self, order):
self.traded = False
index = list(order.keys())[0]
if order[index]['side'] == 'buy':
book = self.ask_book
if order[index]['price'] in self.ask_prices:
remainder = order[index]['quantity']
while remainder > 0:
book_order_id = book[order[index]['price']]['order_ids'][0]
book_order = book[order[index]['price']]['orders'][book_order_id]
if remainder >= book_order['quantity']:
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': book_order['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.traded = True
remainder = remainder - book_order['quantity']
self.save_historty_orders(index, order[index])
break
else:
self.traded = True
self.trade_book.append({'order_id': index, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.save_historty_orders(index, order[index])
break
else:
self.add_order_to_book(order)
self.save_historty_orders(index, order[index])
else: #order['side'] == 'sell'
book = self.bid_book
if order[index]['price'] in self.bid_prices:
remainder = order[index]['quantity']
while remainder > 0:
book_order_id = book[order[index]['price']]['order_ids'][0]
book_order = book[order[index]['price']]['orders'][book_order_id]
if remainder >= book_order['quantity']:
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.traded = True
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
remainder = remainder - book_order['quantity']
self.save_historty_orders(index, order[index])
break
else:
self.traded = True
self.trade_book.append({'order_id': book_order_id, 'timestamp': order[index]['timestamp'],
'price': order[index]['price'],
'quantity': order[index]['quantity'], 'side': order[index]['side']})
self.confirm_trade(index, order[index]['timestamp'], order[index]['quantity'], order[index]['price'], order[index]['side'])
self.save_historty_orders(index, order[index])
break
else:
self.add_order_to_book(order)
self.save_historty_orders(index, order[index])
This class process I create object in my app.py and call the function process_trade_orders in that inside a function processing():
app = Flask(__name__)
app.config['DEBUG'] = True
newUser = Process()
succorder = Success()
#sched = BackgroundScheduler()
def generate_orderid():
num = 0
while num < 1000:
yield num
num = num + 1
genid = generate_orderid()
proc = Process()
sucorder = Success()
#Processing orders to find if they have a match
def processing():
get_orders_data = proc.get_userdata()
print(get_orders_data)
print("\n")
for data in get_orders_data['orders']:
index = list(data.keys())[0]
if data[index]['status'] == 'Successful':
sucorder.add_trader_orders(data[index],index)
else:
proc.process_trade_orders(data)
# sched = BackgroundScheduler()
# sched.add_job(func = processing, trigger="interval", seconds = 2)
# sched.start()
I did use APSbackground-scheduler for the same but I want to use thread for it. I was thinking of running a main thread in infinite loop as a daemon and use worker thread to run this function processing() in app.py which will be called after every few seconds to check if there are any successful order it will return the value to the main thread and those list of dict every new one I can return a response or some other way to the user about this successful order getting matched.
Note that this will be running is short intervals like 5 seconds and multiple add orders will be added and will be continuously running the checks asynchronously so I am not sure how will I return those values. I am just confused so if anyone can help me will be grateful.

If you want to make a threaded function that runs in background, just use the threading module, like this:
from threading import Thread
def bg_func():
doSomething
t = Thread(target=bg_func)
t.start() # will start the function and continue, even if the function still runs
doSomethingelseAtSameTime # runs with bg_func
You can also have multiple background threads.
Check the documentation for more info.

Going with while loop through ajax with scrapy

I got code with for loop, which works perfectly fine. However, I'm struggling implement while loop. It's looks like I'm getting empty json objects.How could I get 'while' working, bearing in mind that at some point json objects becomes {"data":[],"result":"ok"}
My while loop
def after_login(self,response):
if "smg" in response.body:
#for i in range(0,100,10):
minime = 2
i = 10
while len(self.parse_firstcall(response)['data']) > 1 or minime > 1:
print('------------------------------------')
print(len(self.parse_firstcall(response)['data']))
print(str(minime))
print(str(i))
print('-------------------------------------')
yield FormRequest(
url='URL',
formdata={'act': 'serial', 'type': 'search', 'o': str(i), 's': '3','t': '0'},
callback=self.parse_firstcall
)
minime = 0
i += 10
time.sleep(5)
def parse_firstcall(self,response):
try:
firstc = response.body
self.serialj = json.loads(firstc)
except:
self.serialj = {"data":['why', 'always', 'me'], "result": "ok"}
return self.serialj

The solution which I've found:
There's no need for while loop here.Simple making I call and checking if data len() is bigger than 1
def after_login(self,response):
if "smg" in response.body:
yield FormRequest(
url='url',
formdata={'act': 'serial', 'type': 'search', 'o': str(self.req), 's': '3','t': '0'},
callback=self.parse_firstcall
)
def parse_firstcall(self,response):
firstc = response.body
serialj = json.loads(firstc)
if len(serialj['data']) > 1:
print('///////////////////////////////////////////')
print('Request number: ' +str(self.req)+ ' been made')
print('///////////////////////////////////////////')
for i in serialj['data']:
self.series[i['title_orig']] = i
self.req += 10
yield FormRequest(
url='url',
formdata={'act': 'serial', 'type': 'search', 'o': str(self.req), 's': '3','t': '0'},
callback=self.parse_firstcall
)

How to accelerate the following function in Python

I have a complex function which takes forever to run on large pandas dataframe and I do not find ways to accelerate it. Do you guys have any tip?
I have used numba, but this is clearly not enough. I have also tried to used index reference to exploit pandas capacity to the maximum but, I am sure there are other ways I have not implemented.
What this function does, is basically takes a dataframe with random time spaced events and normalize it into second spaced events. There are three different types of events (TRADE, BEST_BID, BEST_ASK), so for each second I should have three lines (one of each event). If not event of a specific type happened during that second, we reuse previous value.
Thank you for your help!
#numba.jit
def convertTicksToSeconds(dataFrame_df):
idxs = dataFrame_df['data_all'][dataFrame_df['data_all']['time_change']].index.tolist()
previous_idx = 0
progress_i = 0
#Creation of the df to holfd the normalized data
normalized_Data = pandas.DataFrame( columns=['timestamp', 'B.A.T', 'price', 'volume', 'asset'])
BAT_type =['TRADE','BEST_BID','BEST_ASK']
tmp_time = dataFrame_df['data_all']['timestamp'][0]
data_TRADE = {'timestamp': tmp_time, 'B.A.T': 'TRADE', 'price': 0, 'volume': 0, 'asset': dataFrame_df['data_all']['asset'][0]}
data_BID = {'timestamp': tmp_time, 'B.A.T': 'BEST_BID', 'price': 0, 'volume': 0, 'asset': dataFrame_df['data_all']['asset'][0]}
data_ASK = {'timestamp': tmp_time, 'B.A.T': 'BEST_ASK', 'price': 0, 'volume': 0, 'asset': dataFrame_df['data_all']['asset'][0]}
for BAT in BAT_type:
for idx in idxs:
if dataFrame_df['data_all'][previous_idx:idx-1][dataFrame_df['data_all']['B.A.T'] == BAT].empty == False:
timestamp = dataFrame_df['data_all']['timestamp'][idx]
price = dataFrame_df['data_all']['price'][previous_idx:idx-1][dataFrame_df['data_all']['B.A.T'] == BAT]
volume = dataFrame_df['data_all']['volume'][previous_idx:idx-1][dataFrame_df['data_all']['B.A.T'] == BAT]
total_volume = volume.sum()
weighted_price = price * volume
weighted_price = weighted_price.sum() / total_volume
volume = volume.mean()
asset = dataFrame_df['data_all']['asset'][idx]
if BAT == 'TRADE':
data_TRADE = {'timestamp': timestamp, 'B.A.T': BAT, 'price': weighted_price, 'volume': volume, 'asset': asset}
elif BAT == 'BEST_BID':
data_BID = {'timestamp': timestamp, 'B.A.T': BAT, 'price': weighted_price, 'volume': volume, 'asset': asset}
elif BAT == 'BEST_ASK':
data_ASK = {'timestamp': timestamp, 'B.A.T': BAT, 'price': weighted_price, 'volume': volume, 'asset': asset}
print data_TRADE
print data_BID
print data_ASK
normalized_Data.append(data_TRADE, ignore_index=True)
normalized_Data.append(data_BID, ignore_index=True)
normalized_Data.append(data_ASK, ignore_index=True)
previous_idx = idx
progress_i += 1
tmp = (progress_i / len(idxs))*100
print ('Progress : ' + str(tmp) + ' %')
return normalized_Data

Unable to update nested dictionary value in multiprocessing's manager.dict()

I am trying to update a key in a nested dictionary of multiprocessing module's manager.dict() but not able to do so. It doesn't update the value and doesn't throw any error too.
Code:
import time
import random
from multiprocessing import Pool, Manager
def spammer_task(d, token, repeat):
success = 0
fail = 0
while success+fail<repeat:
time.sleep(random.random()*2.0)
if (random.random()*100)>98.0:
fail+=1
else:
success+=1
d[token] = {
'status': 'ongoing',
'fail': fail,
'success': success,
'repeat': repeat
}
print d[token]['status']
d[token]['status'] = 'complete'
return
p = Pool()
m = Manager()
d = m.dict()
p.apply_async(spammer_task (d, 'abc', 5))
print d
Output:
ongoing
{'abc': {'status': 'ongoing', 'fail': 0, 'repeat': 5, 'success': 5}}
My expectations are that as soon as while loop ends, it should make d['abc']['status'] = complete. But on final print it prints its status as 'ongoing' only.

not sure why, but the Manager DictProxy object can't seem to handle mutating a nested part. this code works:
import time
import random
from multiprocessing import Pool, Manager
def spammer_task(d, token, repeat):
success = 0
fail = 0
while success+fail<repeat:
time.sleep(random.random()*2.0)
if (random.random()*100)>98.0:
fail+=1
else:
success+=1
d[token] = {
'status': 'ongoing',
'fail': fail,
'success': success,
'repeat': repeat,
}
print d[token]['status']
foo = d[token]
foo['status'] = 'complete'
d[token] = foo
return
p = Pool()
m = Manager()
d = m.dict()
p.apply_async(spammer_task(d, 'abc', 5))
print d

Looks like this issue remains per code below:
import multiprocessing, sys;
if __name__ == '__main__':
print(sys.version);
mpd = multiprocessing.Manager().dict();
mpd['prcss'] = {'q' : 'queue_1', 'ctlg' : 'ctlg_1' };
# update 1 - doesn't work!
mpd['prcss'].update( { 'name': 'concfun_1'} );
print('Result of failed update 1:', mpd['prcss']);
# update 2 - doesn't work!
mpd['prcss']['name'] = 'concfun_1';
print('Result of failed update 2:', mpd['prcss']);
# update 3 - works!
mpd_prcss = mpd['prcss'];
mpd_prcss['name'] = 'concfun_1';
mpd['prcss'] = mpd_prcss;
print('Result of successful update 3:', mpd['prcss']);
Output:
3.6.1 (v3.6.1:69c0db5, Mar 21 2017, 17:54:52) [MSC v.1900 32 bit (Intel)]
Result of failed update 1: {'q': 'queue_1', 'ctlg': 'ctlg_1'}
Result of failed update 2: {'q': 'queue_1', 'ctlg': 'ctlg_1'}
Result of successful update 3: {'q': 'queue_1', 'ctlg': 'ctlg_1',
'name': 'concfun_1'}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Thread returning same value in infinite loop - python

Related

Multiprocessing and relationship traversal?

Run a function in background using thread in Flask

Going with while loop through ajax with scrapy

How to accelerate the following function in Python

Unable to update nested dictionary value in multiprocessing's manager.dict()

Categories

Resources