how do i call the function in specified period of time? - python

I am trying to build a learner which will call the function and store the weights into the DB, now the problem is, it at least takes from 30 to 60 seconds to learn, so if i want to store i need to wait and i decided to call the function with threading timer which will call the function after specified time period,
Example of code:
def learn(myConnection):
'''
Derive all the names,images where state = 1
Learn and Store
Delete all the column where state is 1
'''
id = 0
with myConnection:
cur = myConnection.cursor()
cur.execute("Select name, image FROM images WHERE state = 1")
rows = cur.fetchall()
for row in rows:
print "%s, %s" % (row[0], row[1])
name ='images/Output%d.jpg' % (id,)
names = row[0]
with open(name, "wb") as output_file:
output_file.write(row[1])
unknown_image = face_recognition.load_image_file(name)
unknown_encoding = face_recognition.face_encodings(unknown_image)[0]
# here i give a timer and call the function
threading=Timer(60, storeIntoSQL(names,unknown_encoding) )
threading.start()
id += 1
the thing that did not work with this is that it just worked as if i did not specify the timer it did not wait 60 seconds it just worked normal as if i called the function without the timer, Any ideas on how i can make this work or what alternatives i can use ? ... PS i have already used time.sleep it just stops the main thread i need the Project to be running while this is training
Example of the function that is being called:
def storeIntoSQL(name,unknown_face_encoding):
print 'i am printing'
# connect to the database
con = lite.connect('users2.db')
# store new person into the database rmena
with con:
cur = con.cursor()
# get the new id
cur.execute("SELECT DISTINCT id FROM Users ")
rows = cur.fetchall()
newId = len(rows)+1
# store into the Database
query = "INSERT INTO Users VALUES (?,?,?)"
cur.executemany(query, [(newId,name,r,) for r in unknown_face_encoding])
con
I was also told that MUTEX synchronization could help, where i can make one thread to work only if the other thread has finished it's job but i am not sure how to implement it and am open to any suggestions

I would suggest to use the threading library of python and implement a time.sleep(60) somewhere inside your function or in a wrapper function. For example
import time
import threading
def delayed_func(name,unknown_face_encoding):
time.sleep(60)
storeIntoSQL(name,unknown_face_encoding)
timer_thread = threading.Thread(target=delayed_func, args=(name,unknown_face_encoding))
timer_thread.start()

Related

Multiprocessing In Django Function

Is it possible to use multi processing in Django on a request.
#so if I send a request to http://127.0.0.1:8000/wallet_verify
def wallet_verify(request):
walelts = botactive.objects.all()
#here I check if the user want to be included in the process or not so if they set it to True then i'll include them else ignore.
for active in walelts:
check_active = active.active
if check_active == True:
user_is_active = active.user
#for the ones that want to be included I then go to get their key data.
I need to get both api and secret so then I loop through to get the data from active users.
database = Bybitapidatas.objects.filter(user=user_is_active)
for apikey in database:
apikey = apikey.apikey
for apisecret in database:
apisecret = apisecret.apisecret
#since I am making a request to an exchange endpoint I can only include one API and secret at a time . So for 1 person at a time this is why I want to run in parallel.
for a, b in zip(list(Bybitapidatas.objects.filter(user=user_is_active).values("apikey")), list(Bybitapidatas.objects.filter(user=user_is_active).values("apisecret"))):
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=a['apikey'], api_secret=b['apisecret'])
#here I check to see if they have balance to open trades if they have selected to be included.
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
#if they don't have enough balance I skip the user.
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
How can I run this process in parallel while looping through the database to also get data for each individual user.
I am still new to coding so hope I explained that it makes sense.
I have tried multiprocessing and pools but then I get that the app has not started yet and I have to run it outside of wallet_verify is there a way to do it in wallet_verify
and when I send the Post Request.
Any help appreciated.
Filtering the Database to get Users who have set it to True
Listi - [1,3](these are user ID's Returned
processess = botactive.objects.filter(active=True).values_list('user')
listi = [row[0] for row in processess]
Get the Users from the listi and perform the action.
def wallet_verify(listi):
# print(listi)
database = Bybitapidatas.objects.filter(user = listi)
print("---------------------------------------------------- START")
for apikey in database:
apikey = apikey.apikey
print(apikey)
for apisecret in database:
apisecret = apisecret.apisecret
print(apisecret)
start_time = time.time()
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=apikey, api_secret=apisecret)
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
print ("My program took", time.time() - start_time, "to run")
print("---------------------------------------------------- END")
return HttpResponse("Wallets verified")
Verifyt is what I use for the multiprocessing since I don't want it to run without being requested to run. also initialiser starts apps for each loop
def verifyt(request):
with ProcessPoolExecutor(max_workers=4, initializer=django.setup) as executor:
results = executor.map(wallet_verify, listi)
return HttpResponse("done")
```

python cassandra get big result of select * in generator (without storage result in ram)

I want to get all data in cassandra table "user"
i have 840000 users and i don't want to get all users in python list.
i want get users in packs of 100 users
in cassandra doc https://datastax.github.io/python-driver/query_paging.html
i see i can use fetch_size, but in my python code i have database object that contains all cql instruction
from cassandra.cluster import Cluster
from cassandra.query import SimpleStatement
class Database:
def __init__(self, name, salary):
self.cluster = Cluster(['192.168.1.1', '192.168.1.2'])
self.session = cluster.connect()
def get_users(self):
users_list = []
query = "SELECT * FROM users"
statement = SimpleStatement(query, fetch_size=10)
for user_row in session.execute(statement):
users_list.append(user_row.name)
return users_list
actually get_users return very big list of user name
but i want to transform return get_users to a "generator"
i don't want get all users name in 1 list and 1 call of function get_users, but i want to have lot of call get_users and return list with only 100 users max every call function
for example :
list1 = database.get_users()
list2 = database.get_users()
...
listn = database.get_users()
list1 contains 100 first user in query
list2 contains 100 "second" users in query
listn contains the latest elements in query (<=100)
is this possible ?
thanks for advance for your answer
According to Paging Large Queries:
Whenever there are no more rows in the current page, the next page
will be fetched transparently.
So, if you execute your code like this, you will still the whole result set, but this is paged in a transparent manner.
In order to achieve what you need to use callbacks. You can also find some code sample on the link above.
I added below the full code for reference.
from cassandra.cluster import Cluster
from cassandra.query import SimpleStatement
from threading import Event
class PagedResultHandler(object):
def __init__(self, future):
self.error = None
self.finished_event = Event()
self.future = future
self.future.add_callbacks(
callback=self.handle_page,
errback=self.handle_error)
def handle_page(self, rows):
for row in rows:
process_row(row)
if self.future.has_more_pages:
self.future.start_fetching_next_page()
else:
self.finished_event.set()
def handle_error(self, exc):
self.error = exc
self.finished_event.set()
def process_row(user_row):
print user_row.name, user_row.age, user_row.email
cluster = Cluster()
session = cluster.connect()
query = "SELECT * FROM myschema.users"
statement = SimpleStatement(query, fetch_size=5)
future = session.execute_async(statement)
handler = PagedResultHandler(future)
handler.finished_event.wait()
if handler.error:
raise handler.error
cluster.shutdown()
Moving to next page is done in handle_page when start_fetching_next_page is called.
If you replace the if statement with self.finished_event.set() you will see that the iteration stops after the first 5 rows as defined in fetch_size

Why is Twisted's adbapi failing to recover data from within unittests?

Overview
Context
I am writing unit tests for some higher-order logic that depends on writing to an SQLite3 database. For this I am using twisted.trial.unittest and twisted.enterprise.adbapi.ConnectionPool.
Problem statement
I am able to create a persistent sqlite3 database and store data therein. Using sqlitebrowser, I am able to verify that the data has been persisted as expected.
The issue is that calls to t.e.a.ConnectionPool.run* (e.g.: runQuery) return an empty set of results, but only when called from within a TestCase.
Notes and significant details
The problem I am experiencing occurs only within Twisted's trial framework. My first attempt at debugging was to pull the database code out of the unit test and place it into an independent test/debug script. Said script works as expected while the unit test code does not (see examples below).
Case 1: misbehaving unit test
init.sql
This is the script used to initialize the database. There are no (apparent) errors stemming from this file.
CREATE TABLE ajxp_changes ( seq INTEGER PRIMARY KEY AUTOINCREMENT, node_id NUMERIC, type TEXT, source TEXT, target TEXT, deleted_md5 TEXT );
CREATE TABLE ajxp_index ( node_id INTEGER PRIMARY KEY AUTOINCREMENT, node_path TEXT, bytesize NUMERIC, md5 TEXT, mtime NUMERIC, stat_result BLOB);
CREATE TABLE ajxp_last_buffer ( id INTEGER PRIMARY KEY AUTOINCREMENT, type TEXT, location TEXT, source TEXT, target TEXT );
CREATE TABLE ajxp_node_status ("node_id" INTEGER PRIMARY KEY NOT NULL , "status" TEXT NOT NULL DEFAULT 'NEW', "detail" TEXT);
CREATE TABLE events (id INTEGER PRIMARY KEY AUTOINCREMENT, type text, message text, source text, target text, action text, status text, date text);
CREATE TRIGGER LOG_DELETE AFTER DELETE ON ajxp_index BEGIN INSERT INTO ajxp_changes (node_id,source,target,type,deleted_md5) VALUES (old.node_id, old.node_path, "NULL", "delete", old.md5); END;
CREATE TRIGGER LOG_INSERT AFTER INSERT ON ajxp_index BEGIN INSERT INTO ajxp_changes (node_id,source,target,type) VALUES (new.node_id, "NULL", new.node_path, "create"); END;
CREATE TRIGGER LOG_UPDATE_CONTENT AFTER UPDATE ON "ajxp_index" FOR EACH ROW BEGIN INSERT INTO "ajxp_changes" (node_id,source,target,type) VALUES (new.node_id, old.node_path, new.node_path, CASE WHEN old.node_path = new.node_path THEN "content" ELSE "path" END);END;
CREATE TRIGGER STATUS_DELETE AFTER DELETE ON "ajxp_index" BEGIN DELETE FROM ajxp_node_status WHERE node_id=old.node_id; END;
CREATE TRIGGER STATUS_INSERT AFTER INSERT ON "ajxp_index" BEGIN INSERT INTO ajxp_node_status (node_id) VALUES (new.node_id); END;
CREATE INDEX changes_node_id ON ajxp_changes( node_id );
CREATE INDEX changes_type ON ajxp_changes( type );
CREATE INDEX changes_node_source ON ajxp_changes( source );
CREATE INDEX index_node_id ON ajxp_index( node_id );
CREATE INDEX index_node_path ON ajxp_index( node_path );
CREATE INDEX index_bytesize ON ajxp_index( bytesize );
CREATE INDEX index_md5 ON ajxp_index( md5 );
CREATE INDEX node_status_status ON ajxp_node_status( status );
test_sqlite.py
This is the unit test class that fails unexpectedly. TestStateManagement.test_db_clean passes, indicated that the tables were properly created. TestStateManagement.test_inode_create fails, reporitng that zero results were retrieved.
import os.path as osp
from twisted.internet import defer
from twisted.enterprise import adbapi
import sqlengine # see below
class TestStateManagement(TestCase):
def setUp(self):
self.meta = mkdtemp()
self.db = adbapi.ConnectionPool(
"sqlite3", osp.join(self.meta, "db.sqlite"), check_same_thread=False,
)
self.stateman = sqlengine.StateManager(self.db)
with open("init.sql") as f:
script = f.read()
self.d = self.db.runInteraction(lambda c, s: c.executescript(s), script)
def tearDown(self):
self.db.close()
del self.db
del self.stateman
del self.d
rmtree(self.meta)
#defer.inlineCallbacks
def test_db_clean(self):
"""Canary test to ensure that the db is initialized in a blank state"""
yield self.d # wait for db to be initialized
q = "SELECT name FROM sqlite_master WHERE type='table' AND name=?;"
for table in ("ajxp_index", "ajxp_changes"):
res = yield self.db.runQuery(q, (table,))
self.assertTrue(
len(res) == 1,
"table {0} does not exist".format(table)
)
#defer.inlineCallbacks
def test_inode_create_file(self):
yield self.d
path = osp.join(self.ws, "test.txt")
with open(path, "wt") as f:
pass
inode = mk_dummy_inode(path)
yield self.stateman.create(inode, directory=False)
entry = yield self.db.runQuery("SELECT * FROM ajxp_index")
emsg = "got {0} results, expected 1. Are canary tests failing?"
lentry = len(entry)
self.assertTrue(lentry == 1, emsg.format(lentry))
sqlengine.py
These are the artefacts being tested by the above unit tests.
def values_as_tuple(d, *param):
"""Return the values for each key in `param` as a tuple"""
return tuple(map(d.get, param))
class StateManager:
"""Manages the SQLite database's state, ensuring that it reflects the state
of the filesystem.
"""
log = Logger()
def __init__(self, db):
self._db = db
def create(self, inode, directory=False):
params = values_as_tuple(
inode, "node_path", "bytesize", "md5", "mtime", "stat_result"
)
directive = (
"INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) "
"VALUES (?,?,?,?,?);"
)
return self._db.runOperation(directive, params)
Case 2: bug disappears outside of twisted.trial
#! /usr/bin/env python
import os.path as osp
from tempfile import mkdtemp
from twisted.enterprise import adbapi
from twisted.internet.task import react
from twisted.internet.defer import inlineCallbacks
INIT_FILE = "example.sql"
def values_as_tuple(d, *param):
"""Return the values for each key in `param` as a tuple"""
return tuple(map(d.get, param))
def create(db, inode):
params = values_as_tuple(
inode, "node_path", "bytesize", "md5", "mtime", "stat_result"
)
directive = (
"INSERT INTO ajxp_index (node_path,bytesize,md5,mtime,stat_result) "
"VALUES (?,?,?,?,?);"
)
return db.runOperation(directive, params)
def init_database(db):
with open(INIT_FILE) as f:
script = f.read()
return db.runInteraction(lambda c, s: c.executescript(s), script)
#react
#inlineCallbacks
def main(reactor):
meta = mkdtemp()
db = adbapi.ConnectionPool(
"sqlite3", osp.join(meta, "db.sqlite"), check_same_thread=False,
)
yield init_database(db)
# Let's make sure the tables were created as expected and that we're
# starting from a blank slate
res = yield db.runQuery("SELECT * FROM ajxp_index LIMIT 1")
assert not res, "database is not empty [ajxp_index]"
res = yield db.runQuery("SELECT * FROM ajxp_changes LIMIT 1")
assert not res, "database is not empty [ajxp_changes]"
# The details of this are not important. Suffice to say they (should)
# conform to the DB schema for ajxp_index.
test_data = {
"node_path": "/this/is/some/arbitrary/path.ext",
"bytesize": 0,
"mtime": 179273.0,
"stat_result": b"this simulates a blob of raw binary data",
"md5": "d41d8cd98f00b204e9800998ecf8427e", # arbitrary
}
# store the test data in the ajxp_index table
yield create(db, test_data)
# test if the entry exists in the db
entry = yield db.runQuery("SELECT * FROM ajxp_index")
assert len(entry) == 1, "got {0} results, expected 1".format(len(entry))
print("OK")
Closing remarks
Again, upon checking with sqlitebrowser, it seems as though the data is being written to db.sqlite, so this looks like a retrieval problem. From here, I'm sort of stumped... any ideas?
EDIT
This code will produce an inode that that can be used for testing.
def mk_dummy_inode(path, isdir=False):
return {
"node_path": path,
"bytesize": osp.getsize(path),
"mtime": osp.getmtime(path),
"stat_result": dumps(stat(path), protocol=4),
"md5": "directory" if isdir else "d41d8cd98f00b204e9800998ecf8427e",
}
Okay, it turns out that this is a bit of a tricky one. Running the tests in isolation (as was posted to this question) makes it such that the bug only rarely occurs. However, when running in the context of an entire test suite, it fails almost 100% of the time.
I added yield task.deferLater(reactor, .00001, lambda: None) after writing to the db and before reading from the db, and this solves the issue.
From there, I suspected this might be a race condition stemming from the connection pool and sqlite's limited concurrency-tolerance. I tried setting the cb_min and cb_max parameters to ConnectionPool to 1, and this also solved the issue.
In short: it seems as though sqlite doesn't play very nicely with multiple connections, and that the appropriate fix is to avoid concurrency to the extent possible.
If you take a look at your setUp function, you're returning self.db.runInteraction(...), which returns a deferred. As you've noted, you assume that it waits for the deferred to finish. However this is not the case and it's a trap that most fall victim to (myself included). I'll be honest with you, for situations like this, especially for unit tests, I just execute the synchronous code outside the TestCase class to initialize the database. For example:
def init_db():
import sqlite3
conn = sqlite3.connect('db.sqlite')
c = conn.cursor()
with open("init.sql") as f:
c.executescript(f.read())
init_db() # call outside test case
class TestStateManagement(TestCase):
"""
My test cases
"""
Alternatively, you could decorate the setup and yield runOperation(...) but something tells me that it wouldn't work... In any case, it's surprising that no errors were raised.
PS
I've been eyeballing this question for a while and it's been in the back of my head for days now. A potential reason for this finally dawned on me at nearly 1am. However, I'm too tired/lazy to actually test this out :D but it's a pretty damn good hunch. I'd like to commend you on your level of detail in this question.

PySQLPool and Celery, proper way to use it?

I am wondering what is the proper way to use the mysql pool with celery tasks.
At the moment, this is how (the relevant portion) of my tasks module looks like:
from start import celery
import PySQLPool as pool
dbcfg = config.get_config('inputdb')
input_db = pool.getNewConnection(username=dbcfg['user'], password=dbcfg['passwd'], host=dbcfg['host'], port=dbcfg['port'], db=dbcfg['db'], charset='utf8')
dbcfg = config.get_config('outputdb')
output_db = pool.getNewConnection(username=dbcfg['user'], password=dbcfg['passwd'], host=dbcfg['host'], port=dbcfg['port'], db=dbcfg['db'], charset='utf8')
#celery.task
def fetch():
ic = pool.getNewQuery(input_db)
oc = pool.getNewQuery(output_db)
count = 1
for e in get_new_stuff():
# do stuff with new stuff
# read the db with ic
# write to db using oc
# commit from time to time
if count % 1000:
pool.commitPool()
# commit whatever's left
pool.commitPool()
On one machine there can be at most 4 fetch() tasks running at the same time (1 per core).
I notice, however, that sometimes a task will hang and I suspect it is due to mysql.
Any tips on how to use mysql and celery?
Thank you!
I am also using celery and PySQLPool.
maria = PySQLPool.getNewConnection(username=app.config["MYSQL_USER"],
password=app.config["MYSQL_PASSWORD"],
host=app.config["MYSQL_HOST"],
db='configuration')
def myfunc(self, param1, param2):
query = PySQLPool.getNewQuery(maria, True)
try:
sSql = """
SELECT * FROM table
WHERE col1= %s AND col2
"""
tDatas = ( var1, var2)
query.Query(sSql, tDatas)
return query.record
except Exception, e:
logger.info(e)
return False
#celery.task
def fetch():
myfunc('hello', 'world')

Task Chaining with Cursor issue on app engine. Exception: Too big query offset. Anyone else get this issue?

I'm not sure if anyone else has this problem, but I'm getting an exception "Too big query offset" when using a cursor for chaining tasks on appengine development server (not sure if it happens on live).
The error occurs when requesting a cursor after 4000+ records have been processed in a single query.
I wasn't aware that offsets had anything to do with cursors, and perhaps its just a quirk in sdk for app engine.
To fix, either shorten the time allowed before task is deferred (so fewer records get processed at a time) or when checking time elapsed you can also check the number of records processed is still within range. e.g, if time.time() > end_time or count == 2000.Reset count and defer task. 2000 is an arbitrary number, I'm not sure what the limit should be.
EDIT:
After making the above mentioned changes, the never finishes executing. The with_cursor(cursor) code is being called, but seems to start at the beginning each time. Am I missing something obvious?
The code that causes the exception is as follows:
The table "Transact" has 4800 rows. The error occurs when transacts.cursor() is called when time.time() > end_time is true. 4510 records have been processed at the time when the cursor is requested, which seems to cause the error (on development server, haven't tested elsewhere).
def some_task(trans):
tts = db.get(trans)
for t in tts:
#logging.info('in some_task')
pass
def test_cursor(request):
ret = test_cursor_task()
def test_cursor_task(cursor = None):
startDate = datetime.datetime(2010,7,30)
endDate = datetime.datetime(2010,8,30)
end_time = time.time() + 20.0
transacts = Transact.all().filter('transactionDate >', startDate).filter('transactionDate <=',endDate)
count =0
if cursor:
transacts.with_cursor(cursor)
trans =[]
logging.info('queue_trans')
for tran in transacts:
count+=1
#trans.append(str(tran))
trans.append(str(tran.key()))
if len(trans)==20:
deferred.defer(some_task, trans, _countdown = 500)
trans =[]
if time.time() > end_time:
logging.info(count)
if len(trans)>0:
deferred.defer(some_task, trans, _countdown = 500)
trans =[]
logging.info('time limit exceeded setting next call to queue')
cursor = transacts.cursor()
deferred.defer(test_cursor_task, cursor)
logging.info('returning false')
return False
return True
return HttpResponse('')
Hope this helps someone.
Thanks
Bert
Try this again without using the iter functionality:
#...
CHUNK = 500
objs = transacts.fetch(CHUNK)
for tran in objs:
do_your_stuff
if len(objs) == CHUNK:
deferred.defer(my_task_again, cursor=str(transacts.cursor()))
This works for me.

Categories

Resources