python multiprocessing mssql cursor - python

Is there anyway to connectionpool or use a connection across multiple processes?
I am trying to use one connection across multiple processes. Here is the code (running on python 2.7, pyodbc).
# Import custom python packages
import pathos.multiprocessing as mp
import pyodbc
class MyManagerClass(object):
def __init__(self):
self.conn = None
self.result = []
def connect_to_db(self):
conn = pyodbc.connect("DSN=cpmeast;UID=dntcore;PWD=dntcorevs2")
cursor = conn.cursor()
self.conn = conn
return cursor
def read_data(self, *args):
cursor = args[0][0]
data = args[0][1]
print 'Running query'
cursor.execute("WAITFOR DELAY '00:00:02';select GETDATE(), '"+data+"';")
self.result.append(cursor.fetchall())
def read_data(*args):
print 'Running query', args
# cursor.execute("WAITFOR DELAY '00:00:02';select GETDATE(), '"+data+"';")
def main():
dbm = MyManagerClass()
conn = pyodbc.connect("DSN=cpmeast;UID=dntcore;PWD=dntcorevs2")
cursor = conn.cursor()
pool = mp.ProcessingPool(4)
for i in pool.imap(dbm.read_data, ((cursor, 'foo'), (cursor, 'bar'))):
print i
pool.close()
pool.join()
cursor.close();
dbm.conn.close()
print 'Result', dbm.result
print 'Closed'
if __name__ == '__main__':
main()
I am getting the following error:
Process PoolWorker-1:
Traceback (most recent call last):
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/process.py", line 227, in _bootstrap
self.run()
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/process.py", line 85, in run
self._target(*self._args, **self._kwargs)
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/pool.py", line 54, in worker
for job, i, func, args, kwds in iter(inqueue.get, None):
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/queue.py", line 327, in get
return recv()
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/dill-0.2.4-py2.7.egg/dill/dill.py", line 209, in loads
return load(file)
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/dill-0.2.4-py2.7.egg/dill/dill.py", line 199, in load
obj = pik.load()
File "/home/amit/envs/py_env_clink/lib/python2.7/pickle.py", line 858, in load
dispatch[key](self)
File "/home/amit/envs/py_env_clink/lib/python2.7/pickle.py", line 1083, in load_newobj
obj = cls.__new__(cls, *args)
TypeError: object.__new__(pyodbc.Cursor) is not safe, use pyodbc.Cursor.__new__()
Process PoolWorker-2:
Traceback (most recent call last):
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/process.py", line 227, in _bootstrap
self.run()
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/process.py", line 85, in run
self._target(*self._args, **self._kwargs)
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/pool.py", line 54, in worker
for job, i, func, args, kwds in iter(inqueue.get, None):
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/processing/queue.py", line 327, in get
return recv()
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/dill-0.2.4-py2.7.egg/dill/dill.py", line 209, in loads
return load(file)
File "/home/amit/envs/py_env_clink/lib/python2.7/site-packages/dill-0.2.4-py2.7.egg/dill/dill.py", line 199, in load
obj = pik.load()
File "/home/amit/envs/py_env_clink/lib/python2.7/pickle.py", line 858, in load
dispatch[key](self)
File "/home/amit/envs/py_env_clink/lib/python2.7/pickle.py", line 1083, in load_newobj
obj = cls.__new__(cls, *args)
TypeError: object.__new__(pyodbc.Cursor) is not safe, use pyodbc.Cursor.__new__()

The problem is with the Pickle stage. Pickle doesn't know inherently how to serialize a connection. Consider:
import pickle
import pymssql
a = {'hello': 'world'}
server = 'server'
username = 'username'
password = 'password'
database = 'database'
conn = pymssql.connect(host=server,user=username,password=password,database=database)
with open('filename.pickle', 'wb') as handle:
pickle.dump(conn, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('filename.pickle', 'rb') as handle:
b = pickle.load(handle)
print(a == b)
This results in the following error message:
Traceback (most recent call last):
File "pickle_ex.py", line 10, in <module>
pickle.dump(conn, handle, protocol=pickle.HIGHEST_PROTOCOL)
File "stringsource", line 2, in _mssql.MSSQLConnection.__reduce_cython__
TypeError: no default __reduce__ due to non-trivial __cinit__
But if you replace conn with a in pickle.dump, the code will run and print out True.
You may be able to define a custom reduce method in your class, but I wouldn't try it, considering how this would result in temp tables acting like global temp tables but only accessible across these processes (which shouldn't be allowed to transpire) anyways.
Links:
My pickle code is from here: How can I use pickle to save a dict?

Related

TypeError while joining a room with Flask-SocketIO 5.0.1

I am trying to set up a web-app in Python using Flask, having multiple rooms for different users, however using join_room provided by Flask-SocketIO and executing the script this error is returned:
Exception in thread Thread-10:
Traceback (most recent call last):
File "D:\Python\Python39\lib\threading.py", line 954, in _bootstrap_inner
self.run()
File "D:\Python\Python39\lib\threading.py", line 892, in run
self._target(*self._args, **self._kwargs)
File "D:\Python\Python39\lib\site-packages\socketio\server.py", line 688, in _handle_event_internal
r = server._trigger_event(data[0], namespace, sid, *data[1:])
File "D:\Python\Python39\lib\site-packages\socketio\server.py", line 712, in _trigger_event
return self.handlers[namespace][event](*args)
File "D:\Python\Python39\lib\site-packages\flask_socketio\__init__.py", line 283, in _handler
return self._handle_event(handler, message, namespace, sid,
File "D:\Python\Python39\lib\site-packages\flask_socketio\__init__.py", line 751, in _handle_event
ret = handler(*args)
File "D:\master-thesis\safety-detector\server.py", line 30, in join_room
join_room(roomId)
File "D:\master-thesis\safety-detector\server.py", line 28, in join_room
roomId = data['roomId']
TypeError: string indices must be integers
If I comment out join_room(roomId) the assignment for camId works as expected, so I don't know why this error happens.
Backend code:
#socketio.on('connect')
def connection():
#socketio.on('join-room')
def join_room(data):
roomId = data['roomId']
camId = data['camId']
join_room(roomId)
emit('cam-connected', {'camId': camId}, broadcast=True)
#socketio.on('disconnect')
def on_disconnect():
leave_room(roomId)
emit('cam-disconnected', {'camId': camId}, broadcast=True)
You have a function named join_room() in your code, which is shadowing the join_room() function from Flask-SocketIO.
You also have a very strange structure for your Socket.IO handlers with inner functions that is not likely to work (or maybe the indentation got messed up when you copy/pasted the code in your question?). Try something like this:
#socketio.on('connect')
def connection():
pass
#socketio.on('join-room')
def my_join_room(data): # <--- rename this to something other than join_room
roomId = data['roomId']
camId = data['camId']
join_room(roomId)
emit('cam-connected', {'camId': camId}, broadcast=True)
#socketio.on('disconnect')
def on_disconnect():
leave_room(roomId)
emit('cam-disconnected', {'camId': camId}, broadcast=True)

python multiprocessing manager cannot load list from distributed node

After serveral test, I find this problem caused by the dim of manager.list(manager.list(...)). But I really need it to be 2 dims. Any suggestion would be appreciated!
I'm trying to build a server and multiple clients across multiple nodes.
One node act as server which initial manager.list() for other client to use.
Other nodes act as client which attach server to get list and deal with it.
Firewall is closed. And when put server and client on a single node, it works fine.
Got problem like this:
Traceback (most recent call last):
File "main.py", line 352, in <module>
train(args)
File "main.py", line 296, in train
args, proc_manager, device)
File "main.py", line 267, in make_gossip_buffer
mng,sync_freq=args.sync_freq, num_nodes=args.num_nodes)
File "/home/think/gala-master-distprocess-changing_to_multinodes/gala/gpu_gossip_buffer.py", line 49, in __init__
r_events = read_events[rank]
File "<string>", line 2, in __getitem__
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/managers.py", line 819, in _callmethod
kind, result = conn.recv()
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/managers.py", line 943, in RebuildProxy
return func(token, serializer, incref=incref, **kwds)
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/managers.py", line 793, in __init__
self._incref()
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/managers.py", line 847, in _incref
conn = self._Client(self._token.address, authkey=self._authkey)
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/connection.py", line 492, in Client
c = SocketClient(address)
File "/home/think/anaconda3/envs/AC/lib/python3.7/multiprocessing/connection.py", line 620, in SocketClient
s.connect(address)
FileNotFoundError: [Errno 2] No such file or directory
Server runs on a single node.
Code of server are shown below:
import torch.multiprocessing as mp
from multiprocessing.managers import ListProxy, BarrierProxy, AcquirerProxy, EventProxy
from gala.arguments import get_args
mp.current_process().authkey = b'abc'
def server(manager,host, port, key, args):
read_events = manager.list([manager.list([manager.Event() for _ in range(num_learners)])
for _ in range(num_learners)])
manager.register('get_read_events', callable=lambda : read_events, proxytype=ListProxy)
print('start service at', host)
s = manager.get_server()
s.serve_forever()
if __name__ == '__main__':
mp.set_start_method('spawn')
args = get_args()
manager = mp.Manager()
server(manager,'10.107.13.120', 5000, b'abc', args)
Client runs on other nodes. those nodes connect server with ethernet. CLient ip is 10.107.13.80
Code of client are shown below:
import torch.multiprocessing as mp
mp.current_process().authkey = b'abc'
def make_gossip_buffer(mng):
read_events = mng.get_read_events()
gossip_buffer = GossipBuffer(parameters)
def train(args):
proc_manager = mp.Manager()
proc_manager.register('get_read_events')
proc_manager.__init__(address=('10.107.13.120', 5000), authkey=b'abc')
proc_manager.connect()
make_gossip_buffer(proc_manager)
if __name__ == "__main__":
mp.set_start_method('spawn')
train(args)
Any help would be appreciated!

Error on writing to Google cloud spanner using Google cloud functions

I am trying to insert data into cloud spanner table using cloud functions but it is throwing the error given below.Reading data from cloud spanner is working properly but writing using both the Data Definition Language commands and batch.insert method both throws the same error. I am thinking its some kind of permissions problem! I don't know how to fix it?
Requirements file contains only google-cloud-spanner==1.7.1
Code running in cloud functions
import json
from google.cloud import spanner
INSTANCE_ID = 'AARISTA'
DATABASE_ID = 'main'
TABLE_NAME = 'userinfo'
dataDict = None
def new_user(request):
dataDict = json.loads(request.data)# Data is available in dict format
if dataDict['USER_ID']==None:
return "User id empty"
elif dataDict['IMEI'] == None:
return "Imei number empty"
elif dataDict['DEVICE_ID'] == None:
return "Device ID empty"
elif dataDict['NAME'] == None:
return "Name field is empty"
elif dataDict['VIRTUAL_PRIVATE_KEY']== None:
return "User's private key cant be empty"
else:
return insert_data(INSTANCE_ID,DATABASE_ID)
def insert_data(instance_id, database_id):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
database = instance.database(database_id)
def insert_user(transcation):
row_ct= transcation.execute_update("INSERT userinfo
(USER_ID,DEVICE_ID,IMEI,NAME,VIRTUAL_PRIVATE_KEY) VALUES"
"("+dataDict['USER_ID']+',
'+dataDict['DEVICE_ID']+', '+ dataDict['IMEI']+',
'+dataDict['NAME']+',
'+dataDict['VIRTUAL_PRIVATE_KEY']+")")
database.run_in_transaction(insert_user)
return 'Inserted data.'
Error logs on Cloud Functions
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/pool.py", line 265, in get session = self._sessions.get_nowait()
File "/opt/python3.7/lib/python3.7/queue.py", line 198, in get_nowait return self.get(block=False)
File "/opt/python3.7/lib/python3.7/queue.py", line 167, in get raise Empty _queue.Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/api_core/grpc_helpers.py", line 57, in error_remapped_callable return callable_(*args, **kwargs)
File "/env/local/lib/python3.7/site-packages/grpc/_channel.py", line 547, in __call__ return _end_unary_response_blocking(state, call, False, None)
File "/env/local/lib/python3.7/site-packages/grpc/_channel.py", line 466, in _end_unary_response_blocking raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "Invalid CreateSession request." debug_error_string = "{"created":"#1547373361.398535906","description":"Error received from peer","file":"src/core/lib/surface/call.cc","file_line":1036,"grpc_message":"Invalid> CreateSession request.","grpc_status":3}" >
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 297, in run_http_function result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 199, in invoke_user_function return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 192, in call_user_function return self._user_function(request_or_event)
File "/user_code/main.py", line 21, in new_user return insert_data(INSTANCE_ID,DATABASE_ID)
File "/user_code/main.py", line 31, in insert_data database.run_in_transaction(insert_user)
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/database.py", line 438, in run_in_transaction with SessionCheckout(self._pool) as session:
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/pool.py", line 519, in __enter__ self._session = self._pool.get(**self._kwargs)
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/pool.py", line 268, in get session.create()
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/session.py", line 116, in create session_pb = api.create_session(self._database.name, metadata=metadata, **kw)
File "/env/local/lib/python3.7/site-packages/google/cloud/spanner_v1/gapic/spanner_client.py", line 276, in create_session request, retry=retry, timeout=timeout, metadata=metadata
File "/env/local/lib/python3.7/site-packages/google/api_core/gapic_v1/method.py", line 143, in __call__ return wrapped_func(*args, **kwargs)
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 270, in retry_wrapped_func on_error=on_error,
File "/env/local/lib/python3.7/site-packages/google/api_core/retry.py", line 179, in retry_target return target()
File "/env/local/lib/python3.7/site-packages/google/api_core/timeout.py", line 214, in func_with_timeout return func(*args, **kwargs)
File "/env/local/lib/python3.7/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 Invalid CreateSession request.
I tried to reproduce this but it seems to work for me as a Python 3.7 function. I used the latest google-cloud-spanner library in requirements.txt.
While I am unsure what would be causing your error I did notice a few other things.
It seemed odd to declare a global dataDict and not use the one constructed and pass it. Instead I added that as a param to the insert method.
The spacing of the query was a bit odd and the use of single and double quotes was odd. this made it hard to parse visually. As the function runs as python 3.7 you can also use f-strings which likely would make it even more readable.
Here is the code I ran in a function that seemed to work.
import json
from google.cloud import spanner
INSTANCE_ID = 'testinstance'
DATABASE_ID = 'testdatabase'
TABLE_ID = 'userinfo'
def new_user(request):
data = { 'USER_ID': '10', 'DEVICE_ID': '11' }
return insert_data(INSTANCE_ID, DATABASE_ID, data)
def insert_data(instance_id, database_id, data):
spanner_client = spanner.Client()
instance = spanner_client.instance(instance_id)
database = instance.database(database_id)
def insert_user(transaction):
query = f"INSERT {TABLE_ID} (USER_ID,DEVICE_ID) VALUES ({data['USER_ID']},{data['DEVICE_ID']})"
row_ct = transaction.execute_update(query)
database.run_in_transaction(insert_user)
return 'Inserted data.'

Python 3.x multiprocess TypeError: can't pickle _thread.lock objects

I am testing python multiprocess. I use pymongo to manage queue, my code and error is like below. I can't solve the issue and I don't know the root cause, please help me, thank you very much. I know multithreading could work, everything else works too, I had a test line saying:
process_crawler(seed_url, scrape_callback=scrape_callback, cache=cache, max_threads=max_threads, timeout=10)
import time
import threading
from mongo_queue import MongoQueue
from downloader import Downloader
import multiprocessing
SLEEP_TIME = 1
def threaded_crawler(seed_url, delay=5, cache=None, scrape_callback=None, user_agent='wswp', proxies=None, num_retries=1, max_threads=10, timeout=60):
"""Crawl a website in multiple threads"""
# url queues to be crawled
crawl_queue = MongoQueue()
crawl_queue.clear()
crawl_queue.push(seed_url)
downloader = Downloader(delay=delay, user_agent=user_agent, proxies=proxies, num_retries=num_retries, cache=cache, timeout=timeout)
def process_queue():
while True:
try:
url = crawl_queue.pop()
except KeyError:
#crawl queue is empty
break
else:
html = downloader(url)
if scrape_callback:
try:
links = scrape_callback(url, html) or []
except Exception as e:
print('Error in call back for %s, %s' % (url, e))
else:
for link in links:
crawl_queue.push(link)
threads = []
while threads or crawl_queue:
# the craw is still active
for thread in threads:
if not thread.is_alive():
threads.remove(thread)
while len(threads) < max_threads and crawl_queue.peek():
# can start some more threads
thread = threading.Thread(target=process_queue)
thread.setDaemon(True)
thread.start()
threads.append(thread)
time.sleep(SLEEP_TIME)
def process_crawler(args, **kwargs):
num_cpus = multiprocessing.cpu_count()
print('Starting Multiprocessing.... CPU Number is ', num_cpus)
processes = []
for i in range(num_cpus):
p = multiprocessing.Process(target=threaded_crawler, args=[args], kwargs=kwargs)
p.start()
processes.append(p)
for p in processes:
p.join()
Traceback (most recent call last):
Starting Multiprocessing.... CPU Number is 8
File "C:/Users/Michael Qian/Desktop/Python/MyScraper/process_test.py", line 15, in <module>
test(1)
File "C:/Users/Michael Qian/Desktop/Python/MyScraper/process_test.py", line 10, in test
process_crawler(scrape_callback.seed_url, scrape_callback=scrape_callback, cache=cache, max_threads=max_threads, timeout=10)
File "C:\Users\Michael Qian\Desktop\Python\MyScraper\process_crawler.py", line 58, in process_crawler
p.start()
File "C:\Program Files\Python35\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python35\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Program Files\Python35\lib\multiprocessing\spawn.py", line 106, in spawn_main
exitcode = _main(fd)
File "C:\Program Files\Python35\lib\multiprocessing\spawn.py", line 116, in _main
self = pickle.load(from_parent)
EOFError: Ran out of input
I've just tried the multiprocessing and ran into the very same problem. The problem was caused by sharing the MongoClient object between the processes.
Have a look at FAQ: Using PyMongo with Multiprocessing

how from service RPyC server refer to the Service specific client

CORE
The server part - Core, which is responsible for the registration of modules and the interaction between them. Core runs as ThreadedServer. CoreService provides registration modules. When registering I keep a list of Connections, then to use them. Module calls at the core function that it should call another module. But to use the list of connections does not work, the performance goes into an infinite loop.
class CoreService(rpyc.Service):
__modules = {}
def exposed_register_module(self, module_name):
if module_name in self.__modules:
return False
self.__modules[module_name] = self._conn
return True
def exposed_execute_query_module(self, module_name, attribute_name, args):
# TTTTTTTTTTHHHHHHHHHIIIIIIIISSSSSSSSSSSSSS
if module_name in self.__modules:
self.__modules[module_name].root
# return None
Run test
When you run the test I get in into a loop which is interrupted by a combination of keys and get the following output:
^CTraceback (most recent call last):
File "/home/kpv/perseus/control-lib/perseus_control_lib/module.py", line 67, in __getattr__
return self.__core_connector.root.execute_query_module(self.__proxy_module_name, name, args)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/netref.py", line 196, in __call__
return syncreq(_self, consts.HANDLE_CALL, args, kwargs)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/netref.py", line 71, in syncreq
return conn.sync_request(handler, oid, *args)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py", line 438, in sync_request
self.serve(0.1)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py", line 387, in serve
data = self._recv(timeout, wait_for_lock = True)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py", line 344, in _recv
if self._channel.poll(timeout):
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/channel.py", line 43, in poll
return self.stream.poll(timeout)
File "/usr/local/lib/python2.7/dist-packages/rpyc/core/stream.py", line 41, in poll
rl, _, _ = select([self], [], [], timeout)
KeyboardInterrupt

Categories

Resources