pymysql.err.OperationalError: 2013 with pymysql and SQLAlchemy - python

guys! Hope someone can help me with this issue.
I executing a query through SQLAlchemy that returns ~6kk rows (it's historical data) that I need to process on a python script. I have some functions to read and do some processing on the data using pandas dataframe.Here are the functions:
def consulta_db_cancelamentos(db_con, query):
engine = create_engine(db_con, pool_recycle=3600)
con = engine.connect()
query_result = con.execution_options(stream_results=True).execute(query)
query_result_list = []
while True:
rows = query_result.fetchmany(10000)
if not rows:
break
for row in rows:
data = row['data'],\
row['plano'],\
row['usuario_id'],\
row['timestamp_cancelamentos'],\
row['timestamp'],\
row['status']
query_result_list.append(data)
df = pd.DataFrame()
if df.empty:
df = pd.DataFrame(query_result_list)
else:
df.append(pd.DataFrame(query_result_list))
df_cor = corrige_cancelamentos_df(df, '2017-01-01', '2017-12-15')
con.close()
return df_cor
As you can see, I'm already trying to read the data and process/store it in 10k rows chunk. When I try to execute the whole script I got this error on the function (I'm also including the error raised on the main()):
Traceback (most recent call last):
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1159, in fetchmany
l = self.process_rows(self._fetchmany_impl(size))
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1318, in _fetchmany_impl
row = self._fetchone_impl()
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1308, in _fetchone_impl
self.__buffer_rows()
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1295, in __buffer_rows
self.__rowbuffer = collections.deque(self.cursor.fetchmany(size))
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 485, in fetchmany
row = self.read_next()
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 446, in read_next
return self._conv_row(self._result._read_rowdata_packet_unbuffered())
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1430, in _read_rowdata_packet_unbuffered
packet = self.connection._read_packet()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1008, in _read_packet
recv_data = self._read_bytes(bytes_to_read)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1037, in _read_bytes
CR.CR_SERVER_LOST, "Lost connection to MySQL server during query")
pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/aiquis/EI/cancelamentos_testes5.py", line 180, in <module>
main()
File "/home/aiquis/EI/cancelamentos_testes5.py", line 164, in main
cancelamentos_df_corrigido = consulta_db_cancelamentos(db_param, query_cancelamentos)
File "/home/aiquis/EI/cancelamentos_testes5.py", line 14, in consulta_db_cancelamentos
rows = query_result.fetchmany(1000)
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1166, in fetchmany
self.cursor, self.context)
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/base.py", line 1413, in _handle_dbapi_exception
exc_info
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/util/compat.py", line 203, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/util/compat.py", line 186, in reraise
raise value.with_traceback(tb)
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1159, in fetchmany
l = self.process_rows(self._fetchmany_impl(size))
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1318, in _fetchmany_impl
row = self._fetchone_impl()
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1308, in _fetchone_impl
self.__buffer_rows()
File "/home/aiquis/.local/lib/python3.5/site-packages/sqlalchemy/engine/result.py", line 1295, in __buffer_rows
self.__rowbuffer = collections.deque(self.cursor.fetchmany(size))
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 485, in fetchmany
row = self.read_next()
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 446, in read_next
return self._conv_row(self._result._read_rowdata_packet_unbuffered())
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1430, in _read_rowdata_packet_unbuffered
packet = self.connection._read_packet()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1008, in _read_packet
recv_data = self._read_bytes(bytes_to_read)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1037, in _read_bytes
CR.CR_SERVER_LOST, "Lost connection to MySQL server during query")
sqlalchemy.exc.OperationalError: (pymysql.err.OperationalError) (2013, 'Lost connection to MySQL server during query') (Background on this error at: http://sqlalche.me/e/e3q8)
Exception ignored in: <bound method MySQLResult.__del__ of <pymysql.connections.MySQLResult object at 0x7f8c543dc198>>
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1345, in __del__
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1447, in _finish_unbuffered_query
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 991, in _read_packet
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1022, in _read_bytes
AttributeError: 'NoneType' object has no attribute 'settimeout'
[Finished in 602.4s]
The way I wrote consulta_db_cancelamentos is already a result of some search on SO and SQLAlchemy documentation. Suppose I have no access to my MySQL Server administration.
When I limit my queryy to bring results for only one usuario_id for example (something like ~50 rows) it works fine. I executed the same query on MySQL Workbench and the Duration/Fetch was 251.998 sec/357.541 sec

Solved executing this command in MySQL Server:
set global max_allowed_packet = 67108864;
This solution was suggested here Lost connection to MySQL server during query

I faced a problem like this one. I faced problem after I put stream_results=True to the query.
For me, the parameter net_write_timeout was causing a trouble. When stream_results=True and the app does not immediately read all the data being sent from the MySQL server, then on the MySQL server side, the write communication (sending data packet) to the app gets blocked. net_write_timeout seems to be the parameter which controls how many seconds the connection is allowed for the MySQL server's write to the socket operation being blocked.
So, I've changed net_write_timeout parameter from 60 (default) to 3600 and solved the problem.

Related

csv file to MOngoDB, with `pymongo.errors.ServerSelectionTimeoutError: Connection refused`

I'm running the script wish to save the csv file to MOngoDB, and face pymongo.errors.ServerSelectionTimeoutError: Connection refused
(ps. '1xx.xx.xx.1:27017' is correct mongoDB ip)
the python script
import
import pandas as pd
from pymongo import MongoClient
client = MongoClient('1xx.xx.xx.1:27017')
db = client ['(practice_12_29)img_to_text-001_new']
collection = db ['img_to_text_listOfElems']
data = pd.read_csv('file_listOfElems.csv',encoding = 'UTF-8')
data_json = json.loads(data.to_json(orient='records'))
collection.insert(data_json)
this is the output
jetson#jetson-desktop:~/Desktop/test_12.26$ python3 csv_to_mongoDB.py
Traceback (most recent call last):
File "csv_to_mongoDB.py", line 13, in <module>
collection.insert(data_json)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/collection.py", line 3182, in insert
check_keys, manipulate, write_concern)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/collection.py", line 646, in _insert
blk.execute(write_concern, session=session)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/bulk.py", line 511, in execute
return self.execute_command(generator, write_concern, session)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/bulk.py", line 344, in execute_command
with client._tmp_session(session) as s:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/mongo_client.py", line 1820, in _tmp_session
s = self._ensure_session(session)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/mongo_client.py", line 1807, in _ensure_session
return self.__start_session(True, causal_consistency=False)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/mongo_client.py", line 1760, in __start_session
server_session = self._get_server_session()
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/mongo_client.py", line 1793, in _get_server_session
return self._topology.get_server_session()
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/topology.py", line 477, in get_server_session
None)
File "/home/jetson/.local/lib/python3.6/site-packages/pymongo/topology.py", line 205, in _select_servers_loop
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: 1xx.xx.xx.1:27017: [Errno 111] Connection refused
(tried 01)
I tried one of the similar issue's solution,but is not work too
Pymongo keeps refusing the connection at 27017
jetson#jetson-desktop:~/Desktop/test_12.26$ sudo rm /var/lib/mongodb/mongod.lock
rm: cannot remove '/var/lib/mongodb/mongod.lock': No such file or directory
(tried 02)
I also find this https://stackoverflow.com/a/68608172/20861658
but how do I know the <username> , <password> and cluster-details , my last time experience with other computer can just upload excel with below code
# this worked fine, that I don't remember I put user name and password
import pandas as pd
from pymongo import MongoClient
aaa = pd.read_excel("T1_new.xls")
print(aaa.head)
client = MongoClient('1xx.xx.xx.1:27017')
db = client['sample_data_in_DB']
collection = db['sample_collection']
collection.insert_many(aaa.to_dict('records'))
if any idea just let me knogw

SQLAlchemy switch to python multiprocessing

I am currently working on a web crawler. It works fine but I want to maximise the ressources I am trying to switch to multi-processing. But the second I try that I run into a wall of tracebacks and I cant seam to find what I am doing wrong as I am still novice with both SQLAlchemy and Python multi-processing.
Here is how the parent loop looks like:
...
def crawler(url=False):
...
while url:
crawl(url.id)
url = get_new_url()
I am trying to turn this into a parallel processing function where I dont have to wait for the previous crawl/scrape to be finished:
from multiprocessing import Process
...
def crawler(url=False):
while url:
p = Process(target=crawl, args=(url.id,))
p.start()
url = get_new_url()
Here is how I make my database connection:
engine = create_engine('mysql://user:password#domain:3306/mdb01?charset=utf8mb4', pool_recycle=3600)
Session = sessionmaker(bind=engine, autoflush=True)
Base = declarative_base()
Here is the modules doing the crawling's database interactions and the importing of the database factory (I removed the bulk as I feel the issue is how I interact with sqlalchemy and not the rest of the code):
from news_models.base import Base, Session, engine
database = Session()
def crawl(urlid):
url = database.query(Url).filter_by(id=urlid).first()
print(f"Starting to work on {url.id}: {url.url}")
... scrape page ....
scrape = scrape_url(url)
... running beautifull soup ...
# Retrieve all of the anchor tags
tags = soup('a')
for tag in tags:
... validation ...
make_url(url)
def make_url(url):
...
#domain = ex. abc.com
domain = database.query(Domain).filter_by(domain=domain).first()
database.add(Url(url, domain, vetted))
database.commit()
def scrape_url(url):
scrape = Scrape(page = html, url = url)
database.add(scrape)
database.commit()
return scrape
Here is the dialog:
Starting to work on 179226: https://bbc.co.uk/sport/football/53891604
Starting to work on 110232: https://theweathernetwork.com/ca/weather/saskatchewan/carragana
Starting to work on 152054: https://ca.images.search.yahoo.com/search/images?p=barack+obama&fr=fp-tts&th=110.1&tw=162.6&imgurl=https%3a%2f%2fimage.cnbcfm.com%2fapi%2fv1%2fimage%2f105055178-gettyimages-680143744rr.jpg%3fv%3d1576513702%26w%3d1400%26h%3d950&rurl=https%3a%2f%2fwww.cnbc.com%2f2019%2f12%2f16%2fbarack-obama-how-women-are-better-leaders-than-men.html&size=123kb&name=barack+obama%3a+how+women+are+better+leaders+than+men&oid=1&h=950&w=1400&turl=https%3a%2f%2ftse1.mm.bing.net%2fth%3fid%3doip.btjoweh9kdcuxxcdksvoiwhafb%26amp%3bpid%3dapi%26rs%3d1%26c%3d1%26qlt%3d95%26w%3d162%26h%3d110&tt=barack+obama%3a+how+women+are+better+leaders+than+men&sigr=4nejz_6_wyyo&sigit=.iypm9cqprc9&sigi=9sv3ee5szhdl&sign=eqzxpc3ps9fm&sigt=eqzxpc3ps9fm
Exception during reset or similar
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1975b0>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/pool/base.py", line 697, in _finalize_fairy
fairy._reset(pool)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/pool/base.py", line 893, in _reset
pool._dialect.do_rollback(self)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.ProgrammingError: (2014, "Commands out of sync; you can't run this command now")
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
Process Process-3:
Process Process-1:
self.dialect.do_execute(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "./crawler.py", line 138, in <module>
main()
File "./crawler.py", line 49, in main
crawler(url=url)
File "./crawler.py", line 135, in crawler
url = get_new_url()
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 482, in get_new_url
url = database.query(Url).filter_by(scrape=None, error=False).order_by(sqlalchemy.func.rand()).first()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3402, in first
ret = list(self[0:1])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3176, in __getitem__
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1e3790>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
return list(res)
The above exception was the direct cause of the following exception:
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3508, in __iter__
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 62, in crawl
soup = scrape_and_soup(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 331, in scrape_and_soup
scrape = scrape_url(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 325, in scrape_url
database.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1006, in rollback
self.transaction.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 574, in rollback
util.raise_(rollback_err[1], with_traceback=rollback_err[2])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 534, in rollback
t[1].rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1753, in rollback
self._do_rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1791, in _do_rollback
self.connection._rollback_impl()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 751, in _rollback_impl
self._handle_dbapi_exception(e, None, None, None, None)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
(Background on this error at: http://sqlalche.me/e/13/e3q8)
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1e3a60>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 62, in crawl
soup = scrape_and_soup(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 331, in scrape_and_soup
scrape = scrape_url(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 325, in scrape_url
database.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1006, in rollback
self.transaction.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 574, in rollback
util.raise_(rollback_err[1], with_traceback=rollback_err[2])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 534, in rollback
t[1].rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1753, in rollback
self._do_rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1791, in _do_rollback
self.connection._rollback_impl()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 751, in _rollback_impl
self._handle_dbapi_exception(e, None, None, None, None)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
(Background on this error at: http://sqlalche.me/e/13/e3q8)
return self._execute_and_instances(context)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3533, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1011, in execute
return meth(self, multiparams, params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 298, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1124, in _execute_clauseelement
ret = self._execute_context(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1316, in _execute_context
self._handle_dbapi_exception(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
self.dialect.do_execute(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
[SQL: SELECT urls.id AS urls_id, urls.url AS urls_url, urls.error AS urls_error, urls.vetted AS urls_vetted, urls.useful AS urls_useful, urls.date_discovered AS urls_date_discovered, urls.last_parse AS urls_last_parse, urls.domain_id AS urls_domain_id, urls.publisher_id AS urls_publisher_id
FROM urls
WHERE NOT (EXISTS (SELECT 1
FROM scrapes
WHERE urls.id = scrapes.url_id)) AND urls.error = false ORDER BY rand()
LIMIT %s]
[parameters: (1,)]
(Background on this error at: http://sqlalche.me/e/13/e3q8)
I've tryed playing with create_engine adding pool_size=20, max_overflow=0 or autoflush=True/False to no success.
Could someone please indicate what I am doing wrong ?
The solution is to make a new database session in each process, at the start of the crawl function (then pass it into make_url and scrape_url, either as a separate parameter or by making them all methods of one object). You should use a with closing(...) statement to make sure the session is closed when crawl finishes.
You have another problem in the code: the while url loop needs to also wait for all the scrapers to finish, in case one of them finds additional URLs that also need to be scraped.
As a suggestion for improvement, rather than using Process directly, you could use multiprocessing.Pool; that would let you control the number of scrapers running in parallel, which you'll probably eventually want to do (to avoid overloading the CPU, RAM, network and/or database). At that point, you could either still use a separate database session for each crawl call, or one per pool worker.

PyMySQL connection timeout - code structure

I have written a script which gets data from a sqlite db to upload into MySQL db. The structure of the code is as below.
def insert_to_mysql(_row):
# get mysql connection by Pymysql
connection = pymysql.connect(..., connect_timeout=31536000)
# insert data
connection.close
def get_data_from_sqlite(_db):
for i in sqlite_generator:
insert_to_mysql(i)
the code works fine. But the connection get's time out without certain pattern with the below trace back very often, despite the connect_timeout in connection object and I am using one object per row. My logic is to create one object for a row so that there will not be timeout issue.
Can anyone help me in understanding what's going wrong here?
Traceback (most recent call last):
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 691, in _read_bytes
data = self._rfile.read(num_bytes)
File "/usr/lib/python3.5/socket.py", line 575, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "up_data_upload.py", line 85, in <module>
import_data(record)
File "up_data_upload.py", line 43, in import_data
db='up_scrape_data')
File "up_data_upload.py", line 31, in get_mysql_connection
connect_timeout=31536000)
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/__init__.py", line 94, in Connect
return Connection(*args, **kwargs)
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 325, in __init__
self.connect()
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 598, in connect
self._get_server_information()
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 975, in _get_server_information
packet = self._read_packet()
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 657, in _read_packet
packet_header = self._read_bytes(4)
File "/home/santhosh/.local/lib/python3.5/site-packages/pymysql/connections.py", line 699, in _read_bytes
"Lost connection to MySQL server during query (%s)" % (e,))
pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query ([Errno 110] Connection timed out)')
Thanks in advance.

How to create and delete database using mssql+pyodbc connection string using python

I have a database engine as the below:
from sqlalchemy import create_engine
import pydoc
# connect db
engine = create_engine('mssql+pyodbc://xxxx\MARTRNO_EXPRESS/toolDB?driver=SQL+Server+Native+Client+11.0')
connection = engine.connect()
I tried to use something like the below code as to create a database using this connection as the below code:
from database import connec
import pandas as pd
def delete_all_tables_from_db():
delete_all_tables_query = "CREATE DATABASE MyNewDatabase"
delete_all_tables_df = pd.read_sql(delete_all_tables_query, connec.engine)
connec.engine.execute(delete_all_tables_df)
delete_all_tables_from_db()
but I find this error:
Traceback (most recent call last):
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 1245, in _execute_context
self.dialect.do_execute(
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\default.py", line 588, in do_execute
cursor.execute(statement, parameters)
pyodbc.ProgrammingError: ('42000', '[42000] [Microsoft][SQL Server Native Client 11.0][SQL Server]CREATE DATABASE statement not allowed within multi-statement transaction. (226) (SQLExecDirectW)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/database/delete_all_tables_from_db.py", line 10, in <module>
delete_all_tables_from_db()
File "C:/Users/haroo501/PycharmProjects/ToolUpdated/database/delete_all_tables_from_db.py", line 7, in delete_all_tables_from_db
delete_all_tables_df = pd.read_sql(delete_all_tables_query, connec.engine)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\sql.py", line 432, in read_sql
return pandas_sql.read_query(
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\sql.py", line 1218, in read_query
result = self.execute(*args)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\io\sql.py", line 1087, in execute
return self.connectable.execute(*args, **kwargs)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 2182, in execute
return connection.execute(statement, *multiparams, **params)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 976, in execute
return self._execute_text(object_, multiparams, params)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 1143, in _execute_text
ret = self._execute_context(
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 1249, in _execute_context
self._handle_dbapi_exception(
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 1476, in _handle_dbapi_exception
util.raise_from_cause(sqlalchemy_exception, exc_info)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\util\compat.py", line 398, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\util\compat.py", line 152, in reraise
raise value.with_traceback(tb)
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\base.py", line 1245, in _execute_context
self.dialect.do_execute(
File "C:\Users\haroo501\AppData\Local\Programs\Python\Python38\lib\site-packages\sqlalchemy\engine\default.py", line 588, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (pyodbc.ProgrammingError) ('42000', '[42000] [Microsoft][SQL Server Native Client 11.0][SQL Server]CREATE DATABASE statement not allowed within multi-statement transaction. (226) (SQLExecDirectW)')
[SQL: CREATE DATABASE MyNewDatabase]
(Background on this error at: http://sqlalche.me/e/f405)
Process finished with exit code 1
I tried to modify this database and works fine but I have to assume the permission for the use.
I am using MicroSoft SQL Managment Studio SQL EXPRESS:
Server Type Database Engine
Authentication Windows Authentication I don't have use name and password for the database
I think now the problem in this part:
'mssql+pyodbc://xxxx\SMARTRNO_EXPRESS/toolDB?driver=SQL+Server+Native+Client+11.0'
That I use this database connection string to connect directly to the toolDB
So I need something like a connection String as the below one:
# connect db
engine = create_engine('mssql+pyodbc://xxxx\SMARTRNO_EXPRESS?driver=SQL+Server+Native+Client+11.0')
connection = engine.connect()
as to able to create a database in this server and able to delete or create or even modify database
Well! I solved this by creating a new connection in database .py file as the below code and by adding the autocommit = True:
conn = pyodbc.connect("driver={SQL Server};server=WINKPN-3B5JTT2\SMARTRNO_EXPRESS; database=master; trusted_connection=true",
autocommit=True)
And Tried to access this connection by the below code:
from database import connec
def create_all_tables_from_db():
create_all_tables_query = "CREATE DATABASE MyNewDatabase"
connec.conn.execute(create_all_tables_query)
create_all_tables_from_db()

cannot connect to in-memory SQLite DB using SQLAlchemy with Python 2.7.3 on Windows

I am trying to use the in-memory SQLite database using SQLAlchemy with Python 2.7.3 on Windows. I can connect to the engine, but when I try to execute the second statement I am getting the following error:
>>> engine=create_engine('sqlite:///memory:',echo=True)
>>> engine.execute("select 1").scalar()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 2445, in execute
connection = self.contextual_connect(close_with_result=True)
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 2489, in contextual_connect
self.pool.connect(),
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 236, in connect
return _ConnectionFairy(self).checkout()
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 401, in __init__
rec = self._connection_record = pool._do_get()
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 822, in _do_get
return self._create_connection()
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 189, in _create_connection
return _ConnectionRecord(self)
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 282, in __init__
self.connection = self.__connect()
File "C:\Python27\lib\site-packages\sqlalchemy\pool.py", line 344, in __connect
connection = self.__pool._creator()
File "C:\Python27\lib\site-packages\sqlalchemy\engine\strategies.py", line 80, in connect
return dialect.connect(*cargs, **cparams)
File "C:\Python27\lib\site-packages\sqlalchemy\engine\default.py", line 281, in connect
return self.dbapi.connect(*cargs, **cparams)
sqlalchemy.exc.OperationalError: (OperationalError) unable to open database file
None None
The filename should be :memory:, not memory:. (See the docs for in-memory databases). The relevant SQLAlchemy docs mention that's the default path, so you should use:
engine=create_engine('sqlite://',echo=True)
The error you're getting is presumably because memory: isn't a valid filename on Windows.
The following worked for me:
from sqlalchemy import create_engine
from sqlalchemy.pool import StaticPool
engine = create_engine("sqlite://", poolclass=StaticPool)

Categories

Resources