SQLAlchemy switch to python multiprocessing - python

I am currently working on a web crawler. It works fine but I want to maximise the ressources I am trying to switch to multi-processing. But the second I try that I run into a wall of tracebacks and I cant seam to find what I am doing wrong as I am still novice with both SQLAlchemy and Python multi-processing.
Here is how the parent loop looks like:
...
def crawler(url=False):
...
while url:
crawl(url.id)
url = get_new_url()
I am trying to turn this into a parallel processing function where I dont have to wait for the previous crawl/scrape to be finished:
from multiprocessing import Process
...
def crawler(url=False):
while url:
p = Process(target=crawl, args=(url.id,))
p.start()
url = get_new_url()
Here is how I make my database connection:
engine = create_engine('mysql://user:password#domain:3306/mdb01?charset=utf8mb4', pool_recycle=3600)
Session = sessionmaker(bind=engine, autoflush=True)
Base = declarative_base()
Here is the modules doing the crawling's database interactions and the importing of the database factory (I removed the bulk as I feel the issue is how I interact with sqlalchemy and not the rest of the code):
from news_models.base import Base, Session, engine
database = Session()
def crawl(urlid):
url = database.query(Url).filter_by(id=urlid).first()
print(f"Starting to work on {url.id}: {url.url}")
... scrape page ....
scrape = scrape_url(url)
... running beautifull soup ...
# Retrieve all of the anchor tags
tags = soup('a')
for tag in tags:
... validation ...
make_url(url)
def make_url(url):
...
#domain = ex. abc.com
domain = database.query(Domain).filter_by(domain=domain).first()
database.add(Url(url, domain, vetted))
database.commit()
def scrape_url(url):
scrape = Scrape(page = html, url = url)
database.add(scrape)
database.commit()
return scrape
Here is the dialog:
Starting to work on 179226: https://bbc.co.uk/sport/football/53891604
Starting to work on 110232: https://theweathernetwork.com/ca/weather/saskatchewan/carragana
Starting to work on 152054: https://ca.images.search.yahoo.com/search/images?p=barack+obama&fr=fp-tts&th=110.1&tw=162.6&imgurl=https%3a%2f%2fimage.cnbcfm.com%2fapi%2fv1%2fimage%2f105055178-gettyimages-680143744rr.jpg%3fv%3d1576513702%26w%3d1400%26h%3d950&rurl=https%3a%2f%2fwww.cnbc.com%2f2019%2f12%2f16%2fbarack-obama-how-women-are-better-leaders-than-men.html&size=123kb&name=barack+obama%3a+how+women+are+better+leaders+than+men&oid=1&h=950&w=1400&turl=https%3a%2f%2ftse1.mm.bing.net%2fth%3fid%3doip.btjoweh9kdcuxxcdksvoiwhafb%26amp%3bpid%3dapi%26rs%3d1%26c%3d1%26qlt%3d95%26w%3d162%26h%3d110&tt=barack+obama%3a+how+women+are+better+leaders+than+men&sigr=4nejz_6_wyyo&sigit=.iypm9cqprc9&sigi=9sv3ee5szhdl&sign=eqzxpc3ps9fm&sigt=eqzxpc3ps9fm
Exception during reset or similar
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1975b0>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/pool/base.py", line 697, in _finalize_fairy
fairy._reset(pool)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/pool/base.py", line 893, in _reset
pool._dialect.do_rollback(self)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.ProgrammingError: (2014, "Commands out of sync; you can't run this command now")
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
Process Process-3:
Process Process-1:
self.dialect.do_execute(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "./crawler.py", line 138, in <module>
main()
File "./crawler.py", line 49, in main
crawler(url=url)
File "./crawler.py", line 135, in crawler
url = get_new_url()
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 482, in get_new_url
url = database.query(Url).filter_by(scrape=None, error=False).order_by(sqlalchemy.func.rand()).first()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3402, in first
ret = list(self[0:1])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3176, in __getitem__
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1e3790>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
return list(res)
The above exception was the direct cause of the following exception:
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3508, in __iter__
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 62, in crawl
soup = scrape_and_soup(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 331, in scrape_and_soup
scrape = scrape_url(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 325, in scrape_url
database.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1006, in rollback
self.transaction.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 574, in rollback
util.raise_(rollback_err[1], with_traceback=rollback_err[2])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 534, in rollback
t[1].rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1753, in rollback
self._do_rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1791, in _do_rollback
self.connection._rollback_impl()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 751, in _rollback_impl
self._handle_dbapi_exception(e, None, None, None, None)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
(Background on this error at: http://sqlalche.me/e/13/e3q8)
Traceback (most recent call last):
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 321, in scrape_url
database.add(scrape)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2008, in add
self._save_or_update_state(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2021, in _save_or_update_state
self._save_or_update_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2371, in _save_or_update_impl
self._save_impl(state)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2324, in _save_impl
to_attach = self._before_attach(state, obj)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2441, in _before_attach
raise sa_exc.InvalidRequestError(
sqlalchemy.exc.InvalidRequestError: Object '<Scrape at 0x7f4f7e1e3a60>' is already attached to session '3' (this is '2')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
MySQLdb._exceptions.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 62, in crawl
soup = scrape_and_soup(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 331, in scrape_and_soup
scrape = scrape_url(url)
File "/home/fabrice/workbench/news/news_crawler/crawl_tools.py", line 325, in scrape_url
database.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1006, in rollback
self.transaction.rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 574, in rollback
util.raise_(rollback_err[1], with_traceback=rollback_err[2])
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 534, in rollback
t[1].rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1753, in rollback
self._do_rollback()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1791, in _do_rollback
self.connection._rollback_impl()
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 751, in _rollback_impl
self._handle_dbapi_exception(e, None, None, None, None)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 749, in _rollback_impl
self.engine.dialect.do_rollback(self.connection)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/dialects/mysql/base.py", line 2475, in do_rollback
dbapi_connection.rollback()
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
(Background on this error at: http://sqlalche.me/e/13/e3q8)
return self._execute_and_instances(context)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3533, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1011, in execute
return meth(self, multiparams, params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 298, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1124, in _execute_clauseelement
ret = self._execute_context(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1316, in _execute_context
self._handle_dbapi_exception(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
util.raise_(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
self.dialect.do_execute(
File "/home/fabrice/.local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/home/fabrice/.local/lib/python3.8/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (2013, 'Lost connection to MySQL server during query')
[SQL: SELECT urls.id AS urls_id, urls.url AS urls_url, urls.error AS urls_error, urls.vetted AS urls_vetted, urls.useful AS urls_useful, urls.date_discovered AS urls_date_discovered, urls.last_parse AS urls_last_parse, urls.domain_id AS urls_domain_id, urls.publisher_id AS urls_publisher_id
FROM urls
WHERE NOT (EXISTS (SELECT 1
FROM scrapes
WHERE urls.id = scrapes.url_id)) AND urls.error = false ORDER BY rand()
LIMIT %s]
[parameters: (1,)]
(Background on this error at: http://sqlalche.me/e/13/e3q8)
I've tryed playing with create_engine adding pool_size=20, max_overflow=0 or autoflush=True/False to no success.
Could someone please indicate what I am doing wrong ?

The solution is to make a new database session in each process, at the start of the crawl function (then pass it into make_url and scrape_url, either as a separate parameter or by making them all methods of one object). You should use a with closing(...) statement to make sure the session is closed when crawl finishes.
You have another problem in the code: the while url loop needs to also wait for all the scrapers to finish, in case one of them finds additional URLs that also need to be scraped.
As a suggestion for improvement, rather than using Process directly, you could use multiprocessing.Pool; that would let you control the number of scrapers running in parallel, which you'll probably eventually want to do (to avoid overloading the CPU, RAM, network and/or database). At that point, you could either still use a separate database session for each crawl call, or one per pool worker.

Related

Error while upgrading superset to 2.0.0 psycopg2.errors.UndefinedColumn

After updating superset to 2.0.0 I cannot access sqllab nor view databases (Oracle DB).
I'm running superset on openshift with custom image built from apache/superset:2.0.0
Installed dependencies:
cx_Oracle certifi flask-oidc==1.4.0 flask_openid gevent psycopg2 psycopg2-binary==2.9.1 redis==3.5.3 itsdangerous==2.0.1
I tried running superset db upgrade with following result:
$ superset db upgrade
Loaded your LOCAL configuration at [/app/pythonpath/superset_config.py]
logging was configured successfully
2022-11-21 14:21:41,835:INFO:superset.utils.logging_configurator:logging was configured successfully
2022-11-21 14:21:41,841:INFO:root:Configured event logger of type <class 'superset.utils.log.DBEventLogger'>
INFO [alembic.runtime.migration] Context impl PostgresqlImpl.
INFO [alembic.runtime.migration] Will assume transactional DDL.
and superset init throws error:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 608, in do_execute
cursor.execute(statement, parameters)
psycopg2.errors.UndefinedColumn: column dbs.allow_multi_schema_metadata_fetch does not exist
LINE 1: ..., dbs.force_ctas_schema AS dbs_force_ctas_schema, dbs.allow_...
^
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/bin/superset", line 33, in \<module\>
sys.exit(load_entry_point('apache-superset', 'console_scripts', 'superset')())
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1128, in __call__
return self.main(\*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 601, in main
return super().main(\*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return \_process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, \*\*ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return \__callback(\*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), \*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 445, in decorator
return \__ctx.invoke(f, \*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return \__callback(\*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), \*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 445, in decorator
return \__ctx.invoke(f, \*args, \*\*kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return _callback(\*args, \*\*kwargs)
File "/app/superset/cli/main.py", line 62, in init
security_manager.sync_role_definitions()
File "/app/superset/security/manager.py", line 731, in sync_role_definitions
self.create_missing_perms()
File "/app/superset/security/manager.py", line 679, in create_missing_perms
merge_pv("datasource_access", datasource.get_perm())
File "/app/superset/connectors/sqla/models.py", line 826, in get_perm
if self.database is None:
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/attributes.py", line 294, in __get__
return self.impl.get(instance_state(instance), dict_)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/attributes.py", line 730, in get
value = self.callable(state, passive)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/strategies.py", line 759, in \_load_for_state
return self.\_emit_lazyload(
File "\<string\>", line 1, in \<lambda\>
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/strategies.py", line 847, in \_emit_lazyload
q(session)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/ext/baked.py", line 615, in \_load_on_pk_identity
result = list(bq.for_session(self.session).params(\*\*params))
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/ext/baked.py", line 444, in __iter__
return q.\_execute_and_instances(context)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 3560, in \_execute_and_instances
result = conn.execute(querycontext.statement, self.\_params)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1011, in execute
return meth(self, multiparams, params)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 298, in \_execute_on_connection
return connection.\_execute_clauseelement(self, multiparams, params)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1124, in \_execute_clauseelement
ret = self.\_execute_context(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1316, in \_execute_context
self._handle_dbapi_exception(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1510, in handle_dbapi_exception
util.raise_(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 182, in raise
raise exception
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 608, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.UndefinedColumn) column dbs.allow_multi_schema_metadata_fetch does not exist
LINE 1: ..., dbs.force_ctas_schema AS dbs_force_ctas_schema, dbs.allow_...
^
\[SQL: SELECT dbs.uuid AS dbs_uuid, dbs.created_on AS dbs_created_on, dbs.changed_on AS dbs_changed_on, dbs.id AS dbs_id, dbs.verbose_name AS dbs_verbose_name, dbs.database_name AS dbs_database_name, dbs.sqlalchemy_uri AS dbs_sqlalchemy_uri, dbs.password AS dbs_password, dbs.cache_timeout AS dbs_cache_timeout, dbs.select_as_create_table_as AS dbs_select_as_create_table_as, dbs.expose_in_sqllab AS dbs_expose_in_sqllab, dbs.configuration_method AS dbs_configuration_method, dbs.allow_run_async AS dbs_allow_run_async, dbs.allow_file_upload AS dbs_allow_file_upload, dbs.allow_ctas AS dbs_allow_ctas, dbs.allow_cvas AS dbs_allow_cvas, dbs.allow_dml AS dbs_allow_dml, dbs.force_ctas_schema AS dbs_force_ctas_schema, dbs.allow_multi_schema_metadata_fetch AS dbs_allow_multi_schema_metadata_fetch, dbs.extra AS dbs_extra, dbs.encrypted_extra AS dbs_encrypted_extra, dbs.impersonate_user AS dbs_impersonate_user, dbs.server_cert AS dbs_server_cert, dbs.is_managed_externally AS dbs_is_managed_externally, dbs.external_url AS dbs_external_url, dbs.created_by_fk AS dbs_created_by_fk, dbs.changed_by_fk AS dbs_changed_by_fk
FROM dbs
WHERE dbs.id = %(param_1)s\]
\[parameters: {'param_1': 1}\]
(Background on this error at: http://sqlalche.me/e/13/f405)
Anyone found solution for this error?
Tried:
superset db upgrade
superset init
updating dependencies
restarting superset
Expect:
successfully migrate to 2.0.0 version

Come across problem in mysql when deploying website

I use ubuntu 20.04 in AWS.
I have created a python asyio webapp.
I use Nginx.
The Nginx default page could load successfully.
However, after deploying my webapp, error occurs:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 503, in _connect
await self._request_authentication()
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 796, in _request_authentication
await self._process_auth(plugin_name, auth_packet)
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 819, in _process_auth
await self.sha256_password_auth(auth_packet)
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 970, in sha256_password_auth
pkt = await self._read_packet()
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 593, in _read_packet
packet.check_error()
File "/usr/local/lib/python3.8/dist-packages/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/usr/local/lib/python3.8/dist-packages/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.OperationalError: (1045, "Access denied for user 'www'#'localhost' (using password: YES)")
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "app.py", line 154, in <module>
loop.run_until_complete(init(loop))
File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "app.py", line 142, in init
yield from orm.create_pool(loop=loop, **configs.db)
File "/home/ubuntu/srv/awesome/www/orm.py", line 17, in create_pool
__pool = yield from aiomysql.create_pool(
File "/usr/local/lib/python3.8/dist-packages/aiomysql/pool.py", line 29, in _create_pool
await pool._fill_free_pool(False)
File "/usr/local/lib/python3.8/dist-packages/aiomysql/pool.py", line 167, in _fill_free_pool
conn = await connect(echo=self._echo, loop=self._loop,
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 75, in _connect
await conn._connect()
File "/usr/local/lib/python3.8/dist-packages/aiomysql/connection.py", line 521, in _connect
raise OperationalError(2003,
pymysql.err.OperationalError: (2003, "Can't connect to MySQL server on '127.0.0.1'")
How to fix my problem?

500 error when deleting entities from Apache Superset

I am stuck with a problem where when I try to delete a chart or a user or dashboard from Apache Superset throw the web UI (installed using helm on Kubernetes), the browser gets a 500 error and the logs indicated the following:
superset-prod-6785cd75df-zqp5j superset] [SQL: SELECT report_schedule.created_on AS report_schedule_created_on, report_schedule.changed_on AS report_schedule_changed_on, report_schedule.id AS report_schedule_id, report_schedule.type AS report_schedule_type, report_schedule.name AS report_schedule_name, report_schedule.description AS report_schedule_description, report_schedule.context_markdown AS report_schedule_context_markdown, report_schedule.active AS report_schedule_active, report_schedule.crontab AS report_schedule_crontab, report_schedule.sql AS report_schedule_sql, report_schedule.chart_id AS report_schedule_chart_id, report_schedule.dashboard_id AS report_schedule_dashboard_id, report_schedule.database_id AS report_schedule_database_id, report_schedule.last_eval_dttm AS report_schedule_last_eval_dttm, report_schedule.last_state AS report_schedule_last_state, report_schedule.last_value AS report_schedule_last_value, report_schedule.last_value_row_json AS report_schedule_last_value_row_json, report_schedule.validator_type AS report_schedule_validator_type, report_schedule.validator_config_json AS report_schedule_validator_config_json, report_schedule.log_retention AS report_schedule_log_retention, report_schedule.grace_period AS report_schedule_grace_period, report_schedule.working_timeout AS report_schedule_working_timeout, report_schedule.created_by_fk AS report_schedule_created_by_fk, report_schedule.changed_by_fk AS report_schedule_changed_by_fk
FROM report_schedule
WHERE report_schedule.chart_id = %(chart_id_1)s]
[parameters: {'chart_id_1': '51'}]
(Background on this error at: http://sqlalche.me/e/13/f405)
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1277, in _execute_context
cursor, statement, parameters, context
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
psycopg2.errors.UndefinedColumn: column report_schedule.working_timeout does not exist
LINE 1: ...ule.grace_period AS report_schedule_grace_period, report_sch...
^
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/flask_appbuilder/api/__init__.py", line 84, in wraps
return f(self, *args, **kwargs)
File "/app/superset/views/base_api.py", line 80, in wraps
duration, response = time_function(f, self, *args, **kwargs)
File "/app/superset/utils/core.py", line 1484, in time_function
response = func(*args, **kwargs)
File "/app/superset/utils/log.py", line 125, in wrapper
value = f(*args, **kwargs)
File "/app/superset/charts/api.py", line 383, in delete
DeleteChartCommand(g.user, pk).run()
File "/app/superset/charts/commands/delete.py", line 49, in run
self.validate()
File "/app/superset/charts/commands/delete.py", line 64, in validate
reports = ReportScheduleDAO.find_by_chart_id(self._model_id)
File "/app/superset/reports/dao.py", line 45, in find_by_chart_id
.filter(ReportSchedule.chart_id == chart_id)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/orm/query.py", line 3373, in all
return list(self)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/orm/query.py", line 3535, in __iter__
return self._execute_and_instances(context)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/orm/query.py", line 3560, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1011, in execute
return meth(self, multiparams, params)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/sql/elements.py", line 298, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1130, in _execute_clauseelement
distilled_params,
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1317, in _execute_context
e, statement, parameters, cursor, context
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1511, in _handle_dbapi_exception
sqlalchemy_exception, with_traceback=exc_info[2], from_=e
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
raise exception
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/base.py", line 1277, in _execute_context
cursor, statement, parameters, context
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/default.py", line 593, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.UndefinedColumn) column report_schedule.working_timeout does not exist
LINE 1: ...ule.grace_period AS report_schedule_grace_period, report_sch...
^
It seems like there is a mismatch between the backing database for Superset and the python code.
This issue seems to have been resolved after upgrading the superset docker image version to apache/superset:latest around 17 Jan 2021. Open to more in-depth answers, but felt it important to state this issue is no longer an issue for me.

Mongodb crashes when the same collection in accessed by two different python script using pymongo

I have two python scripts, currentdataupload.py and productioncount.py on my Ubuntu server. currentdataupload.py uploads dummy current data and productioncount.py accesses that data to do some calculations. The code runs perfectly in other scenarios without disrupting the MongoDB, but when I run both of these codes simultaneously using nohup after a few seconds it exits the mongoDB service and is no longer able to upload or retrieve data. I would like to know if I am doing the entire process the right way or I am messing up somewhere big time.
Error log
Traceback (most recent call last):
File "/python_codes/currentdataupload.py", line 38, in <module>
result = posts.insert_one(post_data)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/collection.py", line 630, in insert_one
bypass_doc_val=bypass_document_validation),
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/collection.py", line 535, in _insert
check_keys, manipulate, write_concern, op_id, bypass_doc_val)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/collection.py", line 516, in _insert_one
check_keys=check_keys)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 244, in command
self._raise_connection_failure(error)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 372, in _raise_connection_failure
raise error
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 239, in command
read_concern)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/network.py", line 96, in command
response = receive_message(sock, 1, request_id)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/network.py", line 123, in receive_message
header = _receive_data_on_socket(sock, 16)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/network.py", line 161, in _receive_data_on_socket
raise AutoReconnect("connection closed")
pymongo.errors.AutoReconnect: connection closed
Traceback (most recent call last):
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 543, in connect
sock = _configured_socket(self.address, self.opts)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 452, in _configured_socket
sock = _create_connection(address, options)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 436, in _create_connection
raise err
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 429, in _create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python_codes/productioncount.py", line 267, in <module>
result = col2.find_one({"date": date, "machine": machine})
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/collection.py", line 1014, in find_one
for result in cursor.limit(-1):
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/cursor.py", line 1090, in next
if len(self.__data) or self._refresh():
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/cursor.py", line 1012, in _refresh
self.__read_concern))
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/cursor.py", line 850, in __send_message
**kwargs)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/mongo_client.py", line 844, in _send_message_with_response
exhaust)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/mongo_client.py", line 855, in _reset_on_error
return func(*args, **kwargs)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/server.py", line 99, in send_message_with_response
with self.get_socket(all_credentials, exhaust) as sock_info:
File "/root/anaconda3/lib/python3.5/contextlib.py", line 59, in __enter__
return next(self.gen)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/server.py", line 163, in get_socket
with self.pool.get_socket(all_credentials, checkout) as sock_info:
File "/root/anaconda3/lib/python3.5/contextlib.py", line 59, in __enter__
return next(self.gen)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 582, in get_socket
sock_info = self._get_socket_no_auth()
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 628, in _get_socket_no_auth
sock_info = self._check(sock_info)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 682, in _check
return self.connect()
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 555, in connect
_raise_connection_failure(self.address, error)
File "/root/anaconda3/lib/python3.5/site-packages/pymongo/pool.py", line 65, in _raise_connection_failure
raise AutoReconnect(msg)
pymongo.errors.AutoReconnect: localhost:27017: [Errno 111] Connection refused
MongoDB shouldn't crash. Please file a bug report, including the Python script you run and a complete MongoDB logfile, at jira.mongodb.org.

Celery throwing long error message

I'm trying to run the starter code for celery on their website (http://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html), and I'm running an instance of a RabbitMQ server in the background. However, I'm getting a long error message:
>>> from celery import Celery
>>> app = Celery('tasks', broker='pyamqp://guest#localhost//')
>>> #app.task
... def add(x, y):
... return x + y
...
>>>
>>> add.delay(4, 4)
Traceback (most recent call last):
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\utils\functional.py", line 36, in __call__
return self.__value__
AttributeError: 'ChannelPromise' object has no attribute '__value__'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 494, in _ensured
return fun(*args, **kwargs)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\messaging.py", line 187, in _publish
channel = self.channel
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\messaging.py", line 209, in _get_channel
channel = self._channel = channel()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\utils\functional.py", line 38, in __call__
value = self.__value__ = self.__contract__()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\messaging.py", line 224, in <lambda>
channel = ChannelPromise(lambda: connection.default_channel)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 819, in default_channel
self.connection
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 802, in connection
self._connection = self._establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 757, in _establish_connection
conn = self.transport.establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\transport\pyamqp.py", line 130, in establish_connection
conn.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\connection.py", line 294, in connect
self.transport.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 122, in connect
self.socket_settings, self.read_timeout, self.write_timeout,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 174, in _init_socket
self._set_socket_options(socket_settings)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 204, in _set_socket_options
self.sock.setsockopt(SOL_TCP, opt, val)
OSError: [WinError 10042] An unknown, invalid, or unsupported option or level was specified in a getsockopt or setsockopt call
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 414, in _reraise_as_library_errors
yield
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 515, in _ensured
reraise_as_library_errors=False,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 405, in ensure_connection
callback)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\utils\functional.py", line 333, in retry_over_time
return fun(*args, **kwargs)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 261, in connect
return self.connection
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 802, in connection
self._connection = self._establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 757, in _establish_connection
conn = self.transport.establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\transport\pyamqp.py", line 130, in establish_connection
conn.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\connection.py", line 294, in connect
self.transport.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 122, in connect
self.socket_settings, self.read_timeout, self.write_timeout,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 174, in _init_socket
self._set_socket_options(socket_settings)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 204, in _set_socket_options
self.sock.setsockopt(SOL_TCP, opt, val)
OSError: [WinError 10042] An unknown, invalid, or unsupported option or level was specified in a getsockopt or setsockopt call
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Users\Alexander\Anaconda3\lib\site-packages\celery\app\task.py", line 412, in delay
return self.apply_async(args, kwargs)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\celery\app\task.py", line 535, in apply_async
**options
File "C:\Users\Alexander\Anaconda3\lib\site-packages\celery\app\base.py", line 737, in send_task
amqp.send_task_message(P, name, message, **options)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\celery\app\amqp.py", line 558, in send_task_message
**properties
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\messaging.py", line 181, in publish
exchange_name, declare,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 527, in _ensured
errback and errback(exc, 0)
File "C:\Users\Alexander\Anaconda3\lib\contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 419, in _reraise_as_library_errors
sys.exc_info()[2])
File "C:\Users\Alexander\Anaconda3\lib\site-packages\vine\five.py", line 175, in reraise
raise value.with_traceback(tb)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 414, in _reraise_as_library_errors
yield
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 515, in _ensured
reraise_as_library_errors=False,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 405, in ensure_connection
callback)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\utils\functional.py", line 333, in retry_over_time
return fun(*args, **kwargs)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 261, in connect
return self.connection
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 802, in connection
self._connection = self._establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\connection.py", line 757, in _establish_connection
conn = self.transport.establish_connection()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\kombu\transport\pyamqp.py", line 130, in establish_connection
conn.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\connection.py", line 294, in connect
self.transport.connect()
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 122, in connect
self.socket_settings, self.read_timeout, self.write_timeout,
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 174, in _init_socket
self._set_socket_options(socket_settings)
File "C:\Users\Alexander\Anaconda3\lib\site-packages\amqp\transport.py", line 204, in _set_socket_options
self.sock.setsockopt(SOL_TCP, opt, val)
kombu.exceptions.OperationalError: [WinError 10042] An unknown, invalid, or unsupported option or level was specified in a getsockopt or setsockopt call
There is an open issue on github where the same OS-error has been seen:
https://github.com/celery/py-amqp/issues/130
Someone suggested in the comments on the issue that it is temporarily resolved by downgrading the version of amqp to 2.1.3
Steps to take in order to downgrade:
Uninstall amqp using $ pip uninstall amqp.
Install amqp using $ pip install -Iv amqp==2.1.3.

Categories

Resources