trino-python-client oauth in jupyterlab - python

I have installed trino-python-client in custom jupyterlab image and it works if I use basic authentication.
I want to use the OAuth services I have set up in trino to make things simpler for users.
This also works fine in ipython3 see below. But doesn't work in jupyterlab.
Jupyterlab is hiding the redirect URL, never prints out the redirection URL.
In ipython3 is see this
In [1]: import trino
...: conn = trino.dbapi.connect(
...: host='trino.somedomain.net',
...: port=443,
...: user='first.last',
...: catalog='iceberg',
...: schema='ds_scratch',
...: http_scheme='https',
...: auth=trino.auth.OAuth2Authentication(),
...: )
...: cur = conn.cursor()
In [2]: cur.execute('SELECT * FROM system.runtime.nodes')
Open the following URL in the browser for the external authentication:
https://trino.somedomain.net/oauth2/token/initiate/042f6e4167d4e6a3f70068ec4389037c2b9c34f3ec356ddc5522a3e13e179fd9
In [3]: rows = cur.fetchall()
In [4]: print(rows)
[['trino-coordinator-69dffc6f9f-pvpg4',...]]
I can see the redirection URL and complete the OAuth flow and everything works.
But in jupyterlab it never prints the URL. How do I change this?
In jpyterlab I see the following.
In [1]: import trino
In [2]: conn = trino.dbapi.connect(
host='trino.somedomain.net',
port=443,
user='first.last',
catalog='iceberg',
schema='ds_scratch',
http_scheme='https',
auth=trino.auth.OAuth2Authentication(),
)
In [3]: cur = conn.cursor()
In [4]: cur.execute('show tables from iceberg.ds_scratch')
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
/tmp/ipykernel_170/588296735.py in <module>
----> 1 cur.execute('show tables from iceberg.ds_scratch')
/opt/conda/lib/python3.9/site-packages/trino/dbapi.py in execute(self, operation, params)
434 else:
435 self._query = trino.client.TrinoQuery(self._request, sql=operation)
--> 436 result = self._query.execute()
437 self._iterator = iter(result)
438 return result
/opt/conda/lib/python3.9/site-packages/trino/client.py in execute(self, additional_http_headers)
523 raise exceptions.TrinoUserError("Query has been cancelled", self.query_id)
524
--> 525 response = self._request.post(self._sql, additional_http_headers)
526 status = self._request.process(response)
527 self._info_uri = status.info_uri
/opt/conda/lib/python3.9/site-packages/trino/client.py in post(self, sql, additional_http_headers)
337 http_headers.update(additional_http_headers or {})
338
--> 339 http_response = self._post(
340 self.statement_url,
341 data=data,
/opt/conda/lib/python3.9/site-packages/trino/exceptions.py in decorated(*args, **kwargs)
121 for attempt in range(1, max_attempts + 1):
122 try:
--> 123 result = func(*args, **kwargs)
124 if any(guard(result) for guard in conditions):
125 handle_retry.retry(func, args, kwargs, None, attempt)
/opt/conda/lib/python3.9/site-packages/requests/sessions.py in post(self, url, data, json, **kwargs)
588 """
589
--> 590 return self.request('POST', url, data=data, json=json, **kwargs)
591
592 def put(self, url, data=None, **kwargs):
/opt/conda/lib/python3.9/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
540 }
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
544 return resp
/opt/conda/lib/python3.9/site-packages/requests/sessions.py in send(self, request, **kwargs)
653
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
657 # Total elapsed time of the request (approximately)
/opt/conda/lib/python3.9/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
437 try:
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
441 url=url,
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
668
669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
671 conn,
672 method,
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 # Trigger any extra validation we need to do.
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
383 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
976 # Force connect early to allow us to validate the connection.
977 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 978 conn.connect()
979
980 if not conn.is_verified:
/opt/conda/lib/python3.9/site-packages/urllib3/connection.py in connect(self)
307 def connect(self):
308 # Add certificate verification
--> 309 conn = self._new_conn()
310 hostname = self.host
311
/opt/conda/lib/python3.9/site-packages/urllib3/connection.py in _new_conn(self)
157
158 try:
--> 159 conn = connection.create_connection(
160 (self._dns_host, self.port), self.timeout, **extra_kw
161 )
/opt/conda/lib/python3.9/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
72 if source_address:
73 sock.bind(source_address)
---> 74 sock.connect(sa)
75 return sock
76
KeyboardInterrupt:
In [5]: rows = cur.fetchall()

Related

Error while connecting to snowflake via python "UnpicklingError: invalid load key, '\x00'"

I am getting the following error while connecting to snowflake via python using snowflake.connector.connect
import snowflake.connector #pip install snowflake-connector-python
#i am getting the env from .env file i stored locally
cnx = snowflake.connector.connect(user=os.getenv('USER'),password=os.getenv('PASSWORD'),account=os.getenv('ACCOUNT'),warehouse=os.getenv('WAREHOUSE'), database=db,schema=schema )
This was working fine until today when my system crashed while running a python code and i had to hard reboot. I have tried many things like deleting python, anaconda and all its related files in the Users folder and reinstalling a new anaconda version. But still the same error. Here is the Complete error message. Help appreciated.
i even tried hardcoding the username, pass and rest as variable. But still the same error. So the error has nothing to do with .env file.
---------------------------------------------------------------------------
UnpicklingError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18464\4059644960.py in <module>
23 db='DB_SANDBOX'
24 schema='PUBLIC'
---> 25 cnx = snowflake.connector.connect(user=os.getenv('USER'),password=os.getenv('PASSWORD'),account=os.getenv('ACCOUNT'),warehouse=os.getenv('WAREHOUSE'), database=db,schema=schema )
26
27 query ='''
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\__init__.py in Connect(**kwargs)
49
50 def Connect(**kwargs) -> SnowflakeConnection:
---> 51 return SnowflakeConnection(**kwargs)
52
53
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\connection.py in __init__(self, **kwargs)
295 self.converter = None
296 self.__set_error_attributes()
--> 297 self.connect(**kwargs)
298 self._telemetry = TelemetryClient(self._rest)
299
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\connection.py in connect(self, **kwargs)
548 connection_diag.generate_report()
549 else:
--> 550 self.__open_connection()
551
552 def close(self, retry=True):
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\connection.py in __open_connection(self)
787 auth = Auth(self.rest)
788 auth.read_temporary_credentials(self.host, self.user, self._session_parameters)
--> 789 self._authenticate(auth_instance)
790
791 self._password = None # ensure password won't persist
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\connection.py in _authenticate(self, auth_instance)
1050 # make some changes if needed before real __authenticate
1051 try:
-> 1052 self.__authenticate(self.__preprocess_auth_instance(auth_instance))
1053 except ReauthenticationRequest as ex:
1054 # cached id_token expiration error, we have cleaned id_token and try to authenticate again
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\connection.py in __authenticate(self, auth_instance)
1070 auth = Auth(self.rest)
1071 try:
-> 1072 auth.authenticate(
1073 auth_instance=auth_instance,
1074 account=self.account,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\auth.py in authenticate(self, auth_instance, account, user, database, schema, warehouse, role, passcode, passcode_in_password, mfa_callback, password_callback, session_parameters, timeout)
255
256 try:
--> 257 ret = self._rest._post_request(
258 url,
259 headers,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in _post_request(self, url, headers, body, token, timeout, _no_results, no_retry, socket_timeout, _include_retry_params)
702 pprint(ret)
703
--> 704 ret = self.fetch(
705 "post",
706 full_url,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in fetch(self, method, full_url, headers, data, timeout, **kwargs)
792 retry_ctx = RetryCtx(timeout, include_retry_params)
793 while True:
--> 794 ret = self._request_exec_wrapper(
795 session, method, full_url, headers, data, retry_ctx, **kwargs
796 )
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in _request_exec_wrapper(self, session, method, full_url, headers, data, retry_ctx, no_retry, token, **kwargs)
915 except Exception as e:
916 if not no_retry:
--> 917 raise e
918 logger.debug("Ignored error", exc_info=True)
919 return {}
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in _request_exec_wrapper(self, session, method, full_url, headers, data, retry_ctx, no_retry, token, **kwargs)
835 full_url = SnowflakeRestful.add_request_guid(full_url)
836 try:
--> 837 return_object = self._request_exec(
838 session=session,
839 method=method,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in _request_exec(self, session, method, full_url, headers, data, token, catch_okta_unauthorized_error, is_raw_text, is_raw_binary, binary_data_handler, socket_timeout)
1114 stack_trace=traceback.format_exc(),
1115 )
-> 1116 raise err
1117
1118 def make_requests_session(self):
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\network.py in _request_exec(self, session, method, full_url, headers, data, token, catch_okta_unauthorized_error, is_raw_text, is_raw_binary, binary_data_handler, socket_timeout)
1016 # the response within the time. If not, ConnectReadTimeout or
1017 # ReadTimeout is raised.
-> 1018 raw_ret = session.request(
1019 method=method,
1020 url=full_url,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
585 }
586 send_kwargs.update(settings)
--> 587 resp = self.send(prep, **send_kwargs)
588
589 return resp
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\requests\sessions.py in send(self, request, **kwargs)
699
700 # Send the request
--> 701 r = adapter.send(request, **kwargs)
702
703 # Total elapsed time of the request (approximately)
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
487 try:
488 if not chunked:
--> 489 resp = conn.urlopen(
490 method=request.method,
491 url=url,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
701
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
384 # Trigger any extra validation we need to do.
385 try:
--> 386 self._validate_conn(conn)
387 except (SocketTimeout, BaseSSLError) as e:
388 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\urllib3\connectionpool.py in _validate_conn(self, conn)
1040 # Force connect early to allow us to validate the connection.
1041 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1042 conn.connect()
1043
1044 if not conn.is_verified:
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\vendored\urllib3\connection.py in connect(self)
412 context.load_default_certs()
413
--> 414 self.sock = ssl_wrap_socket(
415 sock=conn,
416 keyfile=self.key_file,
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\ssl_wrap_socket.py in ssl_wrap_socket_with_ocsp(*args, **kwargs)
76 ret = ssl_.ssl_wrap_socket(*args, **kwargs)
77
---> 78 from .ocsp_asn1crypto import SnowflakeOCSPAsn1Crypto as SFOCSP
79
80 log.debug(
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\ocsp_asn1crypto.py in <module>
45 )
46 from snowflake.connector.errors import RevocationCheckError
---> 47 from snowflake.connector.ocsp_snowflake import SnowflakeOCSP, generate_cache_key
48
49 with warnings.catch_warnings():
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\ocsp_snowflake.py in <module>
79 tuple[bytes, bytes, bytes],
80 OCSPResponseValidationResult,
---> 81 ] = SFDictFileCache(
82 entry_lifetime=constants.DAY_IN_SECONDS,
83 file_path={
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\cache.py in __init__(self, file_path, entry_lifetime, file_timeout)
404 self.last_loaded: datetime.datetime | None = None
405 if os.path.exists(self.file_path):
--> 406 self._load()
407
408 def _getitem_non_locking(
~\.conda\envs\py_38_env\lib\site-packages\snowflake\connector\cache.py in _load(self)
485 try:
486 with open(self.file_path, "rb") as r_file:
--> 487 other = pickle.load(r_file)
488 self._update(
489 other,
UnpicklingError: invalid load key, '\x00'.
This is probably a corrupted cache, which you should try deleting. The default cache directories are documented here. On Windows the default to store the cache in
%USERPROFILE%\AppData\Local\Snowflake\Caches

Is my code crashing because of internet connection issue? if so is there a way i can make a snippet of code to check connection before trying api?

Im using Jupyter Notebook to run a fairly basic crypto trading bot with an API on coinbase and after an arbitrary amount of time, sometimes 6 hours other times 18 hours, the code will error and stop working.. can has anyone had this problem or are able to determine the problem from the error script produced.
error code given below..
OSError Traceback (most recent call last)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
596 # Make the request on the httplib connection object.
--> 597 httplib_response = self._make_request(conn, method, url,
598 timeout=timeout_obj,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
~\anaconda3\lib\http\client.py in getresponse(self)
1370 try:
-> 1371 response.begin()
1372 except ConnectionError:
~\anaconda3\lib\http\client.py in begin(self)
318 while True:
--> 319 version, status, reason = self._read_status()
320 if status != CONTINUE:
~\anaconda3\lib\http\client.py in _read_status(self)
279 def _read_status(self):
--> 280 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
281 if len(line) > _MAXLINE:
~\anaconda3\lib\socket.py in readinto(self, b)
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
292 else:
--> 293 return self.recv_into(*args, **kwargs)
294
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
281 else:
--> 282 raise SocketError(str(e))
283 except OpenSSL.SSL.ZeroReturnError as e:
OSError: (10054, 'WSAECONNRESET')
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~\anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
412 if not chunked:
--> 413 resp = conn.urlopen(
414 method=request.method,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
647
--> 648 retries = retries.increment(method, url, error=e, _pool=self,
649 _stacktrace=sys.exc_info()[2])
~\anaconda3\lib\site-packages\requests\packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
346 if read is False or not self._is_method_retryable(method):
--> 347 raise six.reraise(type(error), error, _stacktrace)
348 elif read is not None:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
596 # Make the request on the httplib connection object.
--> 597 httplib_response = self._make_request(conn, method, url,
598 timeout=timeout_obj,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
~\anaconda3\lib\http\client.py in getresponse(self)
1370 try:
-> 1371 response.begin()
1372 except ConnectionError:
~\anaconda3\lib\http\client.py in begin(self)
318 while True:
--> 319 version, status, reason = self._read_status()
320 if status != CONTINUE:
~\anaconda3\lib\http\client.py in _read_status(self)
279 def _read_status(self):
--> 280 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
281 if len(line) > _MAXLINE:
~\anaconda3\lib\socket.py in readinto(self, b)
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
292 else:
--> 293 return self.recv_into(*args, **kwargs)
294
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
281 else:
--> 282 raise SocketError(str(e))
283 except OpenSSL.SSL.ZeroReturnError as e:
ProtocolError: ('Connection aborted.', OSError("(10054, 'WSAECONNRESET')"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_26992/2760925491.py in <module>
36 # dead zone
37 if (price_diff < buy_diff) and (price_diff > sell_diff) and (wait_time<run_time):
---> 38 price_difference()
39 wait_time = (run_time+180)
40 run_log.logger.info("price difference in dead zone")
~\AppData\Local\Temp/ipykernel_26992/3172399590.py in price_difference()
46 global difference
47 global lastDiff
---> 48 price()
49 historic_price()
50 if current_price < hist_avg:
~\AppData\Local\Temp/ipykernel_26992/3172399590.py in price()
23 global current_ask
24 global last_price
---> 25 var=auth_client.get_product_order_book(trade_pair)
26 bids=var.get('bids')
27 asks=var.get('asks')
~\anaconda3\lib\site-packages\cbpro\public_client.py in get_product_order_book(self, product_id, level)
86 """
87 params = {'level': level}
---> 88 return self._send_message('get',
89 '/products/{}/book'.format(product_id),
90 params=params)
~\anaconda3\lib\site-packages\cbpro\public_client.py in _send_message(self, method, endpoint, params, data)
266 """
267 url = self.url + endpoint
--> 268 r = self.session.request(method, url, params=params, data=data,
269 auth=self.auth, timeout=30)
270 return r.json()
~\anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 }
487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489
490 return resp
~\anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
607
608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610
611 # Total elapsed time of the request (approximately)
~\anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
471
472 except (ProtocolError, socket.error) as err:
--> 473 raise ConnectionError(err, request=request)
474
475 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', OSError("(10054, 'WSAECONNRESET')"))
A “connection reset” error usually means that the network is ok but the thing at the other end isn’t, or doesn’t want to talk to you, though with today’s “unclean” networks with proxies and firewalls and layering violations everywhere, there are probably more exotic causes as well.
In any case, I would not try to predict it (which is going to be hopelessly brittle) but deal with it when it happens by catching the exception.
so it might not be the perfect solution but it's going to work for me in this case, have a script simply restart the main script if/when it crashes.
from subprocess import run
from time import sleep
# Path and name to the script you are trying to start
file_path = "CBPRO_BTC_USDC_V3.py"
print('program started')
restart_timer = 60
def start_script():
try:
# Make sure 'python' command is available
run("python "+file_path, check=True)
except:
# Script crashed, lets restart it!
handle_crash()
def handle_crash():
print('Restarting in 60 seconds')
sleep(restart_timer) # Restarts the script after 60 seconds
start_script()
start_script()
I don't know if it crashes because the internet connection. But if you want check if you have a internet cnnection you could ping a server for example 8.8.8.8 (Google DNS Server). If the command returns 0 it was sucesfull if it returns 1 it failed.
import os
response_code = os.system("ping 8.8.8.8 -n 1")
print(response_code)

AWS S3 - boto3 LibraryNotFoundError HTTPClientError

I tried to print out the files I have in an S3 bucket. It worked well yesterday but somehow I got the LibraryNotFoundError and HTTPClientError today (I already made sure I have the boto3 library installed). I would appreciate the instruction from you. Thank you.
My code:
import boto3
s3 = boto3.client('s3', aws_access_key_id='my_access_key',aws_secret_access_key='my_secret_key')
bucketname='bucket_name'
s3_client = boto3.resource('s3')
bucket = s3_client.Bucket(bucketname)
for obj in bucket.objects.all():
key = obj.key
print(key)
The error message:
---------------------------------------------------------------------------
LibraryNotFoundError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.7/site-packages/botocore/httpsession.py in send(self, request)
261 decode_content=False,
--> 262 chunked=self._chunked(request.headers),
263 )
~/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
671 headers=headers,
--> 672 chunked=chunked,
673 )
~/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
375 try:
--> 376 self._validate_conn(conn)
377 except (SocketTimeout, BaseSSLError) as e:
~/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
993 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 994 conn.connect()
995
~/opt/anaconda3/lib/python3.7/site-packages/urllib3/connection.py in connect(self)
359 server_hostname=server_hostname,
--> 360 ssl_context=context,
361 )
~/opt/anaconda3/lib/python3.7/site-packages/snowflake/connector/ssl_wrap_socket.py in ssl_wrap_socket_with_ocsp(*args, **kwargs)
400
--> 401 from .ocsp_asn1crypto import SnowflakeOCSPAsn1Crypto as SFOCSP
402
~/opt/anaconda3/lib/python3.7/site-packages/snowflake/connector/ocsp_asn1crypto.py in <module>
33 use_openssl(libcrypto_path='/usr/lib/libcrypto.35.dylib', libssl_path='/usr/lib/libssl.35.dylib')
---> 34 from oscrypto import asymmetric
35
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/asymmetric.py in <module>
18 )
---> 19 from ._asymmetric import _unwrap_private_key_info
20 from ._errors import pretty_message
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/_asymmetric.py in <module>
26
---> 27 from .kdf import pbkdf1, pbkdf2, pkcs12_kdf
28 from .symmetric import (
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/kdf.py in <module>
8 from . import backend
----> 9 from .util import rand_bytes
10 from ._types import type_name, byte_cls, int_types
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/util.py in <module>
9 if sys.platform == 'darwin':
---> 10 from ._mac.util import rand_bytes
11 elif sys.platform == 'win32':
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/_mac/util.py in <module>
10 from ._common_crypto import CommonCrypto, CommonCryptoConst
---> 11 from ._security import Security
12
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/_mac/_security.py in <module>
8 if ffi() == 'cffi':
----> 9 from ._security_cffi import Security, version_info as osx_version_info
10 from ._core_foundation_cffi import CoreFoundation, CFHelpers
~/opt/anaconda3/lib/python3.7/site-packages/oscrypto/_mac/_security_cffi.py in <module>
238 if not security_path:
--> 239 raise LibraryNotFoundError('The library Security could not be found')
240
LibraryNotFoundError: The library Security could not be found
During handling of the above exception, another exception occurred:
HTTPClientError Traceback (most recent call last)
<ipython-input-6-4a14fd7aca9a> in <module>
3 bucket = s3_client.Bucket(bucketname)
4
----> 5 for obj in bucket.objects.all():
6 key = obj.key
7 print(key)
~/opt/anaconda3/lib/python3.7/site-packages/boto3/resources/collection.py in __iter__(self)
81
82 count = 0
---> 83 for page in self.pages():
84 for item in page:
85 yield item
~/opt/anaconda3/lib/python3.7/site-packages/boto3/resources/collection.py in pages(self)
164 # we start processing and yielding individual items.
165 count = 0
--> 166 for page in pages:
167 page_items = []
168 for item in self._handler(self._parent, params, page):
~/opt/anaconda3/lib/python3.7/site-packages/botocore/paginate.py in __iter__(self)
253 self._inject_starting_params(current_kwargs)
254 while True:
--> 255 response = self._make_request(current_kwargs)
256 parsed = self._extract_parsed_response(response)
257 if first_request:
~/opt/anaconda3/lib/python3.7/site-packages/botocore/paginate.py in _make_request(self, current_kwargs)
330
331 def _make_request(self, current_kwargs):
--> 332 return self._method(**current_kwargs)
333
334 def _extract_parsed_response(self, response):
~/opt/anaconda3/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
355 "%s() only accepts keyword arguments." % py_operation_name)
356 # The "self" in this scope is referring to the BaseClient.
--> 357 return self._make_api_call(operation_name, kwargs)
358
359 _api_call.__name__ = str(py_operation_name)
~/opt/anaconda3/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
646 else:
647 http, parsed_response = self._make_request(
--> 648 operation_model, request_dict, request_context)
649
650 self.meta.events.emit(
~/opt/anaconda3/lib/python3.7/site-packages/botocore/client.py in _make_request(self, operation_model, request_dict, request_context)
665 def _make_request(self, operation_model, request_dict, request_context):
666 try:
--> 667 return self._endpoint.make_request(operation_model, request_dict)
668 except Exception as e:
669 self.meta.events.emit(
~/opt/anaconda3/lib/python3.7/site-packages/botocore/endpoint.py in make_request(self, operation_model, request_dict)
100 logger.debug("Making request for %s with params: %s",
101 operation_model, request_dict)
--> 102 return self._send_request(request_dict, operation_model)
103
104 def create_request(self, params, operation_model=None):
~/opt/anaconda3/lib/python3.7/site-packages/botocore/endpoint.py in _send_request(self, request_dict, operation_model)
135 request, operation_model, context)
136 while self._needs_retry(attempts, operation_model, request_dict,
--> 137 success_response, exception):
138 attempts += 1
139 # If there is a stream associated with the request, we need
~/opt/anaconda3/lib/python3.7/site-packages/botocore/endpoint.py in _needs_retry(self, attempts, operation_model, request_dict, response, caught_exception)
229 event_name, response=response, endpoint=self,
230 operation=operation_model, attempts=attempts,
--> 231 caught_exception=caught_exception, request_dict=request_dict)
232 handler_response = first_non_none_response(responses)
233 if handler_response is None:
~/opt/anaconda3/lib/python3.7/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
354 def emit(self, event_name, **kwargs):
355 aliased_event_name = self._alias_event_name(event_name)
--> 356 return self._emitter.emit(aliased_event_name, **kwargs)
357
358 def emit_until_response(self, event_name, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
226 handlers.
227 """
--> 228 return self._emit(event_name, kwargs)
229
230 def emit_until_response(self, event_name, **kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/botocore/hooks.py in _emit(self, event_name, kwargs, stop_on_response)
209 for handler in handlers_to_call:
210 logger.debug('Event %s: calling handler %s', event_name, handler)
--> 211 response = handler(**kwargs)
212 responses.append((handler, response))
213 if stop_on_response and response is not None:
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in __call__(self, attempts, response, caught_exception, **kwargs)
181
182 """
--> 183 if self._checker(attempts, response, caught_exception):
184 result = self._action(attempts=attempts)
185 logger.debug("Retry needed, action of: %s", result)
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
249 def __call__(self, attempt_number, response, caught_exception):
250 should_retry = self._should_retry(attempt_number, response,
--> 251 caught_exception)
252 if should_retry:
253 if attempt_number >= self._max_attempts:
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in _should_retry(self, attempt_number, response, caught_exception)
267 attempt_number < self._max_attempts:
268 try:
--> 269 return self._checker(attempt_number, response, caught_exception)
270 except self._retryable_exceptions as e:
271 logger.debug("retry needed, retryable exception caught: %s",
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
315 for checker in self._checkers:
316 checker_response = checker(attempt_number, response,
--> 317 caught_exception)
318 if checker_response:
319 return checker_response
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in __call__(self, attempt_number, response, caught_exception)
221 elif caught_exception is not None:
222 return self._check_caught_exception(
--> 223 attempt_number, caught_exception)
224 else:
225 raise ValueError("Both response and caught_exception are None.")
~/opt/anaconda3/lib/python3.7/site-packages/botocore/retryhandler.py in _check_caught_exception(self, attempt_number, caught_exception)
357 # the MaxAttemptsDecorator is not interested in retrying the exception
358 # then this exception just propogates out past the retry code.
--> 359 raise caught_exception
~/opt/anaconda3/lib/python3.7/site-packages/botocore/endpoint.py in _do_get_response(self, request, operation_model)
198 http_response = first_non_none_response(responses)
199 if http_response is None:
--> 200 http_response = self._send(request)
201 except HTTPClientError as e:
202 return (None, e)
~/opt/anaconda3/lib/python3.7/site-packages/botocore/endpoint.py in _send(self, request)
242
243 def _send(self, request):
--> 244 return self.http_session.send(request)
245
246
~/opt/anaconda3/lib/python3.7/site-packages/botocore/httpsession.py in send(self, request)
296 message = 'Exception received when sending urllib3 HTTP request'
297 logger.debug(message, exc_info=True)
--> 298 raise HTTPClientError(error=e)
HTTPClientError: An HTTP Client raised and unhandled exception: The library Security could not be found

How to make Python requests persist in getting head

I have a problem where Python requests is throwing an exception after a few seconds. The website is being very slow, but only intermittently.
Chrome and Safari both fail to load the page. (E.g. Chrome displays "This site can't be reached", ERR_CONNECTION_RESET). However, Firefox is consistently able to access the page, although it takes around 20 secs to load. This behaviour is repeatable from several different machines, located in different countries. It seems like Firefox is "trying harder", and not timing out.
I'd like to get Python's requests to behave more like Firefox in this case. I have set the timeout argument to be a large number (60-seconds), but the exception is thrown long before that. It seems like there is some kind of handshake timeout, whereas maybe the timeout parameter controls the wait time for the response, post-handshake?
import requests
target='https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.20191113/00/pgrb2a/'
request = requests.head(target, timeout=60)
print(request.status_code)
^^ replace 20191113 with yesterday's date, as these links expire after 7-days.
The exception arrives after around 5-seconds, and is the "standard" requests exception when it can't access a page:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~/miniconda/envs/basics/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/miniconda/envs/basics/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/miniconda/envs/basics/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/miniconda/envs/basics/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
299 else:
--> 300 return self.recv_into(*args, **kwargs)
301
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
289 else:
--> 290 raise SocketError(str(e))
291 except OpenSSL.SSL.ZeroReturnError as e:
OSError: (54, 'ECONNRESET')
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
444 retries=self.max_retries,
--> 445 timeout=timeout
446 )
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
366 if read is False or not self._is_method_retryable(method):
--> 367 raise six.reraise(type(error), error, _stacktrace)
368 elif read is not None:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~/miniconda/envs/basics/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/miniconda/envs/basics/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/miniconda/envs/basics/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/miniconda/envs/basics/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
299 else:
--> 300 return self.recv_into(*args, **kwargs)
301
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
289 else:
--> 290 raise SocketError(str(e))
291 except OpenSSL.SSL.ZeroReturnError as e:
ProtocolError: ('Connection aborted.', OSError("(54, 'ECONNRESET')",))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-2-e4852eeb80e3> in <module>()
2 import requests
3 target='https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.20191113/00/pgrb2a/'
----> 4 request = requests.head(target, timeout=60)
5 print(request.status_code)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/api.py in head(url, **kwargs)
96
97 kwargs.setdefault('allow_redirects', False)
---> 98 return request('head', url, **kwargs)
99
100
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
56 # cases, and look like a memory leak in others.
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
60
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
510 }
511 send_kwargs.update(settings)
--> 512 resp = self.send(prep, **send_kwargs)
513
514 return resp
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
620
621 # Send the request
--> 622 r = adapter.send(request, **kwargs)
623
624 # Total elapsed time of the request (approximately)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
493
494 except (ProtocolError, socket.error) as err:
--> 495 raise ConnectionError(err, request=request)
496
497 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', OSError("(54, 'ECONNRESET')",))
Is there a way to get Requests to "try harder" for slow pages?
This is Python3.6 and requests2.19.1
Based on the stack trace the connection does not timeout but is rejected by to host ('ECONNRESET'). For more information about the error see the question "What does “connection reset by peer” mean?"
Instead of increasing the timeout you need to retry the request. To avoid spamming the host there should be some time between the retries. You could write your own retry logic or use a library like backoff.
Below is an example from the backoff's documentation which will retry on any error for 60 seconds using exponential backoff strategy.
#backoff.on_exception(backoff.expo,
requests.exceptions.RequestException,
max_time=60)
def get_url(url):
return requests.get(url)

BigQuery code failing due to MemoryError using pandas python client library

I am hitting google bigquery using python client library and converting into pandas dataframe and then ultimately converting into csv file. but the code is failing with Memory Error.
The no of counts as of now in table is 74567 after unnesting it is something 1.8mn.
I am currently using python client library to perform these steps.
from google.cloud import bigquery
import pandas as pd
import pandas_gbq
import os
from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_file(json_key)
def query_to_dataframe(sql_statement):
return pd.read_gbq(sql_statement,
project_id=project_id,
credentials=credentials,
dialect='standard')
sql_statement = """SELECT visitorId,
visitNumber,
visitId,
visitStartTime,
date,
totals.visits,
totals.hits,
totals.pageviews,
totals.timeOnSite,
totals.bounces,
totals.transactions,
totals.transactionRevenue,
totals.newVisits,
totals.screenviews,
totals.uniqueScreenviews,
totals.timeOnScreen,
totals.totalTransactionRevenue,
totals.sessionQualityDim,
trafficSource.referralPath,
trafficSource.campaign,
trafficSource.source,
trafficSource.medium,
trafficSource.keyword,
trafficSource.adContent,
trafficSource.adwordsClickInfo.campaignId,
trafficSource.adwordsClickInfo.adGroupId,
trafficSource.adwordsClickInfo.creativeId,
trafficSource.adwordsClickInfo.criteriaId,
trafficSource.adwordsClickInfo.page,
trafficSource.adwordsClickInfo.slot,
trafficSource.adwordsClickInfo.criteriaParameters,
trafficSource.adwordsClickInfo.gclId,
trafficSource.adwordsClickInfo.customerId,
trafficSource.adwordsClickInfo.adNetworkType,
trafficSource.adwordsClickInfo.targetingCriteria.boomUserlistId,
trafficSource.adwordsClickInfo.isVideoAd,
trafficSource.isTrueDirect,
trafficSource.campaignCode,
device.browser,
device.browserVersion,
device.browserSize,
device.operatingSystem,
device.operatingSystemVersion,
device.isMobile,
device.mobileDeviceBranding,
device.mobileDeviceModel,
device.mobileInputSelector,
device.mobileDeviceInfo,
device.mobileDeviceMarketingName,
device.flashVersion,
device.javaEnabled,
device.language,
device.screenColors,
device.screenResolution,
device.deviceCategory,
geoNetwork.continent,
geoNetwork.subContinent,
geoNetwork.country,
geoNetwork.region,
geoNetwork.metro,
geoNetwork.city,
geoNetwork.cityId,
geoNetwork.networkDomain,
geoNetwork.latitude,
geoNetwork.longitude,
geoNetwork.networkLocation,
cd.index,
cd.value,
h.hitNumber,
h.time,
h.hour,
h.minute,
h.isSecure,
h.isInteraction,
h.isEntrance,
h.isExit,
h.referer,
h.page.pagePath,
h.page.hostname,
h.page.pageTitle,
h.page.searchKeyword,
h.page.searchCategory,
h.page.pagePathLevel1,
h.page.pagePathLevel2,
h.page.pagePathLevel3,
h.page.pagePathLevel4,
h.transaction.transactionId,
h.transaction.transactionRevenue as tRevenue,
h.transaction.transactionTax,
h.transaction.transactionShipping,
h.transaction.affiliation,
h.transaction.currencyCode,
h.transaction.localTransactionRevenue,
h.transaction.localTransactionTax,
h.transaction.localTransactionShipping,
h.transaction.transactionCoupon,
h.item.transactionId as tId,
h.item.productName,
h.item.productCategory,
h.item.productSku,
h.item.itemQuantity,
h.item.itemRevenue,
h.item.currencyCode as cCode,
h.item.localItemRevenue,
h.contentInfo.contentDescription,
h.appInfo.name,
h.appInfo.version,
h.appInfo.id,
h.appInfo.installerId,
h.appInfo.appInstallerId,
h.appInfo.appName,
h.appInfo.appVersion,
h.appInfo.appId,
h.appInfo.screenName,
h.appInfo.landingScreenName,
h.appInfo.exitScreenName,
h.appInfo.screenDepth,
h.exceptionInfo.description,
h.exceptionInfo.isFatal,
h.exceptionInfo.exceptions,
h.exceptionInfo.fatalExceptions,
h.eventInfo.eventCategory,
h.eventInfo.eventAction,
h.eventInfo.eventLabel,
h.eventInfo.eventValue,
hp.productSKU as pSKU,
hp.v2ProductName,
hp.v2ProductCategory,
hp.productVariant,
hp.productBrand,
hp.productRevenue,
hp.localProductRevenue,
hp.productPrice,
hp.localProductPrice,
hp.productQuantity,
hp.productRefundAmount,
hp.localProductRefundAmount,
hp.isImpression,
hp.isClick,
hpc.index as hpcIndex,
hpc.value as hpcValue,
hpCustomMetrics.index as cusomMetricsIndex,
hpCustomMetrics.value as cusomMetricsValue,
hp.productListName,
hp.productListPosition,
hp.productCouponCode,
hpromotion.promoId,
hpromotion.promoName,
hpromotion.promoCreative,
hpromotion.promoPosition,
h.promotionActionInfo.promoIsView,
h.promotionActionInfo.promoIsClick,
h.refund.refundAmount,
h.refund.localRefundAmount,
h.eCommerceAction.action_type,
h.eCommerceAction.step,
h.eCommerceAction.option,
hExperiment.experimentId,
hExperiment.experimentVariant,
h.publisher.dfpClicks,
h.publisher.dfpImpressions,
h.publisher.dfpMatchedQueries,
h.publisher.dfpMeasurableImpressions,
h.publisher.dfpQueries,
h.publisher.dfpRevenueCpm,
h.publisher.dfpRevenueCpc,
h.publisher.dfpViewableImpressions,
h.publisher.dfpPagesViewed,
h.publisher.adsenseBackfillDfpClicks,
h.publisher.adsenseBackfillDfpImpressions,
h.publisher.adsenseBackfillDfpMatchedQueries,
h.publisher.adsenseBackfillDfpMeasurableImpressions,
h.publisher.adsenseBackfillDfpQueries,
h.publisher.adsenseBackfillDfpRevenueCpm,
h.publisher.adsenseBackfillDfpRevenueCpc,
h.publisher.adsenseBackfillDfpViewableImpressions,
h.publisher.adsenseBackfillDfpPagesViewed,
h.publisher.adxBackfillDfpClicks,
h.publisher.adxBackfillDfpImpressions,
h.publisher.adxBackfillDfpMatchedQueries,
h.publisher.adxBackfillDfpMeasurableImpressions,
h.publisher.adxBackfillDfpQueries,
h.publisher.adxBackfillDfpRevenueCpm,
h.publisher.adxBackfillDfpRevenueCpc,
h.publisher.adxBackfillDfpViewableImpressions,
h.publisher.adxBackfillDfpPagesViewed,
h.publisher.adxClicks,
h.publisher.adxImpressions,
h.publisher.adxMatchedQueries,
h.publisher.adxMeasurableImpressions,
h.publisher.adxQueries,
h.publisher.adxRevenue,
h.publisher.adxViewableImpressions,
h.publisher.adxPagesViewed,
h.publisher.adsViewed,
h.publisher.adsUnitsViewed,
h.publisher.adsUnitsMatched,
h.publisher.viewableAdsViewed,
h.publisher.measurableAdsViewed,
h.publisher.adsPagesViewed,
h.publisher.adsClicked,
h.publisher.adsRevenue,
h.publisher.dfpAdGroup,
h.publisher.dfpAdUnits,
h.publisher.dfpNetworkId,
hcustomVariables.index as hcustomVariableIndex,
hcustomVariables.customVarName,
hcustomVariables.customVarValue,
hcustomDimensions.index as customDimensionsIndex,
hcustomDimensions.value as customDimensionsvalue,
hcustomMetrics.index as hcustoMetricsIndex,
hcustomMetrics.value as hcustomMetricsValue,
h.type,
h.social.socialInteractionNetwork,
h.social.socialInteractionAction,
h.social.socialInteractions,
h.social.socialInteractionTarget,
h.social.socialNetwork,
h.social.uniqueSocialInteractions,
h.social.hasSocialSourceReferral,
h.social.socialInteractionNetworkAction,
h.latencyTracking.pageLoadSample,
h.latencyTracking.pageLoadTime,
h.latencyTracking.pageDownloadTime,
h.latencyTracking.redirectionTime,
h.latencyTracking.speedMetricsSample,
h.latencyTracking.domainLookupTime,
h.latencyTracking.serverConnectionTime,
h.latencyTracking.serverResponseTime,
h.latencyTracking.domLatencyMetricsSample,
h.latencyTracking.domInteractiveTime,
h.latencyTracking.domContentLoadedTime,
h.latencyTracking.userTimingValue,
h.latencyTracking.userTimingSample,
h.latencyTracking.userTimingVariable,
h.latencyTracking.userTimingCategory,
h.latencyTracking.userTimingLabel,
sourcePropertyInfo.sourcePropertyDisplayName,
sourcePropertyInfo.sourcePropertyTrackingId,
h.contentGroup.contentGroup1,
h.contentGroup.contentGroup2,
h.contentGroup.contentGroup3,
h.contentGroup.contentGroup4,
h.contentGroup.contentGroup5,
h.contentGroup.previousContentGroup1,
h.contentGroup.previousContentGroup2,
h.contentGroup.previousContentGroup3,
h.contentGroup.previousContentGroup4,
h.contentGroup.previousContentGroup5,
h.contentGroup.contentGroupUniqueViews1,
h.contentGroup.contentGroupUniqueViews2,
h.contentGroup.contentGroupUniqueViews3,
h.contentGroup.contentGroupUniqueViews4,
h.contentGroup.contentGroupUniqueViews5,
h.dataSource,
hpublisher.dfpClicks as hpublisherDfpclicks,
hpublisher.dfpImpressions as hpublisherDfpImpressions,
hpublisher.dfpMatchedQueries as hpublisherDfpMatchedQueries,
hpublisher.dfpMeasurableImpressions as hpublisherDfpMeasurableImpressions,
hpublisher.dfpQueries as hpublisherDfpQueries,
hpublisher.dfpRevenueCpm as hpublisherDfpRevenueCpm,
hpublisher.dfpRevenueCpc as hpublisherDfpRevenueCpc,
hpublisher.dfpViewableImpressions as hpublisherDfpViewableImpressions,
hpublisher.dfpPagesViewed as hpublisherDfpPagesViewed,
hpublisher.adsenseBackfillDfpClicks as hpublisherAdsenseBackfillDfpClicks,
hpublisher.adsenseBackfillDfpImpressions as hpublisherAdsenseBackfillDfpImpressions,
hpublisher.adsenseBackfillDfpMatchedQueries as hpublisherAdsenseBackfillDfpMatchedQueries,
hpublisher.adsenseBackfillDfpMeasurableImpressions as hpublisherAdsenseBackfillDfpMeasurableImpressions,
hpublisher.adsenseBackfillDfpQueries as hpublisherAdsenseBackfillDfpQueries,
hpublisher.adsenseBackfillDfpRevenueCpm as hpublisherAdsenseBackfillDfpRevenueCpm,
hpublisher.adsenseBackfillDfpRevenueCpc as hpublisherAdsenseBackfillDfpRevenueCpc,
hpublisher.adsenseBackfillDfpViewableImpressions as hpublisherAdsenseBackfillDfpViewableImpressions,
hpublisher.adsenseBackfillDfpPagesViewed as hpublisherAdsenseBackfillDfpPagesViewed,
hpublisher.adxBackfillDfpClicks as hpublisherAdxBackfillDfpClicks,
hpublisher.adxBackfillDfpImpressions as hpublisherAdxBackfillDfpImpressions,
hpublisher.adxBackfillDfpMatchedQueries as hpublisherAdxBackfillDfpMatchedQueries,
hpublisher.adxBackfillDfpMeasurableImpressions as hpublisherAdxBackfillDfpMeasurableImpressions,
hpublisher.adxBackfillDfpQueries as hpublisherAdxBackfillDfpQueries,
hpublisher.adxBackfillDfpRevenueCpm as hpublisherAdxBackfillDfpRevenueCpm,
hpublisher.adxBackfillDfpRevenueCpc as hpublisherAdxBackfillDfpRevenueCpc,
hpublisher.adxBackfillDfpViewableImpressions as hpublisherAdxBackfillDfpViewableImpressions,
hpublisher.adxBackfillDfpPagesViewed as hpublisherAdxBackfillDfpPagesViewed,
hpublisher.adxClicks as hpublisherAdxClicks,
hpublisher.adxImpressions as hpublisherAdxImpressions,
hpublisher.adxMatchedQueries as hpublisherAdxMatchedQueries,
hpublisher.adxMeasurableImpressions as hpublisherAdxMeasurableImpressions,
hpublisher.adxQueries as hpublisherAdxQueries,
hpublisher.adxRevenue as hpublisherAdxRevenue,
hpublisher.adxViewableImpressions as hpublisherAdxViewableImpressions,
hpublisher.adxPagesViewed as hpublisherAdxPagesViewed,
hpublisher.adsViewed as hpublisherAdsViewed,
hpublisher.adsUnitsViewed as hpublisherAdsUnitsViewed,
hpublisher.adsUnitsMatched as hpublisherAdsUnitsMatched,
hpublisher.viewableAdsViewed as hpublisherViewableAdsViewed,
hpublisher.measurableAdsViewed as hpublisherMeasurableAdsViewed,
hpublisher.adsPagesViewed as hpublisherAdsPagesViewed,
hpublisher.adsClicked as hpublisherAdsClicked,
hpublisher.adsRevenue as hpublisherAdsRevenue,
hpublisher.dfpAdGroup as hpublisherDfpAdGroup,
hpublisher.dfpAdUnits as hpublisherDfpAdUnits,
hpublisher.dfpNetworkId as hpublisherDfpNetworkId,
fullVisitorId,
userId,
clientId,
channelGrouping,
socialEngagementType
FROM `project_id.dataset.table`
LEFT JOIN UNNEST(customDimensions) as cd
LEFT JOIN UNNEST(hits) as h
LEFT JOIN UNNEST(h.product) as hp
LEFT JOIN UNNEST(hp.customDimensions) as hpc
LEFT JOIN UNNEST(hp.customMetrics) as hpCustomMetrics
LEFT JOIN UNNEST(h.promotion) as hpromotion
LEFT JOIN UNNEST(h.experiment) as hExperiment
LEFT JOIN UNNEST(h.customVariables) as hcustomVariables
LEFT JOIN UNNEST(h.customDimensions) as hcustomDimensions
LEFT JOIN UNNEST(h.customMetrics) as hcustomMetrics
LEFT JOIN UNNEST(h.publisher_infos) as hpublisher"""
df = query_to_dataframe(sql_statement)
df.to_csv("sample1.csv")
ERROR:
ConnectionError
Traceback (most recent call last)
<ipython-input-60-a51dc6a6e704> in <module>()
340 LEFT JOIN UNNEST(h.publisher_infos) as hpublisher"""
341
--> 342 df = query_to_dataframe(sql_statement)
343 df.to_csv("sample1.csv")
344
<ipython-input-60-a51dc6a6e704> in query_to_dataframe(sql_statement)
16 project_id=project_id,
17 credentials=credentials,
---> 18 dialect='standard')
19
20
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\gbq.pyc in read_gbq(query, project_id, index_col, col_order, reauth, auth_local_webserver, dialect, location, configuration, credentials, private_key, verbose)
147 auth_local_webserver=auth_local_webserver, dialect=dialect,
148 location=location, configuration=configuration,
--> 149 credentials=credentials, verbose=verbose, private_key=private_key)
150
151
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas_gbq\gbq.pyc in read_gbq(query, project_id, index_col, col_order, reauth, auth_local_webserver, dialect, location, configuration, credentials, verbose, private_key)
834 private_key=private_key,
835 )
--> 836 schema, rows = connector.run_query(query, configuration=configuration)
837 final_df = _parse_data(schema, rows)
838
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas_gbq\gbq.pyc in run_query(self, query, **kwargs)
484 except self.http_error as ex:
485 self.process_http_error(ex)
--> 486 result_rows = list(rows_iter)
487 total_rows = rows_iter.total_rows
488 schema = {
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\page_iterator.pyc in _items_iter(self)
202 def _items_iter(self):
203 """Iterator for each item returned."""
--> 204 for page in self._page_iter(increment=False):
205 for item in page:
206 self.num_results += 1
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\page_iterator.pyc in _page_iter(self, increment)
239 self.num_results += page.num_items
240 yield page
--> 241 page = self._next_page()
242
243 #abc.abstractmethod
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\page_iterator.pyc in _next_page(self)
359 """
360 if self._has_next_page():
--> 361 response = self._get_next_page_response()
362 items = response.get(self._items_key, ())
363 page = Page(self, items, self.item_to_value)
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.pyc in _get_next_page_response(self)
1304 params["maxResults"] = self._page_size
1305 return self.api_request(
-> 1306 method=self._HTTP_METHOD, path=self.path, query_params=params
1307 )
1308
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\client.pyc in _call_api(self, retry, **kwargs)
379 if retry:
380 call = retry(call)
--> 381 return call()
382
383 def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY):
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.pyc in retry_wrapped_func(*args, **kwargs)
268 sleep_generator,
269 self._deadline,
--> 270 on_error=on_error,
271 )
272
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.pyc in retry_target(target, predicate, sleep_generator, deadline, on_error)
177 for sleep in sleep_generator:
178 try:
--> 179 return target()
180
181 # pylint: disable=broad-except
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\_http.pyc in api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object)
313 content_type=content_type,
314 headers=headers,
--> 315 target_object=_target_object,
316 )
317
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\_http.pyc in _make_request(self, method, url, data, content_type, headers, target_object)
190 headers["User-Agent"] = self.USER_AGENT
191
--> 192 return self._do_request(method, url, headers, data, target_object)
193
194 def _do_request(
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\_http.pyc in _do_request(self, method, url, headers, data, target_object)
219 :returns: The HTTP response.
220 """
--> 221 return self.http.request(url=url, method=method, headers=headers, data=data)
222
223 def api_request(
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\google\auth\transport\requests.pyc in request(self, method, url, data, headers, **kwargs)
206
207 response = super(AuthorizedSession, self).request(
--> 208 method, url, data=data, headers=request_headers, **kwargs)
209
210 # If the response indicated that the credentials needed to be
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\requests\sessions.pyc in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\requests\sessions.pyc in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
C:\Users\asmohammad\AppData\Local\Continuum\anaconda3\lib\site-packages\requests\adapters.pyc in send(self, request, stream, timeout, verify, cert, proxies)
496
497 except (ProtocolError, socket.error) as err:
--> 498 raise ConnectionError(err, request=request)
499
500 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', error("(10060, 'WSAETIMEDOUT')",))
Seems pd.read_gbq() is trying to load everything into memory and causing the MemoryError.
You may try iterating over the results the same query using just google-cloud-bigquery facilities? E.g.:
from google.cloud.bigquery import Client
client = Client()
query = client.query(sql)
row_count = 0
for row in query:
row_count += 1
You could also try converting to a dataframe directly using google-cloud-bigquery:
from google.cloud.bigquery import Client
client = Client()
query = client.query(sql)
dataframe = query.to_dataframe()

Categories

Resources