Python BigQuery really strange timeout - python

I am building a service to stream data into bigquery. The following code works flawlessly if i remove the part that takes 4-5 minutes to load (i am precaching some mappings)
from googleapiclient import discovery
from oauth2client import file
from oauth2client import client
from oauth2client import tools
from oauth2client.client import SignedJwtAssertionCredentials
## load email and key
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
## this does not hang, because it is before the long operation
service.tabledata().insertAll(...)
## some code that takes 5 minutes to execute
r = load_mappings()
## aka long operation
## this hangs
service.tabledata().insertAll(...)
If i leave the part that takes 5 minutes to execute, the Google API stops responding to the requests i do afterwards. It simply hangs in there and doesn't even return an error. I left it even 10-20 minutes to see what happens and it just sits there. If i hit ctrl+c, i get this:
^CTraceback (most recent call last):
File "./to_bigquery.py", line 116, in <module>
main(sys.argv)
File "./to_bigquery.py", line 101, in main
print service.tabledata().insertAll(projectId=p_n, datasetId="XXX", tableId="%s_XXXX" % str(shop), body=_mybody).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
I have managed to temporarily fix it by placing the big loading operation BEFORE the credentials authorization, but it seems like a bug to me. What am i missing?
EDIT: I have managed to get an error, while waiting:
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
socket.error: [Errno 110] Connection timed out
It said timeout. This seems to happen with cold tables..

def refresh_bq(self):
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
self.service = service
i am running self.refresh_bq() everytime i do some inserts that do not require preprocessing, and it works flawlessly. messy hack, but i needed to make it work ASAP. There is def. a bug somewhere.

Related

BrokenPipeError when using Gmail API

I'm using Gmail API to send an email with attachments in Python 3.
I'm trying the same code as google developers as shown below:
https://developers.google.com/gmail/api/guides/sending
The problem is that when attachments are 4.2KB or 2.6MB, the code works Well; but when attachments are 3.0MB or 9.6MB or bigger, an Error occurs:
Traceback (most recent call last):
File "quickstart2.py", line 184, in <module>
main()
File "quickstart2.py", line 170, in main
send_message(service, "me", message)
File "quickstart2.py", line 147, in send_message
message = (service.users().messages().send(userId=user_id, body=message).execute())
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/oauth2client/_helpers.py", line 133, in positional_wrapper
return wrapped(*args, **kwargs)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/googleapiclient/http.py", line 837, in execute
method=str(self.method), body=self.body, headers=self.headers)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/googleapiclient/http.py", line 176, in _retry_request
raise exception
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/googleapiclient/http.py", line 163, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/oauth2client/transport.py", line 175, in new_request
redirections, connection_type)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/oauth2client/transport.py", line 282, in request
connection_type=connection_type)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/httplib2/__init__.py", line 1322, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/httplib2/__init__.py", line 1072, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/home/yizhu/anaconda3/lib/python3.6/site-packages/httplib2/__init__.py", line 996, in _conn_request
conn.request(method, request_uri, body, headers)
File "/home/yizhu/anaconda3/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/yizhu/anaconda3/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/yizhu/anaconda3/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/yizhu/anaconda3/lib/python3.6/http/client.py", line 1065, in _send_output
self.send(chunk)
File "/home/yizhu/anaconda3/lib/python3.6/http/client.py", line 986, in send
self.sock.sendall(data)
File "/home/yizhu/anaconda3/lib/python3.6/ssl.py", line 972, in sendall
v = self.send(byte_view[count:])
File "/home/yizhu/anaconda3/lib/python3.6/ssl.py", line 941, in send
return self._sslobj.write(data)
File "/home/yizhu/anaconda3/lib/python3.6/ssl.py", line 642, in write
return self._sslobj.write(data)
BrokenPipeError: [Errno 32] Broken pipe
What's the problem here?
Thanks
It seems an exception is raised at _retry_request.
I haven't encountered this error myself, but there's a thread on github discussing about the same error.
https://github.com/google/google-api-python-client/issues/218
Try using httplib2shim, it seems oauth2client is still not replaced by google-auth.
Another suggestion I found was to use the MEDIA /upload option for files over 10 MB. Docs for how to use /upload: https://developers.google.com/gmail/api/v1/reference/users/messages/send

Pull timeout with google-api-python-client

I am trying to set a user defined timeout on message pull with 'returnImmediately' = False :
PUBSUB_SCOPES = ['https://www.googleapis.com/auth/pubsub']
credentials = oauth2client.GoogleCredentials.get_application_default()
if credentials.create_scoped_required():
credentials = credentials.create_scoped(PUBSUB_SCOPES)
http = httplib2.Http(timeout=timeout)
credentials.authorize(http)
return discovery.build('pubsub', 'v1', http=http)
When the timeout is < 90 seconds I get the following errors:
resp = client.projects().subscriptions().pull(subscription=subscription, body=body).execute()
File "venv\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "venv\lib\site-packages\googleapiclient\http.py", line 755, in execute
method=str(self.method), body=self.body, headers=self.headers)
File "venv\lib\site-packages\googleapiclient\http.py", line 93, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "venv\lib\site-packages\oauth2client\client.py", line 622, in new_request
redirections, connection_type)
File "venv\lib\site-packages\httplib2\__init__.py", line 1609, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "venv\lib\site-packages\httplib2\__init__.py", line 1351, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "venv\lib\site-packages\httplib2\__init__.py", line 1307, in _conn_request
response = conn.getresponse()
File "C:\python27\Lib\httplib.py", line 1074, in getresponse
response.begin()
File "C:\python27\Lib\httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "C:\python27\Lib\httplib.py", line 371, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "C:\python27\Lib\socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
File "C:\python27\Lib\ssl.py", line 714, in recv
return self.read(buflen)
File "C:\python27\Lib\ssl.py", line 608, in read
v = self._sslobj.read(len or 1024)
SSLError: ('The read operation timed out',)
Thanks.
Unfortunately these client libraries do not support forwarding the timeout values to the server; however, we have just announced the gRPC client libraries, which correctly pass the deadline to the server.
As a workaround for the current libraries, either use returnImmediately=true, or set a deadline higher than 90 seconds, as your question implies.

Google Cloud Storage api performance regressions

I have a python 3.5 app that makes calls to Google Cloud Storage using the python sdk.
Every once in a while, for 10-30 minutes, all calls to the API fail with BrokenPipeError or ssl.SSLError errors. After some time, they just start working again, I have not noticed a pattern as to why.
Is this a known issue? Is it specific to the python sdk or is this a real performance regression on the side of google?
It should also be noted that these errors will emanate from the same code running on my local machine as well as from a GCE machine.
The trace for BrokenPipe:
Traceback (most recent call last):
File "oauth2client/util.py", line 140, in positional_wrapper
return wrapped(*args, **kwargs)
File "googleapiclient/http.py", line 722, in execute
body=self.body, headers=self.headers)
File "oauth2client/client.py", line 596, in new_request
redirections, connection_type)
File "httplib2/__init__.py", line 1314, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "httplib2/__init__.py", line 1064, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "httplib2/__init__.py", line 988, in _conn_request
conn.request(method, request_uri, body, headers)
File "/usr/lib/python3.5/http/client.py", line 1083, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python3.5/http/client.py", line 1128, in _send_request
self.endheaders(body)
File "/usr/lib/python3.5/http/client.py", line 1079, in endheaders
self._send_output(message_body)
File "/usr/lib/python3.5/http/client.py", line 911, in _send_output
self.send(msg)
File "/usr/lib/python3.5/http/client.py", line 885, in send
self.sock.sendall(data)
File "/usr/lib/python3.5/ssl.py", line 886, in sendall
v = self.send(data[count:])
File "/usr/lib/python3.5/ssl.py", line 856, in send
return self._sslobj.write(data)
File "/usr/lib/python3.5/ssl.py", line 581, in write
return self._sslobj.write(data)
BrokenPipeError[Errno 32] Broken pipe
The trace for ssl.SSLError:
File "oauth2client/util.py", line 140, in positional_wrapper
return wrapped(*args, **kwargs)
File "googleapiclient/http.py", line 722, in execute
body=self.body, headers=self.headers)
File "oauth2client/client.py", line 596, in new_request
redirections, connection_type)
File "httplib2/__init__.py", line 1314, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "httplib2/__init__.py", line 1064, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "httplib2/__init__.py", line 1017, in _conn_request
response = conn.getresponse()
File "/usr/lib/python3.5/http/client.py", line 1174, in getresponse
response.begin()
File "/usr/lib/python3.5/http/client.py", line 282, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.5/http/client.py", line 243, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/lib/python3.5/socket.py", line 575, in readinto
return self._sock.recv_into(b)
File "/usr/lib/python3.5/ssl.py", line 924, in recv_into
return self.read(nbytes, buffer)
File "/usr/lib/python3.5/ssl.py", line 786, in read
return self._sslobj.read(len, buffer)
File "/usr/lib/python3.5/ssl.py", line 570, in read
v = self._sslobj.read(len, buffer)
ssl.SSLError[SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC] decryption failed or bad record mac (_ssl.c:1974)
Definitely looks like an intermittent issue on Googles side.
The broken pipe issue relates to httpclient2 being unable to reconnect an existing connection to their API, this is the error that has the greatest impact on our services. We also on few occasions received "503 Backend Error".
Our "solution" was to basically allow the connections to close themselves by releasing the client once done and creating a new one for the next request.
Bare in mind though that our requests are very sparse, services using Cloud Storage as primary storage probably wants to keep the connections open for as long as possible.

pandas read_gbq returns httplib.ResponseNotReady

I am using python with google bigquery to do some operations.
I have a Google BigQuery project names data-wagon.
I created a dataset 'vols'
And a table 'flights'.
This is the code I'm testing:
#
import pandas as pd
projectid = "data-wagon"
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
print data_frame.head()
#
When I run it from eclipse, a web page is displayed to ask for authorization, I click Yes but then I have this error message:
Your browser has been opened to visit:
https://accounts.google.com/o/oauth2/auth?scope=....................
If your browser is on a different machine then exit and re-run this
application with the command-line parameter
--noauth_local_webserver
Traceback (most recent call last):
File "C:\Users\a452618\workspace\BigDataTutos\script_big_query.py", line 16, in <module>
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 334, in read_gbq
connector = GbqConnector(project_id, reauth = reauth)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 88, in __init__
self.credentials = self.get_credentials()
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 111, in get_credentials
credentials = run_flow(flow, storage, argparser.parse_args([]))
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\tools.py", line 225, in run_flow
credential = flow.step2_exchange(code, http=http)
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\client.py", line 1982, in step2_exchange
headers=headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1608, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1350, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1306, in _conn_request
response = conn.getresponse()
File "C:\Python27\lib\httplib.py", line 1018, in getresponse
raise ResponseNotReady()
httplib.ResponseNotReady
Could any one help me with this?
Best regards,
Ayoub
Chances are you need to do this or do gcloud auth login.

Upload file larger than 70 MB to google drive using Python

Hi I tried to upload files to Google drive using following code
def upload_file(self,file_name,path):
parents = None
if not path == None:
parents = self.create_path(path)
mime_type = self.get_mime_type_for(file_name)
file_id = self.check_file_exist(file_name,parents,mime_type)
if file_id == None:
print "creating file...........",file_name
print "mime_type",mime_type
media = MediaFileUpload(file_name, mimetype=mime_type, resumable=True)
body = {
'title': file_name,
'description': 'A test document',
'mimeType': mime_type
}
if not parents == None:
body['parents'] = [{'id': parents}]
f = self.drive_service.files().insert(body=body, media_body=media).execute()
else:
print "file exists........... updating"
self.update_file(file_id, file_name)
this code works for smaller files (tested up to 25MB). But if i tried to upload large files(70MB) the system gives the error message
Traceback (most recent call last):
File "googledrive.py", line 176, in
gd.upload_file("test.mp4","/media/media")
File "googledrive.py", line 122, in upload_file
f = self.drive_service.files().insert(body=body, media_body=media).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 688, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 867, in next_chunk
headers=headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1570, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1317, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1286, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 373, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
If your upload is taking longer than about an hour, your token might expire and your download will fail. This is a known issue.
Also, see Google Mirror API throwing BadStatusLine exception (Python)

Categories

Resources