Upload file larger than 70 MB to google drive using Python - python

Hi I tried to upload files to Google drive using following code
def upload_file(self,file_name,path):
parents = None
if not path == None:
parents = self.create_path(path)
mime_type = self.get_mime_type_for(file_name)
file_id = self.check_file_exist(file_name,parents,mime_type)
if file_id == None:
print "creating file...........",file_name
print "mime_type",mime_type
media = MediaFileUpload(file_name, mimetype=mime_type, resumable=True)
body = {
'title': file_name,
'description': 'A test document',
'mimeType': mime_type
}
if not parents == None:
body['parents'] = [{'id': parents}]
f = self.drive_service.files().insert(body=body, media_body=media).execute()
else:
print "file exists........... updating"
self.update_file(file_id, file_name)
this code works for smaller files (tested up to 25MB). But if i tried to upload large files(70MB) the system gives the error message
Traceback (most recent call last):
File "googledrive.py", line 176, in
gd.upload_file("test.mp4","/media/media")
File "googledrive.py", line 122, in upload_file
f = self.drive_service.files().insert(body=body, media_body=media).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 688, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 867, in next_chunk
headers=headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1570, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1317, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1286, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 373, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''

If your upload is taking longer than about an hour, your token might expire and your download will fail. This is a known issue.
Also, see Google Mirror API throwing BadStatusLine exception (Python)

Related

Using PyDrive with proxy for authenticating from a server

I've been trying to authenticate from a server using PyDrive. I'm trying to use a proxy but I keep getting a 403 Forbidden error. I'm not sure if my code for using the proxy is correct, or if this is even possible.
The error:
Traceback (most recent call last):
File "/Users/user/Desktop/Python/files/test_post.py", line 67, in <module>
file1.Upload(param={'supportsAllDrives': True, "http": gauth.http})
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/files.py", line 285, in Upload
self._FilesInsert(param=param)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/auth.py", line 75, in _decorated
return decoratee(self, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/files.py", line 368, in _FilesInsert
metadata = self.auth.service.files().insert(**param).execute(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/_helpers.py", line 131, in positional_wrapper
return wrapped(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 901, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/_helpers.py", line 131, in positional_wrapper
return wrapped(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 1006, in next_chunk
resp, content = _retry_request(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 190, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/oauth2client/transport.py", line 173, in new_request
resp, content = request(orig_request_method, uri, method, body,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/oauth2client/transport.py", line 280, in request
return http_callable(uri, method=method, body=body, headers=headers,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1701, in request
(response, content) = self._request(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1421, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1343, in _conn_request
conn.connect()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1133, in connect
sock.connect((self.host, self.port))
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/socks.py", line 512, in connect
self.__negotiatehttp(destpair[0], destpair[1])
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/socks.py", line 465, in __negotiatehttp
raise HTTPError((statuscode, statusline[2]))
httplib2.socks.HTTPError: (403, b'Forbidden')
I should note: the script seems to work fine up until it gets to the file upload command with PyDrive. I've tried passing http into the params and tried it without it. They both don't work.
Here is the code for setting up the proxy:
proxy_info = httplib2.ProxyInfo(proxy_type=httplib2.socks.PROXY_TYPE_HTTP_NO_TUNNEL,
proxy_host='myproxyhost',
proxy_port=8080)
print("Proxy info variable set")
gauth.http = httplib2.Http(proxy_info=proxy_info)
print("gauth.http is set")
# Try to load saved client credentials
gauth.LoadCredentialsFile("mycreds.txt")
if gauth.credentials is None:
# Authenticate if they're not there
print("Gauth credentials is none conditional")
gauth.GetFlow()
gauth.flow.params.update({'access_type': 'offline'})
gauth.flow.params.update({'approval_prompt': 'force'})
gauth.CommandLineAuth()
elif gauth.access_token_expired:
print("Gauth access token expired conditional.")
# Refresh them if expired
gauth.Refresh()
else:
print("Gauth authroized conditional")
# Initialize the saved creds
gauth.CommandLineAuth()
print("Commandlineauth allowed!")
gauth.Authorize()```

Timeout on spreadsheet api

Do you know why I have this error ?
timeout: The read operation timed out
How to except it please ? From which package can I retrieve the tiemout exception ?
File "C:\Users\root\Desktop\bot\sheetapi.py", line 58, in
update
spreadsheetId=SAMPLE_SPREADSHEET_ID, range=SAMPLE_RANGE_NAME, valueInputOption='USER_ENTERED', body=body).execute()
File
"C:\Users\root\Anaconda3\lib\site-packages\googleapiclient_helpers.py",
line 134, in positional_wrapper
return wrapped(*args, **kwargs)
File
"C:\Users\root\Anaconda3\lib\site-packages\googleapiclient\http.py",
line 892, in execute
headers=self.headers,
File
"C:\Users\root\Anaconda3\lib\site-packages\googleapiclient\http.py",
line 195, in _retry_request
raise exception
File
"C:\Users\root\Anaconda3\lib\site-packages\googleapiclient\http.py",
line 172, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File
"C:\Users\root\Anaconda3\lib\site-packages\google_auth_httplib2.py",
line 198, in request
uri, method, body=body, headers=request_headers, **kwargs)
File
"C:\Users\root\Anaconda3\lib\site-packages\httplib2__init__.py", line
1514, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File
"C:\Users\root\Anaconda3\lib\site-packages\httplib2__init__.py", line
1264, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File
"C:\Users\root\Anaconda3\lib\site-packages\httplib2__init__.py", line
1217, in _conn_request
response = conn.getresponse()
File "C:\Users\root\Anaconda3\lib\http\client.py", line 1331, in
getresponse
response.begin()
File "C:\Users\root\Anaconda3\lib\http\client.py", line 297, in
begin
version, status, reason = self._read_status()
File "C:\Users\root\Anaconda3\lib\http\client.py", line 258, in
_read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\root\Anaconda3\lib\socket.py", line 586, in readinto
return self._sock.recv_into(b)
File "C:\Users\root\Anaconda3\lib\ssl.py", line 1009, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\root\Anaconda3\lib\ssl.py", line 871, in read
return self._sslobj.read(len, buffer)
File "C:\Users\root\Anaconda3\lib\ssl.py", line 631, in read
v = self._sslobj.read(len, buffer)
timeout: The read operation timed out

Pull timeout with google-api-python-client

I am trying to set a user defined timeout on message pull with 'returnImmediately' = False :
PUBSUB_SCOPES = ['https://www.googleapis.com/auth/pubsub']
credentials = oauth2client.GoogleCredentials.get_application_default()
if credentials.create_scoped_required():
credentials = credentials.create_scoped(PUBSUB_SCOPES)
http = httplib2.Http(timeout=timeout)
credentials.authorize(http)
return discovery.build('pubsub', 'v1', http=http)
When the timeout is < 90 seconds I get the following errors:
resp = client.projects().subscriptions().pull(subscription=subscription, body=body).execute()
File "venv\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "venv\lib\site-packages\googleapiclient\http.py", line 755, in execute
method=str(self.method), body=self.body, headers=self.headers)
File "venv\lib\site-packages\googleapiclient\http.py", line 93, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "venv\lib\site-packages\oauth2client\client.py", line 622, in new_request
redirections, connection_type)
File "venv\lib\site-packages\httplib2\__init__.py", line 1609, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "venv\lib\site-packages\httplib2\__init__.py", line 1351, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "venv\lib\site-packages\httplib2\__init__.py", line 1307, in _conn_request
response = conn.getresponse()
File "C:\python27\Lib\httplib.py", line 1074, in getresponse
response.begin()
File "C:\python27\Lib\httplib.py", line 415, in begin
version, status, reason = self._read_status()
File "C:\python27\Lib\httplib.py", line 371, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "C:\python27\Lib\socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
File "C:\python27\Lib\ssl.py", line 714, in recv
return self.read(buflen)
File "C:\python27\Lib\ssl.py", line 608, in read
v = self._sslobj.read(len or 1024)
SSLError: ('The read operation timed out',)
Thanks.
Unfortunately these client libraries do not support forwarding the timeout values to the server; however, we have just announced the gRPC client libraries, which correctly pass the deadline to the server.
As a workaround for the current libraries, either use returnImmediately=true, or set a deadline higher than 90 seconds, as your question implies.

pandas read_gbq returns httplib.ResponseNotReady

I am using python with google bigquery to do some operations.
I have a Google BigQuery project names data-wagon.
I created a dataset 'vols'
And a table 'flights'.
This is the code I'm testing:
#
import pandas as pd
projectid = "data-wagon"
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
print data_frame.head()
#
When I run it from eclipse, a web page is displayed to ask for authorization, I click Yes but then I have this error message:
Your browser has been opened to visit:
https://accounts.google.com/o/oauth2/auth?scope=....................
If your browser is on a different machine then exit and re-run this
application with the command-line parameter
--noauth_local_webserver
Traceback (most recent call last):
File "C:\Users\a452618\workspace\BigDataTutos\script_big_query.py", line 16, in <module>
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 334, in read_gbq
connector = GbqConnector(project_id, reauth = reauth)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 88, in __init__
self.credentials = self.get_credentials()
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 111, in get_credentials
credentials = run_flow(flow, storage, argparser.parse_args([]))
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\tools.py", line 225, in run_flow
credential = flow.step2_exchange(code, http=http)
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\client.py", line 1982, in step2_exchange
headers=headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1608, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1350, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1306, in _conn_request
response = conn.getresponse()
File "C:\Python27\lib\httplib.py", line 1018, in getresponse
raise ResponseNotReady()
httplib.ResponseNotReady
Could any one help me with this?
Best regards,
Ayoub
Chances are you need to do this or do gcloud auth login.

Python BigQuery really strange timeout

I am building a service to stream data into bigquery. The following code works flawlessly if i remove the part that takes 4-5 minutes to load (i am precaching some mappings)
from googleapiclient import discovery
from oauth2client import file
from oauth2client import client
from oauth2client import tools
from oauth2client.client import SignedJwtAssertionCredentials
## load email and key
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
## this does not hang, because it is before the long operation
service.tabledata().insertAll(...)
## some code that takes 5 minutes to execute
r = load_mappings()
## aka long operation
## this hangs
service.tabledata().insertAll(...)
If i leave the part that takes 5 minutes to execute, the Google API stops responding to the requests i do afterwards. It simply hangs in there and doesn't even return an error. I left it even 10-20 minutes to see what happens and it just sits there. If i hit ctrl+c, i get this:
^CTraceback (most recent call last):
File "./to_bigquery.py", line 116, in <module>
main(sys.argv)
File "./to_bigquery.py", line 101, in main
print service.tabledata().insertAll(projectId=p_n, datasetId="XXX", tableId="%s_XXXX" % str(shop), body=_mybody).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
I have managed to temporarily fix it by placing the big loading operation BEFORE the credentials authorization, but it seems like a bug to me. What am i missing?
EDIT: I have managed to get an error, while waiting:
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
socket.error: [Errno 110] Connection timed out
It said timeout. This seems to happen with cold tables..
def refresh_bq(self):
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
self.service = service
i am running self.refresh_bq() everytime i do some inserts that do not require preprocessing, and it works flawlessly. messy hack, but i needed to make it work ASAP. There is def. a bug somewhere.

Categories

Resources