Using PyDrive with proxy for authenticating from a server - python

I've been trying to authenticate from a server using PyDrive. I'm trying to use a proxy but I keep getting a 403 Forbidden error. I'm not sure if my code for using the proxy is correct, or if this is even possible.
The error:
Traceback (most recent call last):
File "/Users/user/Desktop/Python/files/test_post.py", line 67, in <module>
file1.Upload(param={'supportsAllDrives': True, "http": gauth.http})
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/files.py", line 285, in Upload
self._FilesInsert(param=param)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/auth.py", line 75, in _decorated
return decoratee(self, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pydrive/files.py", line 368, in _FilesInsert
metadata = self.auth.service.files().insert(**param).execute(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/_helpers.py", line 131, in positional_wrapper
return wrapped(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 901, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/_helpers.py", line 131, in positional_wrapper
return wrapped(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 1006, in next_chunk
resp, content = _retry_request(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/googleapiclient/http.py", line 190, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/oauth2client/transport.py", line 173, in new_request
resp, content = request(orig_request_method, uri, method, body,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/oauth2client/transport.py", line 280, in request
return http_callable(uri, method=method, body=body, headers=headers,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1701, in request
(response, content) = self._request(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1421, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1343, in _conn_request
conn.connect()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/__init__.py", line 1133, in connect
sock.connect((self.host, self.port))
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/socks.py", line 512, in connect
self.__negotiatehttp(destpair[0], destpair[1])
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/httplib2/socks.py", line 465, in __negotiatehttp
raise HTTPError((statuscode, statusline[2]))
httplib2.socks.HTTPError: (403, b'Forbidden')
I should note: the script seems to work fine up until it gets to the file upload command with PyDrive. I've tried passing http into the params and tried it without it. They both don't work.
Here is the code for setting up the proxy:
proxy_info = httplib2.ProxyInfo(proxy_type=httplib2.socks.PROXY_TYPE_HTTP_NO_TUNNEL,
proxy_host='myproxyhost',
proxy_port=8080)
print("Proxy info variable set")
gauth.http = httplib2.Http(proxy_info=proxy_info)
print("gauth.http is set")
# Try to load saved client credentials
gauth.LoadCredentialsFile("mycreds.txt")
if gauth.credentials is None:
# Authenticate if they're not there
print("Gauth credentials is none conditional")
gauth.GetFlow()
gauth.flow.params.update({'access_type': 'offline'})
gauth.flow.params.update({'approval_prompt': 'force'})
gauth.CommandLineAuth()
elif gauth.access_token_expired:
print("Gauth access token expired conditional.")
# Refresh them if expired
gauth.Refresh()
else:
print("Gauth authroized conditional")
# Initialize the saved creds
gauth.CommandLineAuth()
print("Commandlineauth allowed!")
gauth.Authorize()```

Related

httplib2.socks.HTTPError: (403, b'Forbidden') python apache-beam dataflow

I work on a google cloud environment where i don't have internet access. I'm trying to launch a dataflow job. I'm using a proxy to access the internet.
when i run a simple wordcount.py with dataflow i get this error
WARNING:apache_beam.utils.retry:Retry with exponential backoff: waiting for 4.750968074377858 seconds before retrying _uncached_gcs_file_copy because we caught exception: httplib2.socks.HTTPError: (403, b'Forbidden')
Traceback for above exception (most recent call last):
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/utils/retry.py", line 275, in wrapper
return fun(*args, **kwargs)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/runners/dataflow/internal/apiclient.py", line 631, in _uncached_gcs_file_copy
self.stage_file(to_folder, to_name, f, total_size=total_size)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/runners/dataflow/internal/apiclient.py", line 735, in stage_file
response = self._storage_client.objects.Insert(request, upload=upload)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py", line 1152, in Insert
return self._RunMethod(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/base_api.py", line 728, in _RunMethod
http_response = http_wrapper.MakeRequest(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 359, in MakeRequest
retry_func(ExceptionRetryArgs(http, http_request, e, retry,
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/io/gcp/gcsio_overrides.py", line 45, in retry_func
return http_wrapper.HandleExceptionsAndRebuildHttpConnections(retry_args)
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 304, in HandleExceptionsAndRebuildHttpConnections
raise retry_args.exc
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 348, in MakeRequest
return _MakeRequestNoRetry(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 397, in _MakeRequestNoRetry
info, content = http.request(
File "/opt/py38/lib64/python3.8/site-packages/google_auth_httplib2.py", line 209, in request
self.credentials.before_request(self._request, method, uri, request_headers)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/credentials.py", line 134, in before_request
self.refresh(request)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/credentials.py", line 111, in refresh
self._retrieve_info(request)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/credentials.py", line 87, in _retrieve_info
info = _metadata.get_service_account_info(
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/_metadata.py", line 234, in get_service_account_info
return get(request, path, params={"recursive": "true"})
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/_metadata.py", line 150, in get
response = request(url=url, method="GET", headers=_METADATA_HEADERS)
File "/opt/py38/lib64/python3.8/site-packages/google_auth_httplib2.py", line 119, in __call__
response, data = self.http.request(
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1701, in request
(response, content) = self._request(
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1421, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1343, in _conn_request
conn.connect()
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1026, in connect
self.sock.connect((self.host, self.port) + sa[2:])
File "/opt/py38/lib64/python3.8/site-packages/httplib2/socks.py", line 504, in connect
self.__negotiatehttp(destpair[0], destpair[1])
File "/opt/py38/lib64/python3.8/site-packages/httplib2/socks.py", line 465, in __negotiatehttp
raise HTTPError((statuscode, statusline[2]))
My service account have this role:
BigQuery Data Editor
BigQuery User
Dataflow Developer
Dataflow Worker
Service Account User
Storage Admin
The istance have Cloud API access scopes: Allow full access to all Cloud APIs
what is the problem?
Based on the comment #luca the above error is solved using an internal proxy that will allow access to the internet. Add this --no_use_public_ip to the command and set no_proxy="metadata.google.internal,www.googleapis.com,dataflow.googleapis.com,bigquery.googleapis.com".

pydrive: why has this vertification error suddenly appeared?

I have a python script that downloads files from Google Drive using pydrive. This is the relevent bit of code:
def get_drive_object():
gauth = GoogleAuth()
gauth.LoadCredentialsFile("mycreds.txt")
if gauth.credentials is None:
# Authenticate if they're not there
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
# Refresh them if expired
gauth.Refresh()
else:
# Initialize the saved creds
gauth.Authorize()
# Save the current credentials to a file
gauth.SaveCredentialsFile("mycreds.txt")
# from http://stackoverflow.com/a/24542604/170243
gauth.LocalWebserverAuth()
return GoogleDrive(gauth)
Last week it suddenly stopped working, and gives an error of:
quitman$python update.py
Traceback (most recent call last):
File "update.py", line 264, in <module>
chapters = create_chapter_set(get_file_list(), home)
File "update.py", line 131, in get_file_list
drive = get_drive_object()
File "update.py", line 105, in get_drive_object
gauth.Authorize()
File "/home/toomanycooks/env/PyDrive-1.0.1-py2.7.egg/pydrive/auth.py", line 422, in Authorize
self.service = build('drive', 'v2', http=self.http)
File "/home/toomanycooks/env/oauth2client-1.4.9-py2.7.egg/oauth2client/util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "/home/toomanycooks/env/google_api_python_client-1.4.0-py2.7.egg/googleapiclient/discovery.py", line 196, in build
resp, content = http.request(requested_url)
File "/home/toomanycooks/env/oauth2client-1.4.9-py2.7.egg/oauth2client/util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "/home/toomanycooks/env/oauth2client-1.4.9-py2.7.egg/oauth2client/client.py", line 563, in new_request
redirections, connection_type)
File "/home/toomanycooks/env/httplib2-0.9.1-py2.7.egg/httplib2/__init__.py", line 1608, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/home/toomanycooks/env/httplib2-0.9.1-py2.7.egg/httplib2/__init__.py", line 1350, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/home/toomanycooks/env/httplib2-0.9.1-py2.7.egg/httplib2/__init__.py", line 1272, in _conn_request
conn.connect()
File "/home/toomanycooks/env/httplib2-0.9.1-py2.7.egg/httplib2/__init__.py", line 1059, in connect
raise SSLHandshakeError(e)
httplib2.SSLHandshakeError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:727)
quitman$vim update.py
I'm completely confused. I've tried generating a new mycreds.txt file, but I'm mystified as to how this error suddently appeared on code that had literally been running for years as a cron job...
EDIT: if I download the whole folder from server (dreamhost) to my local machine, the code runs fine... :s

pandas read_gbq returns httplib.ResponseNotReady

I am using python with google bigquery to do some operations.
I have a Google BigQuery project names data-wagon.
I created a dataset 'vols'
And a table 'flights'.
This is the code I'm testing:
#
import pandas as pd
projectid = "data-wagon"
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
print data_frame.head()
#
When I run it from eclipse, a web page is displayed to ask for authorization, I click Yes but then I have this error message:
Your browser has been opened to visit:
https://accounts.google.com/o/oauth2/auth?scope=....................
If your browser is on a different machine then exit and re-run this
application with the command-line parameter
--noauth_local_webserver
Traceback (most recent call last):
File "C:\Users\a452618\workspace\BigDataTutos\script_big_query.py", line 16, in <module>
data_frame = pd.read_gbq('SELECT * FROM vols.flights', project_id = projectid)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 334, in read_gbq
connector = GbqConnector(project_id, reauth = reauth)
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 88, in __init__
self.credentials = self.get_credentials()
File "C:\Python27\lib\site-packages\pandas\io\gbq.py", line 111, in get_credentials
credentials = run_flow(flow, storage, argparser.parse_args([]))
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\tools.py", line 225, in run_flow
credential = flow.step2_exchange(code, http=http)
File "C:\Python27\lib\site-packages\oauth2client\util.py", line 137, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Python27\lib\site-packages\oauth2client\client.py", line 1982, in step2_exchange
headers=headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1608, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1350, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "C:\Python27\lib\site-packages\httplib2\__init__.py", line 1306, in _conn_request
response = conn.getresponse()
File "C:\Python27\lib\httplib.py", line 1018, in getresponse
raise ResponseNotReady()
httplib.ResponseNotReady
Could any one help me with this?
Best regards,
Ayoub
Chances are you need to do this or do gcloud auth login.

Python BigQuery really strange timeout

I am building a service to stream data into bigquery. The following code works flawlessly if i remove the part that takes 4-5 minutes to load (i am precaching some mappings)
from googleapiclient import discovery
from oauth2client import file
from oauth2client import client
from oauth2client import tools
from oauth2client.client import SignedJwtAssertionCredentials
## load email and key
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
## this does not hang, because it is before the long operation
service.tabledata().insertAll(...)
## some code that takes 5 minutes to execute
r = load_mappings()
## aka long operation
## this hangs
service.tabledata().insertAll(...)
If i leave the part that takes 5 minutes to execute, the Google API stops responding to the requests i do afterwards. It simply hangs in there and doesn't even return an error. I left it even 10-20 minutes to see what happens and it just sits there. If i hit ctrl+c, i get this:
^CTraceback (most recent call last):
File "./to_bigquery.py", line 116, in <module>
main(sys.argv)
File "./to_bigquery.py", line 101, in main
print service.tabledata().insertAll(projectId=p_n, datasetId="XXX", tableId="%s_XXXX" % str(shop), body=_mybody).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
I have managed to temporarily fix it by placing the big loading operation BEFORE the credentials authorization, but it seems like a bug to me. What am i missing?
EDIT: I have managed to get an error, while waiting:
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 716, in execute
body=self.body, headers=self.headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1593, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1335, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/__init__.py", line 1291, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 430, in readline
data = recv(1)
File "/usr/lib/python2.7/ssl.py", line 241, in recv
return self.read(buflen)
File "/usr/lib/python2.7/ssl.py", line 160, in read
return self._sslobj.read(len)
socket.error: [Errno 110] Connection timed out
It said timeout. This seems to happen with cold tables..
def refresh_bq(self):
credentials = SignedJwtAssertionCredentials(email, key, scope='https://www.googleapis.com/auth/bigquery')
if credentials is None or credentials.invalid:
raw_input('invalid key')
exit(0)
http = httplib2.Http()
http = credentials.authorize(http)
service = discovery.build('bigquery', 'v2', http=http)
self.service = service
i am running self.refresh_bq() everytime i do some inserts that do not require preprocessing, and it works flawlessly. messy hack, but i needed to make it work ASAP. There is def. a bug somewhere.

Upload file larger than 70 MB to google drive using Python

Hi I tried to upload files to Google drive using following code
def upload_file(self,file_name,path):
parents = None
if not path == None:
parents = self.create_path(path)
mime_type = self.get_mime_type_for(file_name)
file_id = self.check_file_exist(file_name,parents,mime_type)
if file_id == None:
print "creating file...........",file_name
print "mime_type",mime_type
media = MediaFileUpload(file_name, mimetype=mime_type, resumable=True)
body = {
'title': file_name,
'description': 'A test document',
'mimeType': mime_type
}
if not parents == None:
body['parents'] = [{'id': parents}]
f = self.drive_service.files().insert(body=body, media_body=media).execute()
else:
print "file exists........... updating"
self.update_file(file_id, file_name)
this code works for smaller files (tested up to 25MB). But if i tried to upload large files(70MB) the system gives the error message
Traceback (most recent call last):
File "googledrive.py", line 176, in
gd.upload_file("test.mp4","/media/media")
File "googledrive.py", line 122, in upload_file
f = self.drive_service.files().insert(body=body, media_body=media).execute()
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 688, in execute
_, body = self.next_chunk(http=http, num_retries=num_retries)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/apiclient/http.py", line 867, in next_chunk
headers=headers)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/util.py", line 132, in positional_wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/oauth2client/client.py", line 490, in new_request
redirections, connection_type)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1570, in request
(response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1317, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/usr/local/lib/python2.7/dist-packages/httplib2/init.py", line 1286, in _conn_request
response = conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 373, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
If your upload is taking longer than about an hour, your token might expire and your download will fail. This is a known issue.
Also, see Google Mirror API throwing BadStatusLine exception (Python)

Categories

Resources