I have tried a number of ways to download files from google drive via oauth and the API, however I am not able to get the files downloaded. I believe I have properly authenticated. After running my code, it looks like there was success with downloading the file (no errors), but no files were downloaded.
This is the code I have tried so far:
def download_file(file_id, mimeType):
if "google-apps" in mimeType:
return
request = drive_service.files().get(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print "Download %d%%." % int(status.progress() * 100)
However, this results in "Download 100%." being printed to the console, but no file downloaded.
I have also tried:
def download2(download_url):
resp, content = drive_service._http.request(download_url)
if resp.status == 200:
print 'Status: %s' % resp
return content
else:
print 'An error occurred: %s' % resp
return None
This also does not produce a downloaded file, but it does give me a 200 message.
Both of these seem like they are properly making contact with the API. Is there an additional step I have to do to actually get the files on my computer?
Edit:
this was the remainder of my code:
import json
import webbrowser
import httplib2
import io
from apiclient.http import MediaIoBaseDownload
from apiclient import discovery
from oauth2client import client
if __name__ == '__main__':
flow = client.flow_from_clientsecrets(
'client_secrets.json',
scope='https://www.googleapis.com/auth/drive.readonly',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
auth_uri = flow.step1_get_authorize_url()
webbrowser.open(auth_uri)
auth_code = raw_input('Enter the auth code: ')
credentials = flow.step2_exchange(auth_code)
http_auth = credentials.authorize(httplib2.Http())
drive_service = discovery.build('drive', 'v3', http_auth) #also tried v2
files = drive_service.files().list().execute()
for f in files['files']:
#call one of the two download methods with the proper arguments
Changing from BytesIO to FileIO allowed the file to actually be downloaded. This was the line I modified my code to:
fh = io.FileIO(filename, 'wb')
Here is the complete code that allowed me to download the file:
def download_file(file_id, mimeType, filename):
if "google-apps" in mimeType:
# skip google files
return
request = drive_service.files().get_media(fileId=file_id)
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print "Download %d%%." % int(status.progress() * 100)
if __name__ == '__main__':
flow = client.flow_from_clientsecrets(
'client_secrets.json',
scope='https://www.googleapis.com/auth/drive.readonly',
redirect_uri='urn:ietf:wg:oauth:2.0:oob')
auth_uri = flow.step1_get_authorize_url()
webbrowser.open(auth_uri)
print auth_uri
auth_code = raw_input('Enter the auth code: ')
credentials = flow.step2_exchange(auth_code)
http_auth = credentials.authorize(httplib2.Http())
drive_service = discovery.build('drive', 'v3', http_auth)
files = drive_service.files().list().execute()
for f in files['files']:
print f['name']
download_file(f['id'], f['mimeType'], f['name'])
The file is downloading, but the example given by google doesn't do anything with the file.
You simply need to return the contents of the BytesIO buffer like this (just adding a return at the end)...
def download_file(service, file_id):
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
return fh.getvalue()
Related
I'm attempting to download a file from Google Drive using Python, and I'm not sure where the file is being stored.
Following the example here: https://developers.google.com/drive/api/v3/manage-downloads#python
Code:
def DownloadGoogleFile(id: int):
file = str(id) + '.txt'
creds = GetGoogleCredentials()
service = build('drive', 'v3', credentials=creds)
# Call the Drive v3 API
FileSearch = service.files().list(q="name='{0}'".format(file), fields="nextPageToken, files(id, name)").execute()
FoundFiles = FileSearch.get('files', [])
if FoundFiles:
FileID = FoundFiles[0]['id']
request = service.files().get_media(fileId=FileID)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))
else:
output = 'No file found'
I'm getting output of Download 100% but that's it. I can't find the file anywhere. I was thinking it'd be in the same directory as the python file, but there isn't anything there. I also though it may need to be fh=io.FileIO(file) as a way to specify where I want to save the file, but I'm getting a 'no file exists' error when doing that so I'm not sure.
Following the example from the docs, you should be able to just replace
fh = io.BytesIO()
With
fh = io.FileIO('filename.extension', mode='wb')
io.BytesIO() is an in memory file-like object and is not written to disk
I am trying to upload files to Google drive by Google API using the following code
import httplib2
from apiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
SCOPES =['https://www.googleapis.com/auth/drive','https://www.googleapis.com/auth/drive.file','https://www.googleapis.com/auth/drive.appdata', 'https://www.googleapis.com/auth/drive.apps.readonly']
store = file.Storage('scope.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) if flags else tools.run(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
else:
credentials = creds
http = credentials.authorize(httplib2.Http())
DRIVE = discovery.build('drive', 'v3', http=http)
FILES = (
('/home/vkm/mayur/Demo_Google_API.zip', 'application/vmd.google-apps.document'),
)
for filename, mimeType in FILES:
metadata = {'name': filename}
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I am able to upload the small files but when I am trying with 8GB of the file, it is giving MemorryErro.Please find the error message that I am getting.
Traceback (most recent call last):
File "demo.py", line 46, in <module>
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/discovery.py", line 853, in method
payload = media_upload.getbytes(0, media_upload.size())
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 482, in getbytes
return self._fd.read(length)
MemoryError
Vikram's comment revealed a problem in mhawke's answer: next_chunk needs to be called upon the return value of:
request = DRIVE.files().create(body=metadata, media_body=media)
not on the return value of request.execute().
Here is a snippet of Python code I verified as working on files up to 10MB to my Google Drive account:
# Upload some file that just happens to be binary (we
# don't care about metadata, just upload it without
# translation):
the_file_to_upload = 'some_binary_file'
metadata = {'name': the_file_to_upload}
# Note the chunksize restrictions given in
# https://developers.google.com/api-client-library/python/guide/media_upload
media = MediaFileUpload(the_file_to_upload,
chunksize=1024 * 1024,
# Not sure whether or not this mimetypes is necessary:
mimetype='text/plain',
resumable=True)
request = drive_service.files().create(body=metadata, media_body=media)
response = None
while response is None:
status, response = request.next_chunk()
if status:
print("Uploaded %d%%." % int(status.progress() * 100))
print("Upload of {} is complete.".format(the_file_to_upload))
Here is a snippet of Python code that downloads the same file, but to a different file, so that I can use sha1sum to verify file has not been altered by Google Drive going in and out.
# Verify downloading works without translation:
request = drive_service.files().get_media(fileId=response['id'])
# Use io.FileIO. Refer to:
# https://google.github.io/google-api-python-client/docs/epy/googleapiclient.http.MediaIoBaseDownload-class.html
out_filename = the_file_to_upload + ".out"
fh = io.FileIO(out_filename, mode='wb')
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
done = False
while done is False:
status, done = downloader.next_chunk()
if status:
print("Download %d%%." % int(status.progress() * 100))
print("Download Complete!")
You could upload the file using a resumable media upload. This will send the file in chunks and should not max out your memory, which I assume is happening because your client is trying to send the whole file at once.
To do this you need to pass a MediaFileUpload object to the create() method in which the resumable flag is set to True. Optionally you can also set the chunksize.
metadata = {'name': filename}
media = MediaFileUpload(filename, mimetype=mimetype, resumable=True)
request = DRIVE.files().create(body=metadata, media_body=media)
response = None
while response is None:
status, response = request.next_chunk()
if status:
print "Uploaded %d%%." % int(status.progress() * 100)
print "Upload Complete!"
Try reducing the chunksize if needed.
The easiest way to upload large files to Google drive with python is just to add resumable=True
from googleapiclient.http import MediaFileUpload
media = MediaFileUpload(filename, resumable=True)
I have a python script to match files on google drive in specific folder. It worked fine with my GSuite account in the company. But If i want to use it at home with my simple Google Drive account it's not working. No files found:
Here is my code:
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from apiclient.http import MediaFileUpload
from apiclient import errors
import io
from apiclient.http import MediaIoBaseDownload
from apiclient import errors
import os
import csv
try :
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
folder_id = 'XYZ00000'
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
print("Create new data storage file ...")
flow = client.flow_from_clientsecrets('client_secrets.json', SCOPES)
flow.redirect_uri = client.OOB_CALLBACK_URN
authorize_url = flow.step1_get_authorize_url()
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
print ("Storage")
DRIVE = build('drive', 'v2', http=creds.authorize(Http()))
def getKeywordsID():
page_token = None
while True:
response = DRIVE.files().list(q="'XYZ00000' in parents and trashed = false",
spaces='drive',
fields='nextPageToken, items(id, title)',
pageToken=page_token).execute()
for file in response.get('items', []):
if ('test.txt' == file.get('title')):
print ('test.txt on Google Drive found')
page_token = response.get('nextPageToken', None)
if page_token is None:
break;
return file_id
# Download file
file_id = getKeywordsID()
if file_id != None:
request = DRIVE.files().get_media(fileId=file_id)
fh = io.FileIO('test.txt', 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download test.txt %d%%." % int(status.progress() * 100) )
else:
print ("File not found.")
I have no idea why it's not working. I enabled Google Drive API, created a project, added an OAuth2.0 client ID (select “Other” as option) and downloaded the json file for storage.
From the traceback posted, I can see that in the getKeywordsID function you are returning the file_id variable, which you have not initialized in the getKeywordsID function scope. Hence the UnboundLocalError: local variable 'file_id' referenced before assignment is thrown.
I'm trying to read large CSV files that are dropped on Google Drive using the google-api-python-client https://google.github.io/google-api-python-client/docs/epy/googleapiclient.http.MediaIoBaseDownload-class.html
I was able to download the file on the hard drive doing this:
request = drive_service.files().get_media(fileId=file_id)
fh = io.FileIO('test.csv', mode='w')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
But I was wondering if there's a simple way to read it in chunks in memory.
api_service_object = self.service
request = api_service_object.files().get_media(fileId=file_id)
stream = io.BytesIO()
downloader = MediaIoBaseDownload(stream, request)
done = False
# Retry if we received HttpError
for retry in range(0, 5):
try:
while done is False:
status, done = downloader.next_chunk()
print "Download %d%%." % int(status.progress() * 100)
return stream.getvalue()
except HTTPError as error:
print 'There was an API error: {}. Try # {} failed.'.format(
error.response,
retry,
)
This script works for small files, but not when I try to upload a large file (250MB). When I manually upload the same large file to GD it takes less than 10 seconds, so I assume my connection is not the problem.
upload.py
from __future__ import print_function
import os
import sys
from apiclient.http import MediaFileUpload
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage(r'C:\Users\lucas.rezende\.credentials\storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets(r'C:\Users\lucas.rezende\.credentials\client_secret.json', scope=SCOPES)
creds = tools.run_flow(flow, store, flags) if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
FILES = (
('OfertasMensais_20170418_n.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'),
)
for filename, mimeType in FILES:
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
folder_id = '0000'
metadata = {'name': filename, 'parents': [folder_id]}
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
When I run python uploadfile.py cmd screen stays like that eternally:
Can someone help to discover how to make this work? I am not a professional programmer and I am stuck into this for almost two hours trying to make this work.
Following the chunked paradigm, you need to specifically call the next_chunk() to continue with an upload. see here: https://developers.google.com/api-client-library/python/guide/media_upload#resumable-media-chunked-upload
for filename, mimeType in FILES:
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
if mimeType:
metadata['mimeType'] = mimeType
req = DRIVE.files().insert(body=metadata, media_body=filename)
res = None
while res is None:
status, res = req.next_chunk()
if status :
print('Uploading %d%% "%s" (%s)' % (status.progress(), filename, res['mimeType']))
print("Upload Complete!")
The solution for this with v3 is to use the chunked approach, but with the create() function rather than insert()
res = None
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
drive_request = self.drive.files().create(body=metadata,media_body=media_body)
while res is None:
status, res = drive_request.next_chunk()