Upload large file not working - Google Drive Python API - python

This script works for small files, but not when I try to upload a large file (250MB). When I manually upload the same large file to GD it takes less than 10 seconds, so I assume my connection is not the problem.
upload.py
from __future__ import print_function
import os
import sys
from apiclient.http import MediaFileUpload
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage(r'C:\Users\lucas.rezende\.credentials\storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets(r'C:\Users\lucas.rezende\.credentials\client_secret.json', scope=SCOPES)
creds = tools.run_flow(flow, store, flags) if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
FILES = (
('OfertasMensais_20170418_n.xlsx', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'),
)
for filename, mimeType in FILES:
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
folder_id = '0000'
metadata = {'name': filename, 'parents': [folder_id]}
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
When I run python uploadfile.py cmd screen stays like that eternally:
Can someone help to discover how to make this work? I am not a professional programmer and I am stuck into this for almost two hours trying to make this work.

Following the chunked paradigm, you need to specifically call the next_chunk() to continue with an upload. see here: https://developers.google.com/api-client-library/python/guide/media_upload#resumable-media-chunked-upload
for filename, mimeType in FILES:
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
if mimeType:
metadata['mimeType'] = mimeType
req = DRIVE.files().insert(body=metadata, media_body=filename)
res = None
while res is None:
status, res = req.next_chunk()
if status :
print('Uploading %d%% "%s" (%s)' % (status.progress(), filename, res['mimeType']))
print("Upload Complete!")

The solution for this with v3 is to use the chunked approach, but with the create() function rather than insert()
res = None
media_body = MediaFileUpload(filename, chunksize=1024*256, resumable = True)
drive_request = self.drive.files().create(body=metadata,media_body=media_body)
while res is None:
status, res = drive_request.next_chunk()

Related

downloading file from google drive using api( NameError: name 'service' is not defined)

from __future__ import print_function
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
# If modifying these scopes, delete the file token.json.
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
def main():
"""Shows basic usage of the Drive v3 API.
Prints the names and ids of the first 10 files the user has access to.
"""
store = file.Storage('token.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('drive', 'v3', http=creds.authorize(Http()))
# Call the Drive v3 API
results = service.files().list(
pageSize=10, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
for item in items:
print('{0} ({1})'.format(item['name'], item['id']))
if __name__ == '__main__':
main()
save as quickstart.py and run this file , The authentication flow has completed.
then token.json was generated in the directory.
now to download doc file
file_id = '1wzCjl51u131v1KBgpbiKLJs8DPPakhXCFosfYjp7BY0'
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))
id was copied from
https://docs.google.com/document/d/1wzCjl51u131v1KBgpbiKLJs8DPPakhXCFosfYjp7BY0/edit?usp=sharing
and request = drive_service.files().get_media(fileId=file_id)
change to
request = service.files().get_media(fileId=file_id)
in the example as saved as p.py
when executed
line 2, in <module>
request = service.files().get_media(fileId=file_id)
NameError: name 'service' is not defined
I am going to have to assume you have not merged these two files. THe second script needs the service (variable) within your first script to run they should be merged.
from __future__ import print_function
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
# If modifying these scopes, delete the file token.json.
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
def main():
"""Shows basic usage of the Drive v3 API.
Prints the names and ids of the first 10 files the user has access to.
"""
store = file.Storage('token.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('drive', 'v3', http=creds.authorize(Http()))
# Call the Drive v3 API
rfile_id = '1wzCjl51u131v1KBgpbiKLJs8DPPakhXCFosfYjp7BY0'
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))
if __name__ == '__main__':
main()
Here is how I do it
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file
​
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
​
​
def login_gdrive(SCOPES):
store = file.Storage('../personal_token.json')
creds = store.get()
return build('drive', 'v3', http=creds.authorize(Http()))
​
def gdrive_download(file_id):
request = drive_service.files().get(fileId=file_id)
result = request.execute()
#will return metadata of file but I will only get file name
file_name = result['name']
print(f"File name is {file_name}")
#will get actual file
request = drive_service.files().get_media(fileId=file_id)
result = request.execute()
print("Downloading " + file_name)
#will write file using the file_name
with open(file_name, mode="wb") as f:
f.write(result)
print("Finished writing " + file_name)
​
drive_service = login_gdrive(SCOPES)
​
gdrive_download('1oiD6h-ixAUTIWebEdN-jV8MO0sssoQTI')
out... File name is sample_data_2018-10-21.csv
out... Downloading sample_data_2018-10-21.csv
out... Finished writing sample_data2018-10-21.csv

How to upload file to google drive with service account credential

I wanted to upload the files to my Google Drive using a Google service account credentials.
I downloaded credential as a JSON file from Google Developer Console, and got credential from it.
Here is my code snippet.
google_drive_service = discovery.build('drive', 'v3',
credentials=ServiceAccountCredentials.from_json_keyfile_name
os.path.join(settings.CLIENT_PATH, settings.CLIENT_SECRET_FILE),
scopes=settings.SCOPES))
media = MediaFileUpload(tmp_file_path, mimetype=tmp_file.content_type, resumable=True)
google_drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
The code runs and there is no error, however I can't find out the files uploaded to my Google Drive account. I am not sure why files are not uploaded. Would you like to help me to fix this problem?
The issue you are having is that a service account is not you. you have uploaded a file to the service accounts Google drive account not your personal drive account. Try doing a file list you should see the file.
Suggestion. Take the service account email address and share a directory on your personal Google drive account with it like you would share with any other user. The Service account will then be able to upload to this directory. Just make sure to set the permissions on the file after you upload it granting your personal drive account access to the file. When the file is uploaded it will be owned by the service account.
Try this Python terminal client for Google Drive for easy uploading, deleting, listing, sharing files or folders.
client_secret.json
{"installed":{"client_id":"698477346386-5kbs1fh3c6eu46op4qvf30ehp6md8o56.apps.googleusercontent.com","project_id":"proven-dryad-122714","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://accounts.google.com/o/oauth2/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"9j4oMk4HI0ZyPvQrz0jFFA4q","redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}}
GDrive.py
from __future__ import print_function
import sys
import io
import pip
import httplib2
import os
from mimetypes import MimeTypes
try:
from googleapiclient.errors import HttpError
from apiclient import discovery
import oauth2client
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload
from oauth2client import client
from oauth2client import tools
except ImportError:
print('goole-api-python-client is not installed. Try:')
print('sudo pip install --upgrade google-api-python-client')
sys.exit(1)
import sys
class Flag:
auth_host_name = 'localhost'
noauth_local_webserver = False
auth_host_port = [8080, 8090]
logging_level = 'ERROR'
try:
import argparse
# flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
flags = Flag()
except ImportError:
flags = None
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'GDrive'
def get_credentials():
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,
'drive-python-quickstart.json')
store = oauth2client.file.Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
# if flags:
credentials = tools.run_flow(flow, store, flags)
# else: # Needed only for compatibility with Python 2.6
# credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def upload(path, parent_id=None):
mime = MimeTypes()
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
file_metadata = {
'name': os.path.basename(path),
# 'mimeType' : 'application/vnd.google-apps.spreadsheet'
}
if parent_id:
file_metadata['parents'] = [parent_id]
media = MediaFileUpload(path,
mimetype=mime.guess_type(os.path.basename(path))[0],
resumable=True)
try:
file = service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
except HttpError:
print('corrupted file')
pass
print(file.get('id'))
def share(file_id, email):
def callback(request_id, response, exception):
if exception:
# Handle error
print(exception)
else:
print(response.get('id'))
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
batch = service.new_batch_http_request(callback=callback)
user_permission = {
'type': 'user',
'role': 'reader',
'emailAddress': email
}
batch.add(service.permissions().create(
fileId=file_id,
body=user_permission,
fields='id',
))
batch.execute()
def listfiles():
results = service.files().list(fields="nextPageToken, files(id, name,mimeType)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
print('Filename (File ID)')
for item in items:
print('{0} ({1})'.format(item['name'].encode('utf-8'), item['id']))
print('Total=', len(items))
def delete(fileid):
service.files().delete(fileId=fileid).execute()
def download(file_id, path=os.getcwd()):
request = service.files().get_media(fileId=file_id)
name = service.files().get(fileId=file_id).execute()['name']
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(int(status.progress() * 100))
f = open(path + '/' + name, 'wb')
f.write(fh.getvalue())
print('File downloaded at', path)
f.close()
def createfolder(folder, recursive=False):
if recursive:
print('recursive ON')
ids = {}
for root, sub, files in os.walk(folder):
par = os.path.dirname(root)
file_metadata = {
'name': os.path.basename(root),
'mimeType': 'application/vnd.google-apps.folder'
}
if par in ids.keys():
file_metadata['parents'] = [ids[par]]
print(root)
file = service.files().create(body=file_metadata,
fields='id').execute()
id = file.get('id')
print(id)
ids[root] = id
for f in files:
print(root+'/'+f)
upload(root + '/' + f, id)
else:
print('recursive OFF')
file_metadata = {
'name': os.path.basename(folder),
'mimeType': 'application/vnd.google-apps.folder'
}
file = service.files().create(body=file_metadata,
fields='id').execute()
print(file.get('id'))
if __name__ == '__main__':
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
method = sys.argv[1]
if method == 'upload':
if os.path.isdir(sys.argv[2]):
if len(sys.argv) == 4 and sys.argv[3] == 'R':
createfolder(sys.argv[2], True)
else:
createfolder(os.path.basename(sys.argv[2]))
else:
upload(sys.argv[2])
elif method == 'list':
listfiles()
elif method == 'delete':
delete(sys.argv[2])
elif method == 'download':
download(sys.argv[2], sys.argv[3])
elif method == 'share':
share(sys.argv[2], sys.argv[3])
elif method == 'folder':
createfolder(sys.argv[2])
elif method == 'debug':
print(os.getcwd())

Unable to upload huge file on google drive using python

I am trying to upload files to Google drive by Google API using the following code
import httplib2
from apiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
SCOPES =['https://www.googleapis.com/auth/drive','https://www.googleapis.com/auth/drive.file','https://www.googleapis.com/auth/drive.appdata', 'https://www.googleapis.com/auth/drive.apps.readonly']
store = file.Storage('scope.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) if flags else tools.run(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
else:
credentials = creds
http = credentials.authorize(httplib2.Http())
DRIVE = discovery.build('drive', 'v3', http=http)
FILES = (
('/home/vkm/mayur/Demo_Google_API.zip', 'application/vmd.google-apps.document'),
)
for filename, mimeType in FILES:
metadata = {'name': filename}
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I am able to upload the small files but when I am trying with 8GB of the file, it is giving MemorryErro.Please find the error message that I am getting.
Traceback (most recent call last):
File "demo.py", line 46, in <module>
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/discovery.py", line 853, in method
payload = media_upload.getbytes(0, media_upload.size())
File "/usr/local/lib/python2.7/dist-packages/googleapiclient/http.py", line 482, in getbytes
return self._fd.read(length)
MemoryError
Vikram's comment revealed a problem in mhawke's answer: next_chunk needs to be called upon the return value of:
request = DRIVE.files().create(body=metadata, media_body=media)
not on the return value of request.execute().
Here is a snippet of Python code I verified as working on files up to 10MB to my Google Drive account:
# Upload some file that just happens to be binary (we
# don't care about metadata, just upload it without
# translation):
the_file_to_upload = 'some_binary_file'
metadata = {'name': the_file_to_upload}
# Note the chunksize restrictions given in
# https://developers.google.com/api-client-library/python/guide/media_upload
media = MediaFileUpload(the_file_to_upload,
chunksize=1024 * 1024,
# Not sure whether or not this mimetypes is necessary:
mimetype='text/plain',
resumable=True)
request = drive_service.files().create(body=metadata, media_body=media)
response = None
while response is None:
status, response = request.next_chunk()
if status:
print("Uploaded %d%%." % int(status.progress() * 100))
print("Upload of {} is complete.".format(the_file_to_upload))
Here is a snippet of Python code that downloads the same file, but to a different file, so that I can use sha1sum to verify file has not been altered by Google Drive going in and out.
# Verify downloading works without translation:
request = drive_service.files().get_media(fileId=response['id'])
# Use io.FileIO. Refer to:
# https://google.github.io/google-api-python-client/docs/epy/googleapiclient.http.MediaIoBaseDownload-class.html
out_filename = the_file_to_upload + ".out"
fh = io.FileIO(out_filename, mode='wb')
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024)
done = False
while done is False:
status, done = downloader.next_chunk()
if status:
print("Download %d%%." % int(status.progress() * 100))
print("Download Complete!")
You could upload the file using a resumable media upload. This will send the file in chunks and should not max out your memory, which I assume is happening because your client is trying to send the whole file at once.
To do this you need to pass a MediaFileUpload object to the create() method in which the resumable flag is set to True. Optionally you can also set the chunksize.
metadata = {'name': filename}
media = MediaFileUpload(filename, mimetype=mimetype, resumable=True)
request = DRIVE.files().create(body=metadata, media_body=media)
response = None
while response is None:
status, response = request.next_chunk()
if status:
print "Uploaded %d%%." % int(status.progress() * 100)
print "Upload Complete!"
Try reducing the chunksize if needed.
The easiest way to upload large files to Google drive with python is just to add resumable=True
from googleapiclient.http import MediaFileUpload
media = MediaFileUpload(filename, resumable=True)

Python: Google Drive Authentification and folder

I have a python script to match files on google drive in specific folder. It worked fine with my GSuite account in the company. But If i want to use it at home with my simple Google Drive account it's not working. No files found:
Here is my code:
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from apiclient.http import MediaFileUpload
from apiclient import errors
import io
from apiclient.http import MediaIoBaseDownload
from apiclient import errors
import os
import csv
try :
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
folder_id = 'XYZ00000'
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
print("Create new data storage file ...")
flow = client.flow_from_clientsecrets('client_secrets.json', SCOPES)
flow.redirect_uri = client.OOB_CALLBACK_URN
authorize_url = flow.step1_get_authorize_url()
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
print ("Storage")
DRIVE = build('drive', 'v2', http=creds.authorize(Http()))
def getKeywordsID():
page_token = None
while True:
response = DRIVE.files().list(q="'XYZ00000' in parents and trashed = false",
spaces='drive',
fields='nextPageToken, items(id, title)',
pageToken=page_token).execute()
for file in response.get('items', []):
if ('test.txt' == file.get('title')):
print ('test.txt on Google Drive found')
page_token = response.get('nextPageToken', None)
if page_token is None:
break;
return file_id
# Download file
file_id = getKeywordsID()
if file_id != None:
request = DRIVE.files().get_media(fileId=file_id)
fh = io.FileIO('test.txt', 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download test.txt %d%%." % int(status.progress() * 100) )
else:
print ("File not found.")
I have no idea why it's not working. I enabled Google Drive API, created a project, added an OAuth2.0 client ID (select “Other” as option) and downloaded the json file for storage.
From the traceback posted, I can see that in the getKeywordsID function you are returning the file_id variable, which you have not initialized in the getKeywordsID function scope. Hence the UnboundLocalError: local variable 'file_id' referenced before assignment is thrown.

Not able to download google spreadsheet by google drive API using python

I am trying to download a spreadsheet file from my drive to my computer.
I am able to authenticate, get list of files and even get meta-data successfully.
But when I try to download the file, I get the following error :
downloading file starts
An error occurred: <HttpError 400 when requesting https://www.googleapis.com/dri
ve/v2/files/1vJetI_p8YEYiKvPVl0LtXGS5uIAx1eRGUupsXoh7UbI?alt=media returned "The
specified file does not support the requested alternate representation.">
downloading file ends
I couldn't get any such problem or question on SO and the other methods or solutions provided on SO for downloading the spreadsheet are outdated.Those have been deprecated by Google .
Here is the code, I am using to download the file :
import httplib2
import os
from apiclient import discovery
import oauth2client
from oauth2client import client
from oauth2client import tools
from apiclient import errors
from apiclient import http
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
#SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET_FILE = 'client_secrets.json'
APPLICATION_NAME = 'Drive API Quickstart'
def get_credentials():
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,
'drive-quickstart.json')
store = oauth2client.file.Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatability with Python 2.6
credentials = tools.run(flow, store)
print 'Storing credentials to ' + credential_path
return credentials
def main():
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v2', http=http)
file_id = '1vJetI_p8YEYiKvPVl0LtXGS5uIAx1eRGUupsXoh7UbI'
print "downloading file starts"
download_file(service, file_id)
print "downloading file ends "
def download_file(service, file_id):
local_fd = open("foo.csv", "w+")
request = service.files().get_media(fileId=file_id)
media_request = http.MediaIoBaseDownload(local_fd, request)
while True:
try:
download_progress, done = media_request.next_chunk()
except errors.HttpError, error:
print 'An error occurred: %s' % error
return
if download_progress:
print 'Download Progress: %d%%' % int(download_progress.progress() * 100)
if done:
print 'Download Complete'
return
if __name__ == '__main__':
main()
Google spreadsheets don't have media. Instead they have exportLinks. Get the file metadata, then look in the exportlinks and pick an appropriate URL.
This code worked for me. I only had to download client_secret.json from google developers dashboard and keep in the same directory as python script.
And in the list_of_lists variable I got a list with each row as list.
import gspread
import json
from oauth2client.client import SignedJwtAssertionCredentials
json_key = json.load(open('client_secret.json'))
scope = ['https://spreadsheets.google.com/feeds']
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'], scope)
gc = gspread.authorize(credentials)
sht1 = gc.open_by_key('<id_of_sheet>')
worksheet_list = sht1.worksheets()
worksheet = sht1.sheet1
list_of_lists = worksheet.get_all_values()
for row in list_of_lists :
print row

Categories

Resources