I want to add time string to the end of filenames which I am uploading to Google Drive by using pydrive. Basically I try to code below, but I have no idea how to adapt new file variable:
import time
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from time import sleep
gauth = GoogleAuth()
drive = GoogleDrive(gauth)
timestring = time.strftime("%Y%m%d-%H%M")
upload_file_list = ['Secrets.kdbx']
for upload_file in upload_file_list:
gfile = drive.CreateFile({'parents': [{'id': 'folder_id'}]})
# Read file and set it as the content of this instance.
upload_file = drive.CreateFile({'title': 'Secrets.kdbx' + timestring, 'mimeType':'application/x-kdbx'}) # here I try to set new filename
gfile.SetContentFile(upload_file)
gfile.Upload() # Upload the file.
getting TypeError: expected str, bytes or os.PathLike object, not GoogleDriveFile
Actually I found the mistake. I changed name of the file inside CreateFile() and used string slicing to keep the file extension. Of course, this solution cannot be applied to other files with different names.
upload_file_list = ['Secrets.kdbx']
for upload_file in upload_file_list:
gfile = drive.CreateFile({'title': [upload_file[:7] + timestring + "." + upload_file[-4:]], # file name is changed here
'parents': [{'id': '1Ln6ptJ4bTlYoGdxczIgmD-7xcRlJa_7m'}]})
# Read file and set it as the content of this instance.
gfile.SetContentFile(upload_file)
gfile.Upload() # Upload the file. # Output something like: Secrets20211212-1627.kdbx #that's what I wanted :)
Related
Im writting a simple python application where the user selects a file from their local file manager and tries to upload using strealit
Im able to succesfully take the file the user had given using streamlit.uploader and stored the file in a temp directory from the stramlit app folder but the issue is i cant give the path of the file of the file stored in the newly created directory in order to send the application into my gcp clouds bucket
Adding my snippet below any help is appreciated :)
import streamlit as st
from google.oauth2 import service_account
from google.cloud import storage
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path
from PIL import Image, ImageOps
bucketName=('survey-appl-dev-public')
# Create API client.
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
#create a bucket object to get bucket details
bucket = client.get_bucket(bucketName)
file = st.file_uploader("Upload An file")
def main():
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
#img = load_image(image_file)
#st.image(img, caption='Sunrise by the mountains')
with open(os.path.join("tempDir",file.name),"wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
object_name_in_gcs_bucket = bucket.blob(".",file.name)
object_name_in_gcs_bucket.upload_from_filename("tempDir",file.name)
if __name__ == "__main__":
main()
ive tried importing the path of the file using cwd command and also tried os library for file path but nothing worked
edited:
All i wanted to implement is make a file upload that is selected by customer using the dropbox of file_uploader option im able to save the file into a temporary directory after the file is selected using the file.getbuffer as shown in the code but i couldnt amke the code uploaded into the gcs bucket since its refering as str cannnot be converted into int while i press the upload button
may be its the path issue "the code is unable to find the path of the file stored in the temp directory " but im unable to figure iut how to give the path to the upload function
error coding im facing
TypeError: '>' not supported between instances of 'str' and 'int'
Traceback:
File "/home/raviteja/.local/lib/python3.10/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 564, in _run_script
exec(code, module.__dict__)
File "/home/raviteja/test/streamlit/test.py", line 43, in <module>
main()
File "/home/raviteja/test/streamlit/test.py", line 29, in main
object_name_in_gcs_bucket = bucket.blob(".",file.name)
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/bucket.py", line 795, in blob
return Blob(
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/blob.py", line 219, in __init__
self.chunk_size = chunk_size # Check that setter accepts value.
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/blob.py", line 262, in chunk_size
if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
Thanks all for response after days of struggle at last I've figured out the mistake im making.
I dont know if I'm right or wrong correct me if I'm wrong but this worked for me:
object_name_in_gcs_bucket = bucket.blob("path-to-upload"+file.name)
Changing the , to + between the filepath and filename made my issue solve.
Sorry for the small issue.
Happy that I could solve it.
You have some variables in your code and I guess you know what they represent. Try this out else make sure you add every relevant information to the question and the code snippet.
def main():
file = st.file_uploader("Upload file")
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
file_path = os.path.join("tempDir/", file.name)
with open(file_path,"wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
print(file_path)
def upload():
file_name = file_path
read_file(file_name)
st.write(file_name)
st.session_state["upload_state"] = "Saved successfully!"
object_name_in_gcs_bucket = bucket.blob("gcp-bucket-destination-path"+ file.name)
object_name_in_gcs_bucket.upload_from_filename(file_path)
st.write("Youre uploading to bucket", bucketName)
st.button("Upload file to GoogleCloud", on_click=upload)
if __name__ == "__main__":
main()
This one works for me.
Solution 1
import streamlit as st
from google.oauth2 import service_account
from google.cloud import storage
import os
STREAMLIT_SCRIPT_FILE_PATH = os.path.dirname(os.path.abspath(__file__))
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
def main():
bucketName = 'survey-appl-dev-public'
file = st.file_uploader("Upload file")
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
with open(os.path.join("tempDir", file.name), "wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
bucket = client.bucket(bucketName)
object_name_in_gcs_bucket = bucket.blob(file.name)
# src_relative = f'./tempDir/{file.name}' # also works
src_absolute = f'{STREAMLIT_SCRIPT_FILE_PATH}/tempDir/{file.name}'
object_name_in_gcs_bucket.upload_from_filename(src_absolute)
if __name__ == '__main__':
main()
Solution 2
Instead of saving the file to disk, use the file bytes directly using upload_from_string().
References:
Google Cloud upload_from_string
Streamlit file uploader
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
def gcs_upload_data():
bucket_name = 'your_gcs_bucket_name'
file = st.file_uploader("Upload file")
if file is not None:
fname = file.name
ftype = file.type
file_details = {"FileName":fname,"FileType":ftype}
st.write(file_details)
# Define gcs bucket.
bucket = client.bucket(bucket_name)
bblob = bucket.blob(fname)
# Upload the bytes directly instead of a disk file.
bblob.upload_from_string(file.getvalue(), ftype)
if __name__ == '__main__':
gcs_upload_data()
Consider the following code that uses the PyDrive module:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
file = drive.CreateFile({'title': 'test.txt'})
file.Upload()
file.SetContentString('hello')
file.Upload()
file.SetContentString('')
file.Upload() # This throws an exception.
Creating file and changing its contents works fine until I try to erase the contents by setting the content string to an empty one. Doing so throws this exception:
pydrive.files.ApiRequestError
<HttpError 400 when requesting
https://www.googleapis.com/upload/drive/v2/files/{LONG_ID}?alt=json&uploadType=resumable
returned "Bad Request">
When I look at my Drive, I see the test.txt file successfully created with text hello in it. However I expected that it would be empty.
If I change the empty string to any other text, the file is changed twice without errors. Though this doesn't clear the contents so it's not what I want.
When I looked up the error on the Internet, I found this issue on PyDrive github that may be related though it remains unsolved for almost a year.
If you want to reproduce the error, you have to create your own project that uses Google Drive API following this tutorial from the PyDrive docs.
How can one erase the contents of a file through PyDrive?
Issue and workaround:
When resumable=True is used, it seems that the data of 0 byte cannot be used. So in this case, it is required to upload the empty data without using resumable=True. But when I saw the script of PyDrive, it seems that resumable=True is used as the default. Ref So in this case, as a workaround, I would like to propose to use the requests module. The access token is retrieved from gauth of PyDrive.
When your script is modified, it becomes as follows.
Modified script:
import io
import requests
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
file = drive.CreateFile({'title': 'test.txt'})
file.Upload()
file.SetContentString('hello')
file.Upload()
# file.SetContentString()
# file.Upload() # This throws an exception.
# I added below script.
res = requests.patch(
"https://www.googleapis.com/upload/drive/v3/files/" + file['id'] + "?uploadType=multipart",
headers={"Authorization": "Bearer " + gauth.credentials.token_response['access_token']},
files={
'data': ('metadata', '{}', 'application/json'),
'file': io.BytesIO()
}
)
print(res.text)
References:
PyDrive
Files: update
I have trained and saved a model with doc2vec in colab as
model = gensim.models.Doc2Vec(vector_size=size_of_vector, window=10, min_count=5, workers=16,alpha=0.025, min_alpha=0.025, epochs=40)
model.build_vocab(allXs)
model.train(allXs, epochs=model.epochs, total_examples=model.corpus_count)
The model is saved in a folder not accessible from my drive but which I can see as:
from os import listdir
from os.path import isfile, getsize
from operator import itemgetter
files = [(f, getsize(f)) for f in listdir('.') if isfile(f)]
files.sort(key=itemgetter(1), reverse=True)
for f, size in files:
print ('{} {}'.format(size, f))
print ('({} files {} total size)'.format(len(files), sum(f[1] for f in files)))
The output is:
79434928 Model_after_train.docvecs.vectors_docs.npy
9155086 Model_after_train
1024 .rnd
(3 files 88591038 total size)
To move the two files in the same shared directory as the notebook
folder_id = FolderID
for f, size in files:
if 'our_first_lda' in f:
file = drive.CreateFile({'parents':[{u'id': folder_id}]})
file.SetContentFile(f)
file.Upload()
The problem that I am facing now are two:
1) gensim creates two files when saving the model. Which one should I load?
2) when I try to load a file or the other with:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
from googleapiclient.discovery import build
drive_service = build('drive', 'v3')
file_id = FileID
import io
from googleapiclient.http import MediaIoBaseDownload
request = drive_service.files().get_media(fileId=file_id)
downloaded = io.BytesIO()
downloader = MediaIoBaseDownload(downloaded, request)
done = False
while done is False:
_, done = downloader.next_chunk()
model = doc2vec.Doc2Vec.load(downloaded.read())
I am not able to load the model getting the error:
TypeError: file() argument 1 must be encoded string without null bytes, not str
Any suggestion?
I've never used gensim, but from a look at the docs, here's what I think is going on:
You're getting two files because you passed separately=True to save, which is saving large numpy arrays in the output as separate files. You'll want to copy both files around.
Based on the load docs, you want to pass a filename, not the contents of the file. So when fetching the file from Drive, save to a file, and pass mmap='r' to load.
If that doesn't get you up and running, it'd be helpful to see a complete example (eg with fake data).
Here under
"Update file metadata" it shows how to change the title from a created file. I'm looking for how to change the title if I know the id or key. Or is there a way to do it in gspread?
You can also do this directly with PyDrive. CreateFile() only creates a local Python object to represent the state of a new or existing file
# CreateFile() can be called with an existing id.
file1 = drive.CreateFile({'id': 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'})
file1['title'] = '<new title>' # Change title.
file1.Upload() # Upload new title.
i found it
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
id='xxxxxxxxxxxxxxxxxxxxxxxx'
a=drive.auth.service.files().get(fileId=id).execute()
a['title']="new title"
update=drive.auth.service.files().update(fileId=id,body=a).execute()
I am using Python 2.7 and I am trying to upload a file (*.txt) into a folder that is shared with me.
So far I was able to upload it to my Drive, but how to set to which folder. I get the url to where I must place this file.
Thank you
this is my code so far
def Upload(file_name, file_path, upload_url):
upload_url = upload_url
client = gdata.docs.client.DocsClient(source=upload_url)
client.api_version = "3"
client.ssl = True
client.ClientLogin(username, passwd, client.source)
filePath = file_path
newResource = gdata.docs.data.Resource(filePath,file_name)
media = gdata.data.MediaSource()
media.SetFileHandle(filePath, 'mime/type')
newDocument = client.CreateResource(
newResource,
create_uri=gdata.docs.client.RESOURCE_UPLOAD_URI,
media=media
)
the API you are using is deprecated. Use google-api-python-client instead.
Follow this official python quickstart guide to simply upload a file to a folder. Additionally, send parents parameter in request body like this: body['parents'] = [{'id': parent_id}]
Or, you can use PyDrive, a Python wrapper library which simplifies a lot of works dealing with Google Drive API. The whole code is as simple as this:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
drive = GoogleDrive(gauth)
f = drive.CreateFile({'parent': parent_id})
f.SetContentFile('cat.png') # Read local file
f.Upload() # Upload it