create html download link to download dataframe as excel in python - python

can below code be modified to create download link for an xlsx file with multiple df saved as different sheets of 1 excel file
import base64
from IPython.display import HTML
def create_download_link( MM_df_pd, title = "Download CSV file", filename = "case_review.csv"):
csv = MM_df_pd.to_csv()
b64 = base64.b64encode(csv.encode())
payload = b64.decode()
html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
html = html.format(payload=payload,title=title,filename=filename)
return HTML(html)
create_download_link(MM_df_pd)`
I tried with saving excel file saving in s3 location first but it is long way and wont create download link, I replaced csv in code with xlsx but didnt work

You need to use boto3 to upload your file to s3 and then create a presigned_url...
import boto3
def get_presigned_url(bucket:str, key:str, exp:int=3600):
s3 = boto3.client('s3')
return s3.generate_presigned_url('get_object',
Params={'Bucket': bucket,
'Key': key},
ExpiresIn=exp)
def s3_upload_file(bucket: str, source_file: str, target_path: str):
s3 = boto3.client('s3')
s3.upload_file(source_file, bucket, target_path)
df.to_csv("filename.csv") #save your csv
s3_upload_file("your_bucket","filaname.csv","your_s3_path_target")
presigned_url = get_presigned_url("your_bucket","your_s3_path_target")
print(presigned_url)

Related

How to use pd.read_csv() with Blobs from GCS?

I have python script reading files from GCS bucket
from google.cloud import storage
import pandas as pd
client = storage.Client.from_service_account_json('sa.json')
BUCKET_NAME = 'sleep-accel'
bucket = client.get_bucket(BUCKET_NAME)
blobs_all = list(bucket.list_blobs())
blobs_specific = list(bucket.list_blobs(prefix='physionet.org/files/sleep-accel/1.0.0/motion/'))
main_df = pd.DataFrame({})
txtFile = blobs_specific[0].download_to_file('/tmp')
main_df = pd.concat([main_df, pd.read_csv(txtFile)])
blobs_specific returns array of blobs with contain a blob with few metadata and the .txt file that i need to get parsed by .read_csv();
I'm trying to figure out which GCS library function I'm suppose to use so that pd.read_csv() can read it
All the files are in .txt and thats why im trying to parse it to .csv here

File conversion XML to JSON in S3 through AWS Glue

I have my bucket structure like below and i have xml files landing in this s3 bucket folder.
S3:/Fin-app-ops/data-ops/raw-d
Need to convert those xml files to JSON files and put back to s3 in same bucket but different folder:
S3:/Fin-app-ops/data-ops/con-d
I tried by this way but did not work:
import os
import json
import boto3
import xmltodict
s3 = boto3.resource('s3')
s3_bucket = s3.bucket('Fin-app-ops')
file_in_path = 'data-ops/raw-d/'
file_dest_path = 'data-ops/con-d/'
Datafiles = [f.key for f in s3_bucket.objects.filter(prefix = file_in_path)]
for datafile in datafiles:
if "xml" in obj.key:
datafile = obj.get()['Body']
data_dict = xmltodict.parse(datafile .read())
datafile.close()
json_data = json.dumps(data_dict)
s3.Object(bucket_name, file_dest_path `enter code here`+'.json').put(Body=json.dumps(data_dict))
is there any other way I can achieve this, help please i'm new to Python and Glue

How to write pyarrow table as csv to s3 directly? [duplicate]

In boto 2, you can write to an S3 object using these methods:
Key.set_contents_from_string()
Key.set_contents_from_file()
Key.set_contents_from_filename()
Key.set_contents_from_stream()
Is there a boto 3 equivalent? What is the boto3 method for saving data to an object stored on S3?
In boto 3, the 'Key.set_contents_from_' methods were replaced by
Object.put()
Client.put_object()
For example:
import boto3
some_binary_data = b'Here we have some data'
more_binary_data = b'Here we have some more data'
# Method 1: Object.put()
s3 = boto3.resource('s3')
object = s3.Object('my_bucket_name', 'my/key/including/filename.txt')
object.put(Body=some_binary_data)
# Method 2: Client.put_object()
client = boto3.client('s3')
client.put_object(Body=more_binary_data, Bucket='my_bucket_name', Key='my/key/including/anotherfilename.txt')
Alternatively, the binary data can come from reading a file, as described in the official docs comparing boto 2 and boto 3:
Storing Data
Storing data from a file, stream, or string is easy:
# Boto 2.x
from boto.s3.key import Key
key = Key('hello.txt')
key.set_contents_from_file('/tmp/hello.txt')
# Boto 3
s3.Object('mybucket', 'hello.txt').put(Body=open('/tmp/hello.txt', 'rb'))
boto3 also has a method for uploading a file directly:
s3 = boto3.resource('s3')
s3.Bucket('bucketname').upload_file('/local/file/here.txt','folder/sub/path/to/s3key')
http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Bucket.upload_file
You no longer have to convert the contents to binary before writing to the file in S3. The following example creates a new text file (called newfile.txt) in an S3 bucket with string contents:
import boto3
s3 = boto3.resource(
's3',
region_name='us-east-1',
aws_access_key_id=KEY_ID,
aws_secret_access_key=ACCESS_KEY
)
content="String content to write to a new S3 file"
s3.Object('my-bucket-name', 'newfile.txt').put(Body=content)
Here's a nice trick to read JSON from s3:
import json, boto3
s3 = boto3.resource("s3").Bucket("bucket")
json.load_s3 = lambda f: json.load(s3.Object(key=f).get()["Body"])
json.dump_s3 = lambda obj, f: s3.Object(key=f).put(Body=json.dumps(obj))
Now you can use json.load_s3 and json.dump_s3 with the same API as load and dump
data = {"test":0}
json.dump_s3(data, "key") # saves json to s3://bucket/key
data = json.load_s3("key") # read json from s3://bucket/key
A cleaner and concise version which I use to upload files on the fly to a given S3 bucket and sub-folder-
import boto3
BUCKET_NAME = 'sample_bucket_name'
PREFIX = 'sub-folder/'
s3 = boto3.resource('s3')
# Creating an empty file called "_DONE" and putting it in the S3 bucket
s3.Object(BUCKET_NAME, PREFIX + '_DONE').put(Body="")
Note: You should ALWAYS put your AWS credentials (aws_access_key_id and aws_secret_access_key) in a separate file, for example- ~/.aws/credentials
After some research, I found this. It can be achieved using a simple csv writer. It is to write a dictionary to CSV directly to S3 bucket.
eg: data_dict = [{"Key1": "value1", "Key2": "value2"}, {"Key1": "value4", "Key2": "value3"}]
assuming that the keys in all the dictionary are uniform.
import csv
import boto3
# Sample input dictionary
data_dict = [{"Key1": "value1", "Key2": "value2"}, {"Key1": "value4", "Key2": "value3"}]
data_dict_keys = data_dict[0].keys()
# creating a file buffer
file_buff = StringIO()
# writing csv data to file buffer
writer = csv.DictWriter(file_buff, fieldnames=data_dict_keys)
writer.writeheader()
for data in data_dict:
writer.writerow(data)
# creating s3 client connection
client = boto3.client('s3')
# placing file to S3, file_buff.getvalue() is the CSV body for the file
client.put_object(Body=file_buff.getvalue(), Bucket='my_bucket_name', Key='my/key/including/anotherfilename.txt')
it is worth mentioning smart-open that uses boto3 as a back-end.
smart-open is a drop-in replacement for python's open that can open files from s3, as well as ftp, http and many other protocols.
for example
from smart_open import open
import json
with open("s3://your_bucket/your_key.json", 'r') as f:
data = json.load(f)
The aws credentials are loaded via boto3 credentials, usually a file in the ~/.aws/ dir or an environment variable.
You may use the below code to write, for example an image to S3 in 2019. To be able to connect to S3 you will have to install AWS CLI using command pip install awscli, then enter few credentials using command aws configure:
import urllib3
import uuid
from pathlib import Path
from io import BytesIO
from errors import custom_exceptions as cex
BUCKET_NAME = "xxx.yyy.zzz"
POSTERS_BASE_PATH = "assets/wallcontent"
CLOUDFRONT_BASE_URL = "https://xxx.cloudfront.net/"
class S3(object):
def __init__(self):
self.client = boto3.client('s3')
self.bucket_name = BUCKET_NAME
self.posters_base_path = POSTERS_BASE_PATH
def __download_image(self, url):
manager = urllib3.PoolManager()
try:
res = manager.request('GET', url)
except Exception:
print("Could not download the image from URL: ", url)
raise cex.ImageDownloadFailed
return BytesIO(res.data) # any file-like object that implements read()
def upload_image(self, url):
try:
image_file = self.__download_image(url)
except cex.ImageDownloadFailed:
raise cex.ImageUploadFailed
extension = Path(url).suffix
id = uuid.uuid1().hex + extension
final_path = self.posters_base_path + "/" + id
try:
self.client.upload_fileobj(image_file,
self.bucket_name,
final_path
)
except Exception:
print("Image Upload Error for URL: ", url)
raise cex.ImageUploadFailed
return CLOUDFRONT_BASE_URL + id

Download multiple files from S3 bucket using boto3

I have a csv file containing numerous uuids
I'd like to write a python script using boto3 which:
Connects to an AWS S3 bucket
Uses each uuid contained in the CSV to copy the file contained
Files are all contained in a filepath like this: BUCKET/ORG/FOLDER1/UUID/DATA/FILE.PNG
However, the file contained in DATA/ can be different file types.
Put the copied file in a new S3 bucket
So far, I have successfully connected to the s3 bucket and checked its contents in python using boto3, but need help implementing the rest
import boto3
#Create Session
session = boto3.Session(
aws_access_key_id='ACCESS_KEY_ID',
aws_secret_access_key='SECRET_ACCESS_KEY',
)
#Initiate S3 Resource
s3 = session.resource('s3')
your_bucket = s3.Bucket('BUCKET-NAME')
for s3_file in your_bucket.objects.all():
print(s3_file.key) # prints the contents of bucket
To read the CSV file you can use csv library (see: https://docs.python.org/fr/3.6/library/csv.html)
Example:
import csv
with open('file.csv', 'r') as file:
reader = csv.reader(file)
for row in reader:
print(row)
To push files to the new bucket, you can use the copy method (see: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.copy)
Example:
import boto3
s3 = boto3.resource('s3')
source = {
'Bucket': 'BUCKET-NAME',
'Key': 'mykey'
}
bucket = s3.Bucket('SECOND_BUCKET-NAME')
bucket.copy(source, 'SECOND_BUCKET-NAME')

Upload Base64 Image to S3 and return URL

I am trying to upload a base64 image to an S3 bucket using Python.
I have googled and got a few answers but none of them works for me. And some answers use boto and not boto3, hence they are useless to me.
I have also tried this link: Boto3: upload file from base64 to S3 but it is not working for me as Object method is unknown to the s3.
Following is my code so far:
import boto3
s3 = boto3.client('s3')
filename = photo.personId + '.png'
bucket_name = 'photos-collection'
dataToPutInS3 = base64.b64decode(photo.url[23:])
What is the correct way to upload this variable dataToPutInS3 data to s3 bucket and get a url back from it?
You didn't mention how do you get the base64. In order to reproduce,my code snippet getting the image from the internet using the requests library and later convert it to base64 using the base64 library.
The trick here is to make sure the base64 string you want to upload doesn't include the data:image/jpeg;base64 prefix.
And, as #dmigo mentioned in the comments, you should work with boto3.resource and not boto3.client.
from botocore.vendored import requests
import base64
import boto3
s3 = boto3.resource('s3')
bucket_name = 'BukcetName'
#where the file will be uploaded, if you want to upload the file to folder use 'Folder Name/FileName.jpeg'
file_name_with_extention = 'FileName.jpeg'
url_to_download = 'URL'
#make sure there is no data:image/jpeg;base64 in the string that returns
def get_as_base64(url):
return base64.b64encode(requests.get(url).content)
def lambda_handler(event, context):
image_base64 = get_as_base64(url_to_download)
obj = s3.Object(bucket_name,file_name_with_extention)
obj.put(Body=base64.b64decode(image_base64))
#get bucket location
location = boto3.client('s3').get_bucket_location(Bucket=bucket_name)['LocationConstraint']
#get object url
object_url = "https://%s.s3-%s.amazonaws.com/%s" % (bucket_name,location, file_name_with_extention)
print(object_url)
More about S3.Object.put.
You can convert your base64 to IO Bytes and use upload_fileobj to upload to S3 bucket.
import base64
import six
import uuid
import imghdr
import io
def get_file_extension(file_name, decoded_file):
extension = imghdr.what(file_name, decoded_file)
extension = "jpg" if extension == "jpeg" else extension
return extension
def decode_base64_file(data):
"""
Fuction to convert base 64 to readable IO bytes and auto-generate file name with extension
:param data: base64 file input
:return: tuple containing IO bytes file and filename
"""
# Check if this is a base64 string
if isinstance(data, six.string_types):
# Check if the base64 string is in the "data:" format
if 'data:' in data and ';base64,' in data:
# Break out the header from the base64 content
header, data = data.split(';base64,')
# Try to decode the file. Return validation error if it fails.
try:
decoded_file = base64.b64decode(data)
except TypeError:
TypeError('invalid_image')
# Generate file name:
file_name = str(uuid.uuid4())[:12] # 12 characters are more than enough.
# Get the file name extension:
file_extension = get_file_extension(file_name, decoded_file)
complete_file_name = "%s.%s" % (file_name, file_extension,)
return io.BytesIO(decoded_file), complete_file_name
def upload_base64_file(base64_file):
bucket_name = 'your_bucket_name'
file, file_name = decode_base64_file(base64_file)
client = boto3.client('s3', aws_access_key_id='aws_access_key_id',
aws_secret_access_key='aws_secret_access_key')
client.upload_fileobj(
file,
bucket_name,
file_name,
ExtraArgs={'ACL': 'public-read'}
)
return f"https://{bucket_name}.s3.amazonaws.com/{file_name}"

Categories

Resources