How to upload the csv into my folder in s3 bucket? - python

My requirement is I want to upload the csv file from localhost to my folder in s3 bucket but I don't have any idea of how to give the folder name in the below code.
Can somebody help me with this?
import boto3
import os
def upload_files(path):
session = boto3.Session(
aws_access_key_id='',
aws_secret_access_key='',
region_name='us-east-1'
)
s3 = session.resource('s3')
bucket = s3.Bucket('ag-tpch')
for subdir, dirs, files in os.walk(path):
for file in files:
full_path = os.path.join(subdir, file)
with open(full_path, 'rb') as data:
bucket.put_object(Key=full_path[len(path)+1:], Body=data)
if __name__ == "__main__":
upload_files('E:/expenses/shape')

In your code, you are trying to upload all the files under "E:/expenses/shape" to S3.
I have changed it to single file, you could later modify it according to your requirement.
import boto3
import os
def upload_file(path):
session = boto3.Session(
aws_access_key_id='',
aws_secret_access_key='',
region_name='us-east-1'
)
s3 = session.resource('s3')
bucket = s3.Bucket('ag-tpch')
with open(path, 'rb') as data:
bucket.put_object(Key='some/path/to-s3/test1.csv'], Body=data)
//your s3 path will be /some/path/to-s3/test-x.csv
if __name__ == "__main__":
upload_files('E:/expenses/shape/test1.csv')

Related

How to upload excel file to AWS S3 using an AWS lambda function in python

I have an excel file in generated from a Lamba function, stored in the /tmp/ folder, I want to store it in a S3 bucket, I have setup the permisions and the bucket, but when I complete the function it creates an damaged excel file in the bucket, which cannot be opened.
The code I used:
import boto3
def uploadtoS3(filename=str):
s3 = boto3.client('s3')
bucket = 'aws-day-ahead-estimations'
DirName = '/tmp/' + filename
s3.put_object(Bucket=bucket,Body=DirName,Key=filename)
print('put complete')
When you use the put_object() method, the Body parameter expects the actual content of the file, not the file path.
You can fix this:
def uploadtoS3(filename=str):
s3 = boto3.client('s3')
bucket = 'aws-day-ahead-estimations'
file_path = '/tmp/' + filename
try:
with open(file_path, 'rb') as f:
s3.put_object(Bucket=bucket, Body=f, Key=filename)
print('put complete')
except Exception as e:
print(f"An error occurred: {e}")
Another approach is to use the upload_file() method of the S3 client instead of the put_object() method.

Uploading folder from local system to a perticular folder in S3

What should I change in my code so that I can upload my entire folder from local system to a particular folder present in my s3 bucket.
import os
import boto3
s3_resource = boto3.resource("s3", region_name="ap-south-1")
def upload_objects():
try:
bucket_name = "<S3 bucket-name>"
root_path = '<local folder path>'
bucket_folder = '<S3 folder name>'
my_bucket = s3_resource.Bucket(bucket_name)
# s3 = boto3.resource('s3')
for path, subdirs, files in os.walk(root_path):
path = path.replace("\\","/")
directory_name = path.replace(root_path,"")
for file in files:
my_bucket.upload_file(os.path.join(path, file), directory_name+'/'+file)
except Exception as err:
print(err)
if __name__ == '__main__':
upload_objects()
You are not using your bucket_folder at all. This should be the beginning of your S3 prefix as in the S3 there are no folders. Its all about key names and prefixes.
So it should be something as the following:
my_bucket.upload_file(os.path.join(path, file), bucket_folder + '/' + directory_name+'/'+file)

unable to upload pdf containing only images to S3 bucket using python

I've images which have been converted into pdf and stored in a folder named 'test'.
I need to upload all files inside test folder to S3
Current situation : files get created in S3 but are empty. I'm assuming because the source pdf file only contains an image. I'm unable to figure out a way to ensure images of the pdf also get uploaded
Here's my code:
import os
import boto3
import botocore
import sys
SERVICE_NAME = 's3'
AWS_ACCESS_KEY_ID = 'XXXX'
AWS_SECRET_ACCESS_KEY = 'XXXXX+A'
AWS_S3_ENDPOINT_URL = 'https://s3.amazonaws.com'
AWS_STORAGE_BUCKET_NAME = 'resolution-medical/resolution_scanner'
AWS_STORAGE_BUCKET_NAME = 'resolution-medical'
source_folder = '/home/irfan/Downloads/test'
arr = os.listdir(source_folder)
for each in arr:
try:
arr2 = os.listdir(source_folder + '/' + each)
except:
arr2 = []
if len(arr2) == 0:
filepath = 'resolution_scanner/' + each
fileobject = source_folder+ '/' + each
conn = boto3.resource(
service_name=SERVICE_NAME,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
endpoint_url=AWS_S3_ENDPOINT_URL)
conn.Object(AWS_STORAGE_BUCKET_NAME, filepath).put(Body=fileobject, ACL='public-read', ContentType='application/pdf')
sys.exit()
It seems that the encoding issue occurs with put(). This SO Q&A solving with java. For me, just using upload_file() works like this:
import boto3
file = "PDF/aaaaa.pdf"
s3 = boto3.resource("s3")
s3.Object("my-bucket-test", "test.pdf").upload_file(file)

Upload files in a subdirectory in S3 with boto3 in python

I want to upload files in a a subdirectory in a bucket. When I try to upload it in the bucket only it works well but I don't know how to add the subdirectory (Prefix ?)
def dlImgs():
s3 = boto3.resource("s3")
if gmNew is not None:
reqImg = requests.get(gmNew, stream=True)
fileObj = reqImg.raw
reqData = fileObj.read()
#upload to S3
s3.Bucket(_BUCKET_NAME_IMG).put_object(Key=ccvName, Body=reqData)
dlImgs()
But how to add the Prefix ?
EDIT: I find the solution by creating a path directly in the ccvName variable.
I had written this long ago.
def upload_file(file_name,in_sub_folder,bucket_name):
client = boto3.client('s3')
fname = os.path.basename(file_name)
key = f'{in_sub_folder}/{fname}'
try:
client.upload_file(fname, Bucket=bucket_name ,Key=key)
except:
print(f'{file_name} not uploaded')

Cannot Upload Multiple Files to AWS S3 using Python Script and wildcards

I am newer to working with python as well as AWS.
I am attempting to move various files "and usually" with specific formats from my local pc to an S3 AWS folder via a python script. I am having an issue with implementing a wildcard within the script to catch various files at once. I am able to move one file at a time using the string "data = open('file_example_here.csv', 'rb')" , though I am stuck on adjusting my python script to capture all (i.e. .csv or all .json files). An example set of files are detailed below, so if I wanted to move all .json files to my s3 instance using a wildcard in the script how could I go about adjusting my script to handle that ask if possible?.
Any help would really be appreciated , implementation is shared below.
/home/user/folder1/c_log_1-10-19.csv
/home/user/folder1/c_log_2-10-19.csv
/home/user/folder1/c_log_3-10-19.csv
/home/user/folder1/c_log_4-10-19.csv
/home/user/folder1/c_log_5-10-19.csv
/home/user/folder1/c_log_6-10-19.csv
/home/user/folder1/h_log_1-11-18.json
/home/user/folder1/h_log_2-11-18.json
/home/user/folder1/h_log_3-11-18.json
/home/user/folder1/h_log_4-11-18.json
/home/user/folder1/h_log_5-11-18.json
/home/user/folder1/h_log_6-11-18.json
import boto3
from botocore.client import Config
ACCESS_KEY_ID = 'key_id_here'
ACCESS_SECRET_KEY = 'secret_key_here'
BUCKET_NAME = 'bucket_name_here'
data = open('test_file.csv', 'rb')
s3 = boto3.resource(
's3',
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY,
config=Config(signature_version='s3v4')
)
s3.Bucket(BUCKET_NAME).put_object(Key='folder_test/folder_test_2/test_file.csv', Body=data)
print ("All_Done")
````````````````````````````````````````````````````
################################################
############## UPDATED CODE BELOW ############
################################################
import glob
import boto3
from botocore.client import Config
ACCESS_KEY_ID = 'some_key'
ACCESS_SECRET_KEY = 'some_key'
BUCKET_NAME = 'some_bucket'
#session = boto3.Session(profile_name='default')
s3 = boto3.resource(
's3',
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY,
config=Config(signature_version='s3v4')
)
csv_files = glob.glob("/home/user/Desktop/*.csv")
#json_files = glob.glob("/home/user/folder1/h_log_*.json")
for filename in csv_files:
print("Putting %s" % filename)
s3.upload_file(filename, BUCKET_NAME, filename)
#for filename in json_files:
# print("Putting %s" % filename)
# s3.upload_file(filename, BUCKET_NAME, filename)
s3.Bucket(BUCKET_NAME).put_object(Key='folder1/folder1', Body=csv_files)
print("All_Done")
You can use something as simple as Python's glob module to find all files matching a specified pattern as in this example below:
#!/usr/bin/env python
import glob
import boto3
import os
BUCKET_NAME = 'MyBucket'
FOLDER_NAME = 'folder1/folder1'
session = boto3.Session(profile_name='default')
s3 = session.client('s3')
csv_files = glob.glob("/home/user/folder1/c_log_*.csv")
json_files = glob.glob("/home/user/folder1/h_log_*.json")
for filename in csv_files:
key = "%s/%s" % (FOLDER_NAME, os.path.basename(filename))
print("Putting %s as %s" % (filename,key))
s3.upload_file(filename, BUCKET_NAME, key)
for filename in json_files:
key = "%s/%s" % (FOLDER_NAME, os.path.basename(filename))
print("Putting %s as %s" % (filename,key))
s3.upload_file(filename, BUCKET_NAME, key)
print("All_Done")
The above code assumes you have AWS CLI installed with an access key configured under the default profile. If not, you can use the various methods of authenticating with boto3.
There's probably a more pythonic way to do this but this simple script works.
Check out the glob module (https://docs.python.org/3/library/glob.html).
import glob
csv_files = glob.glob('/home/user/folder_1/*.csv')
json_files = glob.glob('/home/user/folder_1/*.json')
Then iterate over these lists and upload as you were doing.
Also, there's no need to read in the data from the file. Just use the upload_file method on the bucket: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Bucket.upload_file

Categories

Resources