I have a core class where I am trying to read the zip file, unzip, get a specific file, and get the contents of that file. This works fine but now I am trying to mock all the things I used along the way.
class ZipService:
def __init__(self, path: str):
self.path = path
def get_manifest_json_object(self):
s3 = boto3.resource('s3')
bucket_name, key = self.__get_bucket_and_key()
bucket = s3.Bucket(bucket_name)
zip_object_reference = bucket.Object(key).get()["Body"]
zip_object_bytes_stream = self.__get_zip_object_bytes_stream(zip_object_reference)
zipf = zipfile.ZipFile(zip_object_bytes_stream, mode='r')
return self.__get_manifest_json(zipf)
def __get_bucket_and_key(self):
pattern = "https:\/\/(.?[^\.]*)\.(.?[^\/]*)\/(.*)" # this regex is working but don't know how :D
result = re.split(pattern, self.path)
return result[1], result[3]
def __get_zip_object_bytes_stream(self, zip_object_reference):
return io.BytesIO(zip_object_reference.read())
def __get_manifest_json(self, zipf):
manifest_json_text = [zipf.read(name) for name in zipf.namelist() if "/manifest.json" in name][0].decode("utf-8")
return json.loads(manifest_json_text)
For this I have written a test case that throws an error:
#patch('boto3.resource')
class TestZipService(TestCase):
def test_zip_service(self, mock):
s3 = boto3.resource('s3')
bucket = s3.Bucket("abc")
bucket.Object.get.return_value = "some-value"
zipfile.ZipFile.return_value = "/some-path"
inst = ZipService("/some-path")
with mock.patch.object(inst, "_ZipService__get_manifest_json", return_value={"a": "b"}) as some_object:
expected = {"a": "b"}
actual = inst.get_manifest_json_object()
self.assertIsInstance(expected, actual)
Error:
bucket_name, key = self.__get_bucket_and_key()
File "/Us.tox/py38/lib/python3.8/site-packages/services/zip_service.py", line 29, in __get_bucket_and_key
return result[1], result[3]
IndexError: list index out of range
What exactly is wrong here? Any hints would also be appreciated. TIA
You are giving your ZipService a path of "/some-path".
Then you test its get_manifest_json_object method, whose 2nd statement calls __get_bucket_and_key.
You are not mocking __get_bucket_and_key, so when it's called it tries to process that input path with a regex split, which won't give you a collection with 4 items that it needs to return result[1], result[3].
Hence, IndexError: list index out of range.
Either give your ZipService a proper path you'd expect, or mock all private methods used in get_manifest_json_object.
Related
I have function that looks like this:
def file1_exists(directory):
file1_path = os.path.join(directory, 'file1.json')
return os.path.exists(file1_path)
def file2_exists(directory):
log_path = os.path.join(directory, 'file2.log')
return os.path.exists(file2_path)
def create_file1(directory):
if file1_exists(directory):
return
if not file2_exists(directory):
return
mod_time = os.stat(os.path.join(directory, 'file2.log')).st_mtime
timestamp = {
"creation_timestamp": datetime.datetime.fromtimestamp(mod_time).isoformat()
}
with open(os.path.join(directory, "file1.json"), "w") as f:
json.dump(timestamp, f)
And I need to create a unittest that uses mock files.
The 3 Unittests that I need are:
A mock myfile.json file where I will assert that the function will return None (based on the 1st if statement, since the file exists)
A way to mock-hide the data.txt item in order to assert that the function will return None (based on the second if statement)
A mock myfile.json file where I write the required data and then assert that the return matches the expected outcome.
So far I've tried tests 1. and 2. with variations of this but I've been unsuccessful:
class TestAdminJsonCreation(unittest.TestCase):
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
I've also read about other solutions such as:
Python testing: using a fake file with mock & io.StringIO
But I haven't found a way to successfully do what I need...
You want to be able to provide different return values for each call to os.path.exists. Since you know the order of the calls, you can use side_effects to supply a list of values to be used in order.
class TestAdminJsonCreation(unittest.TestCase):
# No JSON file
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, log file
#patch('os.path.exists', side_effects=[True, False])
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, no log file
#patch('os.path.exists', side_effects=[True, True])
def test_existing_admin_json(self):
...
The third test requires an actual file system, or for you to mock open.
So, I ended up breaking my original function into 3 different functions for easier testing.
The tests are performed by checking what the result of the 'def create_file1' would be when we feed it different return_values from the other 2 functions and when we add valid data.
class TestFile1JsonCreation(unittest.TestCase):
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=True)
#patch('file2_exists', return_value=False)
def test_existing_file1_json(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=False)
def test_missing_file2(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=True)
def test_write_data(self, file2_exists, file1_existsmock, stat, mopen):
class FakeStat:
st_mtime = 1641992788
stat.return_value = FakeStat()
create_file1('.')
# file1.json should have been written
mopen.assert_called_once_with('./file1.json', 'w')
written_data = ''.join(
c[1][0]
for c in mopen().__enter__().write.mock_calls
)
expected_data = {"creation_timestamp": "2022-01-12T13:06:28"}
written_dict_data = json.loads(written_data)
self.assertEqual(written_dict_data, expected_data)
I have written a test as below:
class TestLoader(TestCase):
#pytest.fixture(autouse=True)
#patch('loaders.myloader.DSFactory')
def _initialize_(self, mock_ds_factory):
self.loader = MyLoader()
mock_ds = Mock()
mock_ds_factory.get_ds_for_env.return_value = mock_ds
self.loader.ds = mock_ds
def test_load(self):
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
self.loader.load("test_s3_key") #####IN THIS LINE I AM GETTING ERROR AS MENTIONED BELOW##
#staticmethod
def get_data():
return {"key1":"value1","key2":"value2"}
Associated source is located here: loaders->myloader.py. myloader.py is as follows:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
print(f"ds : {self.ds}")
print(f"file read is : {self.ds.read_file(S3_BUCKET, file_key)}"}
data_dict = json.loads(self.ds.read_file(S3_BUCKET, file_key))
But while testing, I am getting error as follows:
ds is :<MagicMock name='DSFactory.get_ds_for_env()' id='140634163567528'>
file read is :<MagicMock name='DSFactory.get_ds_for_env().read_file()' id='140635257259568'>
E TypeError: the JSON object must be str, bytes or bytearray, not 'MagicMock'
I don't understand why, even after mocking return value of read_file with
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
I am getting MagickMock object. I am stuck, not getting any clue how to resolve this.
Your code:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
data_dict = json.loads(self.datastore.read_file(S3_BUCKET, file_key))
Issue here i can see is, data-store is not present, it should be self.ds.read_file
Please print self.datastore.read_file(S3_BUCKET, file_key) and verify the output.
This is the error coming from AWS_S3 bucket Json structure. It seems its not sending the Json value in string format rather than in Magic Mock format.
To more about Magic Mock format, please visit here: https://medium.com/ryans-dev-notes/python-mock-and-magicmock-b3295c2cc7eb
I have a versioned bucket and would like to delete the object (and all of its versions) from the bucket. However, when I try to delete the object from the console, S3 simply adds a delete marker but does not perform a hard delete.
Is it possible to delete all versions of the object (hard delete) with a particular key?:
s3resource = boto3.resource('s3')
bucket = s3resource.Bucket('my_bucket')
obj = bucket.Object('my_object_key')
# I would like to delete all versions for the object like so:
obj.delete_all_versions()
# or delete all versions for all objects like so:
bucket.objects.delete_all_versions()
The other answers delete objects individually. It is more efficient to use the delete_objects boto3 call and batch process your delete. See the code below for a function which collects all objects and deletes in batches of 1000:
bucket = 'bucket-name'
s3_client = boto3.client('s3')
object_response_paginator = s3_client.get_paginator('list_object_versions')
delete_marker_list = []
version_list = []
for object_response_itr in object_response_paginator.paginate(Bucket=bucket):
if 'DeleteMarkers' in object_response_itr:
for delete_marker in object_response_itr['DeleteMarkers']:
delete_marker_list.append({'Key': delete_marker['Key'], 'VersionId': delete_marker['VersionId']})
if 'Versions' in object_response_itr:
for version in object_response_itr['Versions']:
version_list.append({'Key': version['Key'], 'VersionId': version['VersionId']})
for i in range(0, len(delete_marker_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': delete_marker_list[i:i+1000],
'Quiet': True
}
)
print(response)
for i in range(0, len(version_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': version_list[i:i+1000],
'Quiet': True
}
)
print(response)
The documentation is helpful here:
When versioning is enabled in an S3 bucket, a simple DeleteObject request cannot permanently delete an object from that bucket. Instead, Amazon S3 inserts a delete marker (which is effectively a new version of the object with its own version ID).
When you try to GET an object whose current version is a delete marker, S3 behaves as if the object has been deleted (even though it has not) and returns a 404 error.
To permanently delete an object from a versioned bucket, use DeleteObject, with the relevant version ID, for each and every version of the object (and that includes the delete markers).
I had trouble using the other solutions to this question so here's mine.
import boto3
bucket = "bucket name goes here"
filename = "filename goes here"
client = boto3.client('s3')
paginator = client.get_paginator('list_object_versions')
response_iterator = paginator.paginate(Bucket=bucket)
for response in response_iterator:
versions = response.get('Versions', [])
versions.extend(response.get('DeleteMarkers', []))
for version_id in [x['VersionId'] for x in versions
if x['Key'] == filename and x['VersionId'] != 'null']:
print('Deleting {} version {}'.format(filename, version_id))
client.delete_object(Bucket=bucket, Key=filename, VersionId=version_id)
This code deals with the cases where
object versioning isn't actually turned on
there are DeleteMarkers
there are no DeleteMarkers
there are more versions of a given file than fit in a single API response
Mahesh Mogal's answer doesn't delete DeleteMarkers. Mangohero1's answer fails if the object is missing a DeleteMarker. Hari's answer repeats 10 times (to workaround missing pagination logic).
You can use object_versions.
def delete_all_versions(bucket_name: str, prefix: str):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
if prefix is None:
bucket.object_versions.delete()
else:
bucket.object_versions.filter(Prefix=prefix).delete()
delete_all_versions("my_bucket", None) # empties the entire bucket
delete_all_versions("my_bucket", "my_prefix/") # deletes all objects matching the prefix (can be only one if only one matches)
As a supplement to #jarmod's answer, here is a way I developed a workaround to "hard deleting" an object (with delete markered objects included);
def get_all_versions(bucket, filename):
s3 = boto3.client('s3')
keys = ["Versions", "DeleteMarkers"]
results = []
for k in keys:
response = s3.list_object_versions(Bucket=bucket)[k]
to_delete = [r["VersionId"] for r in response if r["Key"] == filename]
results.extend(to_delete)
return results
bucket = "YOUR BUCKET NAME"
file = "YOUR FILE"
for version in get_all_versions(bucket, file):
s3.delete_object(Bucket=bucket, Key=file, VersionId=version)
Fewer line solution.
import boto3
def delete_versions(bucket, objects=None): # `objects` is either list of str or None
bucket = boto3.resource('s3').Bucket(bucket)
if objects: # delete specified objects
[version.delete() for version in bucket.object_versions.all() if version.object_key in objects]
else: # or delete all objects in `bucket`
[version.delete() for version in bucket.object_versions.all()]
To delete all versions of an object or objects under a prefix:
Pass the object key /folder/filename or prefix /folder/subfolder/ to the Prefix
import boto3
s3 = boto3.resource('s3')
bucket = s3.Bucket("my-bucket-name")
bucket.object_versions.filter(Prefix="folder/subfolder/").delete()
This post was super helpful without this we would have spent tremendous amount of time cleaning up our S3 folders.
We had a requirement to clean up specific folders only. So I tried the following code and it worked like a charm. Also note that I am iterating through the 10 times to delete more than 1000 objects limit that function has. Feel free to modify the limit as you wish.
import boto3
session = boto3.Session(aws_access_key_id='<YOUR ACCESS KEY>',aws_secret_access_key='<YOUR SECRET KEY>')
bucket_name = '<BUCKET NAME>'
object_name = '<KEY NAME>'
s3 = session.client('s3')
for i in range(10):
versions = s3.list_object_versions (Bucket = bucket_name, Prefix = object_name)
#print (versions)
version_list = versions.get('Versions')
for version in version_list:
keyName = version.get('Key')
versionId = version.get('VersionId')
print (keyName + ':' + versionId)
s3.delete_object(Bucket = bucket_name, Key= keyName, VersionId = versionId)
marker_list = versions.get('DeleteMarkers')
#print(marker_list)
for marker in marker_list:
keyName1 = marker.get('Key')
versionId1 = marker.get('VersionId')
print (keyName1 + ':' + versionId1)
s3.delete_object(Bucket = bucket_name, Key= keyName1, VersionId = versionId1)
this script will delete all version of all object with prefix -
s3 = boto3.resource("s3")
client = boto3.client("s3")
s3_bucket = s3.Bucket(bucket_name)
for obj in s3_bucket.objects.filter(Prefix=""):
response = client.list_object_versions(Bucket=bucket_name, Prefix=obj.key)
while "Versions" in response:
to_delete = [
{"Key": ver["Key"], "VersionId": ver["VersionId"]}
for ver in response["Versions"]
]
delete = {"Objects": to_delete}
client.delete_objects(Bucket=bucket_name, Delete=delete)
response = client.list_object_versions(Bucket=bucket_name, Prefix=obj.key)
client.delete_object(Bucket=bucket_name, Key=obj.key)
Easiest way:
import boto3
bucket = boto3.resource("s3").Bucket("mybucket")
bucket.object_versions.all().delete()
You can delete an object with all of its versions using following code
session = boto3.Session(aws_access_key_id, aws_secret_access_key)
bucket_name = 'bucket_name'
object_name = 'object_name'
s3 = session.client('s3')
versions = s3.list_object_versions (Bucket = bucket_name, Prefix = object_name)
version_list = versions.get('Versions')
for version in version_list:
versionId = version.get('VersionId')
s3.delete_object(Bucket = bucket_name, Key= object_name, VersionId = versionId)
The rest of the answers all miss something. Either using the Prefix parameter, or deleting delete markers, or handling errors...
s3 = boto3.client('s3')
response = s3.list_object_versions(Bucket=bucket_name, Prefix=key)
objects_to_delete = []
# Note that we do not use pagination because we assume the file has less than max versions (something like 300)
# Note that we also traverse delete markers.
for obj in itertools.chain(response.get("Versions", []), response.get("DeleteMarkers", [])):
# NOTE: This is super stupid, but AWS has no API for list_object_versions for a single object, only with prefix.
# So other objects who share the same prefix (e.g "blaze/a.txt" and "bla.json" will also be listed when asking for "bla").
# So we need to be careful here
if obj["Key"] != key:
break
objects_to_delete.append({"Key": obj["Key"], 'VersionId': obj['VersionId']})
if len(objects_to_delete) == 0:
raise FileNotFoundError(f'File {key} not found at bucket {bucket_name}')
deletion_response = s3.delete_objects(Bucket=bucket_name, Delete={"Objects": objects_to_delete, "Quiet": False})
errors = deletion_response.get("Errors", [])
if len(errors) > 0:
raise Exception(f'Failed deleting file {key} from bucket {bucket_name}. Result: {deletion_response}')
I am not able to find any solution for recusively copying contents from one to another in s3 buckets using boto in python.
suppose a bucket B1 contains has key structure like:
B1/x/*
I want to copy all the objects recursively from key like B/x/* to B/y/*
There is not "directory" in S3. Those "/" separator is just part of object name, that's why boto doesn't have such features. Either write a script to deal with it or use third party tools.
AWS customerapps show s3browser that provide such arbitrary directory copying functionality. The typical free version only spawn two threads to move file, the paid version allow you to specify more threads and run faster.
Or you just write script and use s3.client.copy_object to copy the file to another name, then delete them afterwards. e.g.
import boto3
s3 = boto3.client("s3")
# list_objects_v2() give more info
more_objects=True
found_token = True
while more_objects :
if found_token :
response= s3.list_objects_v2(
Bucket="mybucket",
Prefix="B1/x/",
Delimiter="/")
else:
response= s3.list_objects_v2(
Bucket="mybucket",
ContinuationToken=found_token,
Prefix="B1/x/",
Delimiter="/")
# use copy_object or copy_from
for source in object_list["Contents"]:
raw_name = source["Key"].split("/")[-1]
new_name = "new_structure/{}".format(raw_name)
s3.copy_object(
....
)
# Now check there is more objects to list
if "NextContinuationToken" in response:
found_token = response["NextContinuationToken"]
more_objects = True
else:
more_objects = False
** IMPORTANT NOTES ** : list_object only return maximum 1000 keys per listing, MaxKey will not change the limit. So you must use list_objects_v2 and check whether NextContinuationToken is returned, to make sure the is more object, repeat it until exhausted.
Just trying to build on previous answer:
s3 = boto3.client('s3')
def copyFolderFromS3(pathFrom, bucketTo, locationTo):
response = {}
response['status'] = 'failed'
getBucket = pathFrom.split('/')[2]
location = '/'.join(pathFrom.split('/')[3:])
if pathFrom.startswith('s3://'):
copy_source = { 'Bucket': getBucket, 'Key': location }
uploadKey = locationTo
recursiveCopyFolderToS3(copy_source,bucketTo,uploadKey)
def recursiveCopyFolderToS3(src,uplB,uplK):
more_objects=True
found_token = True
while more_objects:
if found_token:
response = s3.list_objects_v2(
Bucket=src['Bucket'],
Prefix=src['Key'],
Delimiter="/")
else:
response = s3.list_objects_v2(
Bucket=src['Bucket'],
ContinuationToken=found_token,
Prefix=src['Key'],
Delimiter="/")
for source in response["Contents"]:
raw_name = source["Key"].split("/")[-1]
raw_name = raw_name
new_name = os.path.join(uplK,raw_name)
if raw_name.endswith('_$folder$'):
src["Key"] = source["Key"].replace('_$folder$','/')
new_name = new_name.replace('_$folder$','')
recursiveCopyFolderToS3(src,uplB,new_name)
else:
src['Key'] = source["Key"]
s3.copy_object(CopySource=src,Bucket=uplB,Key=new_name)
if "NextContinuationToken" in response:
found_token = response["NextContinuationToken"]
more_objects = True
else:
more_objects = False
Or you an also use the simple awscli which is by default installed on EC2/emr machines.
import subprocess
cmd='aws s3 cp '+path+' '+uploadUrl+' --recursive'
p=subprocess.Popen(cmd, shell=True,stdout=subprocess.PIPE)
p.communicate()
Instead of using boto3, I opt for aws-cli and sh. See the aws s3 cp docs for full list of arguments, which you can include as kwargs in the following (reworked from my own code) which can be used to copy to / from / between S3 buckets and / or local targets:
import sh # also assumes aws-cli has been installed
def s3_cp(source, target, **kwargs):
"""
Copy data from source to target. Include flags as kwargs
such as recursive=True and include=xyz
"""
args = []
for flag_name, flag_value in kwargs.items():
if flag_value is not False: # i.e. --quiet=False means omit --quiet
args.append(f"--{flag_name}")
if flag_value is not True: # i.e. --quiet=True means --quiet
args.append(flag_value)
args += [source, target]
sh.aws("s3", "cp", *args)
bucket to bucket (as per the OP's question):
s3_cp("s3://B1/x/", "s3://B1/y/", quiet=True, recursive=True)
or bucket to local:
s3_cp("s3://B1/x/", "my-local-dir/", quiet=True, recursive=True)
Personally I found that this method gave improved transfer time (of a few GB over 20k small files) from a couple of hours to a few minutes compared to boto3. Perhaps under the hood it's doing some threading or simply opening few connections - but that's just speculation.
Warning: it won't work on Windows.
Related: https://stackoverflow.com/a/46680575/1571593
Another boto3 alternative, using the higher level resource API rather than client:
import os
import boto3
def copy_prefix_within_s3_bucket(
endpoint_url: str,
bucket_name: str,
old_prefix: str,
new_prefix: str,
) -> None:
bucket = boto3.resource(
"s3",
endpoint_url=endpoint_url,
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
).Bucket(bucket_name)
for obj in bucket.objects.filter(Prefix=old_prefix):
old_key = obj.key
new_key = old_key.replace(old_prefix, new_prefix)
copy_source = {"Bucket": bucket_name, "Key": old_key}
bucket.copy(copy_source, new_key)
if __name__ == "__main__":
copy_prefix_within_s3_bucket(
endpoint_url="my_endpoint_url",
bucket_name="my_bucket_name",
old_prefix="my_old_prefix",
new_prefix="my_new_prefix",
)
I've saved a document in the blobstore, and am trying to retrieve it within a handler (which handles a task). I've had a read of the appengine documentation regarding the how to use blobstore, but am struggling to get it to work for my case. I've tried the following within the handler but cannot seem to have the object returned as the saved file (e.g. .pdf or .txt)
class SendDocuments(webapp2.RequestHandler):
def post(self):
document_key = self.request.get("document_key")
document_key = Key(str(document_key))
the_document = DocumentsModel.all().filter("__key__ =", document_key).get()
file_data = blobstore.BlobInfo.get(str(the_document.blobstore_key)) # returns a blobinfo object
file_data.open() # returns a blobreader object
file_data.open().read() # returns a string
I've also tried
class ServeSavedDocument(blobstore_handlers.BlobstoreDownloadHandler):
def get(self, blob_key):
self.send_blob(blob_key, save_as=True)
return
class SendDocuments(webapp2.RequestHandler):
def post(self):
document_key = self.request.get("document_key")
document_key = Key(str(document_key))
the_document = DocumentsModel.all().filter("__key__ =", document_key).get()
grab_blob = ServeSavedDocument()
file_data = grab_blob.get(self, str(the_document.blobstore_key))
But the call to ServeSavedDocument fails with
'NoneType' object has no attribute 'headers'
I've had a look at the files api but the only example that's not saving the file simply seems to return the blob key i.e.
blob_key = files.blobstore.get_blob_key(file_name)
What is the best way to grab a saved file in the blobstore from within a handler?
EDIT 1:
I'm trying to retrieve the txt file or pdf file from the blobstore in a format / state that can be encoded as a file in a post request using the following code
from google.appengine.api import urlfetch
from poster.encode import multipart_encode
# assuming here that file_data is the file object
payload = {}
payload['user_id'] = '1234123412341234'
payload['test_file'] = MultipartParam('test_file', filename=file_data.filename,
filetype=file_data.type,
fileobj=file_data.file)
data,headers= multipart_encode(payload)
send_url = "http://127.0.0.0/"
t = urlfetch.fetch(url=send_url, payload="".join(data), method=urlfetch.POST, headers=headers)
Okay, after a lot of messing around I"ve found that this works! The key was to simply call open() on the blobinfo object.
class SendDocuments(webapp2.RequestHandler):
def post(self):
document_key = self.request.get("document_key")
document_key = Key(str(document_key))
the_document = DocumentsModel.all().filter("__key__ =", document_key).get()
file_data = blobstore.BlobInfo.get(str(the_document.blobstore_key))
payload = {}
payload['user_id'] = '1234123412341234'
payload['test_file'] = MultipartParam('the_file', filename="something",
filetype=file_data.content_type,
fileobj=file_data.open())
Have a look at the BlobReader class, I think that's what you are looking for:
https://developers.google.com/appengine/docs/python/blobstore/blobreaderclass
It allows file-like read access to blobstore data:
# blob_key = ..
# Instantiate a BlobReader for a given Blobstore value.
blob_reader = blobstore.BlobReader(blob_key)
# Read the entire value into memory. This may take a while depending
# on the size of the value and the size of the read buffer, and is not
# recommended for large values.
value = blob_reader.read()
Try as this,specify the file name and it's type in the send_blob() as :
def mime_type(filename):
return guess_type(filename)[0]
class ServeSavedDocument(blobstore_handlers.BlobstoreDownloadHandler):
def get(self):
blob_key = self.request.get("blob_key")
if blob_key:
blob_info = blobstore.get(blob_key)
if blob_info:
save_as1 = blob_info.filename
type1=mime_type(blob_info.filename)
self.send_blob(blob_info,content_type=type1,save_as=save_as1)