Save excel file from main directory to database model in Django - python

How to save the file generated from pd.dataframe to certain database record.
This is the view..
#csrf_exempt
def Data_Communication(request):
if request.method == 'POST':
data_sets_number = (len(request.POST)) - 1
Data_Sets_asNestedList = []
Data_set_id = request.POST.get('id')
Data_instance = Data_Sets.objects.get(pk=Data_set_id)
for x in range(data_sets_number):
i = 1
Data_Sets_asNestedList.append(request.POST.getlist('Data'+str(i)))
i = i + 1
pd.DataFrame(Data_Sets_asNestedList).to_excel('output.xlsx', header=False, index=False)
print(Data_Sets_asNestedList)
return HttpResponse('1')

If you're looking to associate the generated Excel file with the model Data_Sets, then you'd probably want to add a FileField to that model:
class Data_Sets(models.Model):
excel_file = fields.FileField()
Once you've created the Excel file in your view, you can then associate it with the new field:
from django.core.files import File
#csrf_exempt
def Data_Communication(request):
if request.method == 'POST':
data_sets_number = (len(request.POST)) - 1
Data_Sets_asNestedList = []
Data_set_id = request.POST.get('id')
Data_instance = Data_Sets.objects.get(pk=Data_set_id)
for x in range(data_sets_number):
i = 1
Data_Sets_asNestedList.append(request.POST.getlist('Data'+str(i)))
i = i + 1
pd.DataFrame(Data_Sets_asNestedList).to_excel('output.xlsx', header=False, index=False)
# Associate the Excel file with the model
with open('output.xlsx', 'rb') as excel:
Data_instance.excel_file.save('output.xlsx', File(excel))
print(Data_Sets_asNestedList)
return HttpResponse('1')
The excel file itself will be saved into the folder specified by the MEDIA_ROOT setting in your settings.py, and the model will point to that file via the excel_file attribute.
Note that you may want to generate a unique filename for output.xlsx to avoid requests from treading on each other.
Additional info on saving a file can be found here.

Don't randomly insert your data to database, use django validation system to validate your data first.
check bulk_create api to store large chunks of records.

Related

How to get values from different tables from DB faster?

so our DB was designed very badly. There is no foreign key used to link multiple tables
I need to fetch complete information and export it to csv. the challenge is the information need to be queried from multiple tables (say for e.g, usertable only stored sectionid in the table, in order to get section detail, I would have to query from section table and match it with sectionid acquired from usertable).
So i did this using serializer, because the fields are multiples.
So the problem with my current method is that its so slow because it needs to query for each object(queryset) to match with other tables using uuid/userid/anyid.
this is my views
class FileDownloaderSerializer(APIView):
def get(self, request, **kwargs):
filename = "All-users.csv"
f = open(filename, 'w')
datas = Userstable.objects.using(dbname).all()
serializer = UserSerializer( datas, context={'sector': sector}, many=True)
df=serializer.data
df.to_csv(f, index=False, header=False)
f.close()
wrapper = FileWrapper(open(filename))
response = HttpResponse(wrapper, content_type='text/csv')
response['Content-Length'] = os.path.getsize(filename)
response['Content-Disposition'] = "attachment; filename=%s" % filename
return response
so notice that i need one file exported which is .csv.
this is my serializer
class UserSerializer(serializers.ModelSerializer):
class Meta:
model = Userstable
fields = _all_
section=serializers.SerializerMethodField()
def get_section(self, obj):
return section.objects.using(dbname.get(pk=obj.sectionid).sectionname
department =serializers.SerializerMethodField()
def get_department(self, obj):
return section.objects.using(dbname).get(pk=obj.deptid).deptname
im showing only two tables here, but in my code i have total of 5 different tables
I tried to limit 100 rows and it is successful, i tried to fecth 300000 and it took me 3 hours to download csv. certainly not efficient. How can i solve this?

PdfFileReader.getFields() returns {} | django

I'm trying to read a pdf form with django. The point is that in another view of my views.py I've succeed into do it by using PyPDF2 and its PdfFileReader.getFields() method.
Now the problem is that the reading is not working properly: I've checked with adobe acrobat and the file still is a form with actually fields, so I don't really have any idea of what could be the problem.
I'm attaching here the relevant portion of the code:
if request.method == "POST":
form = Form(request.POST, request.FILES) # the form refer to a model called 'New Request'
if form.is_valid():
form.save()
File = request.FILES['File'].name
full_filename = os.path.join(BASE_DIR, 'media/media', File)
f = PdfFileReader(full_filename)
fields = f.getFields()
fdfinfo = dict((k, v.get('/V', '')) for k, v in fields.items())
k = creare_from_pdf2(request, fdfinfo, pk) # this is a custom function
nr = NewRequest.objects.all() #I'm deleting the object uploaded because it won't be useful anymore
nr.delete()
os.remove(full_filename)
If I display print(fdfinfo) it actually shows {}. This of course is leading to error when fdfinfo passes into the 'create_from_pdf_2' function. I don't really know what the problem could be, also because in another view I made exactly the same and it works:
if request.method=='POST':
form = Form(request.POST, request.FILES)
if form.is_valid():
form.save()
uploaded_filename = request.FILES['File'].name
full_filename = os.path.join(BASE_DIR, 'media/media', uploaded_filename)
f = PdfFileReader(full_filename)
fields = f.getFields()
fdfinfo = dict((k, v.get('/V', '')) for k, v in fields.items())
k=create_from_pdf1(request, fdfinfo)
if k==1:
return HttpResponse('<html><body>Something went wrong</html></body>')
nr = NewRequest.objects.all()
nr.delete()
os.remove(full_filename)
Maybe is there a way to display the errors of PdfFileReader?
UPDATING
The new file that I'm trying to reading is firstly modified in the sense that some (BUT NOT ALL!) fields are filled with PdfFileWriter, and the one filled are set then to only readable. Could this operation have influenced the performances of PdfFileReader? I'm attaching the correspondent view
att = MAIN.objects.get(id=pk)
file_path = os.path.join(BASE_DIR, 'nuova_form.pdf')
input_stream = open(file_path, "rb")
pdf_reader = PdfFileReader(input_stream, strict = False)
if "/AcroForm" in pdf_reader.trailer["/Root"]:
pdf_reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_writer = PdfFileWriter()
set_need_appearances_writer(pdf_writer)
if "/AcroForm" in pdf_writer._root_object:
# Acro form is form field, set needs appearances to fix printing issues
pdf_writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
data_dict1 = { # my text fields
}
data_dict2 = { # my booleancheckbox fields }
for i in range(0,6): #The pdf file has 6 pages
pdf_writer.addPage(pdf_reader.getPage(i))
page = pdf_writer.getPage(i)
# update form fields
pdf_writer.updatePageFormFieldValues(page, data_dict1)
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in data_dict1:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/Ff"): NumberObject(1) # make ReadOnly
})
# update checkbox fields
updateCheckboxValues(page, data_dict2)
output_stream = BytesIO()
pdf_writer.write(output_stream)
return output_stream
def updateCheckboxValues(page, fields):
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in fields:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/V"): NameObject(fields[field]),
NameObject("/AS"): NameObject(fields[field])
})
I got similar results when trying to do a straightforward read of a PDF form using Python and PyPDF2. The PDF form had been created using Libre Writer and was a single page with about 50 text fields on it. When I ran the getFields() method on the reader object I was getting the same issue -- it was returning an empty dict object.
I thought there might be a limitation on the number of fields and tried removing some for testing, but got the same result. Then when looking at it I noticed the fieldnames were all pretty long: txtLabMemberFirstName01, txtLabMemberLastName01, txtPrincipalInvestigatorFirstName, etc.
I shortened all the fields' names (e.g., "txtLMFN01") and PyPDF2 started working again as expected.

django view that imports a FileField object and processes data using a .py file in the app

I am building an app that uploads a .vbo file (text file) which is then later processed through a different view (DataListView) in this view I need to import the file and pass the data to a function in a .py file. the function returns a pandas data frame (df) and a Matplotlib graph.
I'm battling to import the file to pass to the function and have yet to work out sending the graph to the html but there seems to be some good tutorials on this.
Help on passing the doc to the function would be great.
Thanks
Views:
class BasicCreateView(OwnerCreateView):
model = Basic
template_name = 'coach/basic_form.html'
fields = ['upload_file', 'created_at', 'crew', 'session', 'session_date']
success_url = reverse_lazy('coach:basic_list')
class BasicDetailView(OwnerDetailView):
model = Basic
template_name = "coach/basic_detail.html"
class DataListView(OwnerListView):
Model = Basic
#open file and read dataframe to model
def handle_file(request):
if request.method == "POST" and request.FILES["upload_file"]:
file_data = request.FILES["upload_file"]
#assign data to readvbox.py - class VBox with two functions
#get_data and get_graph need to use get_data which returns df
readvbox.VBox.get_data(file_data)
#put df into matplot graph
x = df.time_of_day
y = df.velocity
fig, ax = plt.scatter(x,y)
flike = io.BytesIO()
fig.savefig(flike)
b64 = base64.b64encode(flike.getvalue()).decode()
context['chart'] = b64
return context

Store odoo images into filesystem

Hope all stack members are alright .I am able to fetch binary data of Product image using code
p_ids=self.env.context.get('active_ids')
produtc_templates = self.env['product.template']
for p_id in p_ids:
binaryData = produtc_templates.search([('id', '=',p_id)]).image
data=base64.b64decode(binaryData)
file="marketplaces/rakuten_ftp/static/imageToSave_"+str(p_id)+".png"
with open(file, "wb") as imgFile:
imgFile.write(data)
Above code is create files from binary Data But i am failed to apply condition on mimetype base.Because when i query ir_attachment table with Products id's it return me False.
for p_id in p_ids:
attachments = self.env['ir.attachment']
mimetype=attachments.search([('res_id','=',p_id)])
I am considering res_id as Product id .But odoo failed to find any record against that id.So if any body have idea that how i can get mimetype against my product id then please help me.
Your code looks good! But as per ir.attachement object, binary data stored in datas field. So, you can use that data to decode binary data to image file!!
Already tried below code into Odoo v11... & it's working as created new image file from binary data which is stored in datas field!
product_image = self.env['ir.attachment']
product_images = product_image.search([('id', 'in', p_ids)])
for rec in product_images:
with open("imageToSave.jpg", "wb") as imgFile:
imgFile.write(base64.b64decode(rec.datas))
You can also add the condition for mimetype as, p_ids can contains multiple ids, so taking only those ids which have mimetype of image/jpeg or image/png
EDIT #1
Below code snippet already checked with Odoo v11.0
import base64
from odoo.tools.mimetypes import guess_mimetype
p_ids = [16, 18, 11, 38, 39, 40] # Taking random ids of product.template
produtc_templates = self.env['product.template']
for p_id in p_ids:
binary_data = produtc_templates.search([('id', '=', p_id)]).image
mimetype = guess_mimetype(base64.b64decode(binary_data))
file_path = ""
if mimetype == 'image/png':
file_path = "/home/Downloads/" + str(p_id) + ".png"
elif mimetype == 'image/jpeg':
file_path = "/home/Downloads/" + str(p_id) + ".jpeg"
if file_path:
with open(file_path, "wb") as imgFile:
imgFile.write(base64.b64decode(binary_data))
Product images aren't saved as instances/records of ir.attachment. OK, maybe that has changed, but i didn't find anything so fast.
What you can do, is using ir.attachment's method _compute_mimetype()
Following example wasn't tested:
def get_mimetype_of_product_image(self, product_id)
product = self.env['product.product'].browse(product_id)
mimetype = self.env['ir.attachment']._compute_mimetype(
{'values': product.image})
return mimetype

How to store HDF5 (HDF Store) in a Django model field

I am currently working on a project where I generate pandas DataFrames as results of analysis. I am developing in Django and would like to use a "data" field in a "Results" model to store the pandas DataFrame.
It appears that HDF5(HDF Store) is the most efficient way to store my pandas DataFrames. However, I do not know how to create the custom field in my model to save it. I will show simplified views.py and models.py below to illustrate.
models.py
class Result(model.Model):
scenario = models.ForeignKey(Scenario)
# HOW DO I Store HDFStore
data = models.HDF5Field()
views.py
class AnalysisAPI(View):
model = Result
def get(self, request):
request_dict = request.GET.dict()
scenario_id = request_dict['scenario_id']
scenario = Scenario.objects.get(pk=scenario_id)
result = self.model.objects.get(scenario=scenario)
analysis_results_df = result.data['analysis_results_df']
return JsonResponse(
analysis_results_df.to_json(orient="records")
)
def post(self, request):
request_dict = request.POST.dict()
scenario_id = request_dict['scenario_id']
scenario = Scenario.objects.get(pk=scenario_id)
record_list = request_dict['record_list']
analysis_results_df = run_analysis(record_list)
data = HDFStore('store.h5')
data['analysis_results_df'] = analysis_results_df
new_result = self.model(scenario=scenario, data=data)
new_result.save()
return JsonResponse(
dict(status="OK", message="Analysis results saved.")
)
I appreciate any help and I am also open to another storage method, such as Pickle, with similar performance provided I can use it with Django.
You can create a custom Model field that saves your data to a file in storage and saves the relative file path to the database.
Here is how you could subclass models.CharField in your app's fields.py:
import os
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from django.db import models
from django.utils.translation import gettext_lazy as _
class DataFrameField(models.CharField):
"""
custom field to save Pandas DataFrame to the hdf5 file format
as advised in the official pandas documentation:
http://pandas.pydata.org/pandas-docs/stable/io.html#io-perf
"""
attr_class = DataFrame
default_error_messages = {
"invalid": _("Please provide a DataFrame object"),
}
def __init__(
self,
verbose_name=None,
name=None,
upload_to="data",
storage=None,
unique_fields=[],
**kwargs
):
self.storage = storage or default_storage
self.upload_to = upload_to
self.unique_fields = unique_fields
kwargs.setdefault("max_length", 100)
super().__init__(verbose_name, name, **kwargs)
def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if kwargs.get("max_length") == 100:
del kwargs["max_length"]
if self.upload_to != "data":
kwargs["upload_to"] = self.upload_to
if self.storage is not default_storage:
kwargs["storage"] = self.storage
kwargs["unique_fields"] = self.unique_fields
return name, path, args, kwargs
The __init__ and deconstruct methods are very much inspired by the Django original FileField. There is an additional unique_fields parameter that is useful for creating predictable unique file names.
def from_db_value(self, value, expression, connection):
"""
return a DataFrame object from the filepath saved in DB
"""
if value is None:
return value
return self.retrieve_dataframe(value)
def get_absolute_path(self, value):
"""
return absolute path based on the value saved in the Database.
"""
return self.storage.path(value)
def retrieve_dataframe(self, value):
"""
return the pandas DataFrame and add filepath as property to Dataframe
"""
# read dataframe from storage
absolute_filepath = self.get_absolute_path(value)
dataframe = read_hdf(absolute_filepath)
# add relative filepath as instance property for later use
dataframe.filepath = value
return dataframe
You load the DataFrame to memory from storage with the from_db_value method based on the file path saved in the database.
When retrieving the DataFrame, you also add the file path as instance property to it, so that you can use that value when saving the DataFrame back to the database.
def pre_save(self, model_instance, add):
"""
save the dataframe field to an hdf5 field before saving the model
"""
dataframe = super().pre_save(model_instance, add)
if dataframe is None:
return dataframe
if not isinstance(dataframe, DataFrame):
raise ValidationError(
self.error_messages["invalid"], code="invalid",
)
self.save_dataframe_to_file(dataframe, model_instance)
return dataframe
def get_prep_value(self, value):
"""
save the value of the dataframe.filepath set in pre_save
"""
if value is None:
return value
# save only the filepath to the database
if value.filepath:
return value.filepath
def save_dataframe_to_file(self, dataframe, model_instance):
"""
write the Dataframe into an hdf5 file in storage at filepath
"""
# try to retrieve the filepath set when loading from the database
if not dataframe.get("filepath"):
dataframe.filepath = self.generate_filepath(model_instance)
full_filepath = self.storage.path(dataframe.filepath)
# Create any intermediate directories that do not exist.
# shamelessly copied from Django's original Storage class
directory = os.path.dirname(full_filepath)
if not os.path.exists(directory):
try:
if self.storage.directory_permissions_mode is not None:
# os.makedirs applies the global umask, so we reset it,
# for consistency with file_permissions_mode behavior.
old_umask = os.umask(0)
try:
os.makedirs(directory, self.storage.directory_permissions_mode)
finally:
os.umask(old_umask)
else:
os.makedirs(directory)
except FileExistsError:
# There's a race between os.path.exists() and os.makedirs().
# If os.makedirs() fails with FileExistsError, the directory
# was created concurrently.
pass
if not os.path.isdir(directory):
raise IOError("%s exists and is not a directory." % directory)
# save to storage
dataframe.to_hdf(full_filepath, "df", mode="w", format="fixed")
def generate_filepath(self, instance):
"""
return a filepath based on the model's class name, dataframe_field and unique fields
"""
# create filename based on instance and field name
class_name = instance.__class__.__name__
# generate unique id from unique fields:
unique_id_values = []
for field in self.unique_fields:
unique_field_value = getattr(instance, field)
# get field value or id if the field value is a related model instance
unique_id_values.append(
str(getattr(unique_field_value, "id", unique_field_value))
)
# filename, for example: route_data_<uuid>.h5
filename = "{class_name}_{field_name}_{unique_id}.h5".format(
class_name=class_name.lower(),
field_name=self.name,
unique_id="".join(unique_id_values),
)
# generate filepath
dirname = self.upload_to
filepath = os.path.join(dirname, filename)
return self.storage.generate_filename(filepath)
Save the DataFrame to an hdf5 file with the pre_save method and save the file path to the Database in get_prep_value.
In my case it helped to use a uuid Model Field to create the unique file name, because for new model instances, the pk was not yet available in the pre-save method, but the uuid value was.
You can then use this field in your models.py:
from .fields import DataFrameField
# track data as a pandas DataFrame
data = DataFrameField(null=True, upload_to="data", unique_fields=["uuid"])
Please note that you cannot use this field in the Django admin or in a Model form. That would require additional work on a custom form Widget to edit the DataFrame content in the front-end, probably as a table.
Also beware that for tests, I had to override the MEDIA_ROOT setting with a temporary directory using tempfile to prevent creating useless files in the actual media folder.
It's not HDF5, but check out picklefield:
from picklefield.fields import PickledObjectField
class Result(model.Model):
scenario = models.ForeignKey(Scenario)
data = PickledObjectField(blank=True, null=True)
https://pypi.python.org/pypi/django-picklefield

Categories

Resources