How to save a **python-doc** object to a **DRF** model - python

I have a Django Rest Framework view that generates python-docx objects, and I want to save such files in a model. I read in the documentation of python-docx that you can specify a "file-like" object to save the object, but I don't understand what it means.
This is my view:
class RoomingWordView(viewsets.ViewSet):
def list(self, request, *args, **kwargs):
... some code
documents_to_return = self.get_hotel_document(start_date, end_date, confirmed, hotel, bookings)
return Response(documents_to_return)
def get_hotel_document(self, start_date, end_date, confirmed, hotel, bookings):
from django.core.files import File
from docx import Document
from docx.shared import Inches, Pt
document = Document()
section = document.sections[-1]
section.left_margin = Inches(0.5)
section.right_margin = Inches(0.5)
style = document.styles['Normal']
font = style.font
font.name ='Arial'
font.size = Pt(10)
document.add_heading("MY COMPANY")
if confirmed:
document.add_paragraph("ROOMING LIST DEL 01-12-2018 AL 31-01-2019 INCLUYE RESERVAS CONFIRMADAS")
else:
document.add_paragraph("ROOMING LIST DEL 01-12-2018 AL 31-01-2019")
document.add_paragraph("Hotel: {}".format(hotel))
table = document.add_table(rows=len(bookings), cols=10)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Booking'
hdr_cells[1].text = 'Reservado a'
hdr_cells[2].text = '# Pax'
hdr_cells[3].text = 'Agencia'
hdr_cells[4].text = 'Habs'
hdr_cells[5].text = 'Hab./Plan'
hdr_cells[6].text = 'Entrada'
hdr_cells[7].text = 'Salida'
hdr_cells[8].text = 'Confirmación'
hdr_cells[9].text = 'Producción'
for cell in table.rows[0].cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
run.underline = True
for booking in bookings['bookings']:
row_cells = table.add_row().cells
row_cells[0].text = booking['booking']
row_cells[1].text = "\n".join(booking['people'])
row_cells[2].text = booking['pax']
row_cells[3].text = booking['agency']
row_cells[4].text = booking['rooms']
row_cells[5].text = "{}\n{}".format(booking['room_type'], booking['plan_type'])
row_cells[6].text = booking['check_in']
row_cells[7].text = booking['check_out']
row_cells[8].text = booking['confirmation']
row_cells[9].text = str(booking['production'])
for row in table.rows:
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size = Pt(8)
file_object = "rooming_reports/Rooming {} {}-{}.docx".format(hotel, start_date, end_date)
document.save(file_object)
return file_object
I want the view to save the resulting document object in a model called RoomingWordDocument, instead of saving it as a file, but I don't know how to assign document variable to RoomingWordDocument.file field.

Assuming you have a model as
class RoomingWordDocument(models.Model):
doc = models.FileField(null=True)
name = models.CharField(max_length=50)
Then try this snippet
class RoomingWordView(viewsets.ViewSet):
def list(self, request, *args, **kwargs):
# your list method
# return Response(documents_to_return)
def get_hotel_document(self, start_date, end_date, confirmed, hotel, bookings):
# your code
# ......
file_object = "rooming_reports/Rooming {} {}-{}.docx".format(hotel, start_date, end_date)
document.save(file_object)
# here is the new snippet
# found a simple way now
model_object = RoomingWordDocument.objects.create(name="somename")
model_object.doc.name = file_object
model_object.save() # this is importent
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
from django.core.files import File
fp = open(file_object, 'rb')
model_object = RoomingWordDocument.objects.create(name="somename")
model_object.doc.save(file_object, File(fp))
return Response("some response")
Reference
1. StringIo in Python2.X and 3.X
2. How to create a file and save it to a model's FileField?
3. Set Django's FileField to an existing file

Related

Is there a faster way of uploading multiple files in Django?

I have a django project where the client can upload multiple files at once. My problem is that for each uploaded file I'm creating a model object - one at a time. Is there a way to do this with bulk create or some other method that is faster.
Views.py:
images = request.FILES.getlist('images')
xmls = request.FILES.getlist('xmls')
jsons = request.FILES.getlist('jsons')
for image in images:
img_name = (str(image).split('.')[0])
dp_name = dataset_name+'-'+img_name
if [xml for xml in xmls if img_name+'.' in str(xml)]:
xml = [xml for xml in xmls if img_name+'.' in str(xml)][0]
else:
xml = None
if [json for json in jsons if img_name+'.' in str(json)]:
json = [json for json in jsons if img_name+'.' in str(json)][0]
else:
json = None
dataset.create_datapoint(image, xml, json, username, dp_name)
Models.py:
def create_datapoint(self, image, xml, json, username, dp_name):
datapoint = Datapoint.objects.create(
xml = xml,
json = json,
name = dp_name,
uploaded_by = username,
img = image,
belongs_to = self,
)
self.num_datapoints += 1
datapoint.parse_xml()
self.datapoints.add(datapoint)
self.save()
return
#mnislam01 is right but there is a small mistake in the code provided.
Here it is fixed:
data_point_create_list = []
# First create a list of objects.
for image in images:
dp = Datapoint(xml = xml,
json = json,
name = dp_name,
uploaded_by = username,
img = image,
belongs_to = self)
data_point_create_list.append(dp)
# Then bulk create all the objects.
if data_point_create_list:
Datapoint.objects.bulk_create(data_point_create_list)
Just needed to assign the newly made datapoint before appending.
You can use .bulk_create() method. For example.
data_point_create_list = []
# First create a list of objects.
for image in images:
dp = Datapoint(xml = xml,
json = json,
name = dp_name,
uploaded_by = username,
img = image,
belongs_to = self)
data_point_create_list.append(dp)
# Then bulk create all the objects.
if data_point_create_list:
Datapoint.objects.bulk_create(data_point_create_list)

How do I import various Data Frames from different python file?

I have a python file called 'clean_data.py' which has all the data frames I need, and I want to import them for use in another python file called 'main.py' to use in creating a dashboard.
Is it possible to create a class in my clean_data.py, and if so can someone direct me to an article (which I struggled to find so far) so that I can figure it out?
The aim is to shift from CSV to an API overtime, so I wanted to keep data side wrangling side of things in a different file while the web app components in the main.py file.
Any help would be much appreciated.
The code from the clean_data.py is:
import pandas as pd
import csv
import os # To access my file directory
print(os.getcwd()) # Let's me know the Current Work Directory
fdi_data = pd.read_csv(r'Data/fdi_data.csv')
fdi_meta = pd.read_csv(r'Data/fdi_metadata.csv')
debt_data = pd.read_csv(r'Data/debt_data.csv')
debt_meta = pd.read_csv(r'Data/debt_metadata.csv')
gdp_percap_data = pd.read_csv(r'Data/gdp_percap_data.csv', header=2)
gdp_percap_meta = pd.read_csv(r'Data/gdp_percap_metadata.csv')
gov_exp_data = pd.read_csv(r'Data/gov_exp_data.csv', header=2)
gov_exp_meta = pd.read_csv(r'Data/gov_exp_metadata.csv')
pop_data = pd.read_csv(r'Data/pop_data.csv', header=2)
pop_meta = pd.read_csv(r'Data/pop_metadata.csv')
"""
'wb' stands for World Bank
"""
def wb_merge_data(data, metadata):
merge = pd.merge(
data,
metadata,
on = 'Country Code',
how = 'inner'
)
return merge
fdi_merge = wb_merge_data(fdi_data, fdi_meta)
debt_merge = wb_merge_data(debt_data, debt_meta)
gdp_percap_merge = wb_merge_data(gdp_percap_data, gdp_percap_meta)
gov_exp_merge = wb_merge_data(gov_exp_data, gov_exp_meta)
pop_merge = wb_merge_data(pop_data, pop_meta)
def wb_drop_data(data):
drop = data.drop(['Country Code','Indicator Name','Indicator Code','TableName','SpecialNotes','Unnamed: 5'], axis=1)
return drop
fdi_merge = wb_drop_data(fdi_merge)
debt_merge = wb_drop_data(debt_merge)
gdp_percap_merge = wb_drop_data(gdp_percap_merge)
gov_exp_merge = wb_drop_data(gov_exp_merge)
pop_merge = wb_drop_data(pop_merge)
def wb_mr_data(data, value_name):
data = data.melt(['Country Name','Region','IncomeGroup']).reset_index()
data = data.rename(columns={'variable': 'Year', 'value': value_name})
data = data.drop('index', axis = 1)
return data
fdi_merge = wb_mr_data(fdi_merge, 'FDI')
debt_merge = wb_mr_data(debt_merge, 'Debt')
gdp_percap_merge = wb_mr_data(gdp_percap_merge, 'GDP per Cap')
gov_exp_merge = wb_mr_data(gov_exp_merge, 'Gov Expend.')
pop_merge = wb_mr_data(pop_merge, 'Population')
def avg_groupby(data, col_cal, cn=False, ig=False, rg=False):
if cn == True:
return data.groupby('Country Name')[col_cal].mean().reset_index()
elif ig == True:
return data.groupby('IncomeGroup')[col_cal].mean().reset_index()
elif rg == True:
return data.groupby('Region')[col_cal].mean().reset_index()
"""
avg_cn_... For country
avg_ig_... Income Group
avg_rg_... Region
"""
avg_cn_fdi = avg_groupby(fdi_merge, 'FDI', cn=True)
avg_ig_fdi = avg_groupby(fdi_merge, 'FDI', ig=True)
avg_rg_fdi = avg_groupby(fdi_merge, 'FDI', rg=True)
avg_cn_debt = avg_groupby(debt_merge, 'Debt', cn=True)
avg_ig_debt = avg_groupby(debt_merge, 'Debt', ig=True)
avg_rg_debt = avg_groupby(debt_merge, 'Debt', rg=True)
avg_cn_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', cn=True)
avg_ig_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', ig=True)
avg_rg_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', rg=True)
avg_cn_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', cn=True)
avg_ig_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', ig=True)
avg_rg_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', rg=True)
avg_cn_pop = avg_groupby(pop_merge, 'Population', cn=True)
avg_ig_pop = avg_groupby(pop_merge, 'Population', ig=True)
avg_rg_pop = avg_groupby(pop_merge, 'Population', rg=True)
In Python, every file is a module. So if you want to re-use your code, you can simple import this module. For example,
# main.py
import clean_data
print(clean_data.avg_cn_fdi)
Maybe you needn't create a class for this
You can import the whole python file like you'd import any other locally created files and have access to the DataFrames in them. Here's an example:
I created a file called temporary.py:
import pandas as pd
data = pd.read_csv("temp.csv")
And then in a separate file I was able to use data like so:
import temporary
print(temporary.data)
Or, you could also do:
from temporary import data
print(data)
All that being said, I don't believe that this would be the best way to handle your data.

ADF delete activity with python- Unable to create logging

I am currently creating adfv2 delete activity with python using below code
My python code looks like below
#Create a delete activity
blob_ls_name = 'AzureBlobLS'
ds_name = 'Dataset_Test'
dataset_name = DatasetReference(reference_name= ds_name)
ds_ls = LinkedServiceReference(reference_name=blob_ls_name)
logsettings = LogStorageSettings(linked_service_name = ds_ls, path = 'mycontainer/path')
act_name = 'CleanUp_Dest_Folder'
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, log_storage_settings = logsettings)
Even after creating logsettings, when the delete activity is created the log settings are not properly applied in the activity and it is selecting nothing.
I can reproduce your issue, you missed the enable_logging = True in the last line, add it like below, then it will work fine.
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, enable_logging = True, log_storage_settings = logsettings )
My test sample:
It creates a pipeline with a delete activity.
from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.datafactory import DataFactoryManagementClient
from azure.mgmt.datafactory.models import *
subscription_id = 'xxxxx'
credentials = ServicePrincipalCredentials(client_id='xxxx', secret='xxxx', tenant='xxx')
adf_client = DataFactoryManagementClient(credentials, subscription_id)
blob_ls_name = 'AzureBlobStorage2'
ds_name = 'Binary1'
dataset_name = DatasetReference(reference_name= ds_name)
ds_ls = LinkedServiceReference(reference_name=blob_ls_name)
logsettings = LogStorageSettings(linked_service_name = ds_ls, path = 'test/d2')
act_name = 'CleanUp_Dest_Folder'
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, enable_logging = True, log_storage_settings = logsettings )
rg_name = 'xxxx'
df_name = 'joyfactory'
p_name = 'Pipeline1234'
params_for_pipeline = {}
p_obj = PipelineResource(
activities=[act_delete], parameters=params_for_pipeline)
p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
print(p.activities[0].log_storage_settings.linked_service_name)
Check in the portal:

Pass id parameter into imported class in Django

In Django I have a function based view responsible of printing the details (actually only the name) of all the registered users on a pdf file.
def test_pdf(request, id):
# Create the HttpResponse object with the appropriate PDF headers.
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="My Users.pdf"'
buffer = io.BytesIO()
report = MyPrint(buffer, 'Letter', id)
pdf = report.print_users()
response.write(pdf)
return response
This function works because I imported in the views.py file a class I built in another file, responsible of drawing the pdf, MyPrint:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER
from django.contrib.auth.models import User
class MyPrint:
def __init__(self, buffer, pagesize):
self.buffer = buffer
if pagesize == 'A4':
self.pagesize = A4
elif pagesize == 'Letter':
self.pagesize = letter
self.width, self.height = self.pagesize
def print_users(self):
buffer = self.buffer
doc = SimpleDocTemplate(buffer,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=72,
pagesize=self.pagesize)
# Our container for 'Flowable' objects
elements = []
# A large collection of style sheets pre-made for us
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name='centered', alignment=TA_CENTER))
# Draw things on the PDF. Here's where the PDF generation happens.
# See the ReportLab documentation for the full list of functionality.
users = User.objects.all()
elements.append(Paragraph('My User Names', styles['Heading1']))
for i, user in enumerate(users):
elements.append(Paragraph(user.get_full_name(), styles['Normal']))
doc.build(elements)
# Get the value of the BytesIO buffer and write it to the response.
pdf = buffer.getvalue()
buffer.close()
return pdf
Now, How can I make the function and the class specific to a user if I pass in the relative pk into the function? Apart from updating the urlpattern, should I pass the id into the class and / or into the function?
If you want to have the existing function work with one or more users, and continue to work if you don't pass in an id, I think the simplest way of changing it would be as follows:
def print_users(self, id=None):
buffer = self.buffer
doc = SimpleDocTemplate(buffer,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=72,
pagesize=self.pagesize)
# Our container for 'Flowable' objects
elements = []
# A large collection of style sheets pre-made for us
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name='centered', alignment=TA_CENTER))
# Draw things on the PDF. Here's where the PDF generation happens.
# See the ReportLab documentation for the full list of functionality.
users = User.objects.all()
if id:
users = users.filter(id__in=id)
elements.append(Paragraph('My User Names', styles['Heading1']))
for i, user in enumerate(users):
elements.append(Paragraph(user.get_full_name(), styles['Normal']))
doc.build(elements)
# Get the value of the BytesIO buffer and write it to the response.
pdf = buffer.getvalue()
buffer.close()
return pdf
Then change how you call it to:
report = MyPrint(buffer, 'Letter')
pdf = report.print_users(id)
or, if you want to print all users, just call it as:
report = MyPrint(buffer, 'Letter')
pdf = report.print_users()

Flask Wtforms FileField object has no attribute read

I'm trying to upload images to an Amazon S3 bucket from a Flask app. Here is my code:
def s3upload(image, acl='public-read'):
key = app.config['S3_KEY']
secret = app.config['S3_SECRET']
bucket = app.config['S3_BUCKET']
conn = S3Connection(key, secret)
mybucket = conn.get_bucket(bucket)
r = redis.StrictRedis(connection_pool = pool)
iid = r.incr('image')
now = time.time()
r.zadd('image:created_on', now, iid)
k = Key(mybucket)
k.key = iid
k.set_contents_from_string(image.read())
return iid
#app.route('/', methods = ['GET', 'POST'])
def index():
form = ImageForm(request.form)
print 'CHECKING REQUEST'
if request.method == 'POST' and form.image:
print 'VALID REQUEST'
image = form.image.read()
upload = s3upload(image)
print upload
else:
image = None
r = redis.StrictRedis(connection_pool = pool)
last_ten = r.zrange('image:created_on', 0, 9)
print last_ten
images = []
key = app.config['S3_KEY']
secret = app.config['S3_SECRET']
bucket = app.config['S3_BUCKET']
conn = S3Connection(key, secret)
mybucket = conn.get_bucket(bucket)
for image in last_ten:
images.append(mybucket.get_key(image, validate = False))
return render_template('index.html', form=form, images=images, image=image)
However I get an error at k.set_contents_from_string(image.read()) saying 'FileField' object has no attribute 'read'. Everything I've ready has indicated this is the proper way to upload an image to S3 and I've found several examples where they call .read() on a FileField object and it works fine. Thanks for your help.
FileField objects have a data attribute:
k.set_contents_from_string(image.data.read())
How about
import os
filestream = form.image.raw_data[0]
filestream.seek(0, os.SEEK_END)
read_data = filestream.tell()
or
read_data = form.image.raw_data[0].read()

Categories

Resources