I am using Django's StreamingHttpResponse to stream a large CSV file on the fly. According to the docs, an iterator is passed to the response's streaming_content parameter:
import csv
from django.http import StreamingHttpResponse
def get_headers():
return ['field1', 'field2', 'field3']
def get_data(item):
return {
'field1': item.field1,
'field2': item.field2,
'field3': item.field3,
}
# StreamingHttpResponse requires a File-like class that has a 'write' method
class Echo(object):
def write(self, value):
return value
def get_response(queryset):
writer = csv.DictWriter(Echo(), fieldnames=get_headers())
writer.writeheader() # this line does not work
response = StreamingHttpResponse(
# the iterator
streaming_content=(writer.writerow(get_data(item)) for item in queryset),
content_type='text/csv',
)
response['Content-Disposition'] = 'attachment;filename=items.csv'
return response
My question is: how can I manually write a row on the CSV writer? manually calling writer.writerow(data) or writer.writeheader() (which also internally calls writerow()) does not seem to write to the dataset, and instead only the generated / streamed data from streaming_content is written on the output dataset.
The answer is yielding results with a generator function instead of calculating them on the fly (within StreamingHttpResponse's streaming_content argument) and using the pseudo buffer we created (Echo Class) in order to write a row to the response:
import csv
from django.http import StreamingHttpResponse
def get_headers():
return ['field1', 'field2', 'field3']
def get_data(item):
return {
'field1': item.field1,
'field2': item.field2,
'field3': item.field3,
}
# StreamingHttpResponse requires a File-like class that has a 'write' method
class Echo(object):
def write(self, value):
return value
def iter_items(items, pseudo_buffer):
writer = csv.DictWriter(pseudo_buffer, fieldnames=get_headers())
yield pseudo_buffer.write(get_headers())
for item in items:
yield writer.writerow(get_data(item))
def get_response(queryset):
response = StreamingHttpResponse(
streaming_content=(iter_items(queryset, Echo())),
content_type='text/csv',
)
response['Content-Disposition'] = 'attachment;filename=items.csv'
return response
The proposed solution can actually lead to incorrect/mismatched CSVs (header mismatched with data). You'd want to replace the affected section with something like:
header = dict(zip(fieldnames, fieldnames))
yield writer.writerow(header)
instead. This is from the implementation of writeheader https://github.com/python/cpython/blob/08045391a7aa87d4fbd3e8ef4c852c2fa4e81a8a/Lib/csv.py#L141:L143
For some reason, it's not behaving well with yield
Hope this helps someone in the future :)
Also note that no fix is needed if using python 3.8+ because of this PR: https://bugs.python.org/issue27497
you can chain generator using itertools in python to add header row to the queryset row
here is how you do it:
import itertools
def some_streaming_csv_view(request):
"""A view that streams a large CSV file."""
# Generate a sequence of rows. The range is based on the maximum number of
# rows that can be handled by a single sheet in most spreadsheet
# applications.
headers = [["title 1", "title 2"], ]
row_titles = (header for header in headers) # title generator
items = Item.objects.all()
rows = (["Row {}".format(item.pk), str(item.pk)] for item in items)
pseudo_buffer = Echo()
writer = csv.writer(pseudo_buffer)
rows = itertools.chain(row_titles, rows) # merge 2 generators
return StreamingHttpResponse(
(writer.writerow(row) for row in rows),
content_type="text/csv",
headers={'Content-Disposition': 'attachment; filename="somefilename.csv"'},
)
and you will get csv with the title and the queryset:
title 1, title 2
1, 1
2, 2
...
Related
so our DB was designed very badly. There is no foreign key used to link multiple tables
I need to fetch complete information and export it to csv. the challenge is the information need to be queried from multiple tables (say for e.g, usertable only stored sectionid in the table, in order to get section detail, I would have to query from section table and match it with sectionid acquired from usertable).
So i did this using serializer, because the fields are multiples.
So the problem with my current method is that its so slow because it needs to query for each object(queryset) to match with other tables using uuid/userid/anyid.
this is my views
class FileDownloaderSerializer(APIView):
def get(self, request, **kwargs):
filename = "All-users.csv"
f = open(filename, 'w')
datas = Userstable.objects.using(dbname).all()
serializer = UserSerializer( datas, context={'sector': sector}, many=True)
df=serializer.data
df.to_csv(f, index=False, header=False)
f.close()
wrapper = FileWrapper(open(filename))
response = HttpResponse(wrapper, content_type='text/csv')
response['Content-Length'] = os.path.getsize(filename)
response['Content-Disposition'] = "attachment; filename=%s" % filename
return response
so notice that i need one file exported which is .csv.
this is my serializer
class UserSerializer(serializers.ModelSerializer):
class Meta:
model = Userstable
fields = _all_
section=serializers.SerializerMethodField()
def get_section(self, obj):
return section.objects.using(dbname.get(pk=obj.sectionid).sectionname
department =serializers.SerializerMethodField()
def get_department(self, obj):
return section.objects.using(dbname).get(pk=obj.deptid).deptname
im showing only two tables here, but in my code i have total of 5 different tables
I tried to limit 100 rows and it is successful, i tried to fecth 300000 and it took me 3 hours to download csv. certainly not efficient. How can i solve this?
I'm using Django I want to send some data from my database to a document word, I'm using Python-Docx for creating word documents I use the class ExportDocx it can generate a static word file but I want to place some dynamic data (e.g. product id =5, name=""..) basically all the details to the "product" into the document
class ExportDocx(APIView):
def get(self, request, *args, **kwargs):
queryset=Products.objects.all()
# create an empty document object
document = Document()
document = self.build_document()
# save document info
buffer = io.BytesIO()
document.save(buffer) # save your memory stream
buffer.seek(0) # rewind the stream
# put them to streaming content response
# within docx content_type
response = StreamingHttpResponse(
streaming_content=buffer, # use the stream's content
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
response['Content-Disposition'] = 'attachment;filename=Test.docx'
response["Content-Encoding"] = 'UTF-8'
return response
def build_document(self, *args, **kwargs):
document = Document()
sections = document.sections
for section in sections:
section.top_margin = Inches(0.95)
section.bottom_margin = Inches(0.95)
section.left_margin = Inches(0.79)
section.right_margin = Inches(0.79)
# add a header
document.add_heading("This is a header")
# add a paragraph
document.add_paragraph("This is a normal style paragraph")
# add a paragraph within an italic text then go on with a break.
paragraph = document.add_paragraph()
run = paragraph.add_run()
run.italic = True
run.add_text("text will have italic style")
run.add_break()
return document
This is the URL.py of the
path('<int:pk>/testt/', ExportDocx.as_view() , name='generate-testt'),
How can I generate it tho I think I need to make the data string so it can work with py-docx.
for the python-docx documentation: http://python-docx.readthedocs.io/
For a product record like: record = {"product_id": 5, "name": "Foobar"), you can add it to the document in your build_document()` method like:
document.add_paragraph(
"Product id: %d, Product name: %s"
% (record.product_id, record.name)
)
There are other more modern methods for interpolating strings, although this sprintf style works just fine for most cases. This resource is maybe not a bad place to start.
So I found out that I need to pass the model I was doing it but in another version of code and forgot to add it... Basically, I just had to add these lines of code, hope this helps whoever is reading this.
def get(self, request,pk, *args, **kwargs):
# create an empty document object
document = Document()
product = Product.objects.get(id=pk)
document = self.build_document(product)
And in the build of the document we just need to stringify it simply by using f'{queryset.xxxx}'
def build_document(self,queryset):
document = Document()
document.add_heading(f'{queryset.first_name}')
Is there a way to minimize a json in JsonResponse?
By minimize I mean removing spaces etc.
Thanks to this I can save around 100KB on my server ;).
Example:
I have a json:
{"text1": 1324, "text2": "abc", "text3": "ddd"}
And I want to achieve something like this:
{"text1":1324,"text2":"abc","text3":"ddd"}
Now creating response looks like that:
my_dict = dict()
my_dict['text1'] = 1324
my_dict['text2'] = 'abc'
my_dict['text3'] = 'ddd'
return JsonResponse(my_dict, safe=False)
If you do this in enough places you could create your own JsonResponse like (mostly ripped from django source):
class JsonMinResponse(HttpResponse):
def __init__(self, data, encoder=DjangoJSONEncoder, safe=True, **kwargs):
if safe and not isinstance(data, dict):
raise TypeError('In order to allow non-dict objects to be '
'serialized set the safe parameter to False')
kwargs.setdefault('content_type', 'application/json')
data = json.dumps(data, separators = (',', ':')), cls=encoder)
super(JsonMinResponse, self).__init__(content=data, **kwargs)
HTTPResponse allows us to return data in the format we specify using separators with json.dumps
HttpResponse(json.dumps(data, separators = (',', ':')), content_type = 'application/json')
I have the following csv export function to export model information in csv file. But I am trying to figure out how to show outputs from functions in the model. So the csv export function shows all the fields in the model (fields in the table) but not outputs from functions in the model..
So if I have the following:
def avg_tag(self, obj):
bids = Bid.objects.active(user=obj.user.id)
return bids.aggregate(average_price=Avg('list_price'))['average_price']
in the model
It does not get the output from that function.
Here is the csv export function action:
def export_select_fields_csv_action(description="Export selected objects",
fields=None, exclude=None, header=True):
def export_as_csv(modeladmin, request, queryset):
"""
Generic csv export admin action.
based on http://djangosnippets.org/snippets/1697/
"""
opts = modeladmin.model._meta
field_names = [field.name for field in opts.fields]
labels = []
if exclude:
field_names = [v for v in field_names if v not in exclude]
elif fields:
field_names = [k for k, v in fields if k in field_names]
labels = [v for k, v in fields if k in field_names]
response = HttpResponse(mimetype='text/csv')
response['Content-Disposition'] = ('attachment; filename=%s.csv'
% unicode(opts).replace('.', '_'))
writer = csv.writer(response)
if header:
if labels:
writer.writerow(labels)
else:
writer.writerow(field_names)
for obj in queryset:
writer.writerow([unicode(getattr(obj, field)).encode('utf-8')
for field in field_names])
return response
export_as_csv.short_description = description
return export_as_csv
How can I change the function above so that the outputs from functions in the model are outputted in the csv? Thanks!
I guess edit the bit where you loop through the queryset and just add it on the end -
for obj in queryset:
writer.writerow([unicode(getattr(obj, field)).encode('utf-8')
for field in field_names] + "," + obj.avg_tag())
You can do something similar to add the heading you want -
if header:
if labels:
writer.writerow(labels + "," + "Average Tag")
else:
writer.writerow(field_names + "," + "Average Tag"))
UPDATE
It's going to be a bit difficult getting fields and methods because you'll find that there are lots of methods on your model which you didn't define (and you probably don't want in your csv). There's no easy way of differentiating which you want and which you don't.
Have a play with the following to see what's in there -
import inspect
obj = modeladmin.model()
methods = [a for a in dir(modeladmin.model) if inspect.ismethod(getattr(obj, a))]
for method in methods:
print str(method)
I certainly can't see an elegant way of doing this (or in fact anyway) - I think you're going to have to actually specify each method.
I'm a little confused on how I would populate the following csv function with the information in my models.py for a given user. Can anyone point me in the right direction? Do I need to process the information in a separare py file, or can I do it in my views?
My view to download the info
def download(request):
response = HttpResponse(mimetype='text/csv')
response['Content-Disposition'] = 'attachment; filename=UserData.csv'
writer = csv.writer(response)
writer.writerow(['Date', 'HighBGL', 'LowBGL', 'Diet', 'Weight', 'Height', 'Etc'])
writer.writerow(['Info pertaining to date 1'])
writer.writerow(['info pertaining to date 2'])
return response
One of the models who's info i'm interesting in saving
class DailyVital(models.Model):
user = models.ForeignKey(User)
entered_at = models.DateTimeField()
high_BGL = models.IntegerField()
low_BGL = models.IntegerField()
height = models.IntegerField(blank = True, null = True)
weight = models.IntegerField(blank = True, null = True)
First you need to query your django model, something like: DailyVital.objects.all() or DailyVital.objects.filter(user=request.user)
Then you can either transform the objects manually into tuples, or you can use Django QuerySet's values_list method with a list of field names to return tuples instead of objects. Something like:
def download(request):
response = HttpResponse(mimetype='text/csv')
response['Content-Disposition'] = 'attachment; filename=UserData.csv'
writer = csv.writer(response)
writer.writerow(['Date', 'HighBGL', 'LowBGL', 'Weight', 'Height'])
query = DailyVital.objects.filter(user=request.user)
for row in query.values_list('entered_at', 'high_BGL', 'low_BGL', 'weight', 'height'):
writer.writerow(row)
return response
If you didn't need it in Django, you might also consider the sqlite3 command line program's -csv option.
An easy way to do this would be to convert your models into a list of lists.
First you need an object to list function:
def object2list(obj, attr_list):
" returns values (or None) for the object's attributes in attr_list"
return [getattr(obj, attr, None) for attr in attr_list]
Then you just pass that to the csvwriter with a list comprehension (given some list_of_objects that you've queried)
attr_list = ['date', 'high_BGL', 'low_BGL', 'diet', 'weight', 'height']
writer.writerows([object2list(obj, attr_list) for obj in list_of_objects])