Exporting a django template table to xlsx - python

I'm trying to use django template for the first time and I need to export some data. How can I export a table in my django template to a .xlsx file? Is there any method? here is my views.py:
from django.shortcuts import render
import requests
from .utils import most_frequent, get_json_values, get_max_dict
def launches(request):
"""main request. Retrieve the year that had most launches"""
response_launches = requests.get('https://api.spacexdata.com/v3/launches?
filter=launch_year')
launches = response_launches.json()
launch_years = get_json_values('launch_year',launches)
result_launches = most_frequent(launch_years)
"""retrieve the launch site most used for launches """
response_sites = requests.get('https://api.spacexdata.com/v3/launches?
filter=launch_site')
sites = response_sites.json()
launch_sites = get_json_values("launch_site", sites)
result_sites = get_max_dict(launch_sites,'site_id')
"""retrieve the number of launches between 2019 and 2021"""
response_2019_2021 = requests.get('https://api.spacexdata.com/v3/launches?
start=2019&end=2021')
launches_2019_2021 = response_2019_2021.json()
result_2019_2021 = len(launches_2019_2021)
data = {
"year_most_launches": str(result_launches),
"launch_sites":result_sites,
"launches_2019_2021":str(result_2019_2021)
}
return render(request,"main/launches.html", {"data":data})
And that's my table inside my template:
<table class="table">
<thead>
<tr>
<th>Year with most launches</th>
<th>Launch site with most launches</th>
<th>Number of launches between 2019 and 2021</th>
</tr>
</thead>
<tbody>
<tr>
{% for element in data.values %}
<td>{{ element }}</td>
{% endfor %}
</tr>
</tbody>
</table>
I couldn't find a way to do it so any help is really appreciated!

In my case, I create a function in the view that handle all the data and insert it into a Excel file.
Let me give you the code, it may help you.
def export_excel(self, elements):
output = io.BytesIO()
workbook = Workbook(output, {'in_memory': True})
worksheet = workbook.add_worksheet()
# r = Row and c = Column
r = 0
c = 0
# Table header
table_headers = [
# Define your headers name here
''
]
for header in table_headers:
worksheet.write(r, c, header)
c += 1
r += 1
for element in elements:
# write your data in the excel following the same command
worksheet.write(r, 0, element.data_name)
r += 1
workbook.close()
output.seek(0)
content_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
response = HttpResponse(output.read(), content_type=content_type)
file_name = _('file_name')
response['Content-Disposition'] = "attachment; filename=" + file_name + ".xlsx"
return response
Let me know if it was useful

Related

How to print out an function variable at HTML in python?

I am a self-learner and a beginner, searched a lot but maybe have lack of searching. I am scraping some values from two web sites and I want o compare them with an HTML output. Each web pages, I am combinin two class'es and gettin into a list. But when making an output with HTML I don't want all list to print. So I made function to choose any keywords to print. When I want to print out that function, It turns out 'None' at HTML output but it turns what I wanted on console. So how to show that special list?
OS= Windows , Python3.
from bs4 import BeautifulSoup
import requests
import datetime
import os
import webbrowser
carf_meySayf = requests.get('https://www.carrefoursa.com/tr/tr/meyve/c/1015?show=All').text
carf_soup = BeautifulSoup(carf_meySayf, 'lxml')
#spans
carf_name_span = carf_soup.find_all('span', {'class' : 'item-name'})
carf_price_span = carf_soup.find_all('span', {'class' : 'item-price'})
#spans to list
carf_name_list = [span.get_text() for span in carf_name_span]
carf_price_list = [span.get_text() for span in carf_price_span]
#combine lists
carf_mey_all = [carf_name_list +' = ' + carf_price_list for carf_name_list, carf_price_list in zip(carf_name_list, carf_price_list)]
#Function to choose and print special product
def test(namelist,product):
for i in namelist:
if product in i:
print(i)
a = test(carf_mey_all,'Muz')
# Date
date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# HTML part
html_str = """
<html>
<title>Listeler</title>
<h2>Tarih: %s</h2>
<h3>Product & Shop List</h3>
<table style="width:100%%">
<tr>
<th>Carrefour</th>
</tr>
<tr>
%s
</tr>
</html>
"""
whole = html_str %(date,a)
Html_file= open("Meyve.html","w")
Html_file.write(whole)
Html_file.close()
the method test() must have return value, for example
def test(namelist,product):
results = ''
for i in namelist:
if product in i:
print(i)
results += '<td>%s</td>\n' % i
return results
Meyve.html results:
<html>
<title>Listeler</title>
<h2>Tarih: 2018-12-29 07:34:00</h2>
<h3>Product & Shop List</h3>
<table style="width:100%">
<tr>
<th>Carrefour</th>
</tr>
<tr>
<td>Muz = 6,99 TL</td>
<td>İthal Muz = 12,90 TL</td>
<td>Paket Yerli Muz = 9,99 TL</td>
</tr>
</html>
note: to be valid html you need to add <body></body>
The problem is that your test() function isn't explicitly returning anything, so it is implicitly returning None.
To fix this, test() should accumulate the text it wants to return (i.e, by building a list or string) and return a string containing the text you want to insert into html_str.

python parsing beautiful soup data to csv

I have written code in python3 to parse an html/css table. Have a few issues with it:
my csv output file headers are not generated based on html (tag: td, class: t1) by my code (on the first run when the output file is being created)
if the incoming html table has a few additional fields (tag: td, class: t1) my code cannot currently capture them and create additional headers in the csv output file
the data is not written to the output cvs file till ALL the ids (A001,A002,A003...) from my input file are processed. i want to write to the output cvs file when the processing of each id from my input file is completed (i.e. A001 to be written to csv before processing A002).
whenever i rerun the code, the data does not begin from the next line in the output csv
Being a noob, I am sure my code is very rudimentary and there will be a better way to do this and would like to learn to write this better and fix the above as well.
Need advise & guidance, please help. Thank you.
My Code:
import csv
import requests
from bs4 import BeautifulSoup
## SIDs.csv contains ids in col2 based on which the 'url' variable pulls the respective data
SIDFile = open('SIDs.csv')
SIDReader = csv.reader(SIDFile)
SID = list(SIDReader)
SqID_data = []
#create and open output file
with open('output.csv','a', newline='') as csv_h:
fields = \
[
"ID",
"Financial Year",
"Total Income",
"Total Expenses",
"Tax Expense",
"Net Profit"
]
for row in SID:
col1,col2 = row
SID ="%s" % (col2)
url = requests.get("http://.......")
soup = BeautifulSoup(url.text, "lxml")
fy = soup.findAll('td',{'class':'tablehead'})
titles = soup.findAll('td',{'class':'t1'})
values = soup.findAll('td',{'class':'t0'})
if titles:
data = {}
for title in titles:
name = title.find("td", class_ = "t1")
data["ID"] = SID
data["Financial Year"] = fy[0].string.strip()
data["Total Income"] = values[0].string.strip()
data["Total Expenses"] = values[1].string.strip()
data["Tax Expense"] = values[2].string.strip()
data["Net Profit"] = values[3].string.strip()
SqID_data.append(data)
#Prepare CSV writer.
writer = csv.DictWriter\
(
csv_h,
fields,
quoting = csv.QUOTE_ALL,
extrasaction = "ignore",
dialect = "excel",
lineterminator = "\n",
)
writer.writeheader()
writer.writerows(SqID_data)
print("write rows complete")
Excerpt of HTML being processed:
<p>
<TABLE border=0 cellspacing=1 cellpadding=6 align=center class="vTable">
<TR>
<TD class=tablehead>Financial Year</TD>
<TD class=t1>01-Apr-2015 To 31-Mar-2016</TD>
</TR>
</TABLE>
</p>
<p>
<br>
<table cellpadding=3 cellspacing=1 class=vTable>
<TR>
<TD class=t1><b>Total income from operations (net) ( a + b)</b></td>
<TD class=t0 nowrap>675529.00</td>
</tr>
<TR>
<TD class=t1><b>Total expenses</b></td>
<TD class=t0 nowrap>446577.00</td>
</tr>
<TR>
<TD class=t1>Tax expense</td>
<TD class=t0 nowrap>71708.00</td>
</tr>
<TR>
<TD class=t1><b>Net Profit / (Loss)</b></td>
<TD class=t0 nowrap>157621</td>
</tr>
</table>
</p>
SIDs.csv (no header row)
1,A0001
2,A0002
3,A0003
Expected Output: output.csv (create header row)
ID,Financial Year,Total Income,Total Expenses,Tax Expense,Net Profit,OtherFieldsAsAndWhenFound
A001,01-Apr-2015 To 31-Mar-2016,675529.00,446577.00,71708.00,157621.00
A002,....
A003,....
I would recommend looking at pandas.read_html for parsing your web data; on your sample data this gives you:
import pandas as pd
tables=pd.read_html(s, index_col=0)
tables[0]
Out[11]:
1
0
Financial Year 01-Apr-2015 To 31-Mar-2016
tables[1]
1
0
Total income from operations (net) ( a + b) 675529
Total expenses 446577
Tax expense 71708
Net Profit / (Loss) 157621
You can then do what ever data manipulations you need (adding id's etc) using Pandas functions, and then export with DataFrame.to_csv.

converting text file to html file with python

I have a text file that contains :
JavaScript 0
/AA 0
OpenAction 1
AcroForm 0
JBIG2Decode 0
RichMedia 0
Launch 0
Colors>2^24 0
uri 0
I wrote this code to convert the text file to html :
contents = open("C:\\Users\\Suleiman JK\\Desktop\\Static_hash\\test","r")
with open("suleiman.html", "w") as e:
for lines in contents.readlines():
e.write(lines + "<br>\n")
but the problem that I had in html file that in each line there is no space between the two columns:
JavaScript 0
/AA 0
OpenAction 1
AcroForm 0
JBIG2Decode 0
RichMedia 0
Launch 0
Colors>2^24 0
uri 0
what should I do to have the same content and the two columns like in text file
Just change your code to include <pre> and </pre> tags to ensure that your text stays formatted the way you have formatted it in your original text file.
contents = open"C:\\Users\\Suleiman JK\\Desktop\\Static_hash\\test","r")
with open("suleiman.html", "w") as e:
for lines in contents.readlines():
e.write("<pre>" + lines + "</pre> <br>\n")
This is HTML -- use BeautifulSoup
from bs4 import BeautifulSoup
soup = BeautifulSoup()
body = soup.new_tag('body')
soup.insert(0, body)
table = soup.new_tag('table')
body.insert(0, table)
with open('path/to/input/file.txt') as infile:
for line in infile:
row = soup.new_tag('tr')
col1, col2 = line.split()
for coltext in (col2, col1): # important that you reverse order
col = soup.new_tag('td')
col.string = coltext
row.insert(0, col)
table.insert(len(table.contents), row)
with open('path/to/output/file.html', 'w') as outfile:
outfile.write(soup.prettify())
That is because HTML parsers collapse all whitespace. There are two ways you could do it (well probably many more).
One would be to flag it as "preformatted text" by putting it in <pre>...</pre> tags.
The other would be a table (and this is what a table is made for):
<table>
<tr><td>Javascript</td><td>0</td></tr>
...
</table>
Fairly tedious to type out by hand, but easy to generate from your script. Something like this should work:
contents = open("C:\\Users\\Suleiman JK\\Desktop\\Static_hash\\test","r")
with open("suleiman.html", "w") as e:
e.write("<table>\n")
for lines in contents.readlines():
e.write("<tr><td>%s</td><td>%s</td></tr>\n"%lines.split())
e.write("</table>\n")
You can use a standalone template library like mako or jinja. Here is an example with jinja:
from jinja2 import Template
c = '''<!doctype html>
<html>
<head>
<title>My Title</title>
</head>
<body>
<table>
<thead>
<tr><th>Col 1</th><th>Col 2</th></tr>
</thead>
<tbody>
{% for col1, col2 in lines %}
<tr><td>{{ col 1}}</td><td>{{ col2 }}</td></tr>
{% endfor %}
</tbody>
</table>
</body>
</html>'''
t = Template(c)
lines = []
with open('yourfile.txt', 'r') as f:
for line in f:
lines.append(line.split())
with open('results.html', 'w') as f:
f.write(t.render(lines=lines))
If you can't install jinja, then here is an alternative:
header = '<!doctyle html><html><head><title>My Title</title></head><body>'
body = '<table><thead><tr><th>Col 1</th><th>Col 2</th></tr>'
footer = '</table></body></html>'
with open('input.txt', 'r') as input, open('output.html', 'w') as output:
output.writeln(header)
output.writeln(body)
for line in input:
col1, col2 = line.rstrip().split()
output.write('<tr><td>{}</td><td>{}</td></tr>\n'.format(col1, col2))
output.write(footer)
I have added title, looping here line by line and appending each line on < tr > and < td > tags, it is should work as single table without column. No need to use these tags(< tr >< /tr > and < td >< /td >[gave a spaces for readability]) for col1 and col2.
log: snippet:
MUTHU PAGE
2019/08/19 19:59:25 MUTHUKUMAR_TIME_DATE,line: 118 INFO | Logger
object created for: MUTHUKUMAR_APP_USER_SIGNUP_LOG 2019/08/19 19:59:25
MUTHUKUMAR_DB_USER_SIGN_UP,line: 48 INFO | ***** User SIGNUP page
start ***** 2019/08/19 19:59:25 MUTHUKUMAR_DB_USER_SIGN_UP,line: 49
INFO | Enter first name: [Alphabet character only allowed, minimum 3
character to maximum 20 chracter]
html source page:
'''
<?xml version="1.0" encoding="utf-8"?>
<body>
<table>
<p>
MUTHU PAGE
</p>
<tr>
<td>
2019/08/19 19:59:25 MUTHUKUMAR_TIME_DATE,line: 118 INFO | Logger object created for: MUTHUKUMAR_APP_USER_SIGNUP_LOG
</td>
</tr>
<tr>
<td>
2019/08/19 19:59:25 MUTHUKUMAR_DB_USER_SIGN_UP,line: 48 INFO | ***** User SIGNUP page start *****
</td>
</tr>
<tr>
<td>
2019/08/19 19:59:25 MUTHUKUMAR_DB_USER_SIGN_UP,line: 49 INFO | Enter first name: [Alphabet character only allowed, minimum 3 character to maximum 20 chracter]
'''
CODE:
from bs4 import BeautifulSoup
soup = BeautifulSoup(features='xml')
body = soup.new_tag('body')
soup.insert(0, body)
table = soup.new_tag('table')
body.insert(0, table)
with open('C:\\Users\xxxxx\\Documents\\Latest_24_may_2019\\New_27_jun_2019\\DB\\log\\input.txt') as infile:
title_s = soup.new_tag('p')
title_s.string = " MUTHU PAGE "
table.insert(0, title_s)
for line in infile:
row = soup.new_tag('tr')
col1 = list(line.split('\n'))
col1 = [ each for each in col1 if each != '']
for coltext in col1:
col = soup.new_tag('td')
col.string = coltext
row.insert(0, col)
table.insert(len(table.contents), row)
with open('C:\\Users\xxxx\\Documents\\Latest_24_may_2019\\New_27_jun_2019\\DB\\log\\output.html', 'w') as outfile:
outfile.write(soup.prettify())

Pick up texts from a batch of files, and write them into an Excel file

(Environment: Python 2.7.6 Shell IDLE + BeautifulSoup 4.3.2 + )
I want to pick up some texts from a batch of files (about 50 files), and put them nicely into an Excel file, either row by row, or column by column.
The text sample in each file contains below:
<tr>
<td width=25%>
Arnold Ed
</td>
<td width=15%>
18 Feb 1959
</td>
</tr>
<tr>
<td width=15%>
男性
</td>
<td width=15%>
02 March 2002
</td>
</tr>
<tr>
<td width=15%>
Guangxi
</td>
</tr>
What I so far worked out are being shown below. The way is to read the files one by one. The codes run fine until the texts pickup part, but they are not writing into the Excel file.
from bs4 import BeautifulSoup
import xlwt
list_open = open("c:\\file list.txt")
read_list = list_open.read()
line_in_list = read_list.split("\n")
for each_file in line_in_list:
page = open(each_file)
soup = BeautifulSoup(page.read())
all_texts = soup.find_all("td")
for a_t in all_texts:
a = a_t.renderContents()
#"print a" here works ok
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('namelist', cell_overwrite_ok = True)
sheet.write (0, 0, a)
book.save("C:\\details.xls")
Actually it’s only writing the last piece of texts into the Excel file. So in what way I can have it correctly done?
With laike9m's help, the final version is:
list_open = open("c:\\file list.txt")
read_list = list_open.read()
line_in_list = read_list.split("\n")
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('namelist', cell_overwrite_ok = True)
for i,each_file in enumerate(line_in_list):
page = open(each_file)
soup = BeautifulSoup(page.read())
all_texts = soup.find_all("td")
for j,a_t in enumerate(all_texts):
a = a_t.renderContents()
sheet.write (i, j, a)
book.save("C:\\details.xls")
You didn't put the last four lines into for loop. I guess that's why it’s only writing the last piece of texts into the Excel file.
EDIT
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('namelist', cell_overwrite_ok = True)
for i, each_file in enumerate(line_in_list):
page = open(each_file)
soup = BeautifulSoup(page.read())
all_texts = soup.find_all("td")
for j, a_t in enumerate(all_texts):
a = a_t.renderContents()
sheet.write(i, j, a)
book.save("C:\\details.xls")

Loop through Result to Gather Urls

I'm trying to parse an html result , grab a few urls, and then parse the output of visiting those urls.
I'm using django 1.5 /python 2.7:
views.py
#mechanize/beautifulsoup config options here.
beautifulSoupObj = BeautifulSoup(mechanizeBrowser.response().read()) #read the raw response
getFirstPageLinks = beautifulSoupObj.find_all('cite') #get first page of urls
url_data = UrlData(NumberOfUrlsFound, getDomainLinksFromGoogle)
#url_data = UrlData(5, 'myapp.com')
#return HttpResponse(MaxUrlsToGather)
print url_data.url_list()
return render(request, 'myapp/scan/process_scan.html', {
'url_data':url_data,'EnteredDomain':EnteredDomain,'getDomainLinksFromGoogle':getDomainLinksFromGoogle,
'NumberOfUrlsFound':NumberOfUrlsFound,
'getFirstPageLinks' : getFirstPageLinks,
})
urldata.py
class UrlData(object):
def __init__(self, num_of_urls, url_pattern):
self.num_of_urls = num_of_urls
self.url_pattern = url_pattern
def url_list(self):
# Returns a list of strings that represent the urls you want based on num_of_urls
# e.g. asite.com/?search?start=10
urls = []
for i in xrange(self.num_of_urls):
urls.append(self.url_pattern + '&start=' + str((i + 1) * 10) + ',')
return urls
template:
{{ getFirstPageLinks }}
{% if url_data.num_of_urls > 0 %}
{% for url in url_data.url_list %}
{{ url }}
{% endfor %}
{% endif %}
This outputs:
[<cite>www.google.com/webmasters/</cite>, <cite>www.domain.com</cite>, <cite>www.domain.comblog/</cite>, <cite>www.domain.comblog/projects/</cite>, <cite>www.domain.comblog/category/internet/</cite>, <cite>www.domain.comblog/category/goals/</cite>, <cite>www.domain.comblog/category/uncategorized/</cite>, <cite>www.domain.comblog/twit/2013/01/</cite>, <cite>www.domain.comblog/category/dog-2/</cite>, <cite>www.domain.comblog/category/goals/personal/</cite>, <cite>www.domain.comblog/category/internet/tech/</cite>]
which is generated by: getFirstPageLinks
and
https://www.google.com/search?q=site%3Adomain.com&start=10, https://www.google.com/search?q=site%3Adomain.com&start=20,
which is generated by: url_data a template variable
The problem currently is: I need to loop though each url in url_data and get the output like getFirstPageLinks is outputting it.
How can I achieve this?
Thank you.

Categories

Resources