Ok so I have two html files. base.html and search_results.html
base.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<style>
body {
font-family: sans-serif;
background-image: url('/static/Flat-Color-Gradient-Triangles.png');
width: 430px;
height: 763px;
align-items: center;
}
h1 {
font-family: "Century Gothic", CenturyGothic, AppleGothic, sans-serif;
font-size: 45px;
font-color: #333333;
font-weight: bold;
margin: 0;
padding: 3px;
text-align: center;
}
h2 {
font-family: "Avant Garde", Avantgarde, "Century Gothic", CenturyGothic, AppleGothic, sans-serif;
font-size: 15px;
font-weight: bold;
margin: 0;
padding: 3px;
border: 1.5px
}
h3 {
font-family: Candara, Calibri, Segoe, "Segoe UI", Optima, Arial, sans-serif;
font-size: 14px;
font-weight: 100;
margin: 0;
border-top: 1px;
border-bottom: 1px;
padding: 3px;
}
.explicit{
color: rgb(200, 0, 0);
}
.rank{
font-weight: bold;
}
table {
border-collapse: collapse;
padding: 4px;
}
table, th, td {
padding: 5px;
border-bottom: 1px solid black;
color: white;
text-align: center;
}
th {
background-color: #121f1f;
border-left: #121f1f;
border-right:#121f1f;
}
td {
background-color: #253f3f;
border-left: #253f3f;
border-right: #253f3f;
}
input[type='text'] {
width: 100%;
padding: 6px 10px;
margin: 6px 0;
box-sizing: border-box;
border: 2px solid #ccc;
font-size: 14px;
border-radius: 10px;
}
input[type='text']:focus {
border: 3px solid red;
}
input[type = 'submit'] {
background-color: #4CAF50;
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
width: 100%;
}
input[value="Up"] {
background-color: #4CAF50;
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
}
input[value="Down"] {
background-color: rgb(200,0,0);
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
}
input[value="Add"] {
border: 3px solid #4CAF50;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
}
div.content {
width: 430px;
align-items: center;
}
</style>
</head>
<body>
<div class="content">
<h1>Public Playlist</h1>
<form action = '/search_results' method="POST">
<input placeholder="Artist:" type = 'text' name='artist'> </input>
<input placeholder="Album:" type = 'text' name='album'> </input>
<input placeholder="Track:" type = 'text' name='track'> </input>
<input type="submit" value="Submit!" id="search"/>
</form>
<table >
<tr>
<th><h2>Rank</h2></th>
<th><h2>Title</h2>
<h2>Author|Album</h2></th>
<th><h2>Score</h2></th>
<th><h2>Vote</h2></th>
</tr>
<tr>
<td><h3 type="bold">1</h3></td>
<td><h3>{{name}}</h3>
<h3>{{artist}}:{{album}}</h3></td>
<td><h3>{{ score }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote" value="Up" id="up">
<input type="button" name= "vote" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">2</h3></td>
<td><h3>{{name1}}</h3>
<h3>{{artist1}}:{{album1}}</h3></td>
<td><h3>{{ score1 }}</h3></td>
<form action="/" method="POST">
<td>
<input type="button" name="vote1" value="Up" id="up">
<input type="button" name= "vote1" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">3</h3></td>
<td><h3>{{name2}}</h3>
<h3>{{artist2}}:{{album2}}</h3></td>
<td><h3>{{ score2 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote2" value="Up" id="up">
<input type="button" name= "vote2" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">4</h3></td>
<td><h3>{{name3}}</h3>
<h3>{{artist3}}:{{album3}}</h3></td>
<td><h3>{{ score3 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote3" value="Up" id="up">
<input type="button" name= "vote3" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">5</h3></td>
<td><h3>{{name4}}</h3>
<h3>{{artist4}}:{{album4}}</h3></td>
<td><h3>{{ score4 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote4" value="Up" id="up">
<input type="button" name= "vote4" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">6</h3></td>
<td><h3>{{name5}}</h3>
<h3>{{artist5}}:{{album5}}</h3></td>
<td><h3>{{ score5 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote5" value="Up" id="up">
<input type="button" name= "vote5" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">7</h3></td>
<td><h3>{{name6}}</h3>
<h3>{{artist6}}:{{album6}}</h3></td>
<td><h3>{{ score6 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote6" value="Up" id="up">
<input type="button" name= "vote6" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">8</h3></td>
<td><h3>{{name7}}</h3>
<h3>{{artist7}}:{{album7}}</h3></td>
<td><h3>{{ score7 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote7" value="Up" id="up">
<input type="button" name= "vote7" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">9</h3></td>
<td><h3>{{name8}}</h3>
<h3>{{artist8}}:{{album8}}</h3></td>
<td><h3>{{ score8 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote8" value="Up" id="up">
<input type="button" name= "vote8" value="Down" id="down">
</td>
</form>
</tr>
<tr>
<td><h3 type="bold">10</h3></td>
<td><h3>{{name9}}</h3>
<h3>{{artist9}}:{{album9}}</h3></td>
<td><h3>{{ score9 }}</h3></td>
<form action="/vote" method="POST">
<td>
<input type="button" name="vote9" value="Up" id="up">
<input type="button" name= "vote9" value="Down" id="down">
</td>
</form>
</tr>
</table>
<script>
setTimeout(function () {location.reload() },60000);
</script>
</div>
</body>
</html>
search_results.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<style>
body {
font-family: sans-serif;
background-image: url('/static/Flat-Color-Gradient-Triangles.png');
width: 640px;
height: 1136px;
}
h1 {
font-family: "Century Gothic", CenturyGothic, AppleGothic, sans-serif;
font-size: 45px;
font-color: #333333;
font-weight: bold;
margin: 0;
padding: 3px;
text-align: center;
}
h2 {
font-family: "Avant Garde", Avantgarde, "Century Gothic", CenturyGothic, AppleGothic, sans-serif;
font-size: 15px;
font-weight: bold;
margin: 0;
padding: 3px;
border: 1.5px
}
h3 {
font-family: Candara, Calibri, Segoe, "Segoe UI", Optima, Arial, sans-serif;
font-size: 14px;
font-weight: 100;
margin: 0;
border-top: 1px;
border-bottom: 1px;
padding: 3px;
}
.explicit {
color: rgb(200, 0, 0);
}
.rank {
font-weight: bold;
}
table {
border-collapse: collapse;
padding: 4px;
}
table,
th,
td {
padding: 5px;
border-bottom: 1px solid black;
color: white;
text-align: center;
}
th {
background-color: #121f1f;
border-left: #121f1f;
border-right: #121f1f;
}
td {
background-color: #253f3f;
border-left: #253f3f;
border-right: #253f3f;
}
input[type='text'] {
width: 100%;
padding: 6px 10px;
margin: 6px 0;
box-sizing: border-box;
border: 2px solid #ccc;
font-size: 14px;
border-radius: 10px;
}
input[type='text']:focus {
border: 3px solid red;
}
input[type='submit'] {
background-color: #4CAF50;
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
width: 100%;
}
input[value="Up"] {
background-color: #4CAF50;
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
}
input[value="Down"] {
background-color: rgb(200, 0, 0);
border: none;
color: white;
padding: 16px 32px;
text-decoration: none;
margin: 4px 2px;
}
input[name="Add"] {
border: 3px solid #4CAF50;
color: white;
background-color: #253f3f;
padding: 16px 32px;
text-decoration: none;
font-weight: bold;
margin: 4px 2px;
}
</style>
</head>
<body>
<h1>Public Playlist</h1>
<table>
<tr>
<th>
<h2>Rank</h2>
</th>
<th>
<h2>Title</h2>
<h2>Author|Album</h2>
</th>
<th>
<h2>Score</h2>
</th>
<th>
<h2>Add</h2>
</th>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">1</h3>
</td>
<td>
<h3>{{name}}</h3>
<h3>{{artist}}:{{album}}</h3>
</td>
<td>
<h3>{{ score }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add" id="Add">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">2</h3>
</td>
<td>
<h3>{{name1}}</h3>
<h3>{{artist1}}:{{album1}}</h3>
</td>
<td>
<h3>{{ score1 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add1" id="Add1">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">3</h3>
</td>
<td>
<h3>{{name2}}</h3>
<h3>{{artist2}}:{{album2}}</h3>
</td>
<td>
<h3>{{ score2 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add2" id="Add2">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">4</h3>
</td>
<td>
<h3>{{name3}}</h3>
<h3>{{artist3}}:{{album3}}</h3>
</td>
<td>
<h3>{{ score3 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add3" id="Add3">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">5</h3>
</td>
<td>
<h3>{{name4}}</h3>
<h3>{{artist4}}:{{album4}}</h3>
</td>
<td>
<h3>{{ score4 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add4" id="Add4">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">6</h3>
</td>
<td>
<h3>{{name5}}</h3>
<h3>{{artist5}}:{{album5}}</h3>
</td>
<td>
<h3>{{ score5 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add5" id="Add5">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">7</h3>
</td>
<td>
<h3>{{name6}}</h3>
<h3>{{artist6}}:{{album6}}</h3>
</td>
<td>
<h3>{{ score6 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add6" id="Add6">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">8</h3>
</td>
<td>
<h3>{{name7}}</h3>
<h3>{{artist7}}:{{album7}}</h3>
</td>
<td>
<h3>{{ score7 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add7" id="Add7">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">9</h3>
</td>
<td>
<h3>{{name8}}</h3>
<h3>{{artist8}}:{{album8}}</h3>
</td>
<td>
<h3>{{ score8 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add8" id="Add8">
</td>
</form>
</tr>
<tr>
<form action="/add" method="GET">
<td>
<h3 type="bold">10</h3>
</td>
<td>
<h3>{{name9}}</h3>
<h3>{{artist9}}:{{album9}}</h3>
</td>
<td>
<h3>{{ score9 }}</h3>
</td>
<td>
<input type="button" name="Add" value="Add9" id="Add9">
</td>
</form>
</tr>
</table>
</body>
</html>
I then have a python file projectfalcon.py The relevant methods are:
#app.route('/add', methods=['GET'])
def addingSongs():
if request.method == 'GET':
btnID = request.form['btn']
if btnID == 'Add0':
songResults[0].upVote()
addedSongs.append(songResults[0])
songResults.remove(0)
if btnID == 'Add1':
songResults[1].upVote()
addedSongs.append(songResults[1])
songResults.remove(1)
return render_template('base.html')
So the base.html is the first page that loads. You type in an artist name to search for songs and it takes you to the search_results.html where the results are displayed. I am trying to let the user be able to click the add button and the song will then be added to a list in the projectfalcon.py file. After clicking it will also take you back to the base.html file and reload the songs that in the playlist I am not sure what I am doing wrong. I have googled multiple times but may have not found the right wording. Any help is appreciated.
The most obvious issue I can spot is the way you're accessing the btn variable. Since your form sends a GET request, it needs to be accessed like this:
btnID = request.args.get('btn', '')
Apart from that, I don't understand how you're storing addedSongs list. Flask context is limited to requests, and you shouldn't use python global namespace. You should either create a context variable (also a global in flask) or the easiest way would be to just store user data in a database.
You should take a look at this flask tutorial which is a complete guide to making a web app from scratch. Perhaps it may be of some use to get the basics cleared up and allow you to 'find the right wordings' to your problems! :)
Cheers.
Related
Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 1 year ago.
This post was edited and submitted for review 1 year ago and failed to reopen the post:
Original close reason(s) were not resolved
Improve this question
I am having a process where my python code needs to generate a PDF.
I have an HTML file as follows:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="./proforma_supply_bill.css">
<link rel="preconnect" href="https://fonts.gstatic.com">
<link href="https://fonts.googleapis.com/css2?family=Karla:wght#400;600;700&family=Rajdhani:wght#700&display=swap"
rel="stylesheet">
<title>Proforma bill of supply</title>
<style>
body {
font-family: "Karla", sans-serif;
margin: 0;
padding: 0;
box-sizing: border-box;
overflow-x: hidden;
}
th,
td {
padding: 0 !important;
text-align: left;
position: relative;
}
.mb-3 {
margin-bottom: 3px;
}
.mb-5 {
margin-bottom: 5px;
}
.pr-10{
padding-right: 10px !important;
}
.text-right {
text-align: right;
}
.border-b_color {
border-bottom: 1px solid #93d150;
}
.border-t {
border-top: 1px solid #dedede;
}
.border-b {
border-bottom: 1px solid #dedede;
}
.card {
background-color: #fff;
padding: 0 20px;
}
.card__header,
.card__total_amount,
.card__amount_section {
display: flex;
align-items: center;
justify-content: space-between;
padding: 17px 0;
}
.card__header_img {
width: 175px;
}
.card__header_title {
font-family: "Rajdhani", sans-serif;
font-size: 24px;
font-weight: bold;
color: #2b9eaa;
text-transform: uppercase;
}
.card__info_flex {
display: flex;
padding: 10px 0;
}
.card__info {
flex: 50%;
}
.card__info_row:not(:last-child) {
margin-bottom: 5px;
}
.card__info_title,
.card__table_data_row__content {
color: #141414;
font-size: 15px;
}
.card__info_text {
color: #141414;
font-size: 15px;
font-weight: bold;
}
.card__table {
width: 100%;
border-collapse: collapse;
}
.card__table_header_row {
background-color: #2b9eaa;
}
.card__table_data_row {
border-top: 1px solid #dedede;
}
.card__table_header_row__content {
color: #fff;
font-size: 15px;
font-weight: bold;
padding: 6px 0;
}
.card__table_data_row__content {
padding: 5px 0;
}
.card__table_data_row__subcontent {
font-size: 13px;
color: #141414;
width: max-content;
margin-left: 10px;
}
.dashed_b-t {
border-top: 1px dashed #dedede;
}
.card__total_amount {
background-color: #F2F2F2;
padding: 4px 10px 5px;
border-top: 1px solid #dedede;
border-bottom: 1px solid #dedede;
}
.card__amount_section {
padding: 6px 10px 5px;
align-items: flex-start;
}
.card__total_amount__title,
.card__total_amount__title_lg {
font-size: 18px;
color: #141414;
font-weight: bold;
}
.card__declaration {
padding: 6px 10px 5px;
background-color: #D4ECEE;
font-size: 15px;
font-weight: bold;
color: #2b2b2b;
}
.card__signature {
margin-top: 30px;
padding: 0 10px;
}
</style>
</head>
<body>
<div class="card">
<div class="card__header border-b_color">
<img src="https://res.cloudinary.com/exportify/image/upload/v1573547246/ExportifyLogo/exportify_logo_166x31_OG_yhqmrg.svg"
alt="Exportify Logo" class="card__header_img">
<div class="card__header_title">PROFORMA Bill of Supply</div>
</div>
<div class="card__info_flex border-b_color">
<div class="card__info">
<div class="card__info_row">
<span class="card__info_title">Proforma Invoice No.: </span>
<span class="card__info_text">{{PROFORMA_INV_NO}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">Reference No. & Date.: </span>
<span class="card__info_text">{{REF_NO}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">Buyer's Order No.: </span>
<span class="card__info_text">{{BUYER_ORDER_NO}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">Vessel/Flight No.: </span>
<span class="card__info_text">{{VESSEL_NAME}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">City/Port of Loading: </span>
<span class="card__info_text">{{POL}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">Terms of Delivery: </span>
<span class="card__info_text">{{DELIVERY_TERMS}}</span>
</div>
</div>
<div class="card__info">
<div class="card__info_row">
<span class="card__info_title">Dated: </span>
<span class="card__info_text">{{INV_DATE}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">SAIL Date: </span>
<span class="card__info_text">{{SAIL_DATE}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">Place of Receipt by Shipper: </span>
<span class="card__info_text">{{PLACE_OF_RECEIPT}}</span>
</div>
<div class="card__info_row">
<span class="card__info_title">City/Port of Discharge: </span>
<span class="card__info_text">{{POD}}</span>
</div>
</div>
</div>
<div class="card__info_flex" style="padding-bottom: 0;">
<div class="card__info"></div>
<div class="card__info mb-3">
<span class="card__info_title">Buyer (Bill to)</span>
</div>
</div>
<div class="card__info_flex" style="padding-top: 0;">
<div class="card__info">
<div class="card__info_text mb-3">XPORTIFY TECHNOLOGIES PRIVATE LIMITED</div>
<div class="card__info_title mb-5" style="line-height: 20px; width: 85%;">
3rd Floor, 313-314, A/3, BGTA Ganga Premises, Wadala Truck Terminal, Near Wadala RTO, Wadala East,
Mumbai
</div>
<div class="card__info_row">
<span class="card__info_title">GSTIN/UIN: </span>
<span class="card__info_text">27AAACX2283M1ZX</span>
</div>
<div class="card__info_row">
<span class="card__info_title">PAN No: </span>
<span class="card__info_text">AAACX2283M</span>
</div>
<div class="card__info_title mb-5">State Name: Maharashtra, Code: 27</div>
<div class="card__info_row">
<span class="card__info_title">CIN: </span>
<span class="card__info_text">U74999MH2017PTC295494</span>
</div>
</div>
<div class="card__info">
<div class="card__info_text mb-3">{{BUYER_COMPANY_NAME}}</div>
<div class="card__info_title mb-5" style="line-height: 20px;">
{{BUYER_ADDRESS}}
</div>
<div class="card__info_row">
<span class="card__info_title">GSTIN/UIN: </span>
<span class="card__info_text">{{BUYER_GST}}</span>
</div>
<div class="card__info_title mb-5">State Name: {{BUYER_STATE}}</div>
<div class="card__info_row">
<span class="card__info_title">Place of Supply: </span>
<span class="card__info_text">{{BUYER_PLACE_OF_SUPPLY}}</span>
</div>
</div>
</div>
<table class="card__table mb-5">
<thead>
<tr class="card__table_header_row">
<th>
<div class="card__table_header_row__content" style="padding-left: 10px;">Sr. No.</div>
</th>
<th>
<div class="card__table_header_row__content">Description of Services</div>
</th>
<th>
<div class="card__table_header_row__content">HSN/SAC</div>
</th>
<th>
<div class="card__table_header_row__content">Quantity</div>
</th>
<th>
<div class="card__table_header_row__content">Rate</div>
</th>
<th>
<div class="card__table_header_row__content">Per</div>
</th>
<th>
<div class="card__table_header_row__content text-right" style="padding-right: 10px;">
Amount
</div>
</th>
</tr>
</thead>
<tbody>
<tr class="card__table_data_row">
<td>
<div class="card__table_data_row__content" style="padding-left: 10px;">1</div>
</td>
<td>
<div class="card__table_data_row__content">Freight Charges</div>
<div class="card__table_data_row__subcontent">$ 1938/20x1x#74.97</div>
</td>
<td>
<div class="card__table_data_row__content">996521</div>
</td>
<td>
<div class="card__table_data_row__content">1</div>
</td>
<td>
<div class="card__table_data_row__content">7,900.00</div>
</td>
<td>
<div class="card__table_data_row__content">Container</div>
</td>
<td>
<div class="card__table_data_row__content text-right pr-10" style="right: 0;">USD
7,900.00
</div>
</td>
</tr>
</tbody>
</table>
<div class="card__total_amount mb-5">
<div class="card__total_amount__title">Total</div>
<div class="card__total_amount__title_lg">USD 7,900.00</div>
</div>
<div class="card__amount_section border-b mb-3">
<div class="card__amount_section_flex">
<div class="card__info_title mb-3">Amount Chargeable (in words)</div>
<div class="card__info_text">USD Seven Thousand Nine Hundred Only</div>
</div>
<div class="card__amount_section_flex">
<div class="card__info_title">E & O.E</div>
</div>
</div>
<div class="card__amount_section border-t">
<div class="card__amount_section_flex">
<div class="card__info_title">HSN/SAC</div>
</div>
<div class="card__amount_section_flex">
<div class="card__info_title">Taxable Value</div>
</div>
</div>
<div class="card__amount_section border-t mb-3">
<div class="card__amount_section_flex">
<div class="card__info_title">996521</div>
</div>
<div class="card__amount_section_flex">
<div class="card__info_title">INR 1,45,291.86</div>
</div>
</div>
<div class="card__total_amount mb-5">
<div class="card__total_amount__title" style="font-size: 15px;">Total</div>
<div class="card__total_amount__title_lg" style="font-size: 15px;">INR 1,45,291.86</div>
</div>
<div class="card__info_row" style="padding: 10px;">
<span class="card__info_title">Tax Amount (in words): </span>
<span class="card__info_text">NIL</span>
</div>
<div class="card__declaration">
Declaration
</div>
<div class="card__info_title" style="padding: 10px;">
We declare that this invoice shows the actual price of the goods described and that all particulars are true
and correct.
</div>
<div class="card__info_text" style="padding: 0 10px;">
for XPORTIFY TECHNOLOGIES PRIVATE LIMITED
</div>
<div class="card__signature">
<div class="card__info_title">Authorised Signatory</div>
</div>
</div>
</body>
</html>
I want to convert this HTML file into a PDF using Python.
I have one option of using wkhtmltopdf package but I have to run it using the command line everytime.
Which is the most optimal way of doing this without hampering the flow of my code?
Install pdfkit package
pip install pdfkit
Install wkhtmltopdf https://wkhtmltopdf.org/downloads.html
PDF to HTML in the current folder
import pdfkit
import glob
3.1 Set wkhtmltopdf executable file path
config = pdfkit.configuration(wkhtmltopdf='C:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe')
3.2 Convert all html files in the current folder
for file in glob.glob('./*.html'):
pdfkit.from_file(file, file[:-4]+'.pdf', configuration=config)
I am writing a data scraping script. It's purpose is to collect data on available broadband deals from BT's website. I cannot figure out why my simple requests code doesn't fill in the form and follow through to the next page.
Please help me figure out how to enter data into this form and save the output html for data scraping.
I have identified the relevant tags in the form that I am interested in. I am trying to populate the UPRN field and continue to the next page
Link to website: https://www.dslchecker.bt.com/#
My python code:
'''python
import requests
url = "https://www.dslchecker.bt.com/#"
payload = {'UPRN':'10033360983'}
r = requests.post(url, data = payload)
print(r.text)
'''
Form from the website:
'''html
<form method="post" action="adsl/ADSLChecker.UPRNoutput"><input type="hidden" name="URL"> <input value="a%20service%20provider" type="hidden" name="SP_NAME">
<span class="subheading">UPRN:</span><br><input maxlength="13" size="14" name="UPRN" autocomplete="off" style="background-image: url(""); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;"> <input value="56" type="hidden" name="VERSION"> <input value="E" type="hidden" name="MS"> <input value="no" type="hidden" name="CAP"> <input value="Y" type="hidden" name="AEA"> <input class="form_button" value="submit" type="submit"> </form>
'''
Please follow this link :https://www.dslchecker.bt.com/# and enter 10033346575 in the UPRN field to see the desired output
My output when ran in a jupyter notebook:
'''html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<!-- saved from url=(0063)http://dslcheckerait.vade.bt.com:61065/adsl/adslchecker.welcome -->
<HTML><HEAD>
<STYLE>
.body {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #004d5f; FONT-SIZE: 11px; FONT-WEIGHT: normal; TEXT-DECORATION: none
}
.bodybold {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #333333; FONT-SIZE: 11px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
.errormessage {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #000000; FONT-SIZE: 11px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
.formDescription {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #666666; FONT-SIZE: 9px; FONT-WEIGHT: normal; TEXT-DECORATION: none
}
.form_button {BORDER-BOTTOM: #666666 1px solid; BORDER-LEFT: #666666 1px solid; BACKGROUND-COLOR: #6400AA; FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Arial, Helvetica, sans-serif; COLOR: #ffffff; FONT-SIZE: 10px; BORDER-TOP: #666666 1px solid; FONT-WEIGHT: bold; BORDER-RIGHT: #666666 1px solid; TEXT-DECORATION: none
}
.heading {FONT-VARIANT: normal; FONT-FAMILY: Arial, Helvetica, sans-serif; COLOR: #004d5f; FONT-SIZE: 14px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
.heading3 {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #333333; FONT-SIZE: 10px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
.heading4 {FONT-VARIANT: normal; FONT-FAMILY: Verdana, Arial, Helvetica, sans-serif; COLOR: #91b1b8; FONT-SIZE: 10px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
.subheading {FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Helvetica, sans-serif; COLOR: color: #333333; FONT-SIZE: 14px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
A:active {FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Arial, Helvetica, sans-serif; COLOR: #6400AA; FONT-SIZE: 12px; FONT-WEIGHT: bold; TEXT-DECORATION: underline
}
A:hover {FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Arial, Helvetica, sans-serif; COLOR: #6400AA; FONT-SIZE: 12px; FONT-WEIGHT: bold; TEXT-DECORATION: underline
}
A:link {FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Arial, Helvetica, sans-serif; COLOR: #6400AA; FONT-SIZE: 12px; FONT-WEIGHT: bold; TEXT-DECORATION: none
}
A:visited {FONT-VARIANT: normal; FONT-FAMILY: Calibri Light, Arial, Helvetica, sans-serif; COLOR: #6400AA; FONT-SIZE: 12px; FONT-WEIGHT: bold; TEXT-DECORATION: underline
}
BODY {PADDING-BOTTOM: 0px; BACKGROUND-COLOR: #ffffff; MARGIN: 10px; PADDING-LEFT: 0px; PADDING-RIGHT: 0px; PADDING-TOP: 0px
}
</STYLE>
<TITLE>BT Broadband</TITLE>
<META content="text/html; charset=utf-8" http-equiv=Content-Type><LINK
rel=stylesheet type=text/css
href="adslchecker_font.html">
<META content=text/css http-equiv=Content-Style-Type><META http-equiv="X-UA-Compatible" content="IE=5">
<SCRIPT>
<!--
function setFocus() {
document.forms[0].elements[2].focus();
}
//-->
</SCRIPT>
<META name=GENERATOR content="MSHTML 8.00.7601.18751"></HEAD>
<BODY onload=setFocus()>
<TABLE width=500 align=center>
<TBODY>
<TR>
<TD>
<SCRIPT language=JavaScript> var isNS = (navigator.appName == "Netscape") ? 1 : 0;var EnableRightClick = 0;if(isNS) document.captureEvents(Event.MOUSEDOWN||Event.MOUSEUP);function mischandler(){if(EnableRightClick==1){ return true;}else {return false; }}function mousehandler(e){ if(EnableRightClick==1){ return true; } var myevent = (isNS) ? e : event; var eventbutton = (isNS) ? myevent.which : myevent.button; if((eventbutton==2)||(eventbutton==3)) return false;}function keyhandler(e) {var myevent = (isNS) ? e : window.event;if (myevent.keyCode==96)EnableRightClick = 1;return;}document.oncontextmenu = mischandler;document.onkeypress = keyhandler;document.onmousedown = mousehandlerdocument.onmouseup = mousehandler;</SCRIPT>
<TABLE border=0 cellSpacing=0 cellPadding=0 width="100%"><!-- Start Header -->
<TBODY>
<TR><BR><BR>
<!--<TD height=20 vAlign=top align=left><IMG border=0 alt="BT Wholesale"
src="dsl_images/g_main_logo.gif" width=129
height=20></TD></TR>
<TR>
<TD class=body height=14 vAlign=top align=left><IMG alt=""
src="dsl_images/spacer.gif" width=450 height=14></TD></TR>
<TR>//-->
<TD class=body vAlign=top align=left fontStyle="italic">
<TABLE border=0 cellSpacing=0 cellPadding=0 width=450><!-- Start Page Title -->
<TBODY>
<TR>
<TD height=45 vAlign=top width=600 align=left><FONT
style="FONT-FAMILY: Calibri Light" color=#6400AA size=6.5><B> BT BROADBAND
AVAILABILITY
CHECKER</B></FONT></TD></TR><!-- End Page Title --></TD></TR></TBODY></TABLE></TD></TR></TBODY></TABLE><SPAN
class=body><!--RESPONSE-START-->
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">Welcome to the Broadband Availability checker. This
will provide a provisional check of your ability to receive reliable
Broadband services.</font></SPAN></P>
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">Please enter your telephone number.</font></SPAN></P>
<FORM method=post action=adsl/adslchecker.TelephoneNumberOutput><INPUT
type=hidden name=URL> <INPUT value=a%20service%20provider type=hidden
name=SP_NAME> <SPAN class=subheading>TELEPHONE:</SPAN><BR><INPUT
maxLength=14 size=14 name=TelNo> <INPUT value=56 type=hidden name=VERSION>
<INPUT value=E type=hidden name=MS> <INPUT value=no type=hidden name=CAP>
<INPUT value=Y type=hidden name=AEA> <INPUT class=form_button value=submit type=submit> </FORM>
<P><SPAN class=body>Or</SPAN></P>
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">Please enter your access line id.</font></SPAN></P>
<FORM method=post action=adsl/adslchecker.AccessLineIDOutput><INPUT type=hidden
name=URL> <INPUT value=a%20service%20provider type=hidden name=SP_NAME>
<SPAN class=subheading>ACCESS LINE ID:</SPAN><BR><INPUT maxLength=13
size=14 name=ALID> <INPUT value=56 type=hidden name=VERSION> <INPUT
value=E type=hidden name=MS> <INPUT value=no type=hidden name=CAP> <INPUT
value=Y type=hidden name=AEA> <INPUT class=form_button value=submit type=submit> </FORM>
<P><SPAN class=body>Or</SPAN></P>
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">Please enter your UPRN.</font></SPAN></P>
<FORM method=post action=adsl/ADSLChecker.UPRNoutput><INPUT type=hidden
name=URL> <INPUT value=a%20service%20provider type=hidden name=SP_NAME>
<SPAN class=subheading>UPRN:</SPAN><BR><INPUT maxLength=13
size=14 name=UPRN> <INPUT value=56 type=hidden name=VERSION> <INPUT
value=E type=hidden name=MS> <INPUT value=no type=hidden name=CAP> <INPUT
value=Y type=hidden name=AEA> <INPUT class=form_button value=submit type=submit> </FORM>
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">If you do not have a telephone number or access line
id, please select the</font>
<TABLE>
<TR>
<FORM method=post action=adsl/adslchecker.address>
<INPUT value="" type=hidden name=url>
<INPUT value=a%20service%20provider type=hidden name=SP_NAME>
<INPUT value=56 type=hidden name=VERSION>
<INPUT value=E type=hidden name=MS>
<INPUT value=no type=hidden name=CAP>
<INPUT value=Y type=hidden name=AEA>
<TD>Address Checker</TD>
</FORM>
<FONT>
<TH><P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">or the</font></SPAN></P></TH>
</FONT>
<FORM method=post action=adsl/adslchecker.postcode>
<TD>Postcode Checker</TD>
<INPUT value="" type=hidden name=url>
<INPUT value=a%20service%20provider type=hidden name=SP_NAME>
<INPUT value=56 type=hidden name=VERSION>
<INPUT value=E type=hidden name=MS>
<INPUT value=no type=hidden name=CAP>
<INPUT value=Y type=hidden name=AEA>
</FORM>
<FONT>
<TH><P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">or the</font></SPAN></P></TH>
</FONT>
<FORM method=post action=adsl/adslchecker.bbeuidform>
<TD>BBEU Checker</TD>
<INPUT value="" type=hidden name=url>
<INPUT value=a%20service%20provider type=hidden name=SP_NAME>
<INPUT value=56 type=hidden name=VERSION>
<INPUT value=E type=hidden name=MS>
<INPUT value=no type=hidden name=CAP>
<INPUT value=Y type=hidden name=AEA>
</FORM>
</TR>
</TABLE>
<P><SPAN class=body><font size="2" font face="Calibri Light" color="#333333">By submitting a query into this checker you accept Terms of Use for this checker.</font>
<!--RESPONSE-END--></SPAN></P></SPAN></TD></TR></TBODY></TABLE></BODY></HTML>
'''
So 1) you're posting to the wrong URL.
From the return HTML, the "action" for the form you want is "adsl/ADSLChecker.UPRNoutput"
2) there are hidden fields in the form you're not submitting
<form method="post" action="adsl/ADSLChecker.UPRNoutput">
<input type="hidden" name="URL">
<input value="a%20service%20provider" type="hidden" name="SP_NAME">
<span class="subheading">UPRN:</span><br>
<input maxlength="13" size="14" name="UPRN">
<input value="56" type="hidden" name="VERSION">
<input value="E" type="hidden" name="MS">
<input value="no" type="hidden" name="CAP">
<input value="Y" type="hidden" name="AEA">
<input class="form_button" value="submit" type="submit">
</form>
Try:
payload = {
"UPRN": "10033360983",
"SP_NAME": "a%20service%20provider",
"VERSION": "56",
"MS": "E",
"CAP": "no",
"AEA": "Y"
}
url = 'https://www.dslchecker.bt.com/adsl/ADSLChecker.UPRNoutput'
r = requests.post(url, data = payload)
You are posting the the wrong url. I used pandas to pull the table, so you'll need to do a little clean up work, but try:
import requests
import pandas as pd
url = 'https://www.dslchecker.bt.com/adsl/ADSLChecker.UPRNoutput'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
UPRN = 10033346575
payload = {
'URL': '',
'SP_NAME': 'a%20service%20provider',
'UPRN': str(UPRN),
'VERSION': '56',
'MS': 'E',
'CAP': 'no',
'AEA': 'Y'}
response = requests.post(url, headers=headers, params=payload)
tables = pd.read_html(response.text)
df = tables[-1]
Output:
print(df.to_string())
Featured Products Downstream Line Rate(Mbps) Upstream Line Rate(Mbps) Downstream Handback Threshold(Mbps) WBC FTTC Availability Date WBC SOGEA Availability Date Unnamed: 8_level_0
Unnamed: 0_level_1 High Low High Low Unnamed: 5_level_1 Unnamed: 6_level_1 Unnamed: 7_level_1 Unnamed: 8_level_1
0 VDSL Range A (Clean) 3 1.2 1.2 0.8 0.8 Available Available NaN
1 VDSL Range B (Impacted) 2.8 1.2 1.2 0.5 0.8 Available Available NaN
2 Featured Products Downstream Line Rate(Mbps) Downstream Line Rate(Mbps) Upstream Line Rate(Mbps) Upstream Line Rate(Mbps) Downstream Range(Mbps) WBC FTTP Availability Date NaN NaN
3 FTTP on Demand 330 330 30 30 -- Available -- NaN
4 ADSL Products Downstream Line Rate(Mbps) Downstream Line Rate(Mbps) Upstream Line Rate(Mbps) Upstream Line Rate(Mbps) Downstream Range(Mbps) Availability Date NaN NaN
5 WBC ADSL 2+ Up to 1 Up to 1 -- -- 1 to 3.5 Available -- NaN
6 ADSL Max Up to 1 Up to 1 -- -- 0.75 to 2.5 Available -- NaN
7 WBC Fixed Rate 0.5 0.5 -- -- -- Available -- NaN
8 Fixed Rate 0.5 0.5 -- -- -- Available -- NaN
9 Observed Speeds VDSL VDSL NaN NaN NaN NaN NaN NaN
10 Other Offerings NaN NaN NaN NaN NaN Availability Date NaN NaN
11 VDSL Multicast -- -- -- -- -- Available -- NaN
12 ADSL Multicast -- -- -- -- -- Available -- NaN
I am getting this answer of a web service:
<html xmlns="http://www.w3.org/TR/REC-html40">
<head>
<title>Grampal </title>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<meta name="Content-Language" content="EN">
<meta name="author" content="jmguirao#ugr.es">
<link rel="icon" type="image/ico" href="/favicon.ico" />
<style type="text/css">
html,body,form,ul,li,h1,h3,p{margin:0; padding:0}
body { font-family: Arial, Helvetica, sans-serif; background-color:#fff }
a { text-decoration: none; }
a:hover { text-decoration: underline }
ul { list-style-type: none }
td {padding: 0.5pc 2pc 0pc 0pc }
.nav { float: right; padding: 0.5pc 0.5pc 0.5pc 0.5pc; margin-left:5px }
.nav li {display:inline; border-left: 1px solid #444; padding:0 0.4em;}
.nav li.first {border-left:0}
.hide { display:none }
input { text-indent: 2px }
input[type="submit"] { text-indent: 0 }
DIV.delPage { padding: 0.5ex 5em 0.5em 5em; background-color:#ffd6ba; }
.delMain { padding: 2ex 0.5em 0.5pc 0.5em; }
.post { margin-bottom: 0.25pc; font-size: 100%; padding-top: 0.5ex; }
.posts, #posts { padding: 0.5ex 0.5em 0.5pc 50px; }
.banner { padding: 0.5ex 0 0.5pc 0.5em; background-color: #ffc6aa;clear: both }
.banner h1 {
font-weight: bolder; font-size: 150%;
margin:0; padding:0 0 0 26px; display: inline;}
h2 {
font-weight: bolder; font-size: 140%; color: red;
margin:0; padding:0 0 0 26px; display: inline;}
.resaltado {font-weight: bolder;font-size: 100%}
</style>
</head>
<body>
<div class="banner">
<ul class="hide"><li>skip to content</li></ul>
<ul class="nav">Análsis de:
<li class="first">
<a title="Analizador morfosintáctico" href="/grampal/grampal.cgi?m=analiza&e=factura">palabras</a></li>
<li><a title="Desambiguador contextual" href="/grampal/grampal.cgi?m=etiqueta&e=factura">oraciones</a></li>
<li><a title="Etiquetado de textos" href="/grampal/grampal.cgi?m=xml">textos</a></li>
<li><a title="Formas de una palabra" href="/grampal/grampal.cgi?m=genera&e=factura">Generación de formas</a></li>
<!--
<li><a title="Transcripción fonética" href="/grampal/grampal.cgi?m=transcribe&e=factura">Transcripción</a></li>
-->
<li>Etiquetario</li>
<li>Autores</li>
</ul>
<h1>Grampal</h1>
</div>
<div class="delPage" style="font-size: 80%;">
<form method="GET" action="/grampal/grampal.cgi">
<input type="hidden" name="m" value="analiza">
<input type="hidden" name="csrf" value="651c4fcfae059a6e31c39a902f6d27c8">
<span class="resaltado">Palabra : </span><input name="e" size="60" value="factura">
<input type="submit" value="Analiza">
</form>
</div>
<br>
<h2>factura</h2>
<div class="delMain">
<div id="posts">
<table>
<tr>
<td style="font-style:italic;font-size:90%">categoría <span style="font-weight:bold"> N </span></td>
<td style="font-style:italic;font-size:90%">lema <span style="font-weight:bold"> FACTURA </span></td>
<td style="font-style:italic;font-size:90%">género <span style="font-weight:bold"> femenino </span></td>
<td style="font-style:italic;font-size:90%">número <span style="font-weight:bold"> singular </span></td>
</tr>
<tr>
<td style="font-style:italic;font-size:90%">categoría <span style="font-weight:bold"> V </span></td>
<td style="font-style:italic;font-size:90%">lema <span style="font-weight:bold"> FACTURAR </span></td>
<td style="font-style:italic;font-size:90%">número <span style="font-weight:bold"> singular </span></td>
<td style="font-style:italic;font-size:90%">persona <span style="font-weight:bold"> 3 </span></td>
<td style="font-style:italic;font-size:90%">tiempo <span style="font-weight:bold"> presente indicativo </span></td>
</tr>
<tr>
<td style="font-style:italic;font-size:90%">categoría <span style="font-weight:bold"> V </span></td>
<td style="font-style:italic;font-size:90%">lema <span style="font-weight:bold"> FACTURAR </span></td>
<td style="font-style:italic;font-size:90%">número <span style="font-weight:bold"> singular </span></td>
<td style="font-style:italic;font-size:90%">persona <span style="font-weight:bold"> 2 </span></td>
<td style="font-style:italic;font-size:90%">tiempo <span style="font-weight:bold"> imperativo </span></td>
</tr>
</table>
</div>
</div>
</body>
</html>
but I'm interested in only getting everything that is inside the <span style="font-weight:bold"> tag. Is there an optimal way to do it? in my knowledge, I could only achieve it using .split but I do not think it's a very elegant or very optimal way to achieve it. I would like to understand the best way or the most elegant way to achieve it.
This is my desired output:
[
N,
FACTURA,
femenino,
singular,
.
.
.]
You can use regular expressions here:
import re
result = re.findall(r'<span style="font-weight:bold">(.*?)<', html_document)
I was given some great code by 'tuomastik' on this site, and have tweaked it slightly to work for me. However, I have spent several hours trying to edit it so it prints to one PDF rather than multiple (one PDF, but each report starting on a fresh page), but my HTML isn't as good as I want to be, and I am getting stuck.
The code is:
HTML
<html>
<head>
<style type="text/css">
html, body {
width: 500px;
font-size: 12px;
background: #fff;
padding: 0px;
}
#my-custom-table {
width: 500px;
border: 0;
margin-top: 20px;
}
#my-custom-table td {
padding: 5px 0px 1px 5px;
text-align: left;
}
</style>
</head>
<body>
<table cellspacing="0" border="0" style="width:500px; border:0; font-size: 14px;">
<tr>
<td style="text-align:left;">
<b><span>Title of the PDF report - Row {{ row_ix + 1 }}</span></b>
</td>
<td style="text-align:right;">
<b><span>{{ date }}</span></b>
</td>
</tr>
</table>
<table cellspacing="0" border="0.1" id="my-custom-table">
{% for variable_name, variable_value in df.iteritems() %}
{% if loop.index0 == 0 %}
<tr style="border-top: 1px solid black;
border-bottom: 1px solid black;
font-weight: bold;">
<td>Variable name</td>
<td>Variable value</td>
</tr>
{% else %}
<tr>
<td>{{ variable_name }}</td>
<td>{{ variable_value }}</td>
</tr>
{% endif %}
{% endfor %}
</table>
</body>
</html>
Python
from datetime import date
import jinja2
import pandas as pd
from xhtml2pdf import pisa
df = pd.read_csv('data.csv', encoding='cp1252')
for row_ix, row in df.iterrows():
html = jinja2.Environment( # Pandas DataFrame to HTML
loader=jinja2.FileSystemLoader(searchpath='')).get_template(
'report_template.html').render(date=date.today().strftime('%d, %b %Y'),
row_ix=row_ix, df=row)
# Convert HTML to PDF
with open('report_row_%s.pdf' % (row_ix+1), "w+b") as out_pdf_file_handle:
pisa.CreatePDF(
src=html, # HTML to convert
dest=out_pdf_file_handle) # File handle to receive result
Could anyone help? I know I should remove the for loop from the python file, but not sure what to do with the html file.
I hate asking, but I have given this a good attempt but I can't get it to run when I mess with the loops in html.
Many thanks
report_template.html
<html>
<head>
<style type="text/css">
html, body {
width: 500px;
font-size: 12px;
background: #fff;
padding: 0px;
}
#my-custom-table {
width: 500px;
border: 0;
margin-top: 20px;
}
#my-custom-table td {
padding: 5px 0px 1px 5px;
text-align: left;
}
</style>
</head>
<body>
{% for row_ix, row in df.iterrows() %}
<table cellspacing="0" border="0" style="width:500px; border:0; font-size: 14px;">
<tr>
<td style="text-align:left;">
<b><span>Title of the PDF report - Row {{ row_ix + 1 }}</span></b>
</td>
<td style="text-align:right;">
<b><span>{{ date }}</span></b>
</td>
</tr>
</table>
<table cellspacing="0" border="0.1" id="my-custom-table">
{% for variable_name, variable_value in row.iteritems() %}
{% if loop.index0 == 0 %}
<tr style="border-top: 1px solid black;
border-bottom: 1px solid black;
font-weight: bold;">
<td>Variable name</td>
<td>Variable value</td>
</tr>
{% else %}
<tr>
<td>{{ variable_name }}</td>
<td>{{ variable_value }}</td>
</tr>
{% endif %}
{% endfor %}
</table>
<!-- Page break (the syntax is for xhtml2pdf) -->
<pdf:nextpage />
{% endfor %}
</body>
</html>
Python
from datetime import date
import jinja2
import pandas as pd
from xhtml2pdf import pisa
df = pd.DataFrame({
"Average Introducer Score": [9, 9.1, 9.2],
"Reviewer Scores": ["Academic: 6, 6, 6", "Something", "Content"],
"Average Academic Score": [5.7, 5.8, 5.9],
"Average User Score": [1.2, 1.3, 1.4],
"Applied for (RC)": [9.2, 9.3, 9.4],
"Applied for (FEC)": [5.5, 5.6, 5.7],
"Duration (Months)": [36, 37, 38]})
html = jinja2.Environment( # Pandas DataFrame to HTML
loader=jinja2.FileSystemLoader(searchpath='')).get_template(
'report_template.html').render(date=date.today().strftime('%d, %b %Y'),
df=df)
# Convert HTML to PDF
with open('report.pdf', "w+b") as out_pdf_file_handle:
pisa.CreatePDF(
src=html, # HTML to convert
dest=out_pdf_file_handle) # File handle to receive result
I am trying to parse HTML with Python using BeautifulSoup, but I can't manage to get what I need.
This is a little module of a personal app I want to do, and it consists in a web login part with credentials, and once the script is logged in the web, I need to parse some information in order to manage it and process it.
The HTML code after getting logged is:
<div class="widget_title clearfix">
<h2>Account Balance</h2>
</div>
<div class="widget_body">
<div class="widget_content">
<table class="simple">
<tr>
<td>Daily Earnings</td>
<td style="text-align: right; width: 125px; color: #119911; font-weight: bold;">
150
</td>
</tr>
<tr>
<td>Weekly Earnings</td>
<td style="text-align: right; border-bottom: 1px solid #000; color: #119911; font-weight: bold;">
500 </td>
</tr>
<tr>
<td>Monthly Earnings</td>
<td style="text-align: right; color: #119911; font-weight: bold;">
1500 </td>
</tr>
<tr>
<td>Total expended</td>
<td style="text-align: right; border-bottom: 1px solid #000; color: #880000; font-weight: bold;">
430 </td>
</tr>
<tr>
<td>Account Balance</td>
<td style="text-align: right; border-bottom: 3px double #000; color: #119911; font-weight: bold;">
840 </td>
</tr>
<tr>
<td></td>
<td style="padding: 5px;">
<center>
<form id="request_bill" method="POST" action="index.php?page=dashboard">
<input type="hidden" name="secret_token" value="" />
<input type="hidden" name="request_payout" value="1" />
<input type="submit" class="btn blue large" value="Request Payout" />
</form>
</center>
</td>
</tr>
</table>
</div>
</div>
</div>
As you can see, it's not a very well-formatted HTML, but I'd need to extract the elements and their values, I mean, for example: "Daily earnings" and "150" | "Weekly earnings" and "500"...
I think that the "id" attribute may help, but when I try to parse it, it crashes.
The Python code I'm working with is:
def parseo(archivohtml):
html = archivohtml
parsed_html = BeautifulSoup(html)
par = parsed_html.find('td', attrs={'id':'west1'}).string
print par
Where archivohtml is the saved html file after logging in the web
When I run the script, I only get errors.
I've also tried doing this:
def parseo(archivohtml):
soup = BeautifulSoup()
html = archivohtml
parsed_html = soup(html)
par = soup.parsed_html.find('td', attrs={'id':'west1'}).string
print par
But the result is still the same.
The tag with id="west1" is an <a> tag. You are looking for the <td> tag that comes after this <a> tag:
import BeautifulSoup as bs
content = '''<div class="widget_title clearfix">
<h2>Account Balance</h2>
</div>
<div class="widget_body">
<div class="widget_content">
<table class="simple">
<tr>
<td>Daily Earnings</td>
<td style="text-align: right; width: 125px; color: #119911; font-weight: bold;">
150
</td>
</tr>
<tr>
<td>Weekly Earnings</td>
<td style="text-align: right; border-bottom: 1px solid #000; color: #119911; font-weight: bold;">
500 </td>
</tr>
<tr>
<td>Monthly Earnings</td>
<td style="text-align: right; color: #119911; font-weight: bold;">
1500 </td>
</tr>
<tr>
<td>Total expended</td>
<td style="text-align: right; border-bottom: 1px solid #000; color: #880000; font-weight: bold;">
430 </td>
</tr>
<tr>
<td>Account Balance</td>
<td style="text-align: right; border-bottom: 3px double #000; color: #119911; font-weight: bold;">
840 </td>
</tr>
<tr>
<td></td>
<td style="padding: 5px;">
<center>
<form id="request_bill" method="POST" action="index.php?page=dashboard">
<input type="hidden" name="secret_token" value="" />
<input type="hidden" name="request_payout" value="1" />
<input type="submit" class="btn blue large" value="Request Payout" />
</form>
</center>
</td>
</tr>
</table>
</div>
</div>
</div>'''
def parseo(archivohtml):
html = archivohtml
parsed_html = bs.BeautifulSoup(html)
par = parsed_html.find('a', attrs={'id':'west1'}).findNext('td')
print par.string.strip()
parseo(content)
yields
150
I can't tell from your question if this will be applicable to you, but here's another method:
def parseo(archivohtml):
html = archivohtml
parsed_html = BeautifulSoup(html)
for line in parsed_html.stripped_strings:
print line.strip()
which yields:
Account Balance
Daily Earnings
150
Weekly Earnings
500
Monthly Earnings
1500
Total expended
430
Account Balance
840
And if you wanted the data in a list:
data = [line.strip() for line in parsed_html.stripped_strings]
[u'Account Balance', u'Daily Earnings', u'150', u'Weekly Earnings', u'500', u'Monthly Earnings', u'1500', u'Total expended', u'430', u'Account Balance', u'840']