Related
I have a table : output _df (image in the question) .
Ienter image description here repetition of the same value for "PCR POS/Neg" consecutive in my "output_df".
If i have 3 results identiques consecutifs , more than 3 times in the output_df so i need to give an error message "WARNING" in my index.html
How i can do it ?
views.py
from django.shortcuts import render
from django.core.files.storage import FileSystemStorage
import pandas as pd
import datetime
from datetime import datetime as td
import os
from collections import defaultdict
from django.contrib import messages
import re
import numpy as np
def home(request):
#upload file and save it in media folder
if request.method == 'POST':
uploaded_file = request.FILES['document']
uploaded_file2 = request.FILES['document2']
if uploaded_file.name.endswith('.xls') and uploaded_file2.name.endswith('.txt'):
savefile = FileSystemStorage()
#save files
name = savefile.save(uploaded_file.name, uploaded_file)
name2 = savefile.save(uploaded_file2.name, uploaded_file2)
d = os.getcwd()
file_directory = d+'/media/'+name
file_directory2 = d+'/media/'+name2
cwd = os.getcwd()
print("Current working directory:", cwd)
results,output_df,new =results1(file_directory,file_directory2)
return render(request,"results.html",{"results":results,"output_df":output_df,"new":new})
else:
messages.warning(request, ' File was not uploaded. Please use the correct type of file')
return render(request, "index.html")
#read file
def readfile(uploaded_file):
data = pd.read_excel(uploaded_file, index_col=None)
return data
def results1(file1,file2):
results_list = defaultdict(list)
names_loc = file2
listing_file = pd.read_excel(file1, index_col=None)
headers = ['Vector Name', 'Date and Time', 'Test ID', 'PCR POS/Neg']
output_df = pd.DataFrame(columns=headers)
with open(names_loc, "r") as fp:
for line in fp.readlines():
line = line.rstrip("\\\n")
full_name = line.split(',')
sample_name = full_name[0].split('_mean')
try:
if len(re.split(r'(^[^\d]+)', sample_name[0])[2]) > 1:
sample_id = int(re.split(r'(^[^\d]+)', sample_name[0])[2])
else:
sample_id = int(re.split(r'(^[^\d]+)', sample_name[0])[2])
except:
sample_id = sample_name[0]
try:
if listing_file['Test ID'].isin([sample_id]).any():
line_data = listing_file.loc[listing_file['Test ID'].isin([sample_id])]
# The name of the file as it is shown in the folder
vector_name = line
# The data and the time of the taken sample
d_t = full_name[1].split('us_')[1].split('_')
date_time = td(int(d_t[0]), int(d_t[1]), int(d_t[2]), int(d_t[3]), int(d_t[4]), int(d_t[5]))
# Calculating the time frame from the swap to test of samples
date_index = list(line_data['Collecting Date from the subject'].iteritems())
for x in date_index:
if type(x[1]) is str():
date_time_obj = td.strptime(x[1], '%Y.%m.%d. %H:%M')
elif type(x[1]) is pd.Timestamp:
date_time_obj = x[1]
elif type(x[1]) is datetime.datetime:
date_time_obj = x[1]
frame_time = str(date_time - date_time_obj)
if date_time - date_time_obj > datetime.timedelta(hours=48):
results_list["List of samples with time frame over 48 :"].append(sample_id)
# The Test ID as it writen in the listing file
test_id = sample_id
# The PCR answer as it was written in the listing file
pcr_index = list(line_data['PCR Pos/Neg'].iteritems())
if len(pcr_index) > 1:
results_list["List of Samples with more than one attribute in the listing file:"].append(sample_id)
for x in pcr_index:
pcr_ans = x[1].strip()
values_to_add = {'Vector Name': vector_name,
'Date and Time': date_time,
'Test ID': test_id,
'PCR POS/Neg': pcr_ans,
'Time Frame': frame_time
}
row_to_add = pd.Series(values_to_add)
output_df = output_df.append(row_to_add, ignore_index=True)
else:
results_list["List of Samples not in the listing file:"].append(sample_name[0])
except:
print('The template name isnt good: {}'.format(sample_id))
output_df['Date and Time'] = pd.to_datetime(output_df['Date and Time'])
new = output_df.groupby([output_df['Date and Time'].dt.date, 'PCR POS/Neg']).size().unstack(fill_value=0)
return dict(results_list), output_df.to_html(), new.to_html()
index.html
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/html">
<head>
{% load static %}
<link rel="stylesheet" type="text/css" href="{% static 'css/style.css' %}"/>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Bootstrap CSS -->
<link href="https://cdn.jsdelivr.net/npm/bootstrap#5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" rel="stylesheet"/>
<link href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" rel="stylesheet"/>
<link href='https://fonts.googleapis.com/css?family=Poppins' rel='stylesheet'>
</head>
<body id="ok" style=" width: 150; height: 100vh; background-size: cover;font-family: 'Poppins'; background-repeat:no-repeat; background-image: url('static/images/o.png'); ">
<br>
<br>
<nav class="navbar navbar-expand-lg navbar-white " style=" border-radius: 25px;box-shadow: inset 0 0 5px grey; margin:2em;background-color:#EDF1F6 ; 350px;">
<img src="static/images/mi2.png" style=" width: 350px; " >
<div class="container-fluid" style="text-align: center; margin: auto;">
<a class="navbar-brand" href="#"></a>
<button class="navbar-toggler" style="color:#0D4171;padding: 1px 1px;" type="button" data-bs-toggle="collapse" data-bs-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation" >
<span class="navbar-toggler-icon"></span>⇩</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav" style="text-align: center; margin: auto;">
<form method="POST" enctype="multipart/form-data">
{% csrf_token %}
<li class="nav-item" style="#DAE2EA" >
<label class="btn btn-outline" style=" color: #0D4171;border-radius: 25px; font-size: 21px; text-align: center;">
<i class="fa fa-cloud-upload" style="font-size: 1.5em;"></i> <br>Listing files (.xls) <input type="file" name="document" id="document" required="required">
</label>
<label class="btn btn-outline" style=" color: #0D4171; font-size: 21px;">
<i class="fa fa-cloud-upload" style="font-size: 1.5em;"></i> <br>File Names (.txt) <input type="file" name="document2" id="document2" required="required">
</label>
<br>
<div style="margin: auto;">
<br>
<button class="btn" style="background-color: #0D4171; border: none; ;color: white; padding: 10px 25px; text-decoration: none;
font-size: 16px;font:Poppins Medium; border-radius: 15px; margin-right:65px;" > Upload </button>
</div>
</li>
</form>
</ul>
{% block messages %}
{% if messages %}
{% for message in messages %}
{% endfor %}
{% endif %}
{% endblock %}
</div>
</div>
</nav>
{%block body%}{% endblock body%}
<script src="https://cdn.jsdelivr.net/npm/bootstrap#5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
</body>
</html>
<div class="">
<h1></h1>
<p></p>
<p></p>
</div>
{{variable}}
</body>
</html>
results.html
<!DOCTYPE html>
<html lang="en">
<head>
<link href='https://fonts.googleapis.com/css?family=Poppins' rel='stylesheet'>
<meta charset="UTF-8">
<title> Dashboard Result</title>
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
<script type="text/javascript">
$("#btnPrint").live("click", function () {
var divContents = $("#dvContainer").html();
var printWindow = window.open('', '', 'height=100,width=200');
printWindow.document.write('<html><head><title> ListingCheckPdf</title>');
printWindow.document.write('</head><body >');
printWindow.document.write(divContents);
printWindow.document.write('</body></html>');
printWindow.document.close();
printWindow.print();
});
</script>
</head>
<body id="ok" style="margin:0.5; padding:0.5em; width: auto;background-size: cover;font-family: 'Poppins'; height:auto; background-size: cover; background-repeat:no-repeat; background-image: url('static/images/o.png'); ">
<nav class="navbar navbar-expand-lg navbar-white " style=" border-radius: 25px; margin:2em;background-color:#EDF1F6 ; ">
<img src="static/images/mi3.png" style=" width: 250px; margin-left:1em;" >
<form id="form1">
<br>
<input type="button" style="background-color: #0D4171; border: none; color: white; padding: 8px 18px; text-align: center; text-decoration: none; display: inline-block; font-size: 15px; margin-left:30px; font-family: 'Poppins'; border-radius: 25px;" value="Download PDF" id="btnPrint" /><br><br><br>
<div class="container-fluid" id="dvContainer" style="width: 900px;height: 900px; border-radius:15px; background-color:white; margin:auto; box-shadow: inset 0 0 5px grey;border-radius: 10px; overflow: scroll; /* showing scrollbars */" >
<style>table, td, th { margin-left: auto; margin-right: auto; border: 1px solid black; width: 600px; text-align:center; align-items: center;} </style> <br>
<div>
{% autoescape off %}{{ new }}{% endautoescape %}
</div><br><br>
<div style="color: hidden; margin: 30px; font: Poppins; font-size: 17px">
{% for key, value in results.items %}<br>
{{ key }}<br>
{% for elem in value %}
<div style="margin-left:50px" >
- {{elem }} <br></div>
{% endfor %}
{% endfor %}</div><br><br><br><br><br>S
<div>{% autoescape off %}{{ output_df }}{% endautoescape %}</div>
</div>
</form>
</body>
</html>
Given the elements of the following lists, which express the columns of a table:
table1 = [('January', '$5'),('February', '$9000'), ('October', '$90'), ('NaN', '$300')]
table2 = [('July', '$890'),('December', 'NaN')]
And this html base template:
<!DOCTYPE html>
<html>
<head>
<style>
table, th, td {
border: 1px solid black;
}
</style>
</head>
<body>
<h1>Table element 1</h1>
<table>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
<tr>
<td>month</td>
<td>amount</td>
</tr>
</table>
<h2>This is the status X</h2>
<input type="checkbox" id="valn" name="valn" value="val_n"> Validate
</body>
</html>
What is the best way to fill with the template's table with the list's values? and extend the table structure for more pairs of lists (columns). For example, for the above lists of tubles, this should be the filled version of the html template:
<!DOCTYPE html>
<html>
<head>
<style>
table, th, td {
border: 1px solid black;
}
</style>
</head>
<body>
<h1>Table element 1</h1>
<table>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
<tr>
<td>January</td>
<td>$5</td>
</tr>
<tr>
<td>February</td>
<td>$9000</td>
</tr>
<tr>
<td>October</td>
<td>$90</td>
</tr>
<tr>
<td>NaN</td>
<td>$9000</td>
</tr>
</table>
<h2>This is the status A</h2>
<form action="/action_page.php">
<input type="checkbox" id="val1" name="val1" value="val_1"> Validate
</form>
<h1>Table element 2</h1>
<table>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
<tr>
<td>July</td>
<td>$100</td>
</tr>
<tr>
<td>December</td>
<td>NaN</td>
</tr>
</table>
<h2>This is the status B</h2>
<input type="checkbox" id="val2" name="val2" value="val_2"> Validate
</body>
</html>
Although I could handle this with some string manipulation I think that theres a cleaner way of doing this with jinja. However, I do not know how to control different elements from the template such as the tables and checkboxes. Any idea of how to get the above output, for two and n lists of tuples?
As you requested in comments (basically the same as #nenadp's answer since it was corrected):
from jinja2 import Template
table1 = [('January', '$5'), ('February', '$9000'), ('October', '$90'), ('NaN', '$300')]
table2 = [('July', '$890'), ('December', 'NaN')]
table = table1 + table2 # Concatenate the two lists
template = Template("""
<table>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
<tr>
{% for row in table %}
<tr>
<td>{{ row[0] }}</td>
<td>{{ row[1] }}</td>
</tr>
{% endfor %}
</table>
""")
print(template.render(table=table))
Change your template file:
{% for row in table1 %}
<tr>
<td>{{ row[0] }}</td>
<td>{{ row[1] }}</td>
</tr>
{% endfor %}
You can format it using this piece of code:
from jinja2 import Template
table1 = [('January', '$5'),('February', '$9000'), ('October', '$90'), ('NaN', '$300')]
t = open('template_file.html', 'r').read()
template = Template(t)
formatted_template = template.render(table1=table1)
print(formatted_template)
You can learn more from Jinja documentation
I´m getting the following error on python when I trying to do some scraping:
Traceback (most recent call last):
File "", line 26, in
signin2.fields["ctl06$txtParam_1"].value = '139210'
File "C:\Users\Alvaro
Pabon\Anaconda3\lib\site-packages\werkzeug\datastructures.py", line
781, in getitem
raise exceptions.BadRequestKeyError(key)
BadRequestKeyError: 400 Bad Request: The browser (or proxy) sent a
request that this server could not understand.
I provide the html and the python code, what am I doing wrong?
HTML:
<form method="post" action="Default.aspx?IdControl=SolicitarReporteUC&TipoProceso=G" id="Form1">
<div class="aspNetHidden">
<input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
<input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTE2MjczMjc4MQ9kFgICAw9kFgICBQ9kFgJmD2QWDgIBDxAPFgYeDkRhdGFWYWx1ZUZpZWxkBQpDb2RSZXBvcnRlHg1EYXRhVGV4dEZpZWxkBQdSZXBvcnRlHgtfIURhdGFCb3VuZGdkEBUBI0NlcnRpZmljYWRvIGRlIGhpc3RvcmlhIGxhYm9yYWwgRlBNFQEFMTAwOTUUKwMBZxYBZmQCAw9kFgJmD2QWAgIBD2QWAgIBDw9kFgIeB29uY2xpY2sFdmphdmFzY3JpcHQ6cmV0dXJuIEJ1c2NhckNvblBvc3RCYWNrKCdFbXBsZWFkb19WSVBQJywnQ29kRW1wbGVhZG8nLCdFbXBsZWFkbycsJycsJ2N0bDA2X3R4dFBhcmFtXzEnLCdjdGwwNl90eHREZXNjXzEnKTtkAgcPDxYCHgRUZXh0ZWRkAgkPEA8WAh4HVmlzaWJsZWdkEBUBA1BERhUBA1BERhQrAwFnZGQCCw8PFgIeB0VuYWJsZWRnZGQCDQ8PFgIfBGVkZAIRDzwrAAsBAA8WCB4IRGF0YUtleXMWAB4LXyFJdGVtQ291bnQCAR4JUGFnZUNvdW50AgEeFV8hRGF0YVNvdXJjZUl0ZW1Db3VudAIBZBYCZg9kFgICAg9kFgxmD2QWAgIDDw8WAh4LTmF2aWdhdGVVcmwFOkRlZmF1bHQuYXNweD9JZENvbnRyb2w9UGV0aWNpb25lc1ZlclVDJkNvZFBldGljaW9uPTk4NDI0NjZkZAIBDw8WAh8EBQc5ODQyNDY2ZGQCAg8PFgIfBAUKMDQvMDcvMjAxN2RkAgMPDxYCHwQFLENlcnRpZmljYWRvIGRlIGhpc3RvcmlhIGxhYm9yYWwgRlBNKDEzOTIxMCwpZGQCBA8PFgIfBAUBVGRkAgUPDxYCHwQFCVRlcm1pbmFkb2RkZG9xWba643oqthJTATkgc95Acvr6oJVDDdMGc4QiUOHQ" />
</div>
<script type="text/javascript">
//<![CDATA[
var theForm = document.forms['Form1'];
if (!theForm) {
theForm = document.Form1;
}
function __doPostBack(eventTarget, eventArgument) {
if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
theForm.__EVENTTARGET.value = eventTarget;
theForm.__EVENTARGUMENT.value = eventArgument;
theForm.submit();
}
}
//]]>
</script>
<script src="/peoploEL/WebResource.axd?d=Vo5dwRm0erdgUaaz932BKtVNZGJOgXKXcR91FZwwFfehyhj6Sl2EkKnl2mAONakSWUxeINyfjibWOjKY8z8OLswtutIQ6CR4NPqhOOhW3-c1&t=635195493660000000" type="text/javascript"></script>
<script type="text/javascript">
//<![CDATA[
var __cultureInfo = {"name":"es-CO","numberFormat":{"CurrencyDecimalDigits":2,"CurrencyDecimalSeparator":",","IsReadOnly":true,"CurrencyGroupSizes":[3],"NumberGroupSizes":[3],"PercentGroupSizes":[3],"CurrencyGroupSeparator":".","CurrencySymbol":"$","NaNSymbol":"NeuN","CurrencyNegativePattern":14,"NumberNegativePattern":1,"PercentPositivePattern":0,"PercentNegativePattern":0,"NegativeInfinitySymbol":"-Infinito","NegativeSign":"-","NumberDecimalDigits":2,"NumberDecimalSeparator":",","NumberGroupSeparator":".","CurrencyPositivePattern":2,"PositiveInfinitySymbol":"Infinito","PositiveSign":"+","PercentDecimalDigits":2,"PercentDecimalSeparator":",","PercentGroupSeparator":".","PercentSymbol":"%","PerMilleSymbol":"‰","NativeDigits":["0","1","2","3","4","5","6","7","8","9"],"DigitSubstitution":1},"dateTimeFormat":{"AMDesignator":"a.m.","Calendar":{"MinSupportedDateTime":"\/Date(-62135578800000)\/","MaxSupportedDateTime":"\/Date(253402300799999)\/","AlgorithmType":1,"CalendarType":1,"Eras":[1],"TwoDigitYearMax":2029,"IsReadOnly":true},"DateSeparator":"/","FirstDayOfWeek":0,"CalendarWeekRule":0,"FullDateTimePattern":"dddd, dd\u0027 de \u0027MMMM\u0027 de \u0027yyyy hh:mm:ss tt","LongDatePattern":"dddd, dd\u0027 de \u0027MMMM\u0027 de \u0027yyyy","LongTimePattern":"hh:mm:ss tt","MonthDayPattern":"dd MMMM","PMDesignator":"p.m.","RFC1123Pattern":"ddd, dd MMM yyyy HH\u0027:\u0027mm\u0027:\u0027ss \u0027GMT\u0027","ShortDatePattern":"dd/MM/yyyy","ShortTimePattern":"hh:mm tt","SortableDateTimePattern":"yyyy\u0027-\u0027MM\u0027-\u0027dd\u0027T\u0027HH\u0027:\u0027mm\u0027:\u0027ss","TimeSeparator":":","UniversalSortableDateTimePattern":"yyyy\u0027-\u0027MM\u0027-\u0027dd HH\u0027:\u0027mm\u0027:\u0027ss\u0027Z\u0027","YearMonthPattern":"MMMM\u0027 de \u0027yyyy","AbbreviatedDayNames":["dom","lun","mar","mié","jue","vie","sáb"],"ShortestDayNames":["do","lu","ma","mi","ju","vi","sá"],"DayNames":["domingo","lunes","martes","miércoles","jueves","viernes","sábado"],"AbbreviatedMonthNames":["ene","feb","mar","abr","may","jun","jul","ago","sep","oct","nov","dic",""],"MonthNames":["enero","febrero","marzo","abril","mayo","junio","julio","agosto","septiembre","octubre","noviembre","diciembre",""],"IsReadOnly":true,"NativeCalendarName":"calendario gregoriano","AbbreviatedMonthGenitiveNames":["ene","feb","mar","abr","may","jun","jul","ago","sep","oct","nov","dic",""],"MonthGenitiveNames":["enero","febrero","marzo","abril","mayo","junio","julio","agosto","septiembre","octubre","noviembre","diciembre",""]},"eras":[1,"d.C.",null,0]};//]]>
</script>
<script src="/peoploEL/ScriptResource.axd?d=oxaJQOalmF_Pc9FHyAFTk_k6TF1NEbUrjIYsB44pk6WCbYo_nSIw4yk5tC2xEtvEorNRA5gOfFsIU4ZnWzjKxobYxQm7qlMyDI-yMbMSd2l6ZDbJap8N8TY6mfiS7PCqS0ZD_N1nysIMDoEuJENdCQ2&t=23c9c237" type="text/javascript"></script>
<script type="text/javascript">
//<![CDATA[
if (typeof(Sys) === 'undefined') throw new Error('ASP.NET Ajax client-side framework failed to load.');
//]]>
</script>
<div class="aspNetHidden">
<input type="hidden" name="__SCROLLPOSITIONX" id="__SCROLLPOSITIONX" value="0" />
<input type="hidden" name="__SCROLLPOSITIONY" id="__SCROLLPOSITIONY" value="0" />
<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEdAArkW6hVSYy1X/RA+Sj0CGQLGp+bdMCDYaJlV2GIWm9IvBdcfX0kLMsTvDhzcFP+5BCmu+5iWjvwd5K06ry8EbPN8eAu30BFMFNpn4fF9w5RD0sfx0Rt1Zoo22r6RgHWIEvbk+/Q0viP1b4fioHhV6vuLByhWnJD/fsZOTyD54nbDa+qASD48033XmTIh5CNr4axLA/MabVFryGhaiI+QVUeJtZhbNAXh60wJUXNyENePpp0PUjhju74p8tImEJGpMk=" />
</div>
<TABLE id="Table1" border="0" cellSpacing="0" cellPadding="0" width="80%" align="center"
height="72%">
<TR>
<TD height="25" vAlign="top" width="165" align="center"></TD>
<TD height="25" width="10"></TD>
<TD height="25" vAlign="top"></TD>
</TR>
<TR>
<TD vAlign="top" width="165" align="center">
<LINK rel="stylesheet" type="text/css" href="EstilosWeb.css">
<LINK rel="stylesheet" type="text/css" href="EstilosWeb.css">
<TABLE style="WIDTH: 160px; HEIGHT: 64px" id="tMain" class="main" cellPadding="0" width="160">
<TR vAlign="top">
<TD id="NavTd">
<DIV id="Nav">
<H4 align="center">Menu
<table id="PanelIzquierdoUC1_htbCategorias" cellspacing="0" cellpadding="0" style="border-width:0px;width:160px;border-collapse:collapse;">
<tr>
<td><a id="PanelIzquierdoUC1_ConsultarLiquidacion" title="Consulta de Liquidación" href="Default.aspx?IdControl=ConsultaLiquidacionFltUC">Consultar Liquidación</a></td>
</tr><tr>
<td><a id="PanelIzquierdoUC1_Reportes" title="Certificado Ing. y Ret." href="Default.aspx?IdControl=ReportesUC">Certificado Ing. y Ret.</a></td>
</tr><tr>
<td><a id="PanelIzquierdoUC1_CambiarClave" title="Cambio de Clave" href="Default.aspx?IdControl=CambioClaveUC">Cambio de Clave</a></td>
</tr><tr>
<td><a id="PanelIzquierdoUC1_ReportesGeneral" title="Reportes" href="Default.aspx?IdControl=SolicitarReporteUC&TipoProceso=G">Reportes</a></td>
</tr><tr>
<td><a id="PanelIzquierdoUC1_CerrarSesion" title="Cerrar Sesion" href="Default.aspx?IdControl=CerrarSesionUC">Cerrar Sesion</a></td>
</tr>
</table></H4>
</DIV>
</TD>
</TR>
</TABLE>
</TD>
<td width="10"> </td>
<TD vAlign="top">
<div id="pnlCargaUserControl" style="width:100%;">
<LINK href="EstilosWeb.css" type="text/css" rel="stylesheet">
<style type="text/css">
.style1
{
height: 26px;
width: 36px;
}
</style>
<TABLE class="FormaTabla" id="Table1" cellSpacing="1" cellPadding="1" width="300" border="0">
<TR>
<TD class="FormaEncabezado" colSpan="2">Reportes</TD>
</TR>
<TR>
<TD colSpan="2">
<P align="center"> </P>
</TD>
</TR>
<TR>
<TD colSpan="2"><select size="4" name="ctl06$lstReportes" onchange="javascript:setTimeout('__doPostBack(\'ctl06$lstReportes\',\'\')', 0)" id="ctl06_lstReportes" class="FormaInfo" style="height:215px;width:564px;">
<option selected="selected" value="10095">Certificado de historia laboral FPM</option>
</select></TD>
</TR>
<TR>
<TD colSpan="2">Parametros</TD>
</TR>
<TR>
<TD style="HEIGHT: 45px" colSpan="2"><table id="ctl06_tbParametros" rules="all" border="1">
<tr>
<td>Empleado</td><td><input name="ctl06$txtParam_1" type="text" value="139211" readonly="readonly" onchange="javascript:setTimeout('__doPostBack(\'ctl06$txtParam_1\',\'\')', 0)" onkeypress="if (WebForm_TextBoxKeyHandler(event) == false) return false;" id="ctl06_txtParam_1" Tabla="Empleado_VIPP" CodigoCampo="CodEmpleado" DescripcionCampo="Empleado" Condicion="" TipoDato="N" Parametro="Empleado" /><input type="submit" name="ctl06$btnParam_1" value="..." id="ctl06_btnParam_1" disabled="disabled" class="aspNetDisabled" onclick="javascript:return BuscarConPostBack('Empleado_VIPP','CodEmpleado','Empleado','','ctl06_txtParam_1','ctl06_txtDesc_1');" style="width:25px;" /></td><td><input name="ctl06$txtDesc_1" type="text" value="JUAN DE LOS PALOTES" readonly="readonly" id="ctl06_txtDesc_1" style="width:250px;" /></td>
</tr>
</table></TD>
</TR>
<TR>
<TD class="style1">
</TD>
<td>
<P align="center"><select name="ctl06$ddlFormato" id="ctl06_ddlFormato" style="width:104px;">
<option value="PDF">PDF</option>
</select> <input type="submit" name="ctl06$btnAceptar" value="Aceptar" id="ctl06_btnAceptar" />
</P>
</td>
</TR>
<TR>
<TD colSpan="2">
<P align="left"><span id="ctl06_lblMensaje" style="color:Red;font-family:Arial;"></span></P>
</TD>
</TR>
</TABLE>
<P>
<input type="submit" name="ctl06$ButActualizar" value="Actualizar" id="ctl06_ButActualizar" /></P>
<P><table class="FormaGrid" cellspacing="0" rules="all" border="1" id="ctl06_dtgDatos" style="border-collapse:collapse;">
<tr>
<td> </td><td>CodPeticion</td><td>FechaHora</td><td>Peticion</td><td>Estado</td><td>DetalleEstado</td>
</tr><tr>
<td style="white-space:nowrap;">
<a id="ctl06_dtgDatos_ctl03_cmdVer" href="javascript:__doPostBack('ctl06$dtgDatos$ctl03$cmdVer','')">Ver</a>
</td><td>9842466</td><td>04/07/2017</td><td>Certificado(139211,)</td><td>T</td><td>Terminado</td>
</tr><tr>
<td colspan="6"><span>1</span></td>
</tr>
</table></P>
</div>
</TD>
</TR>
</TABLE>
PYTHON:
form2 = browser.get_form(id='Form1')
form2["ctl06$txtParam_1"].value = '139211'
form2["ctl06$txtDesc_1"].value = 'JUAN DE LOS POTES'
form2["ctl06$ddlFormato"].value = 'PDF'
form2["ctl06$lstReportes"].value = '10095'
form2["__EVENTTARGET"].value = 'ctl06$dtgDatos$ctl03$cmdVer'
form2["__EVENTARGUMENT"].value = ''
browser.submit_form(signin2)
Use python request lib for that
Create Json and pass it through the headers and remember <__EVENTTARGET>
<__EVENTARGUMENT> This previous <> mention parameter always changing after few minute (based on website).
It Will easy if you use POST method and for before sending request check it in POSTMAN once.
header = {
"ctl00$ContentPlaceHolder1$txt_tradename": str(index),
"ctl00$ContentPlaceHolder1$txtSearchTin": "",
"ctl00$ContentPlaceHolder1$ddl_dist": 2,
"ctl00$ContentPlaceHolder1$btnDlrSearch": "Search",
"__EVENTVALIDATION": token.get("__EVENTVALIDATION", "")
, "__VIEWSTATEGENERATOR": token.get("__VIEWSTATEGENERATOR"),
"__VIEWSTATE": token.get("__VIEWSTATE")
}
try:
req = requests.post(url, header)
I was given some great code by 'tuomastik' on this site, and have tweaked it slightly to work for me. However, I have spent several hours trying to edit it so it prints to one PDF rather than multiple (one PDF, but each report starting on a fresh page), but my HTML isn't as good as I want to be, and I am getting stuck.
The code is:
HTML
<html>
<head>
<style type="text/css">
html, body {
width: 500px;
font-size: 12px;
background: #fff;
padding: 0px;
}
#my-custom-table {
width: 500px;
border: 0;
margin-top: 20px;
}
#my-custom-table td {
padding: 5px 0px 1px 5px;
text-align: left;
}
</style>
</head>
<body>
<table cellspacing="0" border="0" style="width:500px; border:0; font-size: 14px;">
<tr>
<td style="text-align:left;">
<b><span>Title of the PDF report - Row {{ row_ix + 1 }}</span></b>
</td>
<td style="text-align:right;">
<b><span>{{ date }}</span></b>
</td>
</tr>
</table>
<table cellspacing="0" border="0.1" id="my-custom-table">
{% for variable_name, variable_value in df.iteritems() %}
{% if loop.index0 == 0 %}
<tr style="border-top: 1px solid black;
border-bottom: 1px solid black;
font-weight: bold;">
<td>Variable name</td>
<td>Variable value</td>
</tr>
{% else %}
<tr>
<td>{{ variable_name }}</td>
<td>{{ variable_value }}</td>
</tr>
{% endif %}
{% endfor %}
</table>
</body>
</html>
Python
from datetime import date
import jinja2
import pandas as pd
from xhtml2pdf import pisa
df = pd.read_csv('data.csv', encoding='cp1252')
for row_ix, row in df.iterrows():
html = jinja2.Environment( # Pandas DataFrame to HTML
loader=jinja2.FileSystemLoader(searchpath='')).get_template(
'report_template.html').render(date=date.today().strftime('%d, %b %Y'),
row_ix=row_ix, df=row)
# Convert HTML to PDF
with open('report_row_%s.pdf' % (row_ix+1), "w+b") as out_pdf_file_handle:
pisa.CreatePDF(
src=html, # HTML to convert
dest=out_pdf_file_handle) # File handle to receive result
Could anyone help? I know I should remove the for loop from the python file, but not sure what to do with the html file.
I hate asking, but I have given this a good attempt but I can't get it to run when I mess with the loops in html.
Many thanks
report_template.html
<html>
<head>
<style type="text/css">
html, body {
width: 500px;
font-size: 12px;
background: #fff;
padding: 0px;
}
#my-custom-table {
width: 500px;
border: 0;
margin-top: 20px;
}
#my-custom-table td {
padding: 5px 0px 1px 5px;
text-align: left;
}
</style>
</head>
<body>
{% for row_ix, row in df.iterrows() %}
<table cellspacing="0" border="0" style="width:500px; border:0; font-size: 14px;">
<tr>
<td style="text-align:left;">
<b><span>Title of the PDF report - Row {{ row_ix + 1 }}</span></b>
</td>
<td style="text-align:right;">
<b><span>{{ date }}</span></b>
</td>
</tr>
</table>
<table cellspacing="0" border="0.1" id="my-custom-table">
{% for variable_name, variable_value in row.iteritems() %}
{% if loop.index0 == 0 %}
<tr style="border-top: 1px solid black;
border-bottom: 1px solid black;
font-weight: bold;">
<td>Variable name</td>
<td>Variable value</td>
</tr>
{% else %}
<tr>
<td>{{ variable_name }}</td>
<td>{{ variable_value }}</td>
</tr>
{% endif %}
{% endfor %}
</table>
<!-- Page break (the syntax is for xhtml2pdf) -->
<pdf:nextpage />
{% endfor %}
</body>
</html>
Python
from datetime import date
import jinja2
import pandas as pd
from xhtml2pdf import pisa
df = pd.DataFrame({
"Average Introducer Score": [9, 9.1, 9.2],
"Reviewer Scores": ["Academic: 6, 6, 6", "Something", "Content"],
"Average Academic Score": [5.7, 5.8, 5.9],
"Average User Score": [1.2, 1.3, 1.4],
"Applied for (RC)": [9.2, 9.3, 9.4],
"Applied for (FEC)": [5.5, 5.6, 5.7],
"Duration (Months)": [36, 37, 38]})
html = jinja2.Environment( # Pandas DataFrame to HTML
loader=jinja2.FileSystemLoader(searchpath='')).get_template(
'report_template.html').render(date=date.today().strftime('%d, %b %Y'),
df=df)
# Convert HTML to PDF
with open('report.pdf', "w+b") as out_pdf_file_handle:
pisa.CreatePDF(
src=html, # HTML to convert
dest=out_pdf_file_handle) # File handle to receive result
I want to extract table from an html file. I have written the following code-snippet to extract the first table:
import urllib2
import os
import time
import traceback
from bs4 import BeautifulSoup
#find('table',{'class':'tbl_with_brdr'})
outfile= open('D:/Dropbox/Python/apelec.txt','wb')
rfile = open('D:/Dropbox/PRI/Data/AP/195778.html')
rsoup = BeautifulSoup(rfile)
nodes = rsoup.find('div',{'class':'frmtext'}).find('table').find('tr')
for node in nodes[1:]:
x = node.find('th').find('b').get_text().encode("utf-8")
print x
y = node.find('th').findNext('th').find('b').get_text().encode("utf-8")
print y
outfile.write(str(x)+"\t"+str(y)+"\n")
outfile.close()
Here is the error:
9 rfile = open('D:/Dropbox/PRI/Data/AP/195778.html')
10 rsoup = BeautifulSoup(rfile)
---> 11 nodes = rsoup.find('div',{'class':'frmtext'}).find('table').find('tr')
12 for node in nodes[1:]:
13 x = node.find('th').find('b').get_text().encode("utf-8")
AttributeError: 'NoneType' object has no attribute 'find'
And the html file is:
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<link rel="icon" type="image/ico" href="images/favicon.ico"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="themes/panchayat_default.css" type="text/css"/>
<title>consolidated Election Report</title>
</head>
<body>
<!-- To blur the background while processing dwr -->
<div class="faded_div process"></div>
<div class="popup_block_div process" style="display: none;">
<img alt="" src="images/loading_animation.gif" style="margin-left: auto; margin-right: auto;">
</div>
<div id="maincontainer" class="resize">
<div id="headerwrap">
<!-- Header -->
<html>
<head>
<script type='text/javascript' src="/profilerdwr/engine.js"> </script>
<script type='text/javascript' src="/profilerdwr/util.js"> </script>
<script type="text/javascript" src="/profilerdwr/interface/lgdDao.js"></script>
<script type="text/javascript" src="js/common_util_js.js"></script>
<link rel="stylesheet" href="css/common_css.css" type="text/css"></link>
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8' />
</head>
<body >
<div class="clear"></div>
<div id="headerwrap">
<div id="header">
<div id="new_header">
<div id="logoleft">Area Profiler</div>
<div id="logoright"></div>
<div class="clear"></div>
</div>
<div class="clear"></div>
<div id="loginnav" align="right">
<table width="100%" class="tbl_no_brdr">
<tr>
<td class="tblclear" align="left">
<div id="mainnav">Home </div>
</td>
</tr>
</table>
</div>
</div>
<div class="clear"></div>
<div id="topnav">
<table width="100%" class="tbl_no_brdr">
<tr>
<td width="85" class="tblclear">Choose Theme :</td>
<td width="200" class="tblclear">
<form id="themeForm" name="themeForm" method="get" action="welcome.do">
<input type="hidden" name='OWASP_CSRFTOKEN' value='CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU' />
<select name="theme" id="themeId" class="combofield" onchange="submitThemeForm()" style="width: 120px;">
<option value="default">Default Theme</option>
<option value="mustard">Mustard Theme</option>
<option value="peach">Peach Theme</option>
<option value="green">Green Theme</option>
<option value="blue">Blue Theme</option>
</select>
</form>
</td>
<td style="padding: 0px">
</td>
<td class="tblclear"> </td>
<td width="14" class="tblclear txticon"><img src="images/btnMinus.jpg" width="16" height="14" border="0" /></div></td>
<td width="14" class="tblclear txticon"><img src="images/btnDefault.jpg" width="16" height="14" border="0" /> </td>
<td width="28" class="tblclear txticon"><img src="images/btnPlus.jpg" width="16" height="14" border="0" /></td>
<script type="text/javascript" >
//documenttextsizer.setup("shared_css_class_of_toggler_controls")
documenttextsizer.setup("texttoggler")
</script>
<td width="100" align="right" class="tblclear">Select Language :</td>
<td width="108" align="right" class="tblclear">
<form id="languageForm" name="languageForm" method="get" action="welcome.do">
<input type="hidden" name='OWASP_CSRFTOKEN' value='CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU' />
<select id="languageId" name="language" class="combofield" style="width: 120px;" onchange="submitLanguageForm()" >
<option value=""> Select Language </option>
</select>
</form>
</td>
</tr>
</table>
</div>
<div id="breadcrumbnav"> </div>
</div>
<script type="text/javascript">
function submitThemeForm()
{
var isOK = confirm("This will Refresh Your Page. Any Unsaved data will be Lost. Do You still want to Continue?");
if(isOK)
{
document.getElementById('themeForm').submit();
}
else
{
return;
}
}
function submitLanguageForm()
{
var isOK = confirm("This will Refresh Your Page. Any Unsaved data will be Lost. Do You still want to Continue?");
if(isOK)
{
document.getElementById('languageForm').submit();
}
else
{
return;
}
}
</script>
</body>
</html>
</div>
<div class="clear"></div>
<div id="content">
<div id="leftpnl">
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="100%" valign="top" class="tblclear">
<!-- content -->.
<script type="text/javascript" src="js/common_js.js"></script>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<script type="text/javascript">
var pathname;
$(document).ready(function() {pathname = window.location.pathname;});
function onBack(s) {
var position =pathname.indexOf("/", 2);
var newPath = "";
var val = s.indexOf("?", 1);
if(val>0)
{
newPath = s+"&redirect=true";
}
else
{
newPath = s+"?redirect=true";
}
window.location.replace(".."+pathname.substring(0,position)+"/"+newPath);
}
function downloadReport(repformat){
//window.location="downloadConsolidatedElectionReportPDF.do?OWASP_CSRFTOKEN=CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU";
//document.forms["electionReportForm"].action="downloadConsolidatedElectionReportPDF.do?repformat="+repformat+"&OWASP_CSRFTOKEN=CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU";
document.forms["electionReportForm"].action="downloadConsolidatedElectionReportPDF.do?reportformat="+repformat+"&OWASP_CSRFTOKEN=CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU";
document.forms["electionReportForm"].method="POST";
document.getElementById('electionReportForm').target="_blank";
document.forms["electionReportForm"].submit();
}
</script>
<style type="text/css">
.data_link{
color:blue;
display: block;
text-decoration: none;
font-size: 1em;
font-weight: bolder;
}
.disable_link
{
cursor:default;
color:blue;
display: block;
text-decoration: none;
font-size: 1em;
font-weight: bolder;
}
.data_link:VISITED
{
color:blue;
display: block;
text-decoration: none;
font-size: 1em;
font-weight: bolder;
}
.data_link:HOVER{
text-decoration: underline;
}
</style>
</head>
<body>
<div id="frmcontent">
<div class="frmhd">
<table width="100%" class="tbl_no_brdr">
<tr>
<td align="left" width="90%">
Consolidated Election</td>
</tr>
</table>
</div>
<div class="clear"></div>
<div class="frmpnlbrdr">
<div class="frmpnlbg">
<div class="frmtxt">
<table width="100%" style="margin-bottom: 10px;" class="tbl_with_brdr">
<tr class="tblRowTitle tblclear" >
<th align="left" ><b>State Name</b></th>
<th align="left" ><b>Local Body Type</b></th>
<th align="left" ><b>Election Term</b></th>
<th align="left" ><b>Local Body Name</b></th>
</tr>
<tr class="tblRowB" style="color: blue;">
<th align="left" >ANDHRA PRADESH</th>
<th align="left" >Village Panchayat</th>
<th align="left" >
02-Aug-2013 To
01-Aug-2018
</th>
<th align="left" >KODIHALLI</th>
</tr>
</table>
<div class="frmhdtitle">Consolidated Election</div>
<table width="100%" class="tbl_with_brdr">
<thead>
<tr class="tblRowTitle tblclear">
<th align="center" width="5%" ><b>S.No.</b></th>
<th align="left" width="9%"><b>Name</b></th> 0
<th align="left" width="9%"><b>Age</b></th> 1
<th align="left" width="9%"><b>Caste Category</b></th> 2
<th align="left" width="9%"><b>Gender</b></th> 3
<th align="left" width="9%"><b>Qualification</b></th> 4
<th align="left" width="9%"><b>Occupation</b></th> 5
<th align="left" width="9%"><b>Email Address</b></th> 6
<th align="left" width="9%"><b>Ward Name</b></th> 7
<th align="left" width="9%"><b>Reservation</b></th> 8
</tr>
</thead>
<tbody>
<tr class="tblRowB">
<td align="center" >1</td>
<td>Kambanna</td>
<td>36</td>
<td>OBC</td>
<td>Male</td>
<td>Middle or Lower Secondary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>N/A</td>
<td >
Yes (OBC / Others)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >2</td>
<td>Ramesh</td>
<td>39</td>
<td>OBC</td>
<td>Male</td>
<td>Middle or Lower Secondary</td>
<td>Workers not reporting any occupations</td>
<td>
N/A
</td>
<td>Ward no 1</td>
<td >
Yes (OBC / Others)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >3</td>
<td>S.Manjunath</td>
<td>29</td>
<td>OBC</td>
<td>Male</td>
<td>Higher Secondary or Intermediate or Pre University or Senior Secondary</td>
<td>Workers not reporting any occupations</td>
<td>
N/A
</td>
<td>Ward no 2</td>
<td >
No (General / Others)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >4</td>
<td>Obuleshu</td>
<td>48</td>
<td>OBC</td>
<td>Male</td>
<td>Below Primary</td>
<td>Workers not reporting any occupations</td>
<td>
N/A
</td>
<td>Ward no 3</td>
<td >
No (General / Others)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >5</td>
<td>Mamatha</td>
<td>24</td>
<td>OBC</td>
<td>Female</td>
<td>Matriculation or Junior School Certificate or Secondary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 4</td>
<td >
Yes (General / Female)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >6</td>
<td>Shivamma</td>
<td>38</td>
<td>OBC</td>
<td>Female</td>
<td>Below Primary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 5</td>
<td >
Yes (General / Female)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >7</td>
<td>Hanumantappa</td>
<td>46</td>
<td>SC</td>
<td>Male</td>
<td>Illiterate</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 6</td>
<td >
No (General / Others)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >8</td>
<td>Malingappa</td>
<td>45</td>
<td>SC</td>
<td>Male</td>
<td>Illiterate</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 7</td>
<td >
No (General / Others)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >9</td>
<td>Kamalamma</td>
<td>52</td>
<td>OBC</td>
<td>Female</td>
<td>Illiterate</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 8</td>
<td >
Yes (OBC / Female)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >10</td>
<td>Muddamma</td>
<td>48</td>
<td>OBC</td>
<td>Female</td>
<td>Illiterate</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 9</td>
<td >
Yes (General / Female)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >11</td>
<td>Patta Tayamma</td>
<td>45</td>
<td>SC</td>
<td>Female</td>
<td>Middle or Lower Secondary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 10</td>
<td >
Yes (SC / Female)
</td>
</tr>
<tr class="tblRowA">
<td align="center" >12</td>
<td>Sujatha</td>
<td>35</td>
<td>OBC</td>
<td>Female</td>
<td>Middle or Lower Secondary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 11</td>
<td >
Yes (OBC / Female)
</td>
</tr>
<tr class="tblRowB">
<td align="center" >13</td>
<td>Kadurappa</td>
<td>35</td>
<td>SC</td>
<td>Male</td>
<td>Middle or Lower Secondary</td>
<td>N/A</td>
<td>
N/A
</td>
<td>Ward no 12</td>
<td >
Yes (SC / Others)
</td>
</tr>
</tbody>
</table>
<br />
<table width="100%" class="tbl_no_brdr">
<tr>
<td align="center">
<input type="button" class="btn" onclick="onClose('welcome.do?OWASP_CSRFTOKEN=CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU')" value=Close />
<input type="button" class="btn" onclick="this.disabled=true; this.value='Please Wait .!';onBack('consolidatedElectionReport.do?OWASP_CSRFTOKEN=CN72-BGJW-G7FM-K1S3-P5FF-V1EN-IO4T-GHWU&electionTermId=35107&stateId=28')" value=Back />
</td>
</tr>
</table>
<form id="electionReportForm" name="electionReportForm" action="#" method="post">
<div align="center"><br/>
<input type="button" class="btn" onclick="downloadReport('pdf');" value="Export to PDF" size="5" />
<input type="button" class="btn" onclick="downloadReport('xls');" value="Export to Excel" size="5" />
</div>
</form>
</div>
<div class="myclass"
style="font-family: Times; text-align: center; font-size: 10.0pt; color: white; font-weight: bold; border: 1px solid gray">
Report generated through Area Profiler (http://areaprofiler.gov.in)Thu Oct 02 22:34:20 IST 2014
</div>
</div>
</div>
</div>
</body>
</html>
</td>
</tr>
</table>
</div>
</div>
<div class="clear"></div>
<div id="footer">
<!-- Footer -->
<html>
<head>
</head>
<body>
<table width="100%" class="tbl_no_brdr">
<tr>
<td colspan="3" class="fotbrdr"></td>
</tr>
<tr>
<td width="161" class="btmlogospace"><a href="http://www.negp.gov.in/" target= "_blank" ><img src="images/e_governance_logo.jpg" width="161" height="38" /></a></td>
<td width="93" class="btmlogospace"><a href="http://www.panchayat.gov.in/" target= "_blank" ><img src="images/panchayatilogo.jpg" width="93" height="38" /></a></td>
<td align="right" class="btmlogospace">Site is designed, hosted
and maintained by National Informatics Centre<br /> Contents on
this website is owned,updated and managed by the Ministry of
Panchayati Raj</td>
</tr>
</table>
</body>
</html>
</div>
</div>
</body>
</html>
I paste here an approach, it is not exactly the solution but you can use it as a guide.
You have to traverse the DOM tree and extract the values you want.
I changed the class of the div you look for from frmtext to frmtxt and in the traversal you have to check if anything is found or not.
import urllib2
import os
import time
import traceback
from bs4 import BeautifulSoup
outfile= open('out.txt','wb')
rfile = open('195778.html')
rsoup = BeautifulSoup(rfile)
nodes1 = rsoup.find('div',{'class':'frmtxt'})
nodes = nodes1.find('table').find_all('tr')
for node in nodes:
a = node.find('th')
x = None
if a != None:
x1 = x.find('b')
if x1 != None:
x2 = x1.get_text().encode("utf-8")
print x2
x = x2
y = node.find('th')
if y != None:
print 'y',y
y2 = y.findNext('th')
if y2 != None:
print 'y2',y2
y3 = y2.find('b')
if y3 != None:
y = y3.get_text().encode("utf-8")
print y
outfile.write(str(x)+"\t"+str(y)+"\n")
outfile.close()