Scraping data from website using Beautiful Soup and Pandas - python

I have a python script that use the BeautifulSoup and Pandas packages in order to scrape data from a list of urls and convert the data into a dataframe then save it as excel file and send it by email as attachment.
The problem is that when the script run and finish the scraping of the first item it crash and return the error below:
ValueError: 15 columns passed, passed data had 14 columns
I think this means that there is a missing html tag right??
The list includes 3 urls.
code:
import time
from datetime import date
import smtplib
import requests
import pandas as pd
from bs4 import BeautifulSoup
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
from email.utils import formatdate
def scrape_website():
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
for url in url_list:
soup = BeautifulSoup(requests.get(url).content, "lxml")
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com" + a['href'])
joineddd = []
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
alldd = [dd.text for dd in s.select(
"div[class='card-content is-spaced'] dd")]
alldd.insert(0, link)
joineddd.append(alldd)
print("Web Crawling is Done for {}".format(url))
convert_to_dataFrame(joineddd)
send_email()
def remove_unwanted_cols(dataset, cols):
for col in cols:
del dataset[col]
return dataset
def convert_to_dataFrame(joineddd):
df = pd.DataFrame(joineddd, columns=[
"link", "location", "Company_Industry", "Company_Type",
"Job_Role", "Employment_Type", "Monthly_Salary_Range",
"Number_of_Vacancies", "Career_Level",
"Years_of_Experience", "Residence_Location",
"Gender","Nationality","Degree","Age"])
df = remove_unwanted_cols(df, ["Company_Industry","Company_Type","Job_Role","Number_of_Vacancies"])
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
send_email()
def send_email():
'''send email '''
today = date.today()
file = 'F:\\AIenv\web_scrapping\\jobDesc.xlsx'
username='XXXXXXXXXXX'
password='XXXXXXXXXXXXX'
send_from = 'XXXXXXXXXXXXX'
send_to = 'XXXXXXXXXXXXXX'
Cc = 'recipient'
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = send_to
msg['Cc'] = Cc
msg['Date'] = formatdate(localtime=True)
msg['Subject'] = 'Hello, This is a test mail {}'.format(today)
server = smtplib.SMTP('smtp.gmail.com')
port = '587'
fp = open(file, 'rb')
part = MIMEBase('application', 'vnd.ms-excel')
part.set_payload(fp.read())
fp.close()
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment', filename='jobs Description--{}'.format(today))
msg.attach(part)
smtp = smtplib.SMTP('smtp.gmail.com')
smtp.ehlo()
smtp.starttls()
smtp.login(username, password)
smtp.sendmail(send_from, send_to.split(',') + msg['Cc'].split(','), msg.as_string())
smtp.quit()
print('Mail Sent')
if __name__ == "__main__":
scrape_website()

update func scrape_website(), save alldd as dictionary.
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
### update Start ###
alldd = dict()
alldd['link'] = link
dd_div = [i for i in s.select("div[class='card-content is-spaced'] div")
if ('<dd>' in str(i) ) and ( "<dt>" in str(i))]
for div in dd_div:
k = div.select_one('dt').get_text(';', True)
v = div.select_one('dd').get_text(';', True)
alldd[k] = v
### update End ###
joineddd.append(alldd)
# result
df = pd.DataFrame(joineddd)
alladd sample:
{
'link': 'https://www.bayt.com/en/qatar/jobs/executive-chef-4298309/',
'Job Location': 'Doha, Qatar',
'Company Industry': 'Real Estate; Hospitality & Accomodation; Catering, Food Service, & Restaurant',
'Company Type': 'Employer (Private Sector)',
'Job Role': 'Hospitality and Tourism',
'Employment Type': 'Unspecified',
'Monthly Salary Range': 'Unspecified',
'Number of Vacancies': 'Unspecified',
'Career Level': 'Mid Career',
'Years of Experience': 'Min: 7',
'Residence Location': 'Qatar',
'Degree': "Bachelor's degree / higher diploma"
}

ValueError: 15 columns passed, passed data had 14 columns
What I read here means that you designated the dataframe to have 15 columns, but the data that you feed it only has 14 features. You need to check your original file to make sure it actually has the data you expect, or adjust your expected columns and their names to match the file.

Let's clean up some of this code.
You don't need to write a function to remove columns, there's already a method to do that with .drop(). So delete the function remove_unwanted_cols(dataset, cols) and simply change the line:
df = remove_unwanted_cols(df, ["Company_Industry","Company_Type","Job_Role","Number_of_Vacancies"])
to
df = df.drop(["Company_Industry","Company_Type","Job_Role","Number_of_Vacancies"], axis=1)
Do you mean to have it send email twice? You have it do it in the scrape_website() finction and the convert_to_dataFrame() function.
If pulling data to construct a dataframe, I usually try to avoid lists, for the exact reason you get the error of some sites have x columns, but the next scrape has and extra one (or not a match in len). Dictionaries are a better way to handle that with the key being the column name, and value the data. So you'll have a list of dictionaries. Each item in the list is a row, and each dictionary corresponds to the value for a column. Then you can get rid of the convert_to_dataFrame() function as pandas can do that for you, but we'll leave that in there and you can keep it, or remove it, if you like.
If you are using r'' for your strings, you don't need to character escape the \. Either do: r"F:\AIenv\web_scrapping\jobDesc.xlsx", or "F:\\AIenv\web_scrapping\\jobDesc.xlsx"
Code:
import time
from datetime import date
import smtplib
import requests
import pandas as pd
from bs4 import BeautifulSoup
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
from email.utils import formatdate
def scrape_website(url):
soup = BeautifulSoup(requests.get(url).content, "lxml")
subject = url.split('/')
subject = [x for x in subject if x != ''][-1]
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com" + a['href'])
joineddd = []
for link in links:
row = {}
s = BeautifulSoup(requests.get(link).content, "lxml")
job_description = s.find('h2', text='Job Description').find_next('dl')
data_titles = job_description.find_all('dt')
for data_title in data_titles:
dt = '_'.join(data_title.text.split())
dd = data_title.find_next('dd').text.strip()
row.update({dt: dd})
if s.find('h2', text='Preferred Candidate'):
preferred_candidate = s.find('h2', text='Preferred Candidate').find_next('dl')
data_titles = preferred_candidate.find_all('dt')
for data_title in data_titles:
dt = '_'.join(data_title.text.split())
dd = data_title.find_next('dd').text.strip()
row.update({dt: dd})
joineddd.append(row)
print("Web Crawling is Done for {}".format(url))
convert_to_dataFrame(joineddd, subject)
#send_email(subject) #<-- did you want to send here?
def convert_to_dataFrame(joineddd, subject):
df = pd.DataFrame(joineddd)
df = df.drop(["Company_Industry","Company_Type","Job_Role","Number_of_Vacancies"], axis=1)
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
send_email(subject) #<--or do you want to send here??
def send_email(subject):
'''send email '''
today = date.today()
file = 'F:\\AIenv\web_scrapping\\jobDesc.xlsx'
username='XXXXXXXXXXX'
password='XXXXXXXXXXXXX'
send_from = 'XXXXXXXXXXXXX'
send_to = 'XXXXXXXXXXXXXX'
Cc = 'recipient'
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = send_to
msg['Cc'] = Cc
msg['Date'] = formatdate(localtime=True)
msg['Subject'] = 'Hello, This is a test mail {} - {}'.format(today,subject)
server = smtplib.SMTP('smtp.gmail.com')
port = '587'
fp = open(file, 'rb')
part = MIMEBase('application', 'vnd.ms-excel')
part.set_payload(fp.read())
fp.close()
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment', filename='jobs Description--{}'.format(today))
msg.attach(part)
smtp = smtplib.SMTP('smtp.gmail.com')
smtp.ehlo()
smtp.starttls()
smtp.login(username, password)
smtp.sendmail(send_from, send_to.split(',') + msg['Cc'].split(','), msg.as_string())
smtp.quit()
print('Mail Sent')
url_list = ["https://www.bayt.com/en/international/jobs/executive-chef-jobs/",
"https://www.bayt.com/en/international/jobs/head-chef-jobs/",
"https://www.bayt.com/en/international/jobs/executive-sous-chef-jobs/"]
if __name__ == "__main__":
for url in url_list:
scrape_website(url)

Related

Sending a pandas Dataframe using smtplib

I've seen a lot of threads here about this topic, however, none regarding this specific question.
I am sending a email with a pandas dataframe (df) as an html using pandas built in df.to_html() method. The email sends successfully. However, the df is displayed in the email as html, not in the desired table format. Can anyone offer assistance on how to ensure the df is displayed as a table, not in html in the email? The code is below:
import requests
import pandas as pd
import smtplib
MY_LAT =
MY_LNG =
API_KEY = ""
parameters = {
"lat": MY_LAT,
'lon': MY_LNG,
'exclude': "",
"appid": API_KEY
}
df = pd.read_csv("OWM.csv")
response = requests.get("https://api.openweathermap.org/data/2.5/onecall", params=parameters)
response.raise_for_status()
data = response.json()
consolidated_weather_12hour = []
for i in range(0, 12):
consolidated_weather_12hour.append((data['hourly'][i]['weather'][0]['id']))
hour7_forecast = []
for hours in consolidated_weather_12hour:
weather_id = df[df.weather_id == hours]
weather_description = weather_id['description']
for desc in weather_description.iteritems():
hour7_forecast.append(desc[1])
times = ['7AM', '8AM', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', '5PM', '6PM']
col_header = ["Description of Expected Weather"]
weather_df = pd.DataFrame(data=hour7_forecast, index=times, columns=col_header)
my_email = ""
password = ""
html_df = weather_df.to_html()
with smtplib.SMTP("smtp.gmail.com", 587) as connection:
connection.starttls() # Makes connection secure
connection.login(user=my_email, password=password)
connection.sendmail(from_addr=my_email, to_addrs="",
msg=f"Subject: 12 Hour Forecast Sterp"
"""\
<html>
<head></head>"
<body>
{0}
<body>
</html>
""".format(html_df))
just use df.to_html() to convert it into an html table that you can include in your html email
then when you send the mail you must set the mimetype to html
smtp = smtplib.SMTP("...")
msg = MIMEMultipart('alternative')
msg['Subject'] = subject_line
msg['From'] = from_addr
msg['To'] = ','.join(to_addrs)
# Create the body of the message (a plain-text and an HTML version).
part1 = MIMEText(plaintext, 'plain')
part2 = MIMEText(html, 'html')
smtp.sendmail(from_addr, to_addrs, msg.as_string())
you can use the library html2text to convert your html to markdown for clients that do not support html content (not many these days) if you do not feel like writing the plaintext on your own
as an aside... using jinja when you are working with html tends to simplify things...

Send Table contents from CSV file in Outlook Mail Body using Python

i programmed a code to send a outlook mail , which should contain the contents of CSV file as it's body , Mail part is working fine . But the table appears to be distorted .
[![MailBody][1]][1]
So here is there anyway to arrange this . and make it pretty .
and here is my code :
def sendMailt():
print("*** SENDING MAIL ****")
email_user = 'ABC#domain.com'
email_send = 'DCF#domain.com'
subject = ''
msg = MIMEMultipart()
msg['From'] = email_user
msg['To'] = email_send
msg['Subject'] = subject
body = 'Hi Team , Please Open This attachment for Folderstat Report,'
msg.attach(MIMEText(body, 'plain'))
text = """
Hello, Friend.
Here is your data:
{table}
Regards,
Me"""
with open(filtered_CSV) as input_file:
reader = csv.reader(input_file)
data = list(reader)
text = text.format(table=tabulate(data, headers=['Unnamed: 0','id','path','name','extension','size','FolderSize in GB','LastAccessTime','LastModifiedTime','LastCreationTime','folder','Total Number of files','Absolute File Count','depth','parent','uid','Oldest File Timestamp','Recent File Timestamp','Folder Modified Timestamp','Status','md5]'] ,tablefmt='orgtbl'))
server = smtplib.SMTP('domain')
sender = 'ABC#domain.com'
reciever = ['DCF#domain.com']
server.sendmail(sender, reciever, text)
server.quit()
print("Mail SEND")
and also when i receive mail , am not able to see subject or receivers ID
[1]: https://i.stack.imgur.com/y5Gxy.png
if you Use html in tablefmt you can get the data in decent format
from tabulate import tabulate
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
text = """
Hello, Friend.
Here is your data:
{table}
Regards,
Me"""
with open(filtered_CSV) as input_file:
reader = csv.reader(input_file)
data = list(reader)
html = text.format(table=tabulate(data, headers="firstrow", tablefmt="html")
# for html design ( you can add whatever style you want)
html = html.replace('<table>', """<table border=1 style="
border-color:grey;
font-family: Helvetica, Arial;
font-size: 15px;
padding: 5px;">""")
server = smtplib.SMTP('domain')
msg = MIMEMultipart("alternative", None, [MIMEText(html, 'html')])
msg['From'] = from address
msg['To'] = to address
msg['Subject'] = "your subject"
server.sendmail(msg['From'], msg['To'], msg.as_string())
server.quit()
print("Mail SEND")

how to scheduling a python script to send email using windows task scheduler

I have a python script that scrapes a website and I want to send the data by email every day at 7:00 AM when i run the script the email is send and the function work perfect, but when I tried to use windows Task Scheduler in order to automate the script it doesn't run.
code:
import time
import pandas as pd
from datetime import date
import requests
from bs4 import BeautifulSoup
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
from email.utils import formatdate
def scrap_website():
soup = BeautifulSoup(
requests.get("https://www.bayt.com/en/international/jobs/executive-chef-jobs/").content,
"lxml"
)
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
joineddd = []
for link in links:
s = BeautifulSoup(requests.get(link).content, "lxml")
jobdesc=s.select_one("div[class='card-content is-spaced'] p")
alldt = [dt.text for dt in s.select("div[class='card-content is-spaced'] dt")]
dt_Job_location = alldt[0]
dt_Job_Company_Industry = alldt[1]
dt_Job_Company_Type = alldt[2]
if len(alldt[3])>0:
dt_Job_Job_Role = alldt[3]
elif len(dt_Job_Employment_Type)>0:
dt_Job_Employment_Type = alldt[4]
alldt.append("link")
alldt.append("description")
alldd = [dd.text for dd in s.select("div[class='card-content is-spaced'] dd")]
dd_job_location = alldd[0]
dd_job_Company_Industry = alldd[1]
dd_job_Company_Type = alldd[2]
if len(alldd[3])>0:
dd_job_Job_Role = alldd[3]
elif len(dd_job_Employment_Type)>0:
dd_job_Employment_Type = alldd[4]
alldd.insert(0,link)
alldd.insert(1,jobdesc)
joineddd.append(alldd)
print("-" * 80)
convert_to_dataFrame(joineddd)
send_email()
def convert_to_dataFrame(joineddd):
df = pd.DataFrame(joineddd,columns=["link","description","location","Company_Industry","Company_Type","Job_Role","Employment_Type"])
df_to_excel = df.to_excel(r"F:\\AIenv\web_scrapping\\jobDesc.xlsx", index = False, header=True)
'''send email '''
def send_email():
today = date.today()
file = 'F:\\AIenv\web_scrapping\\jobDesc.xlsx'
username='xxxxxxx'
password='xxxxxxx'
send_from = 'xxxxxxxxxxx'
send_to = 'xxxxxxxxxxxxxx'
Cc = 'recipient'
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = send_to
msg['Cc'] = Cc
msg['Date'] = formatdate(localtime = True)
msg['Subject'] = 'Hello, This is a test mail {}'.format(today)
server = smtplib.SMTP('smtp.gmail.com')
port = '587'
fp = open(file, 'rb')
part = MIMEBase('application','vnd.ms-excel')
part.set_payload(fp.read())
fp.close()
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment', filename='jobs Description--{}'.format(today))
msg.attach(part)
smtp = smtplib.SMTP('smtp.gmail.com')
smtp.ehlo()
smtp.starttls()
smtp.login(username,password)
smtp.sendmail(send_from, send_to.split(',') + msg['Cc'].split(','), msg.as_string())
smtp.quit()
print('Mail Sent')
if __name__ == "__main__":
scrap_website()
In the windows task scheduler:
I follow the steps and create a trigger in order to run the script.
task scheduler :
<?xml version="1.0" encoding="UTF-16"?>
<Task version="1.4" xmlns="http://schemas.microsoft.com/windows/2004/02/mit/task">
<RegistrationInfo>
<Date>2021-02-02T14:33:03.1578212</Date>
<Author>DESKTOP-LPD1575\LT GM</Author>
<URI>\job_desc scheduled email</URI>
</RegistrationInfo>
<Triggers>
<CalendarTrigger>
<StartBoundary>2021-03-02T07:30:00</StartBoundary>
<Enabled>true</Enabled>
<ScheduleByDay>
<DaysInterval>1</DaysInterval>
</ScheduleByDay>
</CalendarTrigger>
</Triggers>
<Principals>
<Principal id="Author">
<UserId>S-1-5-21-422056822-2861570755-2809137930-1002</UserId>
<LogonType>InteractiveToken</LogonType>
<RunLevel>LeastPrivilege</RunLevel>
</Principal>
</Principals>
<Settings>
<MultipleInstancesPolicy>IgnoreNew</MultipleInstancesPolicy>
<DisallowStartIfOnBatteries>true</DisallowStartIfOnBatteries>
<StopIfGoingOnBatteries>true</StopIfGoingOnBatteries>
<AllowHardTerminate>true</AllowHardTerminate>
<StartWhenAvailable>true</StartWhenAvailable>
<RunOnlyIfNetworkAvailable>false</RunOnlyIfNetworkAvailable>
<IdleSettings>
<StopOnIdleEnd>true</StopOnIdleEnd>
<RestartOnIdle>false</RestartOnIdle>
</IdleSettings>
<AllowStartOnDemand>true</AllowStartOnDemand>
<Enabled>true</Enabled>
<Hidden>false</Hidden>
<RunOnlyIfIdle>false</RunOnlyIfIdle>
<DisallowStartOnRemoteAppSession>false</DisallowStartOnRemoteAppSession>
<UseUnifiedSchedulingEngine>true</UseUnifiedSchedulingEngine>
<WakeToRun>true</WakeToRun>
<ExecutionTimeLimit>PT1H</ExecutionTimeLimit>
<Priority>7</Priority>
</Settings>
<Actions Context="Author">
<Exec>
<Command>F:\AIenv\web_scrapping\job_desc_email.py</Command>
</Exec>
</Actions>
</Task>
I got the problem, in the Exec tag, you must add the full python path to the Command
Current Code:
<Exec>
<Command>F:\AIenv\web_scrapping\job_desc_email.py</Command>
</Exec>
Recommend Code:
assume C:\python39\python.exe is path of your python
<Exec>
<Command>C:\\python39\\python.exe F:\\AIenv\\web_scrapping\\job_desc_email.py</Command>
</Exec>
Because in Windows OSes, .py files are not associated to run with python.exe like Linux & Macs OS. So to run a python script your should use the following format command:
{path-to-your-python.exe} {path-to-your-python-script}

Include Excel table in the body of an email in Python

Using Python, I am trying to send an email with an Excel table inside the body of an email. I would like to maintain all the conditional formatting from the Excel file. I can send the Excel file as an attachment easy, but I would like to also put the table inside the body of the email. I will convert it to an HTML table if I need to, but I need to know how to include the HTML table into body of the email then. Below attaches the file as an email, but I haven't been able to figure out how to put the table inside of the email. How can I do this?
msg = MIMEMultipart()
msg['Subject'] = 'Subject goes here'
msg.attach(MIMEText('Text goes here'))
part = MIMEBase('application', "octet-stream")
f = 'file_name.xlsx'
part.set_payload(open(f, "rb").read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="%s"' % f)
msg.attach(part)
Thanks for your help!
The easiest way to do is use pandas. Something like:
import pandas as pd
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
def generate_html():
read_file = pd.read_csv("example.csv")
html_file = read_file.to_html()
sendEmail(html_file)
def sendEmail(html_file):
msg = MIMEMultipart('alternative')
msg['Subject'] = "Hello"
msg['From'] = EmailFrom
msg['To'] = EmailTo
part = MIMEText(html_file, 'html')
msg.attach(part)
s = smtplib.SMTP("smtp.gmail.com")
s.sendmail(EmailFrom, EmailTo, msg.as_string())
s.quit()
You might want to look into openpyxl https://openpyxl.readthedocs.io/en/default/
Something like this might solve your problem:
import openpyxl
from openpyxl import load_workbook
workbook = load_workbook(f)
worksheet = workbook.get_active_sheet()
html_data = """
<html>
<head>
<title>
XLSX to HTML demo
<title>
<head>
<body>
<h3>
XLSX to HTML demo
<h3>
<table>
"""
ws_range = worksheet.range('A1:H13')
for row in ws_range:
html_data += "<tr>
for cell in row:
if cell.value is None:
html_data += "<td> + ' ' + "<td>
else:
html_data += "<td> + str(cell.value) + "<td>
html_data += "<tr>
html_data += "</table></body></html>
msg.attach(MIMEText(html_data))
with open(f, "rb") as fil:
part = MIMEApplication(
fil.read(),
Name=basename(f)
)
part['Content-Disposition'] = 'attachment; filename="{0}"'.format(basename(f))
msg.attach(part)
Inspired by https://jugad2.blogspot.ch/2013/11/publish-microsoft-excel-xlsx-data-to.html?m=1

How do I retrieve a hyperlink URL from email within python?

I have the following code to get an HTML email as a email.Message object but am unsure how to proceed from here to find the url of the hyperlink given its display name. I have located the url in the payload(0). In addition the email is a href=3D so it doesnt work if i just copy the link and paste into a browser.
import sys
import imaplib
import getpass
import email
import datetime
import email
M = imaplib.IMAP4_SSL('imap.gmail.com')
M.login('email#email.com','password123')
rv,boxes = M.list()
rv,boxes = M.select('Inbox/Test1')
rv, data = M.search(None, 'ALL')
typ, msg_data = M.fetch('1', '(RFC822)')
msg = email.message_from_string(msg_data[0][1])
url_name = 'Click Here'
html_text = msg.get_payload(0)
this will show all href in the message...one can update the parseLinks class to choice their individual string they are interested in.
import imaplib
import email
import quopri
import HTMLParser
class parseLinks(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
global global_futures_fair_value
if tag == 'a':
for name, value in attrs:
if name == 'href':
print name
print value
M = imaplib.IMAP4_SSL('imap.gmail.com')
M.login('email#email.com','password123')
M.select('Inbox/Test1')
rv, data = M.search(None, 'ALL')
typ, msg_data = M.fetch('1', '(RFC822)')
msg = email.message_from_string(msg_data[0][1])
url_name = 'Click Here'
html_text = msg.get_payload(0)
msg = str(msg.get_payload()[0])
msg = quopri.decodestring(msg)
linkParser = parseLinks()
linkParser.feed(msg)

Categories

Resources