Gmail content downloader/parser - python

I'm trying to download all my email content from gmail using a python script but this is what I found. I tried it and it's only downloading the attachments into pdf files. Is there a way to modify it to download just the email content. I'm also trying to parse just the url links in my emails.
import email
import imaplib
import os
server = 'imap.gmail.com'
user = '#gmail.com'
password = 'pass'
outputdir = 'lolz'
subject = 'Order Completed' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)

Related

yahoo mail error 550, b'Request failed; Mailbox unavailable'

So this code works for gmail, when i tried it on yahoo mail i get this error, (550, b'Request failed; Mailbox unavailable')
mail = imaplib.IMAP4_SSL(imap_server)
mail.login(emaill, pwd)
# select the label to work on
print('selecting inbox folder')
try:
mail.select('INBOX')
_, data = mail.search(None, '(UNSEEN)')
mail_ids = data[0]
id_list = mail_ids.split()
for mess in id_list:
_, data = mail.fetch(mess, '(RFC822)')
for response in data:
if isinstance(response, tuple):
print('preparing email body.........')
msg = email.message_from_string(response[1].decode('ISO-8859-1'), policy=email.policy.default)
# open_links(msg)
body_of_email = 'Hi'
email_from = msg['from']
email_to = msg['to']
subject = msg['subject']
mssg = MIMEText(body_of_email, 'plain')
mssg['Subject'] = subject
mssg['From'] = email_from
mssg['To'] = email_to
mssg['Message-ID'] = msg['Message-ID']
try:
# msg.add_header('reply-to', email_to)
s = smtplib.SMTP_SSL(host=smtp_server, port=port)
# .starttls()
s.login(user=emaill, password=pwd)
s.sendmail(emaill, msg['From'], mssg.as_string())
the above code won't work but I change the message to let say 'hey you, it works, so am thinking there is a problem with the MIMETest construction, any help please

count number of emails sent using smtplib python

I have a basic python code that sends out an email to addresses from a list in Google sheet.
I want to count the number of times an email is sent to a particular email address by the python script. I tried researching on it. I didn't find anything related to it. And being a complete beginner hasn't helped me make much progress.
If anyone can point me to a particular direction that would be super helpful. Thanks so much in advance. 
Below is the code
import smtplib
import ssl
from email.mime.text import MIMEText # New line
from email.utils import formataddr # New line
# User configuration
sender_email = 'email ID'
sender_name = 'name'
password = "password"
receiver_emails = [RECEIVER_EMAIL_1, RECEIVER_EMAIL_2, RECEIVER_EMAIL_3]
receiver_names = [RECEIVER_NAME_1, RECEIVER_NAME_2, RECEIVER_NAME_3]
# Email text
email_body = '''
This is a test email sent by Python. Isn't that cool?
'''
for receiver_email, receiver_name in zip(receiver_emails, receiver_names):
print("Sending the email...")
# Configurating user's info
msg = MIMEText(email_body, 'plain')
msg['To'] = formataddr((receiver_name, receiver_email))
msg['From'] = formataddr((sender_name, sender_email))
msg['Subject'] = 'Hello, my friend ' + receiver_name
try:
# Creating a SMTP session | use 587 with TLS, 465 SSL and 25
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
# Encrypts the email
context = ssl.create_default_context()
server.starttls(context=context)
# We log in into our Google account
server.login(sender_email, password)
# Sending email from sender, to receiver with the email body
server.sendmail(sender_email, receiver_email, msg.as_string())
print('Email sent!')
except Exception as e:
print(f'Oh no! Something bad happened!n {e}')
finally:
print('Closing the server...')
server.quit()
I would suggest you to create a list of successful emails, which will be populated on each iteration and then, use Counter from collections module, which receives an iterable and returns an object with number of occurrences of each element in the iterable.
You can try the following code:
from collections import Counter
import json
counter_file_path = "counter.json"
try:
with open(counter_file_path, "r") as f:
email_stats = json.load(f)
except FileNotFoundError as ex:
email_stats = {}
successful_emails = []
for receiver_email, receiver_name in zip(receiver_emails, receiver_names):
print("Sending the email...")
# Configurating user's info
msg = MIMEText(email_body, 'plain')
msg['To'] = formataddr((receiver_name, receiver_email))
msg['From'] = formataddr((sender_name, sender_email))
msg['Subject'] = 'Hello, my friend ' + receiver_name
try:
# Creating a SMTP session | use 587 with TLS, 465 SSL and 25
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
# Encrypts the email
context = ssl.create_default_context()
server.starttls(context=context)
# We log in into our Google account
server.login(sender_email, password)
# Sending email from sender, to receiver with the email body
server.sendmail(sender_email, receiver_email, msg.as_string())
print('Email sent!')
if receiver_email in email_stats:
email_stats[receiver_email] += 1
else:
email_stats[receiver_email] = 1
except Exception as e:
print(f'Oh no! Something bad happened!n {e}')
finally:
print('Closing the server...')
server.quit()
print(email_stats) # output - all occurrences for each email
with open(counter_file_path, "w") as f:
json.dump(email_stats, f)
You can use this code to store/print success mail count into a JSON format.
import smtplib
import SSL
import json
import os
from email.mime.text import MIMEText # New line
from email.utils import formataddr # New line
fileName = "sendMail_count.json"
# To store data into json file.
# It will create file in datetime format.
def store_data_to_file(jsonStr):
jsonFile = open(fileName, "w")
json.dump(jsonStr, jsonFile)
print("data stored successfully")
# User configuration
sender_email = 'email ID'
sender_name = 'name'
password = "password"
receiver_emails = [RECEIVER_EMAIL_1, RECEIVER_EMAIL_2, RECEIVER_EMAIL_3]
receiver_names = [RECEIVER_NAME_1, RECEIVER_NAME_2, RECEIVER_NAME_3]
# To store the count of successful mail received by receiver with their respective email.
if not os.path.exists(fileName) or os.stat(fileName).st_size == 0:
print("File is empty or not found")
print("Creating a JSON file to store the data")
jsonFile = open(fileName, "w+")
print("a JSON file has been created with name: " + str(fileName))
success_mail_count = {}
else:
with open(fileName) as jsonFile:
success_mail_count = json.load(jsonFile)
print(success_mail_count)
# Email text
email_body = '''
This is a test email sent by Python. Isn't that cool?
'''
for receiver_email, receiver_name in zip(receiver_emails, receiver_names):
count = 0
print("Sending the email to..." + receiver_email)
# Configurating user's info
msg = MIMEText(email_body, 'plain')
msg['To'] = formataddr((receiver_name, receiver_email))
msg['From'] = formataddr((sender_name, sender_email))
msg['Subject'] = 'Hello, my friend ' + receiver_name
try:
# Creating a SMTP session | use 587 with TLS, 465 SSL and 25
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
# Encrypts the email
context = ssl.create_default_context()
server.starttls(context=context)
# We log in into our Google account
server.login(sender_email, password)
# Sending email from sender, to receiver with the email body
server.sendmail(sender_email, receiver_email, msg.as_string())
# Check if recevier is already present in the dict,
# then add 1 to its current count
if receiver_email in success_mail_count:
success_mail_count[receiver_email] = str(int(success_mail_count[receiver_email]) + 1)
# If reciever isn't present in map then create new entry for receiver and
# Update the count with one for successfull mail sent.
else:
success_mail_count[receiver_email] = str(count + 1)
print('Email sent!')
except Exception as e:
print(f'Oh no! Something bad happened!n {e}')
finally:
print('Closing the server...')
server.quit()
print(success_mail_count)
store_data_to_file(success_mail_count)
run this code and it will create data into file and then it will read data from file itself.

Use python to download email attachments only based on Subject

The following code uses imap to find emails by subject line and returns all parts of the email and downloads the attachments. However i am ONLY needing it to download the attachments of the email not the entire body also. I understand this has to do with the for part in email_message.walk(): that is iterating the entire email. Could someone please help me have this code download only the attachment of the email? Im sure this is a simple code change but im just not sure how to make it!
import imaplib
import email.header
import os
import sys
import csv
# Your IMAP Settings
host = 'imap.gmail.com'
user = 'User email'
password = 'User password'
# Connect to the server
print('Connecting to ' + host)
mailBox = imaplib.IMAP4_SSL(host)
# Login to our account
mailBox.login(user, password)
boxList = mailBox.list()
# print(boxList)
mailBox.select()
searchQuery = '(SUBJECT "CDR Schedule output from schedule: This is a test to see how it works")'
result, data = mailBox.uid('search', None, searchQuery)
ids = data[0]
# list of uids
id_list = ids.split()
i = len(id_list)
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
# I think I am fetching a bit too much here...
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('C:/install files/', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
print('Downloaded "{file}" from email titled "{subject}" with UID {uid}.'.format(file=fileName, subject=subject, uid=latest_email_uid.decode('utf-8')))
mailBox.close()
mailBox.logout()

How to get unread messages and set message read flags over IMAP using Python?

import imaplib
def read():
userName = "xxx#gmail.com"
password = "xxxx"
name = 'xxx#gmail.com'
email_ids = [userName]
data = []
imap_server = imaplib.IMAP4_SSL("imap.gmail.com",993)
imap_server.login(userName, password)
imap_server.select('INBOX')
da = []
status, response = imap_server.status('INBOX', "(UNSEEN)")
unreadcount = int(response[0].split()[2].strip(').,]'))
print unreadcount
status, response = imap_server.search(None, '(FROM "xxx#gmail.com")')
email_ids = [e_id for e_id in response[0].split()]
for e_id in email_ids:
_, response = imap_server.fetch(e_id, '(UID BODY[TEXT])')
da.append(response[0][1])
print da
read()
How to organise the code above, to read only unread mails?
Also, once we read them, how to mark the messages as read mail using Python?
import imaplib
def read(username, password, sender_of_interest):
# Login to INBOX
imap = imaplib.IMAP4_SSL("imap.gmail.com", 993)
imap.login(username, password)
imap.select('INBOX')
# Use search(), not status()
status, response = imap.search(None, 'INBOX', '(UNSEEN)')
unread_msg_nums = response[0].split()
# Print the count of all unread messages
print len(unread_msg_nums)
# Print all unread messages from a certain sender of interest
status, response = imap.search(None, '(UNSEEN)', '(FROM "%s")' % (sender_of_interest))
unread_msg_nums = response[0].split()
da = []
for e_id in unread_msg_nums:
_, response = imap.fetch(e_id, '(UID BODY[TEXT])')
da.append(response[0][1])
print da
# Mark them as seen
for e_id in unread_msg_nums:
imap.store(e_id, '+FLAGS', '\Seen')
def read(username, password, sender_of_interest=None):
# Login to INBOX
imap = imaplib.IMAP4_SSL("imap.gmail.com", 993)
imap.login(username, password)
imap.select('INBOX')
# Use search(), not status()
# Print all unread messages from a certain sender of interest
if sender_of_interest:
status, response = imap.uid('search', None, 'UNSEEN', 'FROM {0}'.format(sender_of_interest))
else:
status, response = imap.uid('search', None, 'UNSEEN')
if status == 'OK':
unread_msg_nums = response[0].split()
else:
unread_msg_nums = []
data_list = []
for e_id in unread_msg_nums:
data_dict = {}
e_id = e_id.decode('utf-8')
_, response = imap.uid('fetch', e_id, '(RFC822)')
html = response[0][1].decode('utf-8')
email_message = email.message_from_string(html)
data_dict['mail_to'] = email_message['To']
data_dict['mail_subject'] = email_message['Subject']
data_dict['mail_from'] = email.utils.parseaddr(email_message['From'])
data_dict['body'] = email_message.get_payload()
data_list.append(data_dict)
print(data_list)
You may use my lib - imap_tools:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox, A
# get subjects of unseen emails from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'pwd') as mailbox:
subjects = [msg.subject for msg in mailbox.fetch(A(seen=False), mark_seen=True)]

How to fetch an email body using imaplib in python?

I'd like to fetch the whole message from IMAP4 server.
In python docs if found this bit of code that works:
>>> t, data = M.fetch('1', '(RFC822)')
>>> body = data[0][1]
I'm wondering if I can always trust that data[0][1] returns the body of the message. When I've run 'RFC822.SIZE' I've got just a string instead of a tuple.
I've skimmed through rfc1730 but I wasn't able to figure out the proper response structure for the 'RFC822'. It is also hard to tell the fetch result structure from imaplib documentation.
Here is what I'm getting when fetching RFC822:
('OK', [('1 (RFC822 {858569}', 'body of the message', ')')])
But when I fetch RFC822.SIZE I'm getting:
('OK', ['1 (RFC822.SIZE 847403)'])
How should I properly handle the data[0] list?
Can I trust that when it is a list of tuples the tuples has exactly 3 parts and the second part is the payload?
Maybe you know any better library for imap4?
No... imaplib is a pretty good library, it's imap that's so unintelligible.
You may wish to check that t == 'OK', but data[0][1] works as expected for as much as I've used it.
Here's a quick example I use to extract signed certificates I've received by email, not bomb-proof, but suits my purposes:
import getpass, os, imaplib, email
from OpenSSL.crypto import load_certificate, FILETYPE_PEM
def getMsgs(servername="myimapserverfqdn"):
usernm = getpass.getuser()
passwd = getpass.getpass()
subject = 'Your SSL Certificate'
conn = imaplib.IMAP4_SSL(servername)
conn.login(usernm,passwd)
conn.select('Inbox')
typ, data = conn.search(None,'(UNSEEN SUBJECT "%s")' % subject)
for num in data[0].split():
typ, data = conn.fetch(num,'(RFC822)')
msg = email.message_from_string(data[0][1])
typ, data = conn.store(num,'-FLAGS','\\Seen')
yield msg
def getAttachment(msg,check):
for part in msg.walk():
if part.get_content_type() == 'application/octet-stream':
if check(part.get_filename()):
return part.get_payload(decode=1)
if __name__ == '__main__':
for msg in getMsgs():
payload = getAttachment(msg,lambda x: x.endswith('.pem'))
if not payload:
continue
try:
cert = load_certificate(FILETYPE_PEM,payload)
except:
cert = None
if cert:
cn = cert.get_subject().commonName
filename = "%s.pem" % cn
if not os.path.exists(filename):
open(filename,'w').write(payload)
print "Writing to %s" % filename
else:
print "%s already exists" % filename
The IMAPClient package is a fair bit easier to work with. From the description:
Easy-to-use, Pythonic and complete
IMAP client library.
Try my package:
https://pypi.org/project/imap-tools/
example:
from imap_tools import MailBox
# get list of email bodies from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
bodies = [msg.text or msg.html for msg in mailbox.fetch()]
Features:
Parsed email message attributes
Query builder for searching emails
Work with emails in folders (copy, delete, flag, move, append)
Work with mailbox folders (list, set, get, create, exists, rename, delete, status)
No dependencies
This was my solution to extract the useful bits of information. It's been reliable so far:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your#gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue

Categories

Resources