How to fetch an email body using imaplib in python? - python

I'd like to fetch the whole message from IMAP4 server.
In python docs if found this bit of code that works:
>>> t, data = M.fetch('1', '(RFC822)')
>>> body = data[0][1]
I'm wondering if I can always trust that data[0][1] returns the body of the message. When I've run 'RFC822.SIZE' I've got just a string instead of a tuple.
I've skimmed through rfc1730 but I wasn't able to figure out the proper response structure for the 'RFC822'. It is also hard to tell the fetch result structure from imaplib documentation.
Here is what I'm getting when fetching RFC822:
('OK', [('1 (RFC822 {858569}', 'body of the message', ')')])
But when I fetch RFC822.SIZE I'm getting:
('OK', ['1 (RFC822.SIZE 847403)'])
How should I properly handle the data[0] list?
Can I trust that when it is a list of tuples the tuples has exactly 3 parts and the second part is the payload?
Maybe you know any better library for imap4?

No... imaplib is a pretty good library, it's imap that's so unintelligible.
You may wish to check that t == 'OK', but data[0][1] works as expected for as much as I've used it.
Here's a quick example I use to extract signed certificates I've received by email, not bomb-proof, but suits my purposes:
import getpass, os, imaplib, email
from OpenSSL.crypto import load_certificate, FILETYPE_PEM
def getMsgs(servername="myimapserverfqdn"):
usernm = getpass.getuser()
passwd = getpass.getpass()
subject = 'Your SSL Certificate'
conn = imaplib.IMAP4_SSL(servername)
conn.login(usernm,passwd)
conn.select('Inbox')
typ, data = conn.search(None,'(UNSEEN SUBJECT "%s")' % subject)
for num in data[0].split():
typ, data = conn.fetch(num,'(RFC822)')
msg = email.message_from_string(data[0][1])
typ, data = conn.store(num,'-FLAGS','\\Seen')
yield msg
def getAttachment(msg,check):
for part in msg.walk():
if part.get_content_type() == 'application/octet-stream':
if check(part.get_filename()):
return part.get_payload(decode=1)
if __name__ == '__main__':
for msg in getMsgs():
payload = getAttachment(msg,lambda x: x.endswith('.pem'))
if not payload:
continue
try:
cert = load_certificate(FILETYPE_PEM,payload)
except:
cert = None
if cert:
cn = cert.get_subject().commonName
filename = "%s.pem" % cn
if not os.path.exists(filename):
open(filename,'w').write(payload)
print "Writing to %s" % filename
else:
print "%s already exists" % filename

The IMAPClient package is a fair bit easier to work with. From the description:
Easy-to-use, Pythonic and complete
IMAP client library.

Try my package:
https://pypi.org/project/imap-tools/
example:
from imap_tools import MailBox
# get list of email bodies from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
bodies = [msg.text or msg.html for msg in mailbox.fetch()]
Features:
Parsed email message attributes
Query builder for searching emails
Work with emails in folders (copy, delete, flag, move, append)
Work with mailbox folders (list, set, get, create, exists, rename, delete, status)
No dependencies

This was my solution to extract the useful bits of information. It's been reliable so far:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your#gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue

Related

Use python to download email attachments only based on Subject

The following code uses imap to find emails by subject line and returns all parts of the email and downloads the attachments. However i am ONLY needing it to download the attachments of the email not the entire body also. I understand this has to do with the for part in email_message.walk(): that is iterating the entire email. Could someone please help me have this code download only the attachment of the email? Im sure this is a simple code change but im just not sure how to make it!
import imaplib
import email.header
import os
import sys
import csv
# Your IMAP Settings
host = 'imap.gmail.com'
user = 'User email'
password = 'User password'
# Connect to the server
print('Connecting to ' + host)
mailBox = imaplib.IMAP4_SSL(host)
# Login to our account
mailBox.login(user, password)
boxList = mailBox.list()
# print(boxList)
mailBox.select()
searchQuery = '(SUBJECT "CDR Schedule output from schedule: This is a test to see how it works")'
result, data = mailBox.uid('search', None, searchQuery)
ids = data[0]
# list of uids
id_list = ids.split()
i = len(id_list)
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
# I think I am fetching a bit too much here...
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('C:/install files/', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
print('Downloaded "{file}" from email titled "{subject}" with UID {uid}.'.format(file=fileName, subject=subject, uid=latest_email_uid.decode('utf-8')))
mailBox.close()
mailBox.logout()

Read Latest Hotmail Emails in Python

I've been searching for a way to get the latest emails from my hotmail account (specifically the FROM and MESSAGE) using Python. The solutions mostly seem to be for gmail which isn't working as I would like.
Example 1: Using the Gmail examples - msg spits out a lot of unwanted data and the attempt to get subject, to and from returns blanks for each.
import imaplib
import email
from email.mime.multipart import MIMEMultipart
mail = imaplib.IMAP4_SSL('outlook.office365.com')
mail.login('myemail#hotmail.com', 'password')
mail.list()
mail.select('inbox')
for i in range(1, 5):
typ, msg_data = mail.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
# print(response_part[1])
msg = email.message_from_string(str(response_part[1]))
print(msg)
for header in [ 'subject', 'to', 'from' ]:
print('%-8s: %s' % (header.upper(), msg[header]))
mail.close()
mail.logout()
Example 2: Gets last outlook email contents but cannot seem to get more (e.g. last 5)
import imaplib
msrvr = imaplib.IMAP4_SSL('outlook.office365.com', 993)
unm = 'myemail#hotmail.com'
pwd = 'password'
msrvr.login(unm, pwd)
print(str(len(msrvr.select('inbox'))))
stat,cnt = msrvr.select('inbox')
print(str(len(cnt)))
for i in range(0,5):
stat,dta = msrvr.fetch(cnt[i], '(BODY[TEXT])')
print(dta[0][1])
msrvr.close()
msrvr.logout()
Any thoughts how I could get the last 5 emails with FROM and MESSAGE?

Render html in Flask IMAP application

I am creating a Flask application which is supposed to retrieve emails by sending IMAP or POP requests to an email service provider like GMAIL. I am able to retrieve the emails by using the imaplib library. A simple email which contains only text in it is simple enough to retrieve and display. Unfortunately, when an email consists of images, GIFs or special styling it gets more difficult.
Whenever I run the code that retrieves the contents of the emails it seems that I am getting the HTML. But when I try to "render" it to an html file and use render_template('test.html') it seems I would be putting html into html.
What would be the correct way to move what I get from the email service provider to my web application in Flask?
class EmailClient:
imap_host = 'imap.gmail.com'
imap_user = 'test#test.com'
imap_pass = 'password'
def process_mailbox(M):
diction = []
rv, data = M.search(None, "ALL")
if rv != 'OK':
print('No messages found!')
return
for num in data[0].split():
rv, data = M.fetch(num, '(RFC822)')
if rv != 'OK':
print("ERROR getting message", num)
return
msg = email.message_from_bytes(data[0][1])
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
subject = str(hdr)
print('Message %s: %s' % (num, subject))
date_tuple = email.utils.parsedate_tz(msg['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
print('Local Date:', local_date.strftime('%a, %d %b %Y %H:%M:%S'))
for part in msg.walk():
if part.get_content_type() == 'text/html':
print(part.get_payload())
diction.append(part.get_payload())
M = imaplib.IMAP4_SSL('imap.gmail.com')
try:
rv, data = M.login(imap_user, imap_pass)
except imaplib.IMAP4.error:
print("LOGIN FAILED!")
sys.exit(1)
print(rv, data)
rv, mailboxes = M.list()
if rv == 'OK':
print('Mailboxes:')
print(mailboxes)
rv, data = M.select('Inbox')
if rv == 'OK':
print('Processing mailbox...\n')
process_mailbox(M)
M.close()
else:
print('ERROR: Unable to open mailbox', rv)
M.logout()
If you want to pass html code as a variable to a jinja template, add |safe. For instance if email contains the email in html format:
{{ email |safe }}

Python: Keep checking new email and alert of further new emails

I have this code that checks the latest email and then goes and does something. Is it possible to write something that keeps checking the inbox folder for new mail? Although I want it to keep checking for the latest new email. Is it getting too complicated if I try and store that it has made one pass? So it doesn't alert about the same email twice about the same email.
Code:
import imaplib
import email
import Tkinter as tk
word = ["href=", "href", "<a href="] #list of strings to search for in email body
#connection to the email server
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('xxxx', 'xxxx')
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("Inbox", readonly=True) # connect to inbox.
result, data = mail.uid('search', None, "ALL") # search and return uids instead
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
latest_email_uid = data[0].split()[-1]
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)') # fetch the email headers and body (RFC822) for the given ID
raw_email = data[0][1] # here's the body, which is raw headers and html and body of the whole email
# including headers and alternate payloads
.....goes and does other code regarding to email html....
Try to use this approach:
Logic is the same as from #tripleee comment.
import time
word = ["href=", "href", "<a href="] #list of strings to search for in email body
#connection to the email server
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('xxxx', 'xxxx')
mail.list()
# Out: list of "folders" aka labels in gmail.
latest_email_uid = ''
while True:
mail.select("Inbox", readonly=True)
result, data = mail.uid('search', None, "ALL") # search and return uids instead
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
if data[0].split()[-1] == latest_email_uid:
time.sleep(120) # put your value here, be sure that this value is sufficient ( see #tripleee comment below)
else:
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)') # fetch the email headers and body (RFC822) for the given ID
raw_email = data[0][1]
latest_email_uid == data[0].split()[-1]
time.sleep(120) # put your value here, be sure that this value is sufficient ( see #tripleee comment below)

Error 10053 When Sending Large Attachments using Gmail API

I'm trying to send emails of various sizes using the Gmail API and the functions below.
Generally this works perfectly, however for attachments over around 10MB (which are rare but will happen) I recieve Errno 10053 which I think is because I timeout when sending the message including the large attachment.
Is there a way around this by say, specifying size or increasing the timeout limit? There's reference to size in the Gmail API docs, but I'm struggling to understand how to use in Python or whether it would even help.
def CreateMessageWithAttachment(sender, to, cc, subject,
message_text, file_dir, filename):
"""Create a message for an email.
Args:
sender: Email address of the sender.
to: Email address of the receiver.
subject: The subject of the email message.
message_text: The text of the email message.
file_dir: The directory containing the file to be attached.
filename: The name of the file to be attached.
Returns:
An object containing a base64url encoded email object.
"""
message = MIMEMultipart()
message['to'] = to
if cc != None:
message['cc'] = cc
message['from'] = sender
message['subject'] = subject
msg = MIMEText(message_text)
message.attach(msg)
path = os.path.join(file_dir, filename)
content_type, encoding = mimetypes.guess_type(path)
QCoreApplication.processEvents()
if content_type is None or encoding is not None:
content_type = 'application/octet-stream'
main_type, sub_type = content_type.split('/', 1)
if main_type == 'text':
fp = open(path, 'rb')
msg = MIMEText(fp.read(), _subtype=sub_type)
fp.close()
elif main_type == 'image':
fp = open(path, 'rb')
msg = MIMEImage(fp.read(), _subtype=sub_type)
fp.close()
elif main_type == 'audio':
fp = open(path, 'rb')
msg = MIMEAudio(fp.read(), _subtype=sub_type)
fp.close()
else:
fp = open(path, 'rb')
msg = MIMEBase(main_type, sub_type)
msg.set_payload(fp.read())
fp.close()
QCoreApplication.processEvents()
msg.add_header('Content-Disposition', 'attachment', filename=filename)
message.attach(msg)
return {'raw': base64.urlsafe_b64encode(message.as_string())}
def SendMessage(service, user_id, message, size):
"""Send an email message.
Args:
service: Authorized Gmail API service instance.
user_id: User's email address. The special value "me"
can be used to indicate the authenticated user.
message: Message to be sent.
Returns:
Sent Message.
"""
try:
message = (service.users().messages().send(userId=user_id, body=message)
.execute())
QCoreApplication.processEvents()
return message
except errors.HttpError, error:
pass
I succeed to insert/send message with large file, pythons code.
The google api documentations is not friendly for developers, and the "/upload" issue is totally unclear and not well documented, and it confusing a lot of developers.
The final line do the magic :)
def insert_message(service, message):
try:
if message['sizeEstimate'] > 6000000:
insert_large_message(service, message)
else:
insert_small_message(service, message)
except:
print ('Error: ----type: %s, ----value: %s, ----traceback: %s ************' % (sys.exc_info()[0],sys.exc_info()[1],sys.exc_info()[2]))
def insert_small_message(service, message):
body = {'raw': message['raw'],'labelIds':message['labelIds'],'internalDateSource':'dateHeader'}
message = service.users().messages().insert(userId='me',body=body).execute()
def insert_large_message(service, message):
b = io.BytesIO()
message_bytes = base64.urlsafe_b64decode(str(message['raw']))
b.write(message_bytes)
body = {'labelIds':message['labelIds'],'internalDateSource':'dateHeader'}
media_body = googleapiclient.http.MediaIoBaseUpload(b, mimetype='message/rfc822' )
print('load big data!')
message = service.users().messages().insert(userId='me',body=body,media_body=media_body).execute()
'g' is my authorized api context. The call method will invoke execute on the object. The important thing is the Media calls and using both the media_body and the body params. This causes the message to be inserted with the label INBOX, and it will allow at least a 24MB file.
I ended up with two copies because the read timeout was too short:
f fetch 8:9 (flags INTERNALDATE RFC822.SIZE)
* 8 FETCH (RFC822.SIZE 24000720 INTERNALDATE "19-Jul-2007 17:12:26 +0000" FLAGS (\Seen))
* 9 FETCH (RFC822.SIZE 24000720 INTERNALDATE "19-Jul-2007 17:12:26 +0000" FLAGS (\Seen))
Sample code:
import mailbox
import StringIO
import googleapiclient.http
f = 'my-mbox-file.mbox'
params = {}
params[ 'internalDateSource' ] = 'dateHeader'
for m in mailbox.mbox( f, create=False ):
message_string = m.as_string()
params[ 'body' ] = { 'labelIds': [ 'INBOX' ] }
if len(message_string) > 6000000:
s = StringIO.StringIO()
s.write( message_string )
params[ 'media_body' ] = googleapiclient.http.MediaIoBaseUpload(
s, mimetype='message/rfc822' )
else:
params['body']['raw'] = (
base64.urlsafe_b64encode( message_string ) )
g.call( g.auth.users().messages().insert, params )
try:
del params[ 'media_body' ]
except KeyError:
pass
You need to use the MEDIA /upload option for things that large. Then you can send emails up to the max Gmail allows. Docs for how to use /upload:
https://developers.google.com/gmail/api/v1/reference/users/messages/send
The 10MB limitation is not well documented.

Categories

Resources