Checking email with Python - python

I am interested to trigger a certain action upon receiving an email from specific
address with specific subject. In order to be able to do so I need to implement
monitoring of my mailbox, checking every incoming mail (in particular, i use gmail).
what is the easiest way to do that?

Gmail provides the ability to connect over POP, which you can turn on in the gmail settings panel. Python can make connections over POP pretty easily:
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('username')
pop_conn.pass_('password')
#Get messages from server:
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print message['subject']
pop_conn.quit()
You would just need to run this script as a cron job. Not sure what platform you're on so YMMV as to how that's done.

Gmail provides an atom feed for new email messages. You should be able to monitor this by authenticating with py cURL (or some other net library) and pulling down the feed. Making a GET request for each new message should mark it as read, so you won't have to keep track of which emails you've read.

While not Python-specific, I've always loved procmail wherever I could install it...!
Just use as some of your action lines for conditions of your choice | pathtoyourscript (vertical bar AKA pipe followed by the script you want to execute in those cases) and your mail gets piped, under the conditions of your choice, to the script of your choice, for it to do whatever it wants -- hard to think of a more general approach to "trigger actions of your choice upon receipt of mails that meet your specific conditions!! Of course there are no limits to how many conditions you can check, how many action lines a single condition can trigger (just enclose all the action lines you want in { } braces), etc, etc.

People seem to be pumped up about Lamson:
https://github.com/zedshaw/lamson
It's an SMTP server written entirely in Python. I'm sure you could leverage that to do everything you need - just forward the gmail messages to that SMTP server and then do what you will.
However, I think it's probably easiest to do the ATOM feed recommendation above.
EDIT: Lamson has been abandoned

I found a pretty good snippet when I wanted to do this same thing (and the example uses gmail). Also check out the google search results on this.

I recently solved this problem by using procmail and python
Read the documentation for procmail. You can tell it to send all incoming email to a python script like this in a special procmail config file
:0:
| ./scripts/ppm_processor.py
Python has an "email" package available that can do anything you could possibly want to do with email. Read up on the following ones....
from email.generator import Generator
from email import Message
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.mime.multipart import MIMEMultipart

https://developers.google.com/gmail/gmail_inbox_feed
Says you have to have a corporate Gmail, but I have come to find that you can read Gmail free versions without issues. I use this code to get my blood pressure results I email or text to a gmail address.
from email.header import decode_header
from datetime import datetime
import os
import pandas as pd
import plotly.graph_objs as go
import plotly
now = datetime.now()
dt_string = now.strftime("%Y.%m.%d %H:%M:%S")
print("date_time:", dt_string)
email_account = '13123#gmail.com'
email_password = '131231231231231231312313F'
email_server = 'imap.gmail.com'
email_port = 993
accept_emails_from = {'j1231312#gmail.com', '1312312#chase.com', '13131231313131#msg.fi.google.com'}
verbose = True
def get_emails():
email_number = 0
local_csv_data = ''
t_date = None
t_date = None
t_systolic = None
t_diastolic = None
t_pulse = None
t_weight = None
try:
mail = imaplib.IMAP4_SSL(email_server)
email_code, email_auth_status = mail.login(email_account, email_password)
if verbose:
print('[DEBUG] email_code: ', email_code)
print('[DEBUG] email_auth_status: ', email_auth_status)
mail.list()
mail.select('inbox')
# (email_code, messages) = mail.search(None, 'ALL')
(email_code, messages) = mail.search(None, '(UNSEEN)') # only get unread emails to process.
subject = None
email_from = None
for email_id in messages[0].split():
email_number += 1
email_code, email_data = mail.fetch(email_id, '(RFC822)')
for response in email_data:
if isinstance(response, tuple): # we only want the tuple ,the bytes is just b .
msg = email.message_from_bytes(response[1])
content_type = msg.get_content_type()
subject, encoding = decode_header(msg["Subject"])[0]
subject = str(subject.replace("\r\n", ""))
if isinstance(subject, bytes):
subject = subject.decode(encoding)
email_from, encoding = decode_header(msg.get("From"))[0]
if isinstance(email_from, bytes):
email_from = email_from.decode(encoding)
if content_type == "text/plain":
body = msg.get_payload(decode=True).decode()
parse_data = body
else:
parse_data = subject
if '>' in email_from:
email_from = email_from.lower().split('<')[1].split('>')[0]
if email_from in accept_emails_from:
parse_data = parse_data.replace(',', ' ')
key = 0
for value in parse_data.split(' '):
if key == 0:
t_date = value
t_date = t_date.replace('-', '.')
if key == 1:
t_time = value
if ':' not in t_time:
numbers = list(t_time)
t_time = numbers[0] + numbers[1] + ':' + numbers[2] + numbers[3]
if key == 2:
t_systolic = value
if key == 3:
t_diastolic = value
if key == 4:
t_pulse = value
if key == 5:
t_weight = value
key += 1
t_eval = t_date + ' ' + t_time
if verbose:
print()
print('--------------------------------------------------------------------------------')
print('[DEBUG] t_eval:'.ljust(30), t_eval)
date_stamp = datetime.strptime(t_eval, '%Y.%m.%d %H:%M')
if verbose:
print('[DEBUG] date_stamp:'.ljust(30), date_stamp)
print('[DEBUG] t_systolic:'.ljust(30), t_systolic)
print('[DEBUG] t_diastolic:'.ljust(30), t_diastolic)
print('[DEBUG] t_pulse:'.ljust(30), t_pulse)
print('[DEBUG] t_weight:'.ljust(30), t_weight)
new_data = str(date_stamp) + ',' + \
t_systolic + ',' + \
t_diastolic + ',' + \
t_pulse + ',' + \
t_weight + '\n'
local_csv_data += new_data
except Exception as e:
traceback.print_exc()
print(str(e))
return False, email_number, local_csv_data
return True, email_number, local_csv_data
def update_csv(local_data):
""" updates csv and sorts it if there is changes made. """
uniq_rows = 0
if os.name == 'posix':
file_path = '/home/blood_pressure_results.txt'
elif os.name == 'nt':
file_path = '\\\\uncpath\\blood_pressure_results.txt'
else:
print('[ERROR] os not supported:'.ljust(30), os.name)
exit(911)
if verbose:
print('[DEBUG] file_path:'.ljust(30), file_path)
column_names = ['00DateTime', 'Systolic', 'Diastolic', 'Pulse', 'Weight']
if not os.path.exists(file_path):
with open(file_path, 'w') as file:
for col in column_names:
file.write(col + ',')
file.write('\n')
# append the new data to file.
with open(file_path, 'a+') as file:
file.write(local_data)
# sort the file.
df = pd.read_csv(file_path, usecols=column_names)
df_sorted = df.sort_values(by=["00DateTime"], ascending=True)
df_sorted.to_csv(file_path, index=False)
# remove duplicates.
file_contents = ''
with open(file_path, 'r') as file:
for row in file:
if row not in file_contents:
uniq_rows += 1
print('Adding: '.ljust(30), row, end='')
file_contents += row
else:
print('Duplicate:'.ljust(30), row, end='')
with open(file_path, 'w') as file:
file.write(file_contents)
return uniq_rows
# run the main code to get emails.
status, emails, my_data = get_emails()
print('status:'.ljust(30), status)
print('emails:'.ljust(30), emails)
# if the new emails received then sort the files.
csv_rows = update_csv(my_data)
print('csv_rows:'.ljust(30), csv_rows)
exit(0)

Related

'Latin-1' codec can't encode characters in position 1011-1013: ordinal not in range(256)

I am newbie in Python so my method to make the code work is by referring to other people's code and modify until it solves my problem.
I have tried to make a code to download the 'pdf' attachment from the email with particular name. I have made the code and it worked well in my windows laptop. But the problem is my laptop cannot run 24 hours so I was planning to move the code to Raspberry Pi 4 device.
I had to make some adjustments on the code to make it works in the Raspberry Pi, and eventually worked for sometimes. But then now, when I tried to run the code from the terminal in Raspberry Pi, it always shows an error: 'latin-1' codec can't encode characters in position 1011-1013: ordinal not in range(256)
What is going on here? Why does the exact same code work last week, but doesn't work today?
Below is my code:
import imaplib
import email
from email.header import decode_header
import os
import sys
import webbrowser
org_email = "#yahoo.com"
username = "test123" + org_email
password = "xxxxxxx"
smtp_server = "imap.gmail.com"
smtp_port = 993
def create(text): #clean text for creating a folder
if "CCI Daily" in text:
foldername = "CCI Daily"
elif "ICT" in text:
foldername = "Platts ICT"
elif "Argus Coal Daily International" in text:
foldername = "Argus"
elif "Fenwei Index Price Comparion" in text:
foldername = "Fenwei Index Price Comparisons"
else:
foldername = "Spam"
return foldername
#Create Connection
mail = imaplib.IMAP4_SSL(smtp_server)
mail.login(username,password)
#Which Gmail Folder to Select
mail.select("inbox")
type, data = mail.search(None,"ALL")
mail_ids = data[0]
id_list = mail_ids.split()
first_email_id = int(id_list[0])
last_email_id = int(id_list[-1])
print("\nThere are", last_email_id, "emails detected")
for i in range(first_email_id, last_email_id+1):
a = last_email_id + 1 - i #a = latest email index
print("\n%s th email:" %a)
res, msg = mail.fetch(str(a), "(RFC822)")
for response in msg:
if isinstance (response, tuple): #parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
#decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance (subject, bytes):
subject = subject.decode(encoding)
#decode the email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance (From, bytes):
From = From.decode(encoding)
print("Subject: ", subject)
print("===============================================")
print("From: ", From)
#if the email message is multipart
if msg.is_multipart():
#iterate over email parts
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
print(content_type)
if content_disposition != "None":
print(content_disposition)
try:
#get the email body and print the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
#print text/plain emails and skip attachments
print(body)
elif "attachment" in content_disposition:
#download attachment
filename = part.get_filename()
if "ICT" in filename or "CCI" in filename:
folder_name = create(filename) #create specific folder for specific filename
print("Foldername:", folder_name)
if not os.path.isdir(folder_name):
#make a folder for this email
os.mkdir(folder_name)
filepath = os.path.join(folder_name,filename)
open(filepath, "wb").write(part.get_payload(decode=True))
exit()
else:
print("We do not download this attachment")
Since you actually have an encoding name, chances are you have malformed messages, that tough they specify the "latin1" encoding, they have characters that it can't handle. Pass the extra named argument errors="replace" in your calls to "decode": out of range chars will be replaced with a "�", but the app won't stop.
If it's Unicode text file, don't use open(filepath, "wb"), instead use open(filepath, "w", encoding="utf-8").You can also use try/except block depending on the situation:
try:
open(filepath, "wb").write(body)
except UnicodeEncodeError:
open(filepath, "w", encoding="utf-8").write(body)

Python Gmail API : How to batch download of email attachments and emails?

I have the below code, which downloads a Gmail email and its attachments. It returns its attachments.
def gmailAPIDownloadAttachments(self, messageID, userID="me"):
try:
service = self.gmailAPIService
self.GLogger.info("Attempting to download attachments from messageID (" +str(messageID)+ ")")
message = self.gmailAPIGetFullMessage(messageID, userID=userID)
if message is False:
self.GLogger.error("Failed to extract message (" +str(messageID)+ ") for downloading attachments")
return False
attachmentList = list()
payload = message['payload']
if 'parts' in payload:
parts = payload['parts']
for part in parts:
if part['filename']:
if 'data' in part['body']:
data = part['body']['data']
else:
att_id = part['body']['attachmentId']
att = service.users().messages().attachments().get(userId=userID, messageId=messageID, id=att_id).execute()
data = att['data']
file_data = base64.urlsafe_b64decode(data.encode('UTF-8'))
filename = part['filename']
extSearch = filename.find('.')
if extSearch == -1:
ext = ""
partFileName = filename[0:extSearch]
else:
ext = filename[extSearch+1:]
partFileName = filename[0:extSearch]
theAttachment = Attachment(filename,partFileName, ext, file_data)
attachmentList.append(theAttachment)
self.GLogger.info("Successfully downloaded attachments from messageID (" +str(messageID)+ ")")
return(attachmentList)
except:
self.GLogger.error("Encountered an error while attempting to download email attacments from messageID (" +str(messageID)+ ")")
tb = traceback.format_exc()
self.GLogger.exception(tb)
return False
I understand how to convert fetching messages into batching. For example, this is how one could batch-fetch messages:
from apiclient.http import BatchHttpRequest
import json
batch = BatchHttpRequest()
#assume we got messages from Gmail query API
for message in messages:
batch.add(service.users().messages().get(userId='me', id=message['id'],
format='raw'))
batch.execute()
for request_id in batch._order:
resp, content = batch._responses[request_id]
message = json.loads(content)
#handle your message here, like a regular email object
However, the attachments aspect seem to have logic and other possible fetches such as in this part:
att_id = part['body']['attachmentId']
att = service.users().messages().attachments().get(userId=userID, messageId=messageID, id=att_id).execute()
data = att['data']
How can I effectively batch both fetching the message and its attachments? I would like to be able to quickly fetch many emails at once.

How to print email body from outlook without signature - Python

I'm trying to parse emails from Outlook.
I would like the following printed:
subject
body (excluding sender's signature)
Ignore all previous emails from conversion (reply & forward)
Is there any way I can print out the body text before multi-space between lines (usually this is how signature being separated from the main text)?
Any help would be appreciated!
import win32com.client
#other libraries to be used in this script
import os
from datetime import datetime, timedelta
outlook = win32com.client.Dispatch('outlook.application')
mapi = outlook.GetNamespace("MAPI")
for account in mapi.Accounts:
print(account.DeliveryStore.DisplayName)
inbox = mapi.GetDefaultFolder(6)
messages = inbox.Items
messages.Sort('[ReceivedTime]', True)
received_dt = datetime.now() - timedelta(days=1)
received_dt = received_dt.strftime('%m/%d/%Y %H:%M %p')
messages = messages.Restrict("[ReceivedTime] >= '" + received_dt + "'")
messages = messages.Restrict("[SenderEmailAddress] = 'firstname.lastname#gmail.com'")
message = messages.GetFirst()
print ("Current date/time: "+ received_dt)
while message:
print(message.Subject)
print(message.body)
message = messages.GetNext ()
You can use a regex to ignore everything after three newlines (there are normally one or two newlines between paragraphs):
import re
r = re.compile(r"(.*)\n\n\n", re.MULTILINE + re.DOTALL)
# ...
while message:
# ...
match = r.match(message.body)
if match:
body_without_signature = r.match(message.body).groups(0)
else:
# No signature found
body_without_signature = message.body
print(body_without_signature)

How do I download only unread attachments from a specific gmail label?

I have a Python script adapted from Downloading MMS emails sent to Gmail using Python
import email, getpass, imaplib, os
detach_dir = '.' # directory where to save attachments (default: current)
user = raw_input("Enter your GMail username:")
pwd = getpass.getpass("Enter your password: ")
# connecting to the gmail imap server
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login(user,pwd)
m.select("[Gmail]/All Mail") # here you a can choose a mail box like INBOX instead
# use m.list() to get all the mailboxes
resp, items = m.search(None, 'FROM', '"Impact Stats Script"') # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
items = items[0].split() # getting the mails id
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
email_body = data[0][1] # getting the mail content
mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
#Check if any attachments at all
if mail.get_content_maintype() != 'multipart':
continue
print "["+mail["From"]+"] :" + mail["Subject"]
# we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# is this part an attachment ?
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
counter = 1
# if there is no filename, we create one with a counter to avoid duplicates
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
#Check if its already there
if not os.path.isfile(att_path) :
# finally write the stuff
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
I am filtering messages by subject and getting the attachments, but now I need to only get attachments from new emails. Can I modify the m.search() somehow to return only unread emails?
Try modifying this line:
resp, items = m.search(None, 'FROM', '"Impact Stats Script"')
to:
resp, items = m.search(None, 'UNSEEN', 'FROM', '"Impact Stats Script"')
The Python imaplib documentation shows just adding more search criteria, and the IMAP specification defines the UNSEEN search criteria:
UNSEEN
Messages that do not have the \Seen flag set.

python imaplib to get gmail inbox subjects titles and sender name

I'm using pythons imaplib to connect to my gmail account. I want to retrieve the top 15 messages (unread or read, it doesn't matter) and display just the subjects and sender name (or address) but don't know how to display the contents of the inbox.
Here is my code so far (successful connection)
import imaplib
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('mygmail#gmail.com', 'somecrazypassword')
mail.list()
mail.select('inbox')
#need to add some stuff in here
mail.logout()
I believe this should be simple enough, I'm just not familiar enough with the commands for the imaplib library. Any help would be must appreciated...
UPDATE
thanks to Julian I can iterate through each message and retrieve the entire contents with:
typ, data = mail.search(None, 'ALL')
for num in data[0].split():
typ, data = mail.fetch(num, '(RFC822)')
print 'Message %s\n%s\n' % (num, data[0][1])
mail.close()
but I'm wanting just the subject and the sender. Is there a imaplib command for these items or will I have to parse the entire contents of data[0][1] for the text: Subject, and Sender?
UPDATE
OK, got the subject and sender part working but the iteration (1, 15) is done by desc order apparently showing me the oldest messages first. How can I change this? I tried doing this:
for i in range( len(data[0])-15, len(data[0]) ):
print data
but that just gives me None for all 15 iterations... any ideas? I've also tried mail.sort('REVERSE DATE', 'UTF-8', 'ALL') but gmail doesnt support the .sort() function
UPDATE
Figured out a way to do it:
#....^other code is the same as above except need to import email module
mail.select('inbox')
typ, data = mail.search(None, 'ALL')
ids = data[0]
id_list = ids.split()
#get the most recent email id
latest_email_id = int( id_list[-1] )
#iterate through 15 messages in decending order starting with latest_email_id
#the '-1' dictates reverse looping order
for i in range( latest_email_id, latest_email_id-15, -1 ):
typ, data = mail.fetch( i, '(RFC822)' )
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
varSubject = msg['subject']
varFrom = msg['from']
#remove the brackets around the sender email address
varFrom = varFrom.replace('<', '')
varFrom = varFrom.replace('>', '')
#add ellipsis (...) if subject length is greater than 35 characters
if len( varSubject ) > 35:
varSubject = varSubject[0:32] + '...'
print '[' + varFrom.split()[-1] + '] ' + varSubject
this gives me the most recent 15 message subject and sender address in decending order as requested! Thanks to all who helped!
c.select('INBOX', readonly=True)
for i in range(1, 30):
typ, msg_data = c.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
for header in [ 'subject', 'to', 'from' ]:
print '%-8s: %s' % (header.upper(), msg[header])
This should give you an idea on how to retrieve the subject and from?
This was my solution to get the useful bits of information from emails:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your#gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue
For those looking for how to check mail and parse the headers, this is what I used:
def parse_header(str_after, checkli_name, mailbox) :
#typ, data = m.search(None,'SENTON', str_after)
print mailbox
m.SELECT(mailbox)
date = (datetime.date.today() - datetime.timedelta(1)).strftime("%d-%b-%Y")
#date = (datetime.date.today().strftime("%d-%b-%Y"))
#date = "23-Jul-2012"
print date
result, data = m.uid('search', None, '(SENTON %s)' % date)
print data
doneli = []
for latest_email_uid in data[0].split():
print latest_email_uid
result, data = m.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = data[0][1]
import email
email_message = email.message_from_string(raw_email)
print email_message['To']
print email_message['Subject']
print email.utils.parseaddr(email_message['From'])
print email_message.items() # print all headers
I was looking for a ready made simple script to list last inbox via IMAP without sorting through all messages. The information here is useful, though DIY and misses some aspects. First, IMAP4.select returns message count. Second, subject header decoding isn't straightforward.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import imaplib
import email
from email.header import decode_header
import HTMLParser
# to unescape xml entities
_parser = HTMLParser.HTMLParser()
def decodeHeader(value):
if value.startswith('"=?'):
value = value.replace('"', '')
value, encoding = decode_header(value)[0]
if encoding:
value = value.decode(encoding)
return _parser.unescape(value)
def listLastInbox(top = 4):
mailbox = imaplib.IMAP4_SSL('imap.gmail.com')
mailbox.login('mygmail#gmail.com', 'somecrazypassword')
selected = mailbox.select('INBOX')
assert selected[0] == 'OK'
messageCount = int(selected[1][0])
for i in range(messageCount, messageCount - top, -1):
reponse = mailbox.fetch(str(i), '(RFC822)')[1]
for part in reponse:
if isinstance(part, tuple):
message = email.message_from_string(part[1])
yield {h: decodeHeader(message[h]) for h in ('subject', 'from', 'date')}
mailbox.logout()
if __name__ == '__main__':
for message in listLastInbox():
print '-' * 40
for h, v in message.items():
print u'{0:8s}: {1}'.format(h.upper(), v)
BODY gets almost everything and marks the message as read.
BODY[<parts>] gets just those parts.
BODY.PEEK[<parts>] gets the same parts, but doesn't mark the message read.
<parts> can be HEADER or TEXT or HEADER.FIELDS (<list of fields>) or
HEADER.FIELDS.NOT (<list of fields>)
This is what I use: typ, data = connection.fetch(message_num_s, b'(BODY.PEEK[HEADER.FIELDS (SUBJECT FROM)])')
`
def safe_encode(seq):
if seq not in (list,tuple):
seq = [seq]
for i in seq:
if isinstance(i, (int,float)):
yield str(i).encode()
elif isinstance(i, str):
yield i.encode()
elif isinstance(i, bytes):
yield i
else:
raise ValueError
def fetch_fields(connection, message_num, field_s):
"""Fetch just the fields we care about. Parse them into a dict"""
if isinstance(field_s, (list,tuple)):
field_s = b' '.join(safe_encode(field_s))
else:
field_s = tuple(safe_encode(field_s))[0]
message_num = tuple(safe_encode(message_num))[0]
typ, data = connection.fetch(message_num, b'(BODY.PEEK[HEADER.FIELDS (%s)])'%(field_s.upper()))
if typ != 'OK':
return typ, data #change this to an exception if you'd rather
items={}
lastkey = None
for line in data[0][1].splitlines():
if b':' in line:
lastkey, value = line.strip().split(b':', 1)
lastkey = lastkey.capitalize()
#not all servers capitalize the same, and some just leave it
#as however it arrived from some other mail server.
items[lastkey]=value
else:
#subject was so long it ran onto the next line, luckily it didn't have a ':' in it so its easy to recognize.
items[lastkey]+=line
#print(items[lastkey])
return typ, items
`
You drop it into your code example: by replacing the call to 'mail.fetch()' with fetch_fields(mail, i, 'SUBJECT FROM') or fetch_fields(mail, i, ('SUBJECT' 'FROM'))
Adding to all the above answers.
import imaplib
import base64
import os
import email
if __name__ == '__main__':
email_user = "email#domain.com"
email_pass = "********"
mail = imaplib.IMAP4_SSL("hostname", 993)
mail.login(email_user, email_pass)
mail.select()
type, data = mail.search(None, 'ALL')
mail_ids = data[0].decode('utf-8')
id_list = mail_ids.split()
mail.select('INBOX', readonly=True)
for i in id_list:
typ, msg_data = mail.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_bytes(response_part[1])
print(msg['from']+"\t"+msg['subject'])
This will give you the email's from and subject name.

Categories

Resources