Use python to download email attachments only based on Subject - python

The following code uses imap to find emails by subject line and returns all parts of the email and downloads the attachments. However i am ONLY needing it to download the attachments of the email not the entire body also. I understand this has to do with the for part in email_message.walk(): that is iterating the entire email. Could someone please help me have this code download only the attachment of the email? Im sure this is a simple code change but im just not sure how to make it!
import imaplib
import email.header
import os
import sys
import csv
# Your IMAP Settings
host = 'imap.gmail.com'
user = 'User email'
password = 'User password'
# Connect to the server
print('Connecting to ' + host)
mailBox = imaplib.IMAP4_SSL(host)
# Login to our account
mailBox.login(user, password)
boxList = mailBox.list()
# print(boxList)
mailBox.select()
searchQuery = '(SUBJECT "CDR Schedule output from schedule: This is a test to see how it works")'
result, data = mailBox.uid('search', None, searchQuery)
ids = data[0]
# list of uids
id_list = ids.split()
i = len(id_list)
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
# I think I am fetching a bit too much here...
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('C:/install files/', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
print('Downloaded "{file}" from email titled "{subject}" with UID {uid}.'.format(file=fileName, subject=subject, uid=latest_email_uid.decode('utf-8')))
mailBox.close()
mailBox.logout()

Related

Looping thru file types and attaching them into email

I have a list of text files and html files generated by two distinct functions. Each file is labeled signal1.txt, signal2, etc. and signal1.html, signal2.html, etc. I need to send an email with each file pair (signal1.txt and signal1.html, signal2.txt and signal.2.html, and so forth).
I've tried several different ways, but I keep getting just one file pair attached (the last file number whatever it is) over and over. I have no problem sending one file type, but it gets messy when I try with two different files. I'd like to give you as much info as possible and perhaps enough reproducible code for you to try it out on your end if you wish, so my apologies for the long question.
The data is collected from the server. The final result is sorted using the Counter module:
data = Counter({('A user account was locked out ', 47, 'medium', 25): 1, ('An attempt was made to reset an accounts password ', 73, 'high', 2): 1, ('PowerShell Keylogging Script', 73, 'high', 37): 1, ('PowerShell Suspicious Script with Audio Capture Capabilities', 47, 'medium', 36): 1})
I need the rule name to be used in the email subject, so everything else is junk. For instance, in ('A user account was locked out ', 47, 'medium', 25): 1, I only need A user account was locked out. So the following function takes care of all that:
def create_txt_files():
global regex
global count
count = 0
#Convert dict into string and remove unwanted chars
for signal in dict(event_dict).keys():
indiv_signal = (str(signal).replace(",",'').replace('(','').replace(')','')\
.replace("'",'').replace('[','').replace(']',''))
#Further removal of debris using regex
pattern = '^(\D*)'
regex = ''.join(re.findall(pattern,indiv_signal,re.MULTILINE))
count +=1
with open(f"signal{count}.txt", "w") as fh:
fh.write(str(regex))
create_txt_files()
I also need to create html files that will go in the body of the email as a Dataframe. In this case I need almost all the fields in the data file. The dataframe should look like this:
Alert Score Risk Severity Total
0 A user account was locked out 47 medium 26
The following function takes care of that:
#Create Individual HTML files
def create_indiv_html_files():
global html_file
global count
count = 0
#Turn rows into columns
for items in list(event_dict):
df = pd.DataFrame(items)
new_df = df.transpose()
new_df.columns = ['Alert','Score Risk','Severity','Total']
html_file = new_df.to_html()
print(new_df)
count +=1
with open(f'signal{count}.html','w') as wf:
wf.write(html_file)
create_indiv_html_files()
So, up to this point everything is fine and dandy, albeit not as pretty a code as I'd like. But it works, and that's all I'm worried about now. The problem is that when I send the email, I'm getting only one rule (the last one) sent over and over. It's not iterating over the txt and html files and attaching them as it should.
Here is the email function I'm using. Despite my several different attempts, I still have not been able to figure out what's wrong. Thank you for taking the time to help.
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
import smtplib, ssl
import os
dirname = r'C:\Path\To\Files'
ext = ('.txt','html')
for files in os.scandir(dirname):
if files.path.endswith(ext):
def sendmail():
html_body = '''
<html>
<body>
<p style="font-size: 12;"> <strong>Alert</strong><br>{html_file}</p>
</body>
</html>
'''.format(html_file=html_file)
subject = f'Alert: {regex} '
senders_email = 'mail#mail.comt'
receiver_email = 'mail#mail.comt'
# Create a multipart message and set headers
message = MIMEMultipart('alternative')
message['From'] = senders_email
message['To'] = receiver_email
message['Subject'] = subject
#Attach email body
message.attach(MIMEText(html_body, 'html'))
# Name of the file to be attached
filename = f'signal{count}.html'
# Open file in binary mode
with open(filename, 'rb') as attachment:
# Add file as application/octet-stream
part = MIMEBase('application', 'octet-stream')
part.set_payload(attachment.read())
# Encodes file in ASCII characters to send via email
encoders.encode_base64(part)
# Add header as key/value pair to attachment part
part.add_header(
'Content-Disposition',
f"attachment; filename= {filename}",
)
# Add attachment to message and convert message to string
message.attach(part)
text = message.as_string()
# Log into server using secure connection
context = ssl.create_default_context()
with smtplib.SMTP("smtp.mail.com", 25) as server:
# server.starttls(context=context)
# server.login(senders_email, 'password')
server.sendmail(senders_email, receiver_email, text)
print("Email sent!")
sendmail()
I rewrote the code and removed the global variables. Below code should work let me know if you get any errors.
import pathlib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
import smtplib, ssl
import os
def create_txt_files(event_dict):
regex = []
count = 0
#Convert dict into string and remove unwanted chars
for signal in dict(event_dict).keys():
indiv_signal = (str(signal).replace(",",'').replace('(','').replace(')','')\
.replace("'",'').replace('[','').replace(']',''))
#Further removal of debris using regex
pattern = '^(\D*)'
regex.append(''.join(re.findall(pattern,indiv_signal,re.MULTILINE)))
count +=1
with open(f"signal{count}.txt", "w") as fh:
fh.write(str(regex[0]))
return regex
def create_indiv_html_files(event_dict):
html_file = []
count = 0
#Turn rows into columns
for items in list(event_dict):
df = pd.DataFrame(items)
new_df = df.transpose()
new_df.columns = ['Alert','Score Risk','Severity','Total']
html_file.append(new_df.to_html())
print(new_df)
count +=1
with open(f'signal{count}.html','w') as wf:
wf.write(html_file[0])
return html_file
def sendmail(html_file, regex, path_html):
html_body = '''
<html>
<body>
<p style="font-size: 12;"> <strong>Alert</strong><br>{html_file}</p>
</body>
</html>
'''.format(html_file=html_file)
subject = f'Alert: {regex} '
senders_email = 'mail#mail.comt'
receiver_email = 'mail#mail.comt'
# Create a multipart message and set headers
message = MIMEMultipart('alternative')
message['From'] = senders_email
message['To'] = receiver_email
message['Subject'] = subject
#Attach email body
message.attach(MIMEText(html_body, 'html'))
# Name of the file to be attached
# filename = f'signal{count}.html'
# Open file in binary mode
with open(path_html, 'rb') as attachment:
# Add file as application/octet-stream
part = MIMEBase('application', 'octet-stream')
part.set_payload(attachment.read())
# Encodes file in ASCII characters to send via email
encoders.encode_base64(part)
# Add header as key/value pair to attachment part
part.add_header(
'Content-Disposition',
f"attachment; filename= {path_html.name}",
)
# Add attachment to message and convert message to string
message.attach(part)
text = message.as_string()
# Log into server using secure connection
context = ssl.create_default_context()
with smtplib.SMTP("smtp.mail.com", 25) as server:
# server.starttls(context=context)
# server.login(senders_email, 'password')
server.sendmail(senders_email, receiver_email, text)
print("Email sent!")
data = Counter({('A user account was locked out ', 47, 'medium', 25): 1, ('An attempt was made to reset an accounts password ', 73, 'high', 2): 1, ('PowerShell Keylogging Script', 73, 'high', 37): 1, ('PowerShell Suspicious Script with Audio Capture Capabilities', 47, 'medium', 36): 1})
regex = create_txt_files(data)
html_file = create_indiv_html_files(data)
signalfiles = sorted(list(pathlib.Path('C:\Path\To\Files').glob('*.txt')))
htmlfiles = sorted(list(pathlib.Path('C:\Path\To\Files').glob('*.html')))
for i, path_html_file in enumerate(htmlfiles):
sendmail(html_file[i], regex[i], path_html_file)
The create_txt_files and create_indiv_html_files takes input the Counter dictionary. Sendmail function will take in regex, html_file string and html_file path.

Gmail content downloader/parser

I'm trying to download all my email content from gmail using a python script but this is what I found. I tried it and it's only downloading the attachments into pdf files. Is there a way to modify it to download just the email content. I'm also trying to parse just the url links in my emails.
import email
import imaplib
import os
server = 'imap.gmail.com'
user = '#gmail.com'
password = 'pass'
outputdir = 'lolz'
subject = 'Order Completed' #subject line of the emails you want to download attachments from
def connect(server, user, password):
m = imaplib.IMAP4_SSL(server)
m.login(user, password)
m.select()
return m
def downloaAttachmentsInEmail(m, emailid, outputdir):
resp, data = m.fetch(emailid, "(BODY.PEEK[])")
email_body = data[0][1]
mail = email.message_from_bytes(email_body)
if mail.get_content_maintype() != 'multipart':
return
for part in mail.walk():
if part.get_content_maintype() != 'multipart' and part.get('Content-Disposition') is not None:
open(outputdir + '/' + part.get_filename(), 'wb').write(part.get_payload(decode=True))
#download attachments from all emails with a specified subject line
def downloadAttachments(subject):
m = connect(server, user, password)
m.select("Inbox")
typ, msgs = m.search(None, '(SUBJECT "' + subject + '")')
msgs = msgs[0].split()
for emailid in msgs:
downloaAttachmentsInEmail(m, emailid, outputdir)
downloadAttachments(subject)

Read Latest Hotmail Emails in Python

I've been searching for a way to get the latest emails from my hotmail account (specifically the FROM and MESSAGE) using Python. The solutions mostly seem to be for gmail which isn't working as I would like.
Example 1: Using the Gmail examples - msg spits out a lot of unwanted data and the attempt to get subject, to and from returns blanks for each.
import imaplib
import email
from email.mime.multipart import MIMEMultipart
mail = imaplib.IMAP4_SSL('outlook.office365.com')
mail.login('myemail#hotmail.com', 'password')
mail.list()
mail.select('inbox')
for i in range(1, 5):
typ, msg_data = mail.fetch(str(i), '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
# print(response_part[1])
msg = email.message_from_string(str(response_part[1]))
print(msg)
for header in [ 'subject', 'to', 'from' ]:
print('%-8s: %s' % (header.upper(), msg[header]))
mail.close()
mail.logout()
Example 2: Gets last outlook email contents but cannot seem to get more (e.g. last 5)
import imaplib
msrvr = imaplib.IMAP4_SSL('outlook.office365.com', 993)
unm = 'myemail#hotmail.com'
pwd = 'password'
msrvr.login(unm, pwd)
print(str(len(msrvr.select('inbox'))))
stat,cnt = msrvr.select('inbox')
print(str(len(cnt)))
for i in range(0,5):
stat,dta = msrvr.fetch(cnt[i], '(BODY[TEXT])')
print(dta[0][1])
msrvr.close()
msrvr.logout()
Any thoughts how I could get the last 5 emails with FROM and MESSAGE?

Python: Keep checking new email and alert of further new emails

I have this code that checks the latest email and then goes and does something. Is it possible to write something that keeps checking the inbox folder for new mail? Although I want it to keep checking for the latest new email. Is it getting too complicated if I try and store that it has made one pass? So it doesn't alert about the same email twice about the same email.
Code:
import imaplib
import email
import Tkinter as tk
word = ["href=", "href", "<a href="] #list of strings to search for in email body
#connection to the email server
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('xxxx', 'xxxx')
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("Inbox", readonly=True) # connect to inbox.
result, data = mail.uid('search', None, "ALL") # search and return uids instead
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
latest_email_uid = data[0].split()[-1]
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)') # fetch the email headers and body (RFC822) for the given ID
raw_email = data[0][1] # here's the body, which is raw headers and html and body of the whole email
# including headers and alternate payloads
.....goes and does other code regarding to email html....
Try to use this approach:
Logic is the same as from #tripleee comment.
import time
word = ["href=", "href", "<a href="] #list of strings to search for in email body
#connection to the email server
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('xxxx', 'xxxx')
mail.list()
# Out: list of "folders" aka labels in gmail.
latest_email_uid = ''
while True:
mail.select("Inbox", readonly=True)
result, data = mail.uid('search', None, "ALL") # search and return uids instead
ids = data[0] # data is a list.
id_list = ids.split() # ids is a space separated string
if data[0].split()[-1] == latest_email_uid:
time.sleep(120) # put your value here, be sure that this value is sufficient ( see #tripleee comment below)
else:
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)') # fetch the email headers and body (RFC822) for the given ID
raw_email = data[0][1]
latest_email_uid == data[0].split()[-1]
time.sleep(120) # put your value here, be sure that this value is sufficient ( see #tripleee comment below)

How to fetch an email body using imaplib in python?

I'd like to fetch the whole message from IMAP4 server.
In python docs if found this bit of code that works:
>>> t, data = M.fetch('1', '(RFC822)')
>>> body = data[0][1]
I'm wondering if I can always trust that data[0][1] returns the body of the message. When I've run 'RFC822.SIZE' I've got just a string instead of a tuple.
I've skimmed through rfc1730 but I wasn't able to figure out the proper response structure for the 'RFC822'. It is also hard to tell the fetch result structure from imaplib documentation.
Here is what I'm getting when fetching RFC822:
('OK', [('1 (RFC822 {858569}', 'body of the message', ')')])
But when I fetch RFC822.SIZE I'm getting:
('OK', ['1 (RFC822.SIZE 847403)'])
How should I properly handle the data[0] list?
Can I trust that when it is a list of tuples the tuples has exactly 3 parts and the second part is the payload?
Maybe you know any better library for imap4?
No... imaplib is a pretty good library, it's imap that's so unintelligible.
You may wish to check that t == 'OK', but data[0][1] works as expected for as much as I've used it.
Here's a quick example I use to extract signed certificates I've received by email, not bomb-proof, but suits my purposes:
import getpass, os, imaplib, email
from OpenSSL.crypto import load_certificate, FILETYPE_PEM
def getMsgs(servername="myimapserverfqdn"):
usernm = getpass.getuser()
passwd = getpass.getpass()
subject = 'Your SSL Certificate'
conn = imaplib.IMAP4_SSL(servername)
conn.login(usernm,passwd)
conn.select('Inbox')
typ, data = conn.search(None,'(UNSEEN SUBJECT "%s")' % subject)
for num in data[0].split():
typ, data = conn.fetch(num,'(RFC822)')
msg = email.message_from_string(data[0][1])
typ, data = conn.store(num,'-FLAGS','\\Seen')
yield msg
def getAttachment(msg,check):
for part in msg.walk():
if part.get_content_type() == 'application/octet-stream':
if check(part.get_filename()):
return part.get_payload(decode=1)
if __name__ == '__main__':
for msg in getMsgs():
payload = getAttachment(msg,lambda x: x.endswith('.pem'))
if not payload:
continue
try:
cert = load_certificate(FILETYPE_PEM,payload)
except:
cert = None
if cert:
cn = cert.get_subject().commonName
filename = "%s.pem" % cn
if not os.path.exists(filename):
open(filename,'w').write(payload)
print "Writing to %s" % filename
else:
print "%s already exists" % filename
The IMAPClient package is a fair bit easier to work with. From the description:
Easy-to-use, Pythonic and complete
IMAP client library.
Try my package:
https://pypi.org/project/imap-tools/
example:
from imap_tools import MailBox
# get list of email bodies from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
bodies = [msg.text or msg.html for msg in mailbox.fetch()]
Features:
Parsed email message attributes
Query builder for searching emails
Work with emails in folders (copy, delete, flag, move, append)
Work with mailbox folders (list, set, get, create, exists, rename, delete, status)
No dependencies
This was my solution to extract the useful bits of information. It's been reliable so far:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your#gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue

Categories

Resources