Python extract a specific attachment from email - python

I am trying to achieve the following:
I have an email object (eml) which can contain multiple attachments like zip/images/txt etc.
I want to download only a specific attachment and not all the attachments from this object.
I have tried the following code:
import email
import mimetypes
import uuid
import os
m = email.message_from_file(open('hello.eml'))
for part in m.walk():
filename = part.get_filename()
print('fileName: ', filename)
if part.get_content_type() == 'multipart/mixed' and not filename:
number_of_attachments = (len(part.get_payload()) - 1)
print('number_of_attachments: ', number_of_attachments)
for attachment in range(number_of_attachments):
tmp_name = str(uuid.uuid4())
fp = open('.' + tmp_name, 'wb')
attachment += 1
fp.write(part.get_payload()[attachment].get_payload(decode=True))
fp.close()
This extracts all the attachments.
Is there a way to filter only specific attachment and process that.
Any help is much appreciated!

So, I found the following way.
import email
import mimetypes
import uuid
import os
m = email.message_from_file(open('hello.eml'))
for part in m.walk():
filename = part.get_filename()
print('fileName: ', filename)
if part.get_content_type() == 'multipart/mixed' and not filename:
number_of_attachments = (len(part.get_payload()) - 1)
print('number_of_attachments: ', number_of_attachments)
for attachment in range(number_of_attachments):
filteredFile = m.get_payload()[attachment]
if filteredFile.get_content_type() == 'text/html':
print("Search successful.")
else:
print("Search unsuccessful.")
Thanks all for your inputs!

Related

Python Email - saving PDF attachments

import imaplib
import email
from PyPDF2 import PdfFileMerger
import os
HOST = *****
USERNAME = ****
PASSWORD = *****
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
for part in email_message_raw.walk():
content_type = part.get_content_type()
files = []
if "plain" in content_type:
text = part.get_payload()
f = open('text.html','w')
f.write('<p>'+text+'</p>')
f.close
os.system('wkhtmltopdf text.html text.pdf')
os.system('rm -r text.html')
files.append('text.pdf')
if "pdf" in content_type:
'''
save each pdf
files.append(pdf_filename)
'''
merger = PdfFileMerger()
for pdf in files:
merger.append(pdf)
merger.write(complete_email.pdf)
merger.close
m.close()
m.logout()
I am attempting to save PDF attachments from emails and combine them with a created PDF of the main email message. Struggling with how to download the PDF, see the ‘’’ ‘’’ section. Thanks.
files = ['test.pdf'] #exising pdf from plain text
for part in data.walk():
if part.get_content_type() == "application/pdf":
files.append(part.get_filename(failobj=None))
with open(part.get_filename(failobj=None), 'wb') as f:
f.write(part.get_payload(decode=True))

How to save file for every content_type rather than every uid with imaplib and email

I am successfully saving the content for each email with the following code, as a .txt, .html or .PDF file. However, I would like to save a version of every content_type, for each email (for each uid). Currently it is only saving one file type for every uid.
For example, an email with a PDF attachment is only currently saving the PDF. I would like it to save the PDF attachment along with the plain text content of the email, in 2 separate files.
Thanks for any help.
import imaplib
import email
import os
import mimetypes
mail = imaplib.IMAP4_SSL('imap.secureserver.net',993)
mail.login('[user]', '[pw]')
mail.select('Inbox')
result, data = mail.uid('search', None, 'ALL')
item_list = data[0].split()
for item in item_list:
result2, email_data = mail.uid('fetch',item,'(RFC822)')
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
print_dir = False
if print_dir: print(dir(email_message)) #options, e.g. list of from, to etc.
from_ = email_message['From']
date_ = email_message['Date']
for part in email_message.walk():
option = str(item)[2:-1] + ' ' + date_[:-15] + ' ' + from_ + ' '
content_type = part.get_content_type()
print(str(item),' ',content_type)
if content_type == 'text/html':
filename = option + '.html'
elif content_type == 'text/plain':
filename = option + '.txt'
elif content_type == 'application/pdf':
attachment = part.get_filename() #attachment filename
filename = option + str(attachment)
else:
# Guesses the file type
ext = mimetypes.guess_extension(content_type)
if not ext:
ext = '.bin'
filename = option + ext
save_path = os.getcwd() + '/' + filename
with open(save_path, 'wb') as fp:
fp.write(part.get_payload(decode=True))
^ For multitypes I would like to save a file with all the type extensions. Such as for 22382, a PDF and txt
^ Current Output files
I'm not fully sure, but I think your problem is in the for item in item_list: loop.
email_message would only end up being whatever the last item in that loop creates.
Would you need to push nearly everything in that loop 1 tab's worth out?
Also I'd assume you'd want to use part instead of item in this line: option = str(item)[2:-1] + ' ' + date_[:-15] + ' ' + from_ + ' '
Again, not fully sure, but hope this helps!

Unable to download all documents from eml file

I have a .eml file with 3 attachments in it. I was able to download one of the attachment but unable to download all the attachments.
import os
import email
import base64
# Get list of all files
files = [f for f in os.listdir('.') if os.path.isfile(f)]
# Create output directory
if os.path.exists("output"):
pass
else:
os.makedirs("output")
for eml_file in files:
if eml_file.endswith(".eml"):
with open(eml_file) as f:
email = f.read()
ext=".docx"
if ext is not "":
# Extract the base64 encoding part of the eml file
encoding = email.split(ext+'"')[-1]
if encoding:
# Remove all whitespaces
encoding = "".join(encoding.strip().split())
encoding = encoding.split("=", 1)[0]
# Convert base64 to string
if len(encoding) % 4 != 0: #check if multiple of 4
while len(encoding) % 4 != 0:
encoding = encoding + "="
try:
decoded = base64.b64decode(encoding)
except:
print(encoding)
for i in range(100):
print('\n')
# Save it as docx
path = os.path.splitext(eml_file)[0]
if path:
path = os.path.join("output", path + ext)
try:
os.remove(path)
except OSError:
pass
with open(path, "wb") as f:
f.write(decoded)
else:
print("File not done: " + eml_file)
How can I download all the attachments?
edit: I have initialized the eml_file still not downloading all files.
You import the email module. So why do you ignore it and try to write an email parser yourself? In addition:
You can use glob to list all files with a given extension.
Use should have used not operator in the condition: (if not os.path.exists("output"): os.makedirs("output")), but even this is not necessary, because makedirs has exist_ok parameter.
import os
import glob
import email
from email import policy
indir = '.'
outdir = os.path.join(indir, 'output')
os.makedirs(outdir, exist_ok=True)
files = glob.glob(os.path.join(indir, '*.eml'))
for eml_file in files:
# This will not work in Python 2
msg = email.message_from_file(open(eml_file), policy=policy.default)
for att in msg.iter_attachments():
# Tabs may be added for indentation and not stripped automatically
filename = att.get_filename().replace('\t', '')
# Here we suppose for simplicity sake that each attachment has a valid unique filename,
# which, generally speaking, is not true.
with open(os.path.join(outdir, filename), 'wb') as f:
f.write(att.get_content())

How can I get an attached eml file from email message content using Python?

I am using python 3.7 and the email, imap library to read email and extract the content of email and attachments , all the attachment ( like excel, csv, pdf) is downloading as attachment but when i received any .eml file in email , it shows me error, please find the below code to read email content and attachment with error showing in case of eml file is received as attachment.
it is showing error at the time of writing eml file.
at the time of write part.get_payload(decode=True) is coming blank in eml file case.
filename = part.get_filename()
if filename is not None:
dot_position = filename.find('.')
file_prefix = filename[0:dot_position]
file_suffix = filename[dot_position:len(filename)]
# print(dot_position)
# print(file_prefix)
# print(file_suffix)
now = datetime.datetime.now()
timestamp = str(now.strftime("%Y%m%d%H%M%S%f"))
newFileName = file_prefix + "_" + timestamp + file_suffix
sv_path = os.path.join(svdir, newFileName)
# allfiles = allfiles.append([{"oldfilename": filename, "newfilename": newFileName}])
mydict = filename + '$$' + newFileName
mydict1 = mydict1 + ',' + mydict
print(mydict1)
if not os.path.isfile(sv_path):
print("oldpath:---->" + sv_path)
# filename = os.rename(filename, filename + '_Rahul')
# sv_path = os.path.join(svdir, filename)
# print("Newpath:---->" + sv_path)
fp = open(sv_path, 'wb')
# print("Rahul")
print(part.get_payload(decode=True))
# try:
# newFileByteArray = bytearray(fp)
# if part.get_payload(decode=True) is not None:
fp.write(part.get_payload(decode=True))
# except (TypeError, IOError):
# pass
fp.close()
Error is
<class 'TypeError'> ReadEmailUsingIMAP.py 129
a bytes-like object is required, not 'NoneType'
Just to explain why this is happening (it hit me too), quoting the v. 3.5 library doc. (v2 says the same):
If the message is a multipart and the decode flag is True, then None is returned.
If your attachment is an .EML, it's almost always going to be multi-part, thus the None.
Jin Thakur's workaround is appropriate if you're only expecting .EML multipart attachments (not sure if there is any other use cases); it should have been accepted as an answer.
Use eml_parser
https://pypi.org/project/eml-parser/
import datetime
import json
import eml_parser
def json_serial(obj):
if isinstance(obj, datetime.datetime):
serial = obj.isoformat()
return serial
with open('sample.eml', 'rb') as fhdl:
raw_email = fhdl.read()
parsed_eml = eml_parser.eml_parser.decode_email_b(raw_email)
print(json.dumps(parsed_eml, default=json_serial))

How to edit .eml file generated with email.generator in Outlook?

I have a code, that generate me .eml message file which I open in Outlook. But the message file is uneditable (can't edit this file). E.g.: I want to add new sender or recepiant, but I can't do that.
Does anybody know, how fix this problem, or may be there is another way to create message file?
import email
from email import generator
from email.mime.text import MIMEText
from email.encoders import encode_base64
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
import json
import SQL_from_DB
import os
import re
def create_mail(attach_file_name, message_json, mail_template_path, save_path, database, db_username, db_password, driver):
file_name_mail_template = mail_template_path + re.sub("QWE:\[.*?\]\s+","",message_json['message']) + '.txt'
try:
mail_template_file = open(file_name_mail_template.encode('utf-8'), 'r', encoding="utf-8")
except:
print("Ошибка открытия шаблона пиьсма \"" + re.sub("QWE:\[.*?\]\s+","",message_json['message']) + ".txt\"")
sys.exit()
mail_template_text = mail_template_file.read()
for var, value in message_json.items():
mail_template_text = mail_template_text.replace('${' + str(var) + '}', str(value))
# print(mail_template_text)
msg = MIMEMultipart()
msg['Subject'] = message_json['message']
msg['From'] = 'qwe#qwe.qwe'
msg['To'] = SQL_from_DB.SQL_select(database, db_username, db_password, driver, message_json['DOMAINNAME'])
#add attachment
attach_file = open(attach_file_name.encode('utf-8'), 'rb')
attachment = MIMEBase("application", "msword")
#attachment = MIMEBase("application", "pdf")
attachment.set_payload(attach_file.read())
attach_file.close()
encode_base64(attachment)
attachment.add_header('Content-Disposition','attachment',filename='Events' + ".zip")
msg.attach(attachment)
#текст письма
msg.attach(MIMEText(mail_template_text, 'html'))
#save message file
with open(save_path.encode("utf-8"), 'w') as out:
gen = email.generator.Generator(out)
gen.flatten(msg)
return(save_path)
To generate an editable Outlook file, just need generate not .eml but .emltpl (Outlook message template file).
#save message file
with open('/save_path/message.emltpl', 'w') as out:
gen = email.generator.Generator(out)
gen.flatten(msg)
return(save_path)

Categories

Resources