Python Email - saving PDF attachments - python

import imaplib
import email
from PyPDF2 import PdfFileMerger
import os
HOST = *****
USERNAME = ****
PASSWORD = *****
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
for part in email_message_raw.walk():
content_type = part.get_content_type()
files = []
if "plain" in content_type:
text = part.get_payload()
f = open('text.html','w')
f.write('<p>'+text+'</p>')
f.close
os.system('wkhtmltopdf text.html text.pdf')
os.system('rm -r text.html')
files.append('text.pdf')
if "pdf" in content_type:
'''
save each pdf
files.append(pdf_filename)
'''
merger = PdfFileMerger()
for pdf in files:
merger.append(pdf)
merger.write(complete_email.pdf)
merger.close
m.close()
m.logout()
I am attempting to save PDF attachments from emails and combine them with a created PDF of the main email message. Struggling with how to download the PDF, see the ‘’’ ‘’’ section. Thanks.

files = ['test.pdf'] #exising pdf from plain text
for part in data.walk():
if part.get_content_type() == "application/pdf":
files.append(part.get_filename(failobj=None))
with open(part.get_filename(failobj=None), 'wb') as f:
f.write(part.get_payload(decode=True))

Related

Python extract a specific attachment from email

I am trying to achieve the following:
I have an email object (eml) which can contain multiple attachments like zip/images/txt etc.
I want to download only a specific attachment and not all the attachments from this object.
I have tried the following code:
import email
import mimetypes
import uuid
import os
m = email.message_from_file(open('hello.eml'))
for part in m.walk():
filename = part.get_filename()
print('fileName: ', filename)
if part.get_content_type() == 'multipart/mixed' and not filename:
number_of_attachments = (len(part.get_payload()) - 1)
print('number_of_attachments: ', number_of_attachments)
for attachment in range(number_of_attachments):
tmp_name = str(uuid.uuid4())
fp = open('.' + tmp_name, 'wb')
attachment += 1
fp.write(part.get_payload()[attachment].get_payload(decode=True))
fp.close()
This extracts all the attachments.
Is there a way to filter only specific attachment and process that.
Any help is much appreciated!
So, I found the following way.
import email
import mimetypes
import uuid
import os
m = email.message_from_file(open('hello.eml'))
for part in m.walk():
filename = part.get_filename()
print('fileName: ', filename)
if part.get_content_type() == 'multipart/mixed' and not filename:
number_of_attachments = (len(part.get_payload()) - 1)
print('number_of_attachments: ', number_of_attachments)
for attachment in range(number_of_attachments):
filteredFile = m.get_payload()[attachment]
if filteredFile.get_content_type() == 'text/html':
print("Search successful.")
else:
print("Search unsuccessful.")
Thanks all for your inputs!

IMAP how to save **all** attachments

I am trying to save excel file attachments from my inbox to a directory. My code is executing just fine because I am seeing the print outs but the attachments wont save in the file directory. Is there something I am missing in my code that is preventing the action of saving?
import email, getpass, imaplib, os, sys
detach_dir = r'\directory link'
user = "test"
pwd = "test"
sender_email = "example#example.com"
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login(user,pwd)
m.select('"INBOX/somestuff"')
print("ok")
resp, items = m.search(None, 'FROM', '"%s"' % sender_email)
items = items[0].split()
print("ok")
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1].decode('utf-8')
mail = email.message_from_string(email_body)
print("ok")
if mail.get_content_maintype() != 'multipart':
continue
subject = ""
if mail["subject"] is not None:
subject = mail["subject"]
print ("["+mail["From"]+"] :" + subject)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
counter = 1
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
This code saves just one of the attachments in the subfolder but I am looking to get all attachments save to the directory:
detach_dir = r'directory link'
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login('user','pass')
m.select('"INBOX/subfolder"')
resp, items = m.search(None, 'All')
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
filename = part.get_filename()
print(filename)
att_path = os.path.join(detach_dir, filename)
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('check folder')
Question: This code saves just one of the attachments ... but I am looking to get all attachments
Implement iter-attachments()
resp, items = imap.search(None, "(UNSEEN)")
for n, num in enumerate(items[0].split(), 1):
resp, data = imap.fetch(num, '(RFC822)')
data01 = data[0][1]
msg_obj = email.message_from_string(data01)
for part in msg_obj.iter_attachments():
filename = part.get_filename()
print(filename)
iter_attachments()
Return an iterator over all of the immediate sub-parts of the message that are not candidate “body” parts. That is, skip the first occurrence of each of text/plain, text/html, multipart/related, or multipart/alternative (unless they are explicitly marked as attachments via Content-Disposition: attachment), and return all remaining parts.
Used modules and classes:
class imaplib.IMAP4
class email.message.EmailMessage
Here’s an example of how to unpack a MIME message, using email.message.walk(), into a directory of files:

python download google drive attachment from gmail

I'm trying to use python imaplib.IMAP4_SSL to download attachments from gmail. But the problem is if the attachment is larger than 25M, it will be automatically converted to a file in google drive. By using the code below, I cannot find the attachment. Does anyone know how to access and download the file in google drive attached in gmail?
conn = imaplib.IMAP4_SSL("imap.gmail.com")
conn.login(config.username, config.password)
conn.select("Inbox")
resp, items = conn.search(None, '(FROM "xx#xx.com")')
items = items[0].split()
flag = False
emailid = items[-1]
resp, data = conn.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
msg = email.message_from_string(data[0][1])
for part in mail.walk():
if (part.get_content_maintype() == 'multipart'):
continue
if (part.get('Content-Disposition') is None):
continue
attachment_name = part.get_filename()
filename = "attachment_trial.xlsx"
fp = open(filename, 'wb')
fp.write(part.get_payload(decode = True))
fp.close()
flag = True
return flag

To copy the attached file in an email.

I have been able to figure out how to get the name of the attached file in an email. i am just stuck after that. I don't know what to do after that, I have tried using os.path.join which just gives the path i want to download the folder to and joins it with the filename. Please suggest something. Thanks.
m = imaplib.IMAP4_SSL('outlook.office365.com',993)
m.login("UN", "PW")
m.select("Inbox")
typ, msgs = mail.search(None, '(SUBJECT "qwerty")')
msgs = msgs[0].split()
for emailid in msgs:
resp, data = mail.fetch(emailid, "(RFC822)")
email_body = data[0][1]
m = email.message_from_bytes(email_body)
if m.get_content_maintype() != 'multipart':
continue
for part in m.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
print(filename)
Following the sample from this link you can set the path when using the open function. (raw string by prefixing the string with r)
fp = open(r'c:\tmp\folder\' + filename, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print '%s saved!' % filename

Extract content from a file with mime multipart

I have a file that contain a tiff image and a document xml in a multipart mime document.
I would extract the image from this file.
How I can get it?
I have this code, but it requires an infinite time to extract it, if I have a big file (for example 30Mb), so this is unuseful.
f=open("content_file.txt","rb")
msg = email.message_from_file(f)
j=0
image=False
for i in msg.walk():
if i.is_multipart():
#print "MULTIPART: "
continue
if i.get_content_maintype() == 'text':
j=j+1
continue
if i.get_content_maintype() == 'image':
image=True
j=j+1
pl = i.get_payload(decode=True)
localFile = open("map.out.tiff", 'wb')
localFile.write(pl)
continue
f.close()
if (image==False):
sys.exit(0);
Thank you so much.
Solved:
def extract_mime_part_matching(stream, mimetype):
"""Return the first element in a multipart MIME message on stream
matching mimetype."""
msg = mimetools.Message(stream)
msgtype = msg.gettype()
params = msg.getplist()
data = StringIO.StringIO()
if msgtype[:10] == "multipart/":
file = multifile.MultiFile(stream)
file.push(msg.getparam("boundary"))
while file.next():
submsg = mimetools.Message(file)
try:
data = StringIO.StringIO()
mimetools.decode(file, data, submsg.getencoding())
except ValueError:
continue
if submsg.gettype() == mimetype:
break
file.pop()
return data.getvalue()
From:
http://docs.python.org/release/2.6.6/library/multifile.html
Thank you for the support.
It is not quite clear to me, why your code hangs. The indentation looks a bit wrong and opened files are not properly closed. You may also be low on memory.
This version works fine for me:
import email
import mimetypes
with open('email.txt') as fp:
message = email.message_from_file(fp)
for i, part in enumerate(message.walk()):
if part.get_content_maintype() == 'image':
filename = part.get_filename()
if not filename:
ext = mimetypes.guess_extension(part.get_content_type())
filename = 'image-%02d%s' % (i, ext or '.tiff')
with open(filename, 'wb') as fp:
fp.write(part.get_payload(decode=True))
(Partly taken from http://docs.python.org/library/email-examples.html#email-examples)

Categories

Resources