I am trying to save excel file attachments from my inbox to a directory. My code is executing just fine because I am seeing the print outs but the attachments wont save in the file directory. Is there something I am missing in my code that is preventing the action of saving?
import email, getpass, imaplib, os, sys
detach_dir = r'\directory link'
user = "test"
pwd = "test"
sender_email = "example#example.com"
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login(user,pwd)
m.select('"INBOX/somestuff"')
print("ok")
resp, items = m.search(None, 'FROM', '"%s"' % sender_email)
items = items[0].split()
print("ok")
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1].decode('utf-8')
mail = email.message_from_string(email_body)
print("ok")
if mail.get_content_maintype() != 'multipart':
continue
subject = ""
if mail["subject"] is not None:
subject = mail["subject"]
print ("["+mail["From"]+"] :" + subject)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
counter = 1
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
This code saves just one of the attachments in the subfolder but I am looking to get all attachments save to the directory:
detach_dir = r'directory link'
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login('user','pass')
m.select('"INBOX/subfolder"')
resp, items = m.search(None, 'All')
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
filename = part.get_filename()
print(filename)
att_path = os.path.join(detach_dir, filename)
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('check folder')
Question: This code saves just one of the attachments ... but I am looking to get all attachments
Implement iter-attachments()
resp, items = imap.search(None, "(UNSEEN)")
for n, num in enumerate(items[0].split(), 1):
resp, data = imap.fetch(num, '(RFC822)')
data01 = data[0][1]
msg_obj = email.message_from_string(data01)
for part in msg_obj.iter_attachments():
filename = part.get_filename()
print(filename)
iter_attachments()
Return an iterator over all of the immediate sub-parts of the message that are not candidate “body” parts. That is, skip the first occurrence of each of text/plain, text/html, multipart/related, or multipart/alternative (unless they are explicitly marked as attachments via Content-Disposition: attachment), and return all remaining parts.
Used modules and classes:
class imaplib.IMAP4
class email.message.EmailMessage
Here’s an example of how to unpack a MIME message, using email.message.walk(), into a directory of files:
Related
import imaplib
import email
from PyPDF2 import PdfFileMerger
import os
HOST = *****
USERNAME = ****
PASSWORD = *****
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
for part in email_message_raw.walk():
content_type = part.get_content_type()
files = []
if "plain" in content_type:
text = part.get_payload()
f = open('text.html','w')
f.write('<p>'+text+'</p>')
f.close
os.system('wkhtmltopdf text.html text.pdf')
os.system('rm -r text.html')
files.append('text.pdf')
if "pdf" in content_type:
'''
save each pdf
files.append(pdf_filename)
'''
merger = PdfFileMerger()
for pdf in files:
merger.append(pdf)
merger.write(complete_email.pdf)
merger.close
m.close()
m.logout()
I am attempting to save PDF attachments from emails and combine them with a created PDF of the main email message. Struggling with how to download the PDF, see the ‘’’ ‘’’ section. Thanks.
files = ['test.pdf'] #exising pdf from plain text
for part in data.walk():
if part.get_content_type() == "application/pdf":
files.append(part.get_filename(failobj=None))
with open(part.get_filename(failobj=None), 'wb') as f:
f.write(part.get_payload(decode=True))
I am successfully saving the content for each email with the following code, as a .txt, .html or .PDF file. However, I would like to save a version of every content_type, for each email (for each uid). Currently it is only saving one file type for every uid.
For example, an email with a PDF attachment is only currently saving the PDF. I would like it to save the PDF attachment along with the plain text content of the email, in 2 separate files.
Thanks for any help.
import imaplib
import email
import os
import mimetypes
mail = imaplib.IMAP4_SSL('imap.secureserver.net',993)
mail.login('[user]', '[pw]')
mail.select('Inbox')
result, data = mail.uid('search', None, 'ALL')
item_list = data[0].split()
for item in item_list:
result2, email_data = mail.uid('fetch',item,'(RFC822)')
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
print_dir = False
if print_dir: print(dir(email_message)) #options, e.g. list of from, to etc.
from_ = email_message['From']
date_ = email_message['Date']
for part in email_message.walk():
option = str(item)[2:-1] + ' ' + date_[:-15] + ' ' + from_ + ' '
content_type = part.get_content_type()
print(str(item),' ',content_type)
if content_type == 'text/html':
filename = option + '.html'
elif content_type == 'text/plain':
filename = option + '.txt'
elif content_type == 'application/pdf':
attachment = part.get_filename() #attachment filename
filename = option + str(attachment)
else:
# Guesses the file type
ext = mimetypes.guess_extension(content_type)
if not ext:
ext = '.bin'
filename = option + ext
save_path = os.getcwd() + '/' + filename
with open(save_path, 'wb') as fp:
fp.write(part.get_payload(decode=True))
^ For multitypes I would like to save a file with all the type extensions. Such as for 22382, a PDF and txt
^ Current Output files
I'm not fully sure, but I think your problem is in the for item in item_list: loop.
email_message would only end up being whatever the last item in that loop creates.
Would you need to push nearly everything in that loop 1 tab's worth out?
Also I'd assume you'd want to use part instead of item in this line: option = str(item)[2:-1] + ' ' + date_[:-15] + ' ' + from_ + ' '
Again, not fully sure, but hope this helps!
I'm trying to use python imaplib.IMAP4_SSL to download attachments from gmail. But the problem is if the attachment is larger than 25M, it will be automatically converted to a file in google drive. By using the code below, I cannot find the attachment. Does anyone know how to access and download the file in google drive attached in gmail?
conn = imaplib.IMAP4_SSL("imap.gmail.com")
conn.login(config.username, config.password)
conn.select("Inbox")
resp, items = conn.search(None, '(FROM "xx#xx.com")')
items = items[0].split()
flag = False
emailid = items[-1]
resp, data = conn.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
msg = email.message_from_string(data[0][1])
for part in mail.walk():
if (part.get_content_maintype() == 'multipart'):
continue
if (part.get('Content-Disposition') is None):
continue
attachment_name = part.get_filename()
filename = "attachment_trial.xlsx"
fp = open(filename, 'wb')
fp.write(part.get_payload(decode = True))
fp.close()
flag = True
return flag
I have been able to figure out how to get the name of the attached file in an email. i am just stuck after that. I don't know what to do after that, I have tried using os.path.join which just gives the path i want to download the folder to and joins it with the filename. Please suggest something. Thanks.
m = imaplib.IMAP4_SSL('outlook.office365.com',993)
m.login("UN", "PW")
m.select("Inbox")
typ, msgs = mail.search(None, '(SUBJECT "qwerty")')
msgs = msgs[0].split()
for emailid in msgs:
resp, data = mail.fetch(emailid, "(RFC822)")
email_body = data[0][1]
m = email.message_from_bytes(email_body)
if m.get_content_maintype() != 'multipart':
continue
for part in m.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
print(filename)
Following the sample from this link you can set the path when using the open function. (raw string by prefixing the string with r)
fp = open(r'c:\tmp\folder\' + filename, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print '%s saved!' % filename
I have a script that fetches emails from my account, downloads the attachments, creates some html for an email blast program, and then zips them into a nice little archive. This works well when only one email is present in the inbox, however, the script hangs when multiple emails exist. I feel like this is because the section of the script that zips the files is not looping correctly. What I am trying to accomplish is one zip file for each email. 3 emails in the inbox = 3 seperate zip files. I've done my best to reduce my code for maximum readability while still maintaining the core structure. Could anyone point me in the right direction here? Thanks!
Code:
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
for part in mail.walk():
if part.get_content_type() == 'text/plain':
content = part.get_payload()
#do something/define variables from email contents
if mail.get_content_maintype() != 'multipart':
continue
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
counter = 1
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
path = 'C:\directory'
os.chdir(path)
for file in os.listdir('.'):
#download attachments
htmlFile = str(token)+'.html'
htmlCode = ('<html>HTML goes here</html>')
htmlData = open(os.path.join('C:\directory', htmlFile), 'w+')
htmlData.write(htmlCode)
print htmlFile+' Complete'
htmlData.close()
allFiles = [f for f in os.listdir('.')]
for file in allFiles:
archive = zipfile.ZipFile(token+'.zip', mode='a')
archive.write(file)
archive.close()
os.unlink(file)
UPDATE
Here is alink to the complete code. http://ideone.com/WEXv9P
There seems to be a mistake here:
counter = 1
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
Counter will always be 1 in this loop, you probably want to define it before the second
for part in mail.walk():
EDIT:
Okay, so I think the problem is at the last part of the code
allFiles = [f for f in os.listdir('.')]
for file in allFiles:
archive = zipfile.ZipFile(token+'.zip', mode='a')
archive.write(file)
archive.close()
os.unlink(file)
this will create a zip file for each part of the email
I think what you want to do is indent this out a level and change it to something more like this:
allFiles = [f for f in os.listdir(detach_dir) if not f.endswith(".zip")]
for file in allFiles:
archive = zipfile.ZipFile(token+'.zip', mode='a')
archive.write(file)
archive.close()
os.unlink(file)
That way it won't recursively zip other zip files or remove them