python download google drive attachment from gmail - python

I'm trying to use python imaplib.IMAP4_SSL to download attachments from gmail. But the problem is if the attachment is larger than 25M, it will be automatically converted to a file in google drive. By using the code below, I cannot find the attachment. Does anyone know how to access and download the file in google drive attached in gmail?
conn = imaplib.IMAP4_SSL("imap.gmail.com")
conn.login(config.username, config.password)
conn.select("Inbox")
resp, items = conn.search(None, '(FROM "xx#xx.com")')
items = items[0].split()
flag = False
emailid = items[-1]
resp, data = conn.fetch(emailid, "(RFC822)")
email_body = data[0][1]
mail = email.message_from_string(email_body)
msg = email.message_from_string(data[0][1])
for part in mail.walk():
if (part.get_content_maintype() == 'multipart'):
continue
if (part.get('Content-Disposition') is None):
continue
attachment_name = part.get_filename()
filename = "attachment_trial.xlsx"
fp = open(filename, 'wb')
fp.write(part.get_payload(decode = True))
fp.close()
flag = True
return flag

Related

Python Email - saving PDF attachments

import imaplib
import email
from PyPDF2 import PdfFileMerger
import os
HOST = *****
USERNAME = ****
PASSWORD = *****
m = imaplib.IMAP4_SSL(HOST, 993)
m.login(USERNAME, PASSWORD)
m.select('INBOX')
result, data = m.uid('search', None, "ALL")
if result == 'OK':
for num in data[0].split():
result, data = m.uid('fetch', num, '(RFC822)')
if result == 'OK':
email_message_raw = email.message_from_bytes(data[0][1])
for part in email_message_raw.walk():
content_type = part.get_content_type()
files = []
if "plain" in content_type:
text = part.get_payload()
f = open('text.html','w')
f.write('<p>'+text+'</p>')
f.close
os.system('wkhtmltopdf text.html text.pdf')
os.system('rm -r text.html')
files.append('text.pdf')
if "pdf" in content_type:
'''
save each pdf
files.append(pdf_filename)
'''
merger = PdfFileMerger()
for pdf in files:
merger.append(pdf)
merger.write(complete_email.pdf)
merger.close
m.close()
m.logout()
I am attempting to save PDF attachments from emails and combine them with a created PDF of the main email message. Struggling with how to download the PDF, see the ‘’’ ‘’’ section. Thanks.
files = ['test.pdf'] #exising pdf from plain text
for part in data.walk():
if part.get_content_type() == "application/pdf":
files.append(part.get_filename(failobj=None))
with open(part.get_filename(failobj=None), 'wb') as f:
f.write(part.get_payload(decode=True))

Extract email attachments and retain modification/creation date?

I'm trying to extract files from emails via IMAP using Python 3.7 (on Windows, fyi) and each of my attempts shows extracted files with Modification & Creation Date = time of extraction (which is incorrect).
As full email applications have the ability to preserve that information, it must me stored somewhere. I also gave working with structs a try, thinking the information may be stored in binary, but had no luck.
import email
from email.header import decode_header
import imaplib
import os
SERVER = None
OUT_DIR = '/var/out'
IMP_SRV = 'mail.domain.tld'
IMP_USR = 'user#domain.tld'
IMP_PWD = 'hunter2'
def login_mail():
global SERVER
SERVER = imaplib.IMAP4_SSL(IMP_SRV)
SERVER.login(IMP_USR, IMP_PWD)
def get_mail(folder='INBOX'):
mails = []
_, data = SERVER.uid('SEARCH', 'ALL')
uids = data[0].split()
for uid in uids:
_, s = SERVER.uid('FETCH', uid, '(RFC822)')
mail = email.message_from_bytes(s[0][1])
mails.append(mail)
return mails
def parse_attachments(mail):
for part in mail.walk():
if part.get_content_type() == 'application/octet-stream':
filename = get_filename(part)
output = os.path.join(OUT_DIR, filename)
with open(output, 'wb') as f:
f.write(part.get_payload(decode=True))
def get_filename(part):
filename = part.get_filename()
binary = part.get_payload(decode=True)
if decode_header(filename)[0][1] is not None:
filename = decode_header(filename)[0][0].decode(decode_header(filename)[0][1])
filename = os.path.basename(filename)
return filename
Can anyone tell me what I'm doing wrong and if it's somehow possible?
After getting said information it could be possible to modify the timestamps utilizing How do I change the file creation date of a Windows file?.
I was able to extract the creation-date and modification-date from the content-disposition header. Setting the file modified date is simple too.
attachment_creation_date = attachment.get_param('creation-date', None, 'content-disposition')
attachment_modification_date = attachment.get_param('modification-date', None, 'content-disposition')
Here's a more complete example that shows how to read these parameters if present:
def process_email_attachments(msg, output_directory):
for attachment in msg.iter_attachments():
try:
output_filename = attachment.get_filename()
except AttributeError:
print("Couldn't get attachment filename. Skipping.")
continue
# If no attachments are found, skip this file
if output_filename:
attachment_creation_date = attachment.get_param('creation-date', None, 'content-disposition')
attachment_modification_date = attachment.get_param('modification-date', None, 'content-disposition')
try:
output_file_full_path = os.path.join(output_directory, output_filename)
with open(output_file_full_path, "wb") as of:
payload = attachment.get_payload(decode=True)
of.write(payload)
if attachment_modification_date is not None:
attachment_modification_datetime = email.utils.parsedate_to_datetime(attachment_modification_date)
set_file_last_modified(output_file_full_path, attachment_modification_datetime)
except TypeError:
print("Couldn't get payload for %s" % output_filename)
def set_file_last_modified(file_path, dt):
dt_epoch = dt.timestamp()
os.utime(file_path, (dt_epoch, dt_epoch))
The second part of your question is how to set the file created date. This is platform dependent. There is already a separate question with answers demonstrating how to set the creation date on a Windows file: How do I change the file creation date of a Windows file?

How can I get an attached eml file from email message content using Python?

I am using python 3.7 and the email, imap library to read email and extract the content of email and attachments , all the attachment ( like excel, csv, pdf) is downloading as attachment but when i received any .eml file in email , it shows me error, please find the below code to read email content and attachment with error showing in case of eml file is received as attachment.
it is showing error at the time of writing eml file.
at the time of write part.get_payload(decode=True) is coming blank in eml file case.
filename = part.get_filename()
if filename is not None:
dot_position = filename.find('.')
file_prefix = filename[0:dot_position]
file_suffix = filename[dot_position:len(filename)]
# print(dot_position)
# print(file_prefix)
# print(file_suffix)
now = datetime.datetime.now()
timestamp = str(now.strftime("%Y%m%d%H%M%S%f"))
newFileName = file_prefix + "_" + timestamp + file_suffix
sv_path = os.path.join(svdir, newFileName)
# allfiles = allfiles.append([{"oldfilename": filename, "newfilename": newFileName}])
mydict = filename + '$$' + newFileName
mydict1 = mydict1 + ',' + mydict
print(mydict1)
if not os.path.isfile(sv_path):
print("oldpath:---->" + sv_path)
# filename = os.rename(filename, filename + '_Rahul')
# sv_path = os.path.join(svdir, filename)
# print("Newpath:---->" + sv_path)
fp = open(sv_path, 'wb')
# print("Rahul")
print(part.get_payload(decode=True))
# try:
# newFileByteArray = bytearray(fp)
# if part.get_payload(decode=True) is not None:
fp.write(part.get_payload(decode=True))
# except (TypeError, IOError):
# pass
fp.close()
Error is
<class 'TypeError'> ReadEmailUsingIMAP.py 129
a bytes-like object is required, not 'NoneType'
Just to explain why this is happening (it hit me too), quoting the v. 3.5 library doc. (v2 says the same):
If the message is a multipart and the decode flag is True, then None is returned.
If your attachment is an .EML, it's almost always going to be multi-part, thus the None.
Jin Thakur's workaround is appropriate if you're only expecting .EML multipart attachments (not sure if there is any other use cases); it should have been accepted as an answer.
Use eml_parser
https://pypi.org/project/eml-parser/
import datetime
import json
import eml_parser
def json_serial(obj):
if isinstance(obj, datetime.datetime):
serial = obj.isoformat()
return serial
with open('sample.eml', 'rb') as fhdl:
raw_email = fhdl.read()
parsed_eml = eml_parser.eml_parser.decode_email_b(raw_email)
print(json.dumps(parsed_eml, default=json_serial))

IMAP how to save **all** attachments

I am trying to save excel file attachments from my inbox to a directory. My code is executing just fine because I am seeing the print outs but the attachments wont save in the file directory. Is there something I am missing in my code that is preventing the action of saving?
import email, getpass, imaplib, os, sys
detach_dir = r'\directory link'
user = "test"
pwd = "test"
sender_email = "example#example.com"
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login(user,pwd)
m.select('"INBOX/somestuff"')
print("ok")
resp, items = m.search(None, 'FROM', '"%s"' % sender_email)
items = items[0].split()
print("ok")
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
email_body = data[0][1].decode('utf-8')
mail = email.message_from_string(email_body)
print("ok")
if mail.get_content_maintype() != 'multipart':
continue
subject = ""
if mail["subject"] is not None:
subject = mail["subject"]
print ("["+mail["From"]+"] :" + subject)
for part in mail.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
counter = 1
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
if not os.path.isfile(att_path) :
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
This code saves just one of the attachments in the subfolder but I am looking to get all attachments save to the directory:
detach_dir = r'directory link'
m = imaplib.IMAP4_SSL("outlook.office365.com")
m.login('user','pass')
m.select('"INBOX/subfolder"')
resp, items = m.search(None, 'All')
items = items[0].split()
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)")
filename = part.get_filename()
print(filename)
att_path = os.path.join(detach_dir, filename)
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print('check folder')
Question: This code saves just one of the attachments ... but I am looking to get all attachments
Implement iter-attachments()
resp, items = imap.search(None, "(UNSEEN)")
for n, num in enumerate(items[0].split(), 1):
resp, data = imap.fetch(num, '(RFC822)')
data01 = data[0][1]
msg_obj = email.message_from_string(data01)
for part in msg_obj.iter_attachments():
filename = part.get_filename()
print(filename)
iter_attachments()
Return an iterator over all of the immediate sub-parts of the message that are not candidate “body” parts. That is, skip the first occurrence of each of text/plain, text/html, multipart/related, or multipart/alternative (unless they are explicitly marked as attachments via Content-Disposition: attachment), and return all remaining parts.
Used modules and classes:
class imaplib.IMAP4
class email.message.EmailMessage
Here’s an example of how to unpack a MIME message, using email.message.walk(), into a directory of files:

To copy the attached file in an email.

I have been able to figure out how to get the name of the attached file in an email. i am just stuck after that. I don't know what to do after that, I have tried using os.path.join which just gives the path i want to download the folder to and joins it with the filename. Please suggest something. Thanks.
m = imaplib.IMAP4_SSL('outlook.office365.com',993)
m.login("UN", "PW")
m.select("Inbox")
typ, msgs = mail.search(None, '(SUBJECT "qwerty")')
msgs = msgs[0].split()
for emailid in msgs:
resp, data = mail.fetch(emailid, "(RFC822)")
email_body = data[0][1]
m = email.message_from_bytes(email_body)
if m.get_content_maintype() != 'multipart':
continue
for part in m.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
print(filename)
Following the sample from this link you can set the path when using the open function. (raw string by prefixing the string with r)
fp = open(r'c:\tmp\folder\' + filename, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print '%s saved!' % filename

Categories

Resources