I am hitting a road block with exporting outlook messages as .msg files. I'm not sure on how to accomplish this task. I have a program that currently reads the email and exports the attachments and once it completes it moves the message to a processed folder so I can keep track of what has been completed. I need to add a function that exports the entire email itself to a folder on the local machine. Has anyone accomplished this using pywin32.client?
Here is the program as it stands now. (excuse the mess its still in progress)
import os
import win32com.client
import csv
import datetime
from random import randint
ATTACHMENTS_FOLDER = "C:\\EMAILS"
LOG_PATH = 'C:\\EMAILS\\log.csv'
COUNTER = 1
SUBFOLDER_LIST = ["TEST"]
UPLOAD_LIST = 'C:\\EMAILS\\logupload%d.csv' % randint(2,150000)
def ExportMessages (Folder, item):
#No IDEA!
pass
def process_attachment(sender, subject, attachment, messagedate):
"""
:param sender:
:param subject:
:param attachment:
:param messagedate:
"""
global count
count = 0
try:
filepath = os.path.join(ATTACHMENTS_FOLDER, "RAN-%dSEN-%sSUB-%s%s" % (randint(2,100000),str(sender), str(subject), attachment))
count = count +1
print "Trying", filepath
open(filepath, "r")
os.remove(filepath)
except IOError:
pass
try:
attachment.SaveAsFile(filepath)
row = [messagedate, sender, subject, count]
row2 = [messagedate, sender, subject, filepath]
w = csv.writer(csv_file)
w2 = csv.writer(csv_file2)
w.writerow(row)
w2.writerow(row2)
except:
pass
def move_message_fail(message, folder):
"""
:param message:
:param folder:
"""
print "Moving:", message.Subject
proc_folder = folder.Folders("Failed")
message.Move(proc_folder)
def move_message(folder, message):
"""
:param folder:
:param message:
"""
print "Moving:", message.Subject
proc_folder = folder.Folders("Processed")
message.Move(proc_folder)
def process_message(message, folder):
"""
:param message:
:param folder:
"""
global vin_num
vin_num = ''
print "Message:", message.Subject
vin = message.Subject
sender = message.SenderName
if folder == SUBFOLDER_LIST[0]:
for i in vin.split(' '):
if '-' in i:
vin_num = i
if vin_num:
now = datetime.datetime.now()
messagedate = now.strftime("%m-%d-%Y")
attachments = message.Attachments
for n_attachment in range(attachments.Count):
attachment = attachments.Item(n_attachment + 1)
#if attachment.Type == win32com.client.constants.CdoFileData:
process_attachment(sender, vin_num, attachment, messagedate)
#message.Update()
move_message(folder, message, up_folder)
else:
move_message_fail(message, folder)
else:
vin_num = vin.split(' ')[1]
now = datetime.datetime.now()
messagedate = now.strftime("%m-%d-%Y")
attachments = message.Attachments
for n_attachment in range(attachments.Count):
attachment = attachments.Item(n_attachment + 1)
#if attachment.Type == win32com.client.constants.CdoFileData:
process_attachment(sender, vin_num, attachment, messagedate)
#message.Update()
move_message(folder, message, up_folder)
if __name__ == '__main__':
csv_file = open(LOG_PATH, "ab")
csv_file2 = open(UPLOAD_LIST, "wb")
for i in SUBFOLDER_LIST:
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
one_folder = outlook.Folders(1)
two_folder = one_folder.Folders("Inbox")
three_folder = two_folder.Folders(i)
messages = three_folder.Items
message = messages.GetFirst()
while message:
process_message(message, i)
ExportMessages(three_folder, message)
message = messages.GetNext()
Call MailItem.SaveAs and pass olMsg or olMsgUnicode as the second (Format) parameter.
Related
I have the below code, which downloads a Gmail email and its attachments. It returns its attachments.
def gmailAPIDownloadAttachments(self, messageID, userID="me"):
try:
service = self.gmailAPIService
self.GLogger.info("Attempting to download attachments from messageID (" +str(messageID)+ ")")
message = self.gmailAPIGetFullMessage(messageID, userID=userID)
if message is False:
self.GLogger.error("Failed to extract message (" +str(messageID)+ ") for downloading attachments")
return False
attachmentList = list()
payload = message['payload']
if 'parts' in payload:
parts = payload['parts']
for part in parts:
if part['filename']:
if 'data' in part['body']:
data = part['body']['data']
else:
att_id = part['body']['attachmentId']
att = service.users().messages().attachments().get(userId=userID, messageId=messageID, id=att_id).execute()
data = att['data']
file_data = base64.urlsafe_b64decode(data.encode('UTF-8'))
filename = part['filename']
extSearch = filename.find('.')
if extSearch == -1:
ext = ""
partFileName = filename[0:extSearch]
else:
ext = filename[extSearch+1:]
partFileName = filename[0:extSearch]
theAttachment = Attachment(filename,partFileName, ext, file_data)
attachmentList.append(theAttachment)
self.GLogger.info("Successfully downloaded attachments from messageID (" +str(messageID)+ ")")
return(attachmentList)
except:
self.GLogger.error("Encountered an error while attempting to download email attacments from messageID (" +str(messageID)+ ")")
tb = traceback.format_exc()
self.GLogger.exception(tb)
return False
I understand how to convert fetching messages into batching. For example, this is how one could batch-fetch messages:
from apiclient.http import BatchHttpRequest
import json
batch = BatchHttpRequest()
#assume we got messages from Gmail query API
for message in messages:
batch.add(service.users().messages().get(userId='me', id=message['id'],
format='raw'))
batch.execute()
for request_id in batch._order:
resp, content = batch._responses[request_id]
message = json.loads(content)
#handle your message here, like a regular email object
However, the attachments aspect seem to have logic and other possible fetches such as in this part:
att_id = part['body']['attachmentId']
att = service.users().messages().attachments().get(userId=userID, messageId=messageID, id=att_id).execute()
data = att['data']
How can I effectively batch both fetching the message and its attachments? I would like to be able to quickly fetch many emails at once.
i want to downloap PJ from an email so i write this code
import imaplib, email, os
from time import sleep
user='my_email'
password = 'secret'
imap_url = 'Outlook.office365.com'
con = imaplib.IMAP4_SSL(imap_url)
con.login(user,password)
def get_attachments(msg):
for part in msg.walk():
if part.get_content_maintype()=='multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(attachment_dir, fileName)
with open(filePath,'wb') as f:
f.write(part.get_payload(decode=True))
attachment_dir = "D:/TOUT/DATANALYSE/email_python/attachement/"
never_stop = True
while never_stop:
last_email_id = search('FROM', "kvaccarin#datanalyse.fr", con)[0].split()[-1]
typ, data = con.fetch(last_email_id, '(RFC822)')
msg = email.message_from_bytes(data[0][1])
get_attachments(msg)
time.sleep(12*60*60)
this code works when i run on Jupyter but when i want to run this code with Sublime Texte i have this error:
NameError: name 'search' is not defined
This fonction search() is not the fonction from re but it's the fonction from IMAP
Someone know how to fix it?
It's the beginning of the answers... but it's something!
Thanks to Max, I understand the error on the first Question, So to fix it I define a new function search_data():
def search_data(key,value,con):
result, data = con.search(None,key,'"{}"'.format(value))
return data
and when I run the same code with the use of this function like that:
import imaplib, email, os
from time import sleep
user='my_email'
password = 'secret'
imap_url = 'Outlook.office365.com'
con = imaplib.IMAP4_SSL(imap_url)
con.login(user,password)
def get_attachments(msg):
for part in msg.walk():
if part.get_content_maintype()=='multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(attachment_dir, fileName)
with open(filePath,'wb') as f:
f.write(part.get_payload(decode=True))
def search_data(key,value,con):
result, data = con.search(None,key,'"{}"'.format(value))
return data
attachment_dir = "D:/TOUT/DATANALYSE/email_python/attachement/"
never_stop = True
while never_stop:
last_email_id = search_data('FROM', "kvaccarin#datanalyse.fr", con)[0].split()[-1]
typ, data = con.fetch(last_email_id, '(RFC822)')
msg = email.message_from_bytes(data[0][1])
get_attachments(msg)
time.sleep(12*60*60)
there still have an error:
error: command SEARCH illegal in state AUTH, only allowed in states SELECTED
I know it's the beginning of the answer but it perhaps helps to have the entire answer!
I have gotten some of the features I want but need help with 2 others.
I would like to flag the message "Mark as Done" (it's one of the Flag statuses). I have not found how to do this.
If I wanted to do this same thing for 4 other emails how would I do it, with 4 other save paths?
import win32com.client
import os
Outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inboxfolder = Outlook.GetDefaultFolder(6) # "6" refers to the index of a folder - in this case the inbox. You can change that number to reference
inbox = inboxfolder.Items
message = inbox.GetFirst()
subject = message.Subject
sender = message.SenderEmailAddress
for m in inbox:
if m.Class == 43: # this is to make sure it is an email item and not something else.
if m.SenderEmailAddress == 'John#email.com' and m.Unread == True:
path = 'C:\\User\\Path\\Data\\John'
print ('Subject as: ' and message)
for attached in message.Attachments:
attached.SaveASFile(os.path.join(path,attached.FileName)) #Saves attachment to current folder
print (attached)
message.Unread = False
print (message.FlagStatus)
message.FlagStatus = 1 # This is to "mark as Done" but it doesn't work
message = inbox.GetNext()
elif m.SenderEmailAddress == 'Jane#email.com' and m.Unread == True:
path = 'C:\\User\\Path\\Data\\Jane'
# ... How would you add 4 more?
message = inbox.GetNext()
else:
message = inbox.GetNext()
You have to save it message.Save(), Example
import win32com.client
Outlook = win32com.client.Dispatch("Outlook.Application")
olNs = Outlook.GetNamespace("MAPI")
Inbox = olNs.GetDefaultFolder(win32com.client.constants.olFolderInbox)
for Item in Inbox.Items:
if Item.Class == 43:
Item.FlagStatus = 1
Item.Save()
For multiple emails & path use dictionary, Example
emails_with_path = {
"email#one.com": "path_one",
"email#two.com": "path_two",
"email#three.com": "path_three"
}
for m in inbox:
if m.Class == 43:
for email, path in emails_with_path.items():
if m.SenderEmailAddress == email and m.UnRead:
print(email)
print(path)
I'm running Flask on Apache with mod_wsgi, every time a file get uploaded, the response time of the server worses, and my memory goes over 100% to swap.
This below is the whole code that handles my upload
#app.route(sApiRoute + '/f/upload', methods=['POST'])
#hasToken
def pbFileUpload(token=None,**kwargs):
def log_time(start_dt=None,start_time=None,doc=None):
end_time = None
if start_time is not None:
end_time = str(time.time()-start_time)
if end_time is not None and end_time > str(0.3):
pblog.warning("Upload with long response time. Start: "+str(start_dt)+ "Duration: "+str(end_time)+" \nUploader: " + token["id_user"] + " \nDoc info: " + str(doc))
import time
start_time = time.time()
start_dt = current_datetime()
# parse path
path = getUploadPath(request.json, None, token)
if not path:
return r404()
# Get the name of the uploaded file
filename = request.json["name"]
filename = filename.replace("$c$","č").replace("$c2$","ć").replace("$z$","ž").replace("$s$","š").replace("$d$","đ").replace("$C$","Č").replace("$C2$","Ć").replace("$Z$","Ž").replace("$S$","Š").replace("$D$","Đ")
rawfile = request.json["data"]
header, data = rawfile.split(',')
# convert data
import base64
recoveredfile = base64.decodebytes(bytes(data, 'UTF-8'))
# write data
import uuid
# leave file name as is
if request.json.get("leaveName"):
generatedfilename = filename
leaveName = True
else:
generatedfilename = str(uuid.uuid1())
leaveName = False
# use of ftp to save files
if 'ftp.' in path.lower():
putFileToFtp(path, filename, generatedfilename, recoveredfile)
# classic directory structure
else:
if not os.path.exists(path):
try:
os.mkdir(path)
except Exception as e:
pblog.warning(str(e),exc_info=1)
return jsonify(data={"status":False,"msg":"Path not found"})
while os.path.exists(os.path.join(path, generatedfilename)) and not leaveName:
generatedfilename = str(uuid.uuid1())
with open(os.path.join(path, generatedfilename), "wb") as out_file:
out_file.write(recoveredfile)
out_file.close()
# send to document system
if request.json.get("dataRec"):
db = DBStore.getDB(token["current_company_id"])
dataRec = request.json.get("dataRec")
dataRec["idext"] = generatedfilename
rtd = DocumentCls.getIdByIdent(dataRec["IdDocumentType"],db)
if rtd and rtd!=[]:
if rtd.get("Id"):
dataRec["IdDocumentType"] = rtd["Id"]
r = DocumentCls.saveDocument(dataRec,request.json.get("username"),db)
log_time(start_dt,start_time,dataRec)
return jsonify(data={"version": r,"generatedfilename":generatedfilename},**kwargs)
else:
log_time(start_dt,start_time,generatedfilename)
return jsonify(data={"filename": filename, "generatedfilename": generatedfilename},**kwargs)
I am interested to trigger a certain action upon receiving an email from specific
address with specific subject. In order to be able to do so I need to implement
monitoring of my mailbox, checking every incoming mail (in particular, i use gmail).
what is the easiest way to do that?
Gmail provides the ability to connect over POP, which you can turn on in the gmail settings panel. Python can make connections over POP pretty easily:
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('username')
pop_conn.pass_('password')
#Get messages from server:
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print message['subject']
pop_conn.quit()
You would just need to run this script as a cron job. Not sure what platform you're on so YMMV as to how that's done.
Gmail provides an atom feed for new email messages. You should be able to monitor this by authenticating with py cURL (or some other net library) and pulling down the feed. Making a GET request for each new message should mark it as read, so you won't have to keep track of which emails you've read.
While not Python-specific, I've always loved procmail wherever I could install it...!
Just use as some of your action lines for conditions of your choice | pathtoyourscript (vertical bar AKA pipe followed by the script you want to execute in those cases) and your mail gets piped, under the conditions of your choice, to the script of your choice, for it to do whatever it wants -- hard to think of a more general approach to "trigger actions of your choice upon receipt of mails that meet your specific conditions!! Of course there are no limits to how many conditions you can check, how many action lines a single condition can trigger (just enclose all the action lines you want in { } braces), etc, etc.
People seem to be pumped up about Lamson:
https://github.com/zedshaw/lamson
It's an SMTP server written entirely in Python. I'm sure you could leverage that to do everything you need - just forward the gmail messages to that SMTP server and then do what you will.
However, I think it's probably easiest to do the ATOM feed recommendation above.
EDIT: Lamson has been abandoned
I found a pretty good snippet when I wanted to do this same thing (and the example uses gmail). Also check out the google search results on this.
I recently solved this problem by using procmail and python
Read the documentation for procmail. You can tell it to send all incoming email to a python script like this in a special procmail config file
:0:
| ./scripts/ppm_processor.py
Python has an "email" package available that can do anything you could possibly want to do with email. Read up on the following ones....
from email.generator import Generator
from email import Message
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.mime.multipart import MIMEMultipart
https://developers.google.com/gmail/gmail_inbox_feed
Says you have to have a corporate Gmail, but I have come to find that you can read Gmail free versions without issues. I use this code to get my blood pressure results I email or text to a gmail address.
from email.header import decode_header
from datetime import datetime
import os
import pandas as pd
import plotly.graph_objs as go
import plotly
now = datetime.now()
dt_string = now.strftime("%Y.%m.%d %H:%M:%S")
print("date_time:", dt_string)
email_account = '13123#gmail.com'
email_password = '131231231231231231312313F'
email_server = 'imap.gmail.com'
email_port = 993
accept_emails_from = {'j1231312#gmail.com', '1312312#chase.com', '13131231313131#msg.fi.google.com'}
verbose = True
def get_emails():
email_number = 0
local_csv_data = ''
t_date = None
t_date = None
t_systolic = None
t_diastolic = None
t_pulse = None
t_weight = None
try:
mail = imaplib.IMAP4_SSL(email_server)
email_code, email_auth_status = mail.login(email_account, email_password)
if verbose:
print('[DEBUG] email_code: ', email_code)
print('[DEBUG] email_auth_status: ', email_auth_status)
mail.list()
mail.select('inbox')
# (email_code, messages) = mail.search(None, 'ALL')
(email_code, messages) = mail.search(None, '(UNSEEN)') # only get unread emails to process.
subject = None
email_from = None
for email_id in messages[0].split():
email_number += 1
email_code, email_data = mail.fetch(email_id, '(RFC822)')
for response in email_data:
if isinstance(response, tuple): # we only want the tuple ,the bytes is just b .
msg = email.message_from_bytes(response[1])
content_type = msg.get_content_type()
subject, encoding = decode_header(msg["Subject"])[0]
subject = str(subject.replace("\r\n", ""))
if isinstance(subject, bytes):
subject = subject.decode(encoding)
email_from, encoding = decode_header(msg.get("From"))[0]
if isinstance(email_from, bytes):
email_from = email_from.decode(encoding)
if content_type == "text/plain":
body = msg.get_payload(decode=True).decode()
parse_data = body
else:
parse_data = subject
if '>' in email_from:
email_from = email_from.lower().split('<')[1].split('>')[0]
if email_from in accept_emails_from:
parse_data = parse_data.replace(',', ' ')
key = 0
for value in parse_data.split(' '):
if key == 0:
t_date = value
t_date = t_date.replace('-', '.')
if key == 1:
t_time = value
if ':' not in t_time:
numbers = list(t_time)
t_time = numbers[0] + numbers[1] + ':' + numbers[2] + numbers[3]
if key == 2:
t_systolic = value
if key == 3:
t_diastolic = value
if key == 4:
t_pulse = value
if key == 5:
t_weight = value
key += 1
t_eval = t_date + ' ' + t_time
if verbose:
print()
print('--------------------------------------------------------------------------------')
print('[DEBUG] t_eval:'.ljust(30), t_eval)
date_stamp = datetime.strptime(t_eval, '%Y.%m.%d %H:%M')
if verbose:
print('[DEBUG] date_stamp:'.ljust(30), date_stamp)
print('[DEBUG] t_systolic:'.ljust(30), t_systolic)
print('[DEBUG] t_diastolic:'.ljust(30), t_diastolic)
print('[DEBUG] t_pulse:'.ljust(30), t_pulse)
print('[DEBUG] t_weight:'.ljust(30), t_weight)
new_data = str(date_stamp) + ',' + \
t_systolic + ',' + \
t_diastolic + ',' + \
t_pulse + ',' + \
t_weight + '\n'
local_csv_data += new_data
except Exception as e:
traceback.print_exc()
print(str(e))
return False, email_number, local_csv_data
return True, email_number, local_csv_data
def update_csv(local_data):
""" updates csv and sorts it if there is changes made. """
uniq_rows = 0
if os.name == 'posix':
file_path = '/home/blood_pressure_results.txt'
elif os.name == 'nt':
file_path = '\\\\uncpath\\blood_pressure_results.txt'
else:
print('[ERROR] os not supported:'.ljust(30), os.name)
exit(911)
if verbose:
print('[DEBUG] file_path:'.ljust(30), file_path)
column_names = ['00DateTime', 'Systolic', 'Diastolic', 'Pulse', 'Weight']
if not os.path.exists(file_path):
with open(file_path, 'w') as file:
for col in column_names:
file.write(col + ',')
file.write('\n')
# append the new data to file.
with open(file_path, 'a+') as file:
file.write(local_data)
# sort the file.
df = pd.read_csv(file_path, usecols=column_names)
df_sorted = df.sort_values(by=["00DateTime"], ascending=True)
df_sorted.to_csv(file_path, index=False)
# remove duplicates.
file_contents = ''
with open(file_path, 'r') as file:
for row in file:
if row not in file_contents:
uniq_rows += 1
print('Adding: '.ljust(30), row, end='')
file_contents += row
else:
print('Duplicate:'.ljust(30), row, end='')
with open(file_path, 'w') as file:
file.write(file_contents)
return uniq_rows
# run the main code to get emails.
status, emails, my_data = get_emails()
print('status:'.ljust(30), status)
print('emails:'.ljust(30), emails)
# if the new emails received then sort the files.
csv_rows = update_csv(my_data)
print('csv_rows:'.ljust(30), csv_rows)
exit(0)