How to save email from outlook using python - python

I am trying to save email from sub-folder using the below python script, I am trying to restrict with days=1 means I only need to save emails which are 1 day old.
from win32com.client import Dispatch
from datetime import date, timedelta
import datetime as dt
msg_location = r'C:\Users\rahul\Desktop\msg_files'
outlook = Dispatch("outlook.Application").GetNamespace("MAPI")
inbox = outlook.GetDefaultFolder(6).Folders['Email_snapper']
messages = inbox.Items
message = messages.GetFirst()
Body_content = message.Body
print(Body_content)
for msg in messages:
lastWeekDateTime = dt.datetime.now() - dt.timedelta(days=1)
lastWeekDateTime = lastWeekDateTime.strftime('%m/%d/%Y %H:%M %p')
message = messages.Ryestrict("[ReceivedTime] >= '" + lastWeekDateTime + "'")
#name = str(message)
message.SaveAs(msg+".msg")

Try setting your filter like the following
Example
import re
import win32com.client
import datetime as dt
import os
Outlook = win32com.client.Dispatch("Outlook.Application")
olNs = Outlook.GetNamespace("MAPI")
Inbox = olNs.GetDefaultFolder(6)
lastWeekDateTime = dt.datetime.now() - dt.timedelta(days=1)
lastWeekDateTime = lastWeekDateTime.strftime('%m/%d/%Y %H:%M %p')
print(lastWeekDateTime)
Filter = ("#SQL=" + chr(34) + "urn:schemas:httpmail:datereceived" +
chr(34) + " >= '" + lastWeekDateTime + "'")
Items = Inbox.Items.Restrict(Filter)
Items.Sort('[ReceivedTime]', False)
for Item in Items:
print(Item.Subject)
print(Item.ReceivedTime)
save_name = re.sub('[^A-Za-z0-9]+', '', str(Item.Subject)) + '.msg'
Item.SaveAs(os.getcwd() + '//' + save_name)
else:
print("No Item")

Firstly, it is Restrict, not Ryestrict.
Secondly, Restrict returns Items collection, not a single item. You need to iterate over the items in that collection. If you only expect a single item, use Find instead of Restrict.

Related

How to print email body from outlook without signature - Python

I'm trying to parse emails from Outlook.
I would like the following printed:
subject
body (excluding sender's signature)
Ignore all previous emails from conversion (reply & forward)
Is there any way I can print out the body text before multi-space between lines (usually this is how signature being separated from the main text)?
Any help would be appreciated!
import win32com.client
#other libraries to be used in this script
import os
from datetime import datetime, timedelta
outlook = win32com.client.Dispatch('outlook.application')
mapi = outlook.GetNamespace("MAPI")
for account in mapi.Accounts:
print(account.DeliveryStore.DisplayName)
inbox = mapi.GetDefaultFolder(6)
messages = inbox.Items
messages.Sort('[ReceivedTime]', True)
received_dt = datetime.now() - timedelta(days=1)
received_dt = received_dt.strftime('%m/%d/%Y %H:%M %p')
messages = messages.Restrict("[ReceivedTime] >= '" + received_dt + "'")
messages = messages.Restrict("[SenderEmailAddress] = 'firstname.lastname#gmail.com'")
message = messages.GetFirst()
print ("Current date/time: "+ received_dt)
while message:
print(message.Subject)
print(message.body)
message = messages.GetNext ()
You can use a regex to ignore everything after three newlines (there are normally one or two newlines between paragraphs):
import re
r = re.compile(r"(.*)\n\n\n", re.MULTILINE + re.DOTALL)
# ...
while message:
# ...
match = r.match(message.body)
if match:
body_without_signature = r.match(message.body).groups(0)
else:
# No signature found
body_without_signature = message.body
print(body_without_signature)

Python code could not run in task scheduler

I wrote a python code to download email attachments and this code works fine when I start it manually (just double click/open with python).
However, when I tried setting up a task in task scheduler, the job keeps running forever and no output is seen (no attachments in the output folder). What have I done wrong here?
I tried all combination of scheduler settings I could think of (run only when user is logged on/ run whether or not user is logged on; run with highest privileges)
I also tried defining actions in different ways:
-script: C:\Windows\System32\cmd.exe
-argument: "C:\ProgramData\Anaconda3\python.exe "C:\Users\LN\Documents\PythonScripts\outlookdownload.py""
-script: C:\ProgramData\Anaconda3\python.exe
-argument: "C:\Users\LN\Documents\PythonScripts\outlookdownload.py"
-script: C:\ProgramData\Anaconda3\python.exe
-argument: "C:\Users\LN\Documents\PythonScripts\outlookdownload.py"
-start in: C:\Users\LN\Documents\PythonScripts\
-script: C:\ProgramData\Anaconda3\python.exe
-argument: "C:\Users\LN\Documents\PythonScripts\outlookdownload.py"
-start in: C:\Users\LN\Documents\PythonScripts\"
Below is the python code:
outpfolder= [output folder link]
import datetime
import win32com.client
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.Folders('Elo Inventory').Folders('Inbox')
messages = inbox.Items
message = messages.GetLast()
sentdate = message.senton.date()
datelimit = (datetime.date.today () - datetime.timedelta (days=7))
#define function to save attachment:
def OL_Attsave(outpath, name, filenamefilter):
attachments = message.Attachments
att_count = attachments.Count
for i in range(1,att_count+1):
attachment = attachments.Item(i)
if filenamefilter in attachment.FileName and "image" not in attachment.FileName:
attachment.SaveASFile(outpath + str(sentdate)+"_"+name+"_" +str(attachment.Filename))
return "saved " + str(att_count) + " files for " + name +" to " + outpath
while sentdate>datelimit:
try:
#Company1:
if message.SenderEmailAddress == "abc#company1.com" and message.subject == "INV ELO":
outpath = outpfolder + "company1\\"
name = 'company1'
OL_Attsave(outpath, name ,'')
#Company2:
elif "#company2.com" in message.SenderEmailAddress:
outpath = outpfolder + "company2\\"
name = 'company2'
OL_Attsave(outpath, name, 'INV')
message = messages.GetPrevious()
sentdate = message.senton.date()
except:
message = messages.GetPrevious()
sentdate = message.senton.date()

Downloading Email Attachments from Shared Folder - Python

I have the below code to download email attachments based on date sent and email subject criteria:
from datetime import date, timedelta
import os
import win32com.client
path = os.path.expanduser("C:\\Users\\xxxx\\Documents\\Projects\\VBA Projects\\VLOOKUP Automation\\Vlookup File Location")
today = date.today()
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.Folders("xxx").Folders.Item("Inbox")
messages = inbox.Items
subject = "xxx"
dateHigh = date.today() - timedelta(days=1)
dateLow = date.today() - timedelta(days=-1)
max = 2500
for count, message in enumerate(messages):
if count > max:
break
if subject in message.subject and message.senton.date() > dateLow and message.senton.date() < dateHigh:
attachments = message.Attachments
num_attach = len([x for x in attachments])
for x in range(1, num_attach+1):
attachment = attachments.Item(x)
attachment.SaveASFile(path + '\\' + str(attachment))
Is there any way to specify criteria for only .csv attachments to be downloaded for example?
Additionally, this code was previously being used on a public folder - those folders have now been updated to shared folders. Since the update, I have had to increase the "max" from 500 to 2500 in order to find the specified emails. Is there any way to speed this up?
Thanks
Below is a way to specify which file types you want.
Please enter the file endings in the attachments_of_interest list.
from datetime import date, timedelta
import os
import win32com.client
path = os.path.expanduser("C:\\Users\\xxxx\\Documents\\Projects\\VBA Projects\\VLOOKUP Automation\\Vlookup File Location")
today = date.today()
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.Folders("xxx").Folders.Item("Inbox")
messages = inbox.Items
subject = "xxx"
dateHigh = date.today() - timedelta(days=1)
dateLow = date.today() - timedelta(days=-1)
max_n = 2500
attachments_of_interest = ['.csv']
for count, message in enumerate(messages):
if count > max_n:
break
if subject in message.subject and message.senton.date() > dateLow and message.senton.date() < dateHigh:
attachments = message.Attachments
num_attach = len([x for x in attachments])
for x in range(1, num_attach+1):
attachment = attachments.Item(x)
attachment_fname = str(attachment)
file_ending = attachment_fname.split('.')[-1]
if not attachments_of_interest or file_ending in attachments_of_interest:
attachment.SaveASFile(path + '\\' + attachment_fname)
As for speeding up, you could use a pool:
from multiprocessing.pool import ThreadPool as Pool
from datetime import date, timedelta
import os
import win32com.client
path = os.path.expanduser("C:\\Users\\xxxx\\Documents\\Projects\\VBA Projects\\VLOOKUP Automation\\Vlookup File Location")
today = date.today()
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
inbox = outlook.Folders("xxx").Folders.Item("Inbox")
messages = inbox.Items
subject = "xxx"
max_n = 2500
attachments_of_interest = ['.csv']
pool_size = 5
# define worker function before a Pool is instantiated
def worker(message):
dateHigh = date.today() - timedelta(days=1)
dateLow = date.today() - timedelta(days=-1)
if subject in message.subject and message.senton.date() > dateLow and message.senton.date() < dateHigh:
attachments = message.Attachments
num_attach = len([x for x in attachments])
for x in range(1, num_attach+1):
attachment = attachments.Item(x)
attachment_fname = str(attachment)
file_ending = attachment_fname.split('.')[-1]
if not attachments_of_interest or file_ending in attachments_of_interest:
attachment.SaveASFile(path + '\\' + attachment_fname)
pool = Pool(pool_size)
for count, message in enumerate(messages):
if count > max_n:
break
pool.apply_async(worker, (message,))
pool.close()
pool.join()
I think this is part of requirement to download csv only.
This outlook component has some methods which you can utilize.
Instead of messages = inbox.Items
try
messages = inbox.Items.GetFirst()
and get first message then use
messages = inbox.Items.oItems.GetNext()
so in this way you always have one message in memory and you can keep looping for longer time.
Make sure you have outlook Microsoft Outlook 16.0 Object Library or higher than 10 so that this method exists. GetFirst()
c# code used by me
Outlook.MailItem oMsg = (Outlook.MailItem)oItems.GetFirst();
//Output some common properties.
Console.WriteLine(oMsg.Subject);
Console.WriteLine(oMsg.SenderName);
Console.WriteLine(oMsg.ReceivedTime);
Console.WriteLine(oMsg.Body);
//Check for attachments.
int AttachCnt = oMsg.Attachments.Count;
Console.WriteLine("Attachments: " + AttachCnt.ToString());
Outlook.MailItem oMsg1 = (Outlook.MailItem)oItems.GetNext();

Email Python Script Output

I just finished creating my first news web scraping script and I am quite content with it even though the code does not look nice at all. I was wondering how I should go about sending the output of the script to myself via email (gmail address) when I run it. I tried to run smtplib, but it's not working for me.
Here is my current code:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
from datetime import date
from dateutil import parser
import smtplib
from email.mime.text import MIMEText
my_url1 = "https://www.coindesk.com/category/business-news/legal"
my_url2 = "https://cointelegraph.com/tags/bitcoin-regulation"
my_url3 = "https://techcrunch.com/tag/blockchain/"
# Opening up the website, grabbing the page
uFeedOne = uReq(my_url1, timeout=5)
page_one = uFeedOne.read()
uFeedOne.close()
# html parser
page_soup1 = soup(page_one, "html.parser")
# grabs each publication block
containers_one = page_soup1.findAll("a", {"class": "stream-article"} )
for container_one in containers_one:
## get todays date.
## I have taken an offset as the site has older articles than today.
today = date.today().strftime("%Y, %m, %d")
## The actual datetime string is in the datetime attribute of the time tag.
date_time1 = container_one.time['datetime']
## The dateutil package parses the ISO-formatted date and returns a condensed version.
date1 = parser.parse(date_time1)
dt1 = date1.strftime("%Y, %m, %d")
## Simple comparison
if dt1 == today:
link1 = container_one.attrs['href']
publication_date1 = "published on " + container_one.time.text
title1 = container_one.h3.text
description1 = "(CoinDesk)-- " + container_one.p.text
print("link: " + link1)
print("publication_date: " + publication_date1)
print("title: ", title1)
print("description: " + description1 + " \n")
uFeedTwo = uReq(my_url2, timeout=5)
page_two = uFeedTwo.read()
uFeedTwo.close()
page_soup2 = soup(page_two, "html.parser")
containers_two = page_soup2.findAll("div",{"class": "post-preview-item-inline__content"})
for container_two in containers_two:
today = date.today().strftime("%Y, %m, %d")
date_time2 = container_two.time['datetime']
date2 = parser.parse(date_time2)
dt2 = date2.strftime("%Y, %m, %d")
title_container2 = container_two.find("span",{"class": "post-preview-item-inline__title"})
description_container2 = container_two.find("p",{"class": "post-preview-item-inline__text"}).text
if dt2 == today:
link2 = container_two.div.a.attrs['href']
publication_date2 = "published on " + date2.strftime("%b %d, %Y")
title2 = title_container2.text
description2 = "(CoinTelegraph)-- " + description_container2
print("link: " + link2)
print("publication_date: " + publication_date2)
print("title: ", title1)
print("description: " + description2 + " \n")
uFeedThree = uReq(my_url3, timeout=5)
page_three = uFeedThree.read()
uFeedThree.close()
# html parser
page_soup3 = soup(page_three, "html.parser")
# grabs each publication block
containers_three = page_soup3.findAll("div",{"class": "post-block post-block--image post-block--unread"})
for container_three in containers_three:
today = date.today().strftime("%Y, %m, %d")
date_time3 = container_three.time['datetime']
date3 = parser.parse(date_time3)
dt3 = date3.strftime("%Y, %m, %d")
keyword1 = "law"
keyword2 = "legal"
description_container3 = container_three.find("div", {"class": "post-block__content"}).text.strip()
if dt3 == today and (keyword2 in description_container3) or (keyword1 in description_container3):
link3 = container_three.header.h2.a.attrs['href']
publication_date3 = "published on " + date3.strftime("%b %d, %Y")
title3 = container_three.header.h2.a.text.strip()
description3 = "(TechCrunch)-- " + description_container3
print("link: " + link3)
print("publication_date: " + publication_date3)
print("title: ", title3)
print("description: " + description3 + " \n")
I understand that I am suppose to do a variation of this:
# Open a plain text file for reading. For this example, assume that
# the text file contains only ASCII characters.
with open(textfile) as fp:
# Create a text/plain message
msg = MIMEText(fp.read())
# me == the sender's email address
# you == the recipient's email address
msg['Subject'] = 'The contents of %s' % textfile
msg['From'] = me
msg['To'] = you
# Send the message via our own SMTP server.
s = smtplib.SMTP('localhost')
s.send_message(msg)
s.quit()
This is the code snippet to send a mail to anyone using SMTP.
Below code is configured for gmail SMT P.If you have any other it can
be configred.
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
msg = MIMEMultipart()
msg['From'] = 'me#gmail.com'
msg['To'] = 'you#gmail.com'
msg['Subject'] = 'Enter subjecy of msg here'
message = 'here is the email'
msg.attach(MIMEText(message))
# GMAIL_SMTP_HOST = 'smtp.gmail.com'
# GMAIL_SMTP_PORT = '587'
mailserver = smtplib.SMTP('smtp.gmail.com',587)
# secure our email with tls encryption
mailserver.starttls()
mailserver.sendmail('me#gmail.com','you#gmail.com',msg.as_string())
mailserver.quit()

Checking email with Python

I am interested to trigger a certain action upon receiving an email from specific
address with specific subject. In order to be able to do so I need to implement
monitoring of my mailbox, checking every incoming mail (in particular, i use gmail).
what is the easiest way to do that?
Gmail provides the ability to connect over POP, which you can turn on in the gmail settings panel. Python can make connections over POP pretty easily:
import poplib
from email import parser
pop_conn = poplib.POP3_SSL('pop.gmail.com')
pop_conn.user('username')
pop_conn.pass_('password')
#Get messages from server:
messages = [pop_conn.retr(i) for i in range(1, len(pop_conn.list()[1]) + 1)]
# Concat message pieces:
messages = ["\n".join(mssg[1]) for mssg in messages]
#Parse message intom an email object:
messages = [parser.Parser().parsestr(mssg) for mssg in messages]
for message in messages:
print message['subject']
pop_conn.quit()
You would just need to run this script as a cron job. Not sure what platform you're on so YMMV as to how that's done.
Gmail provides an atom feed for new email messages. You should be able to monitor this by authenticating with py cURL (or some other net library) and pulling down the feed. Making a GET request for each new message should mark it as read, so you won't have to keep track of which emails you've read.
While not Python-specific, I've always loved procmail wherever I could install it...!
Just use as some of your action lines for conditions of your choice | pathtoyourscript (vertical bar AKA pipe followed by the script you want to execute in those cases) and your mail gets piped, under the conditions of your choice, to the script of your choice, for it to do whatever it wants -- hard to think of a more general approach to "trigger actions of your choice upon receipt of mails that meet your specific conditions!! Of course there are no limits to how many conditions you can check, how many action lines a single condition can trigger (just enclose all the action lines you want in { } braces), etc, etc.
People seem to be pumped up about Lamson:
https://github.com/zedshaw/lamson
It's an SMTP server written entirely in Python. I'm sure you could leverage that to do everything you need - just forward the gmail messages to that SMTP server and then do what you will.
However, I think it's probably easiest to do the ATOM feed recommendation above.
EDIT: Lamson has been abandoned
I found a pretty good snippet when I wanted to do this same thing (and the example uses gmail). Also check out the google search results on this.
I recently solved this problem by using procmail and python
Read the documentation for procmail. You can tell it to send all incoming email to a python script like this in a special procmail config file
:0:
| ./scripts/ppm_processor.py
Python has an "email" package available that can do anything you could possibly want to do with email. Read up on the following ones....
from email.generator import Generator
from email import Message
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.mime.multipart import MIMEMultipart
https://developers.google.com/gmail/gmail_inbox_feed
Says you have to have a corporate Gmail, but I have come to find that you can read Gmail free versions without issues. I use this code to get my blood pressure results I email or text to a gmail address.
from email.header import decode_header
from datetime import datetime
import os
import pandas as pd
import plotly.graph_objs as go
import plotly
now = datetime.now()
dt_string = now.strftime("%Y.%m.%d %H:%M:%S")
print("date_time:", dt_string)
email_account = '13123#gmail.com'
email_password = '131231231231231231312313F'
email_server = 'imap.gmail.com'
email_port = 993
accept_emails_from = {'j1231312#gmail.com', '1312312#chase.com', '13131231313131#msg.fi.google.com'}
verbose = True
def get_emails():
email_number = 0
local_csv_data = ''
t_date = None
t_date = None
t_systolic = None
t_diastolic = None
t_pulse = None
t_weight = None
try:
mail = imaplib.IMAP4_SSL(email_server)
email_code, email_auth_status = mail.login(email_account, email_password)
if verbose:
print('[DEBUG] email_code: ', email_code)
print('[DEBUG] email_auth_status: ', email_auth_status)
mail.list()
mail.select('inbox')
# (email_code, messages) = mail.search(None, 'ALL')
(email_code, messages) = mail.search(None, '(UNSEEN)') # only get unread emails to process.
subject = None
email_from = None
for email_id in messages[0].split():
email_number += 1
email_code, email_data = mail.fetch(email_id, '(RFC822)')
for response in email_data:
if isinstance(response, tuple): # we only want the tuple ,the bytes is just b .
msg = email.message_from_bytes(response[1])
content_type = msg.get_content_type()
subject, encoding = decode_header(msg["Subject"])[0]
subject = str(subject.replace("\r\n", ""))
if isinstance(subject, bytes):
subject = subject.decode(encoding)
email_from, encoding = decode_header(msg.get("From"))[0]
if isinstance(email_from, bytes):
email_from = email_from.decode(encoding)
if content_type == "text/plain":
body = msg.get_payload(decode=True).decode()
parse_data = body
else:
parse_data = subject
if '>' in email_from:
email_from = email_from.lower().split('<')[1].split('>')[0]
if email_from in accept_emails_from:
parse_data = parse_data.replace(',', ' ')
key = 0
for value in parse_data.split(' '):
if key == 0:
t_date = value
t_date = t_date.replace('-', '.')
if key == 1:
t_time = value
if ':' not in t_time:
numbers = list(t_time)
t_time = numbers[0] + numbers[1] + ':' + numbers[2] + numbers[3]
if key == 2:
t_systolic = value
if key == 3:
t_diastolic = value
if key == 4:
t_pulse = value
if key == 5:
t_weight = value
key += 1
t_eval = t_date + ' ' + t_time
if verbose:
print()
print('--------------------------------------------------------------------------------')
print('[DEBUG] t_eval:'.ljust(30), t_eval)
date_stamp = datetime.strptime(t_eval, '%Y.%m.%d %H:%M')
if verbose:
print('[DEBUG] date_stamp:'.ljust(30), date_stamp)
print('[DEBUG] t_systolic:'.ljust(30), t_systolic)
print('[DEBUG] t_diastolic:'.ljust(30), t_diastolic)
print('[DEBUG] t_pulse:'.ljust(30), t_pulse)
print('[DEBUG] t_weight:'.ljust(30), t_weight)
new_data = str(date_stamp) + ',' + \
t_systolic + ',' + \
t_diastolic + ',' + \
t_pulse + ',' + \
t_weight + '\n'
local_csv_data += new_data
except Exception as e:
traceback.print_exc()
print(str(e))
return False, email_number, local_csv_data
return True, email_number, local_csv_data
def update_csv(local_data):
""" updates csv and sorts it if there is changes made. """
uniq_rows = 0
if os.name == 'posix':
file_path = '/home/blood_pressure_results.txt'
elif os.name == 'nt':
file_path = '\\\\uncpath\\blood_pressure_results.txt'
else:
print('[ERROR] os not supported:'.ljust(30), os.name)
exit(911)
if verbose:
print('[DEBUG] file_path:'.ljust(30), file_path)
column_names = ['00DateTime', 'Systolic', 'Diastolic', 'Pulse', 'Weight']
if not os.path.exists(file_path):
with open(file_path, 'w') as file:
for col in column_names:
file.write(col + ',')
file.write('\n')
# append the new data to file.
with open(file_path, 'a+') as file:
file.write(local_data)
# sort the file.
df = pd.read_csv(file_path, usecols=column_names)
df_sorted = df.sort_values(by=["00DateTime"], ascending=True)
df_sorted.to_csv(file_path, index=False)
# remove duplicates.
file_contents = ''
with open(file_path, 'r') as file:
for row in file:
if row not in file_contents:
uniq_rows += 1
print('Adding: '.ljust(30), row, end='')
file_contents += row
else:
print('Duplicate:'.ljust(30), row, end='')
with open(file_path, 'w') as file:
file.write(file_contents)
return uniq_rows
# run the main code to get emails.
status, emails, my_data = get_emails()
print('status:'.ljust(30), status)
print('emails:'.ljust(30), emails)
# if the new emails received then sort the files.
csv_rows = update_csv(my_data)
print('csv_rows:'.ljust(30), csv_rows)
exit(0)

Categories

Resources