How to get pure text from python email using imaplib - python

I am wondering how to get pure text form python email using imaplib.
What i have so far:
from datetime import datetime
import imaplib ,email
IMAP_SERVER = 'imap.gmail.com'
EMAIL_ACCOUNT = "example#gmail.com"
PASSWORD = "password"
rv, data = M.search(None, "ALL")
if rv != 'OK':
print("No messages found!")
return
if data != ['']: # if not empty list means messages exist
for num in data[0].split():
rv, data = M.fetch(num, '(RFC822)') #(BODY[HEADER.FIELDS (SUBJECT FROM)])
if rv != 'OK':
print("ERROR getting message", num)
return
message = email.message_from_bytes(data[0][1])
text = ""
if message.is_multipart():
for payload in message.get_payload():
text = payload.get_payload()
else:
text = message.get_payload()
res = {
'From': email.utils.parseaddr(message['From'])[1],
'From name': email.utils.parseaddr(message['From'])[0],
'Time': datetime.fromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(message['Date']))),
'To': message['To'],
'Subject': email.header.decode_header(message["Subject"])[0][0],
'Text': text
}
print(res['Text'])
else:
print("Nothing to work with.")
If i do it this way, the code works, but i get
<div dir="ltr">test 3 body</div>
as an output.
Is there any way to get purely "test 3 body" out?

Look for the plain text part of the email message.
for payload in message.walk():
if payload.get_content_type().lower() == 'text/plain':
print(payload.get_payload())

If you just stack on removing html tags from string you have to use regular expression like here:
import re
s = '<div dir="ltr">test 3 body</div>'
print(re.sub('<[^<]+?>', '', s))
Output: test 3 body
s has to be your res['Text'].

Related

How to extract data from email that come in json format and then use that data to execute code, I keep getting errors in the loading of the email data

I keep getting errors in the loading of the email data no email IDs found or no search results. Im using imaplib to login to a IMAP server from GMX.com looking for email subject that is UNSEEN and with Subject Alert: then trying to use that data extracted to execute trades on bybit API. So should I just change email librarys or modify the code somehow
` from flask import Flask, jsonify
import bybit
import re
import ssl
import certifi
import imaplib
class IMAPError(imaplib.IMAP4.error):
pass
app = Flask(__name__)
app.debug = True
#app.route('/e')
def execute_trade():
response = {}
HOST = "imap.gmx.com"
USERNAME = "example#gmx.com"
PASSWORD = "example"
try:
ssl_context = ssl.create_default_context(cafile=certifi.where())
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
with imaplib.IMAP4_SSL(HOST, 993, ssl_context=ssl_context) as imap:
imap.login(USERNAME, PASSWORD)
imap.select('INBOX',readonly=True)
mail_ids = imap.search(None, "UNSEEN", 'SUBJECT "Alert:"')[0].split()
print(mail_ids)
mail_ids = mail_ids[0].split()
valid_mail_ids = [mail_id for mail_id in mail_ids if mail_id.strip().isdigit()]
for mail_id in mail_ids:
try:
int(mail_id)
valid_mail_ids.append(mail_id)
except ValueError:
pass
if not valid_mail_ids:
response['error'] = "No valid email IDs found in the search results"
return response, 500
mail_id = valid_mail_ids[0]
email_data = imap.fetch(mail_id, "(RFC822)")
email_body = email_data[0][1].decode('utf-8')
account_side = re.search(r'Account side: (.*)', email_body)
if account_side:
response['account_side'] = account_side.group(1)
else:
response['error'] = "No account side information found in the email"
return response, 500
except (IMAPError, IndexError, UnicodeDecodeError, ssl.SSLError) as e:
response['error'] = f"Error accessing the email server: {str(e)}"
return response, 500
account_side = re.search(r'Account side: (.*)', email_body)
if account_side:
response['account_side'] = account_side = account_side.group(1)
else:
response['error'] = "No 'Account side' found in the email body"
return response, 500
order_price = re.search(r'Price: (.*)', email_body)
if order_price:
response['order_price'] = order_price = order_price
else:
response['error'] = "No 'Price' found in the email body"
return jsonify(response), 500
account_symbol = re.search(r'account_symbol: (.*)', email_body)
if account_symbol:
response['account_symbol'] = account_symbol = account_symbol.group(1)
else:
response['error'] = "No 'Account symbol' found in the email body"
return jsonify(response), 500
account_volume = re.search(r'account_volume: (.*)', email_body)
if account_volume:
response['account_volume'] = account_volume = account_volume.group(1)
else:
response['error'] = "No 'Account volume' found in the email body"
return jsonify(response), 500
imap.close()
imap.logout()
client = bybit.bybit(test=False, api_key="76QjbsflcrBdSZiSrU",api_secret="SGAAHPUfQnTm1d1EX12ffVaizjbFA4iT32ps")
if account_side and account_symbol and account_volume:
print(client.Order.Order_new(side=account_side, symbol=account_symbol, order_type="limit",qty=account_volume, price=order_price,time_in_force="gtc").result())
# Query Active Orders(real-time)
print(client.Order.Order_query(symbol='account_symbol').result())
# My Position
print(client.Positions.Positions_myPosition(
symbol=account_symbol).result())
if __name__ == '__main__':
app.run(port=5000)
``` `
I've tried changing the library to imapClient and other solutions like reading the email data differently but always the same errors
I tried adding
``` ` json_data = json.loads(email_body)
result = json_data.get('result')
if result:
response['result'] = result
else:
response['error'] = "No result information found in the email"
return response, 500
after the email body line still same error and importing json.

Use python to download email attachments only based on Subject

The following code uses imap to find emails by subject line and returns all parts of the email and downloads the attachments. However i am ONLY needing it to download the attachments of the email not the entire body also. I understand this has to do with the for part in email_message.walk(): that is iterating the entire email. Could someone please help me have this code download only the attachment of the email? Im sure this is a simple code change but im just not sure how to make it!
import imaplib
import email.header
import os
import sys
import csv
# Your IMAP Settings
host = 'imap.gmail.com'
user = 'User email'
password = 'User password'
# Connect to the server
print('Connecting to ' + host)
mailBox = imaplib.IMAP4_SSL(host)
# Login to our account
mailBox.login(user, password)
boxList = mailBox.list()
# print(boxList)
mailBox.select()
searchQuery = '(SUBJECT "CDR Schedule output from schedule: This is a test to see how it works")'
result, data = mailBox.uid('search', None, searchQuery)
ids = data[0]
# list of uids
id_list = ids.split()
i = len(id_list)
for x in range(i):
latest_email_uid = id_list[x]
# fetch the email body (RFC822) for the given ID
result, email_data = mailBox.uid('fetch', latest_email_uid, '(RFC822)')
# I think I am fetching a bit too much here...
raw_email = email_data[0][1]
# converts byte literal to string removing b''
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# downloading attachments
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('C:/install files/', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
subject = str(email_message).split("Subject: ", 1)[1].split("\nTo:", 1)[0]
print('Downloaded "{file}" from email titled "{subject}" with UID {uid}.'.format(file=fileName, subject=subject, uid=latest_email_uid.decode('utf-8')))
mailBox.close()
mailBox.logout()

Render html in Flask IMAP application

I am creating a Flask application which is supposed to retrieve emails by sending IMAP or POP requests to an email service provider like GMAIL. I am able to retrieve the emails by using the imaplib library. A simple email which contains only text in it is simple enough to retrieve and display. Unfortunately, when an email consists of images, GIFs or special styling it gets more difficult.
Whenever I run the code that retrieves the contents of the emails it seems that I am getting the HTML. But when I try to "render" it to an html file and use render_template('test.html') it seems I would be putting html into html.
What would be the correct way to move what I get from the email service provider to my web application in Flask?
class EmailClient:
imap_host = 'imap.gmail.com'
imap_user = 'test#test.com'
imap_pass = 'password'
def process_mailbox(M):
diction = []
rv, data = M.search(None, "ALL")
if rv != 'OK':
print('No messages found!')
return
for num in data[0].split():
rv, data = M.fetch(num, '(RFC822)')
if rv != 'OK':
print("ERROR getting message", num)
return
msg = email.message_from_bytes(data[0][1])
hdr = email.header.make_header(email.header.decode_header(msg['Subject']))
subject = str(hdr)
print('Message %s: %s' % (num, subject))
date_tuple = email.utils.parsedate_tz(msg['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
print('Local Date:', local_date.strftime('%a, %d %b %Y %H:%M:%S'))
for part in msg.walk():
if part.get_content_type() == 'text/html':
print(part.get_payload())
diction.append(part.get_payload())
M = imaplib.IMAP4_SSL('imap.gmail.com')
try:
rv, data = M.login(imap_user, imap_pass)
except imaplib.IMAP4.error:
print("LOGIN FAILED!")
sys.exit(1)
print(rv, data)
rv, mailboxes = M.list()
if rv == 'OK':
print('Mailboxes:')
print(mailboxes)
rv, data = M.select('Inbox')
if rv == 'OK':
print('Processing mailbox...\n')
process_mailbox(M)
M.close()
else:
print('ERROR: Unable to open mailbox', rv)
M.logout()
If you want to pass html code as a variable to a jinja template, add |safe. For instance if email contains the email in html format:
{{ email |safe }}

Uploading attachments to Salesforce with Beatbox - coming up blank

Im trying to upload a folder full of emails to Salesforce, one email per case. The script does everything fine, but the emails it uploads are totally blank, and are 0 bytes.
Im pretty sure its something to do with the base64 encoding required to upload attachments, but I've tried everything to make it work.
I read this Uploading Attachments to Salesforce API via Beatbox, Python, which was helpful, but I cannot solve the problem.
Any help will be greatly appreciated.
Here is my code:
import wx
import email.parser
import os
import re
#import time
import beatbox
import base64
def browse(parent = None, message = 'Browse for the dealsheet folder'):
app = wx.App()
dialog = wx.DirDialog(None, "Choose a directory:",style=wx.DD_DEFAULT_STYLE | wx.DD_NEW_DIR_BUTTON)
if dialog.ShowModal() == wx.ID_OK:
return dialog.GetPath()
dialog.Destroy()
service = beatbox.PythonClient()
service.serverUrl = 'https://login.salesforce.com/services/Soap/u/20.0'
loginresponse = service.login('USERNAME', 'PASSWORD')
filespath = browse()
files = os.listdir(filespath)
global bodytext
global subject
global toAdd
global uploadfile
uploadfile = ''
for f in files:
with open(filespath + '\\' + f, 'rb') as emailfile:
message = email.message_from_file(emailfile)
uploadfile = base64.b64encode(emailfile.read())
subject = str(message.get('subject'))
toAdd = str(message.get('to'))
toAdd = re.findall(r'[\w\-][\w\-\.]+#[\w\-][\w\-\.]+[a-zA-Z]{1,4}', toAdd)[0]
toAdd = toAdd[:-10] # cut the email bits
toAdd = toAdd.replace('.', ' ')
toAdd = toAdd.title()
print toAdd
for part in message.walk():
if part.get_content_type() == 'text/plain':
bodytext = part.get_payload()
bodytext = str(bodytext)
bodytext = unicode(bodytext, errors = 'replace')
# print bodytext
# LOG CASE
details = {'type': 'Case', 'Reason': 'SFDC Admin', 'Origin': 'Email', 'Status': 'Awaiting Analysis', 'Description': ''}
result = service.create(details)
print 'log result = ', result[0]['success']
#TRIAGE CASE
details_triage = {'type': 'Case', 'id': result[0]['id'], 'Case_Triaged__c': 'True'}
triage_result = service.update(details_triage)
print 'triage result = ', triage_result[0]['success']
url = 'https://naX.salesforce.com/{}'.format(triage_result[0]['id'])
print url
attachement_dict = {'type': 'Attachment', 'ParentId': triage_result[0]['id'], 'name': f, 'Body': uploadfile }
if result[0]['success'] == True:
res = service.create(attachement_dict)
print res

How to fetch an email body using imaplib in python?

I'd like to fetch the whole message from IMAP4 server.
In python docs if found this bit of code that works:
>>> t, data = M.fetch('1', '(RFC822)')
>>> body = data[0][1]
I'm wondering if I can always trust that data[0][1] returns the body of the message. When I've run 'RFC822.SIZE' I've got just a string instead of a tuple.
I've skimmed through rfc1730 but I wasn't able to figure out the proper response structure for the 'RFC822'. It is also hard to tell the fetch result structure from imaplib documentation.
Here is what I'm getting when fetching RFC822:
('OK', [('1 (RFC822 {858569}', 'body of the message', ')')])
But when I fetch RFC822.SIZE I'm getting:
('OK', ['1 (RFC822.SIZE 847403)'])
How should I properly handle the data[0] list?
Can I trust that when it is a list of tuples the tuples has exactly 3 parts and the second part is the payload?
Maybe you know any better library for imap4?
No... imaplib is a pretty good library, it's imap that's so unintelligible.
You may wish to check that t == 'OK', but data[0][1] works as expected for as much as I've used it.
Here's a quick example I use to extract signed certificates I've received by email, not bomb-proof, but suits my purposes:
import getpass, os, imaplib, email
from OpenSSL.crypto import load_certificate, FILETYPE_PEM
def getMsgs(servername="myimapserverfqdn"):
usernm = getpass.getuser()
passwd = getpass.getpass()
subject = 'Your SSL Certificate'
conn = imaplib.IMAP4_SSL(servername)
conn.login(usernm,passwd)
conn.select('Inbox')
typ, data = conn.search(None,'(UNSEEN SUBJECT "%s")' % subject)
for num in data[0].split():
typ, data = conn.fetch(num,'(RFC822)')
msg = email.message_from_string(data[0][1])
typ, data = conn.store(num,'-FLAGS','\\Seen')
yield msg
def getAttachment(msg,check):
for part in msg.walk():
if part.get_content_type() == 'application/octet-stream':
if check(part.get_filename()):
return part.get_payload(decode=1)
if __name__ == '__main__':
for msg in getMsgs():
payload = getAttachment(msg,lambda x: x.endswith('.pem'))
if not payload:
continue
try:
cert = load_certificate(FILETYPE_PEM,payload)
except:
cert = None
if cert:
cn = cert.get_subject().commonName
filename = "%s.pem" % cn
if not os.path.exists(filename):
open(filename,'w').write(payload)
print "Writing to %s" % filename
else:
print "%s already exists" % filename
The IMAPClient package is a fair bit easier to work with. From the description:
Easy-to-use, Pythonic and complete
IMAP client library.
Try my package:
https://pypi.org/project/imap-tools/
example:
from imap_tools import MailBox
# get list of email bodies from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'password', 'INBOX') as mailbox:
bodies = [msg.text or msg.html for msg in mailbox.fetch()]
Features:
Parsed email message attributes
Query builder for searching emails
Work with emails in folders (copy, delete, flag, move, append)
Work with mailbox folders (list, set, get, create, exists, rename, delete, status)
No dependencies
This was my solution to extract the useful bits of information. It's been reliable so far:
import datetime
import email
import imaplib
import mailbox
EMAIL_ACCOUNT = "your#gmail.com"
PASSWORD = "your password"
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())
for x in range(i):
latest_email_uid = data[0].split()[x]
result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
# result, email_data = conn.store(num,'-FLAGS','\\Seen')
# this might work to set flag to seen, if it doesn't already
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
# Header Details
date_tuple = email.utils.parsedate_tz(email_message['Date'])
if date_tuple:
local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
# Body details
for part in email_message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
file_name = "email_" + str(x) + ".txt"
output_file = open(file_name, 'w')
output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
output_file.close()
else:
continue

Categories

Resources