Python IMAP scraper hangs indefinitely - python

I'm trying to scrape data from a specific folder in a Gmail account I have access to.
I recently tried running this code using Python 2.7 on Windows 7 while logged into the Gmail account of interest. For some reason though it seems to run for a long time (I left it for as long as 40 minutes) without completing or providing an error.
As it stands right now the folder I'm targeting in the Gmail account only has about 50 simple text emails with no attachments, pictures, or anything that might suggest the process should take as long as it does. Has anyone come across an issue like this before doing something similar with IMAP?
Code for completeness:
#!/usr/bin/env python
#
# Very simple Python script to dump all emails in an IMAP folder to files.
# This code is released into the public domain.
#
# RKI Nov 2013
#
import sys
import imaplib
import getpass
IMAP_SERVER = 'imap.gmail.com'
EMAIL_ACCOUNT = "notatallawhistleblowerIswear#gmail.com"
EMAIL_FOLDER = "Top Secret/PRISM Documents"
OUTPUT_DIRECTORY = 'C:/src/tmp'
PASSWORD = getpass.getpass()
def process_mailbox(M):
"""
Dump all emails in the folder to files in output directory.
"""
rv, data = M.search(None, "ALL")
if rv != 'OK':
print "No messages found!"
return
for num in data[0].split():
rv, data = M.fetch(num, '(RFC822)')
if rv != 'OK':
print "ERROR getting message", num
return
print "Writing message ", num
f = open('%s/%s.eml' %(OUTPUT_DIRECTORY, num), 'wb')
f.write(data[0][1])
f.close()
def main():
M = imaplib.IMAP4_SSL(IMAP_SERVER)
M.login(EMAIL_ACCOUNT, PASSWORD)
rv, data = M.select(EMAIL_FOLDER)
if rv == 'OK':
print "Processing mailbox: ", EMAIL_FOLDER
process_mailbox(M)
M.close()
else:
print "ERROR: Unable to open mailbox ", rv
M.logout()
if __name__ == "__main__":
main()

The code works fine for me. Below, I have added some debug prints to your code (using pprint) to view the attributes of the IMAP4_SSL object M. My Gmail uses two factor authentication so I needed to setup a gmail app password
from pprint import pprint
# ....
M = imaplib.IMAP4_SSL(IMAP_SERVER)
print('---- Attributes of the IMAP4_SSL connection before login ----')
pprint(vars(M))
M.login(EMAIL_ACCOUNT, PASSWORD)
print('\n \n')
print('---- Attributes of the IMAP4_SSL connection after login ----')
pprint(vars(M))
# open specific folder
rv, data = M.select(EMAIL_FOLDER)
print('\n \n')
print('---- Data returned from select of folder = {}'.format(data))
Check the first pprint(vars(M)) for:
'welcome': '\* OK Gimap ready for requests from ...
'port': 993,
Check the second pprint(vars(M)) for:
_cmd_log for a successful login: 6: ('< PJIL1 OK **#gmail.com authenticated (Success)
data returned from M.select(EMAIL_FOLDER) should be the number of emails available to download.

Related

How to delete email from gmail using python IMAP?

I am trying to read otp from mail and after that I want to delete that email from gmail option. I have no problem in reading email but I am not able to delete mail. I tried some code from stackoverflow. below is my code.
def getOtpMail(vEmail, vPaasword):
connection = imaplib.IMAP4_SSL(IMAP_URL) # stablish connection with IMAP server
try:
connection.login(vEmail, vPaasword) # Login with userid password
except Exception as e:
print(e)
return
loopLock = True
while loopLock:
# fetch
connection.select('"INBOX"', readonly=True)
retCode, messages = connection.search(None, '(UNSEEN)')
print(messages[0])
latest = int(messages[0].split()[-1])
res, msg = connection.fetch(str(latest), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
print('\n------------email--------------\n')
msg = email.message_from_bytes(response[1])
if SENDER_NAME in msg['From'] and KEYWORD in msg['Subject']:
loopLock = False
# fetch required information
for part in msg.walk():
body = part.get_payload()
word_list = body.split()
index = word_list.index('verification')
otp = word_list[index + 3].strip('.')
#delete mail - below two line not working
connection.store(str(latest), '+FLAGS', '"[Gmail]/Trash"')
print(connection.expunge())
return otp
else:
continue
I read documentation and print connection.expunge() so I got response as ('NO', [b'EXPUNGE attempt on READ-ONLY folder (Failure)']) . I think issue I have to establish connection in WRITE mode. I am not sure about it.
In this issue, I opened mail box in readonly mode. Hence my program not able to write and store in IMAP server.
I changed
connection.select('"INBOX"', readonly=True)
to
connection.select('"INBOX"', readonly=False)
also I changed command type and flag type in store method -
connection.store(str(latest), '+FLAGS', '"[Gmail]/Trash"')
to
connection.store(str(latest), '+FLAGS', '\\Deleted')
.

How to parse credential args to a function handling imaplib protocols

I have a list of emails(mine) that I want to test against a list of passwords(All valid and some none valid of course) using imaplib library. Whenever I test the program ordinarily like in the code below, it works perfectly no errors.
import sys
import imaplib
# connect to host using SSL
imap_host = 'imap.server.com'
imap_port = '993'
imap_user = 'username#email'
imap_pass = 'RightPassword'
imap = imaplib.IMAP4_SSL(imap_host, imap_port)
## login to server
try:
login = imap.login(imap_user, imap_pass)
if login:
print login
except imaplib.IMAP4.error as error:
print error
#
But whenever I run the code such as to parsing credentials through a function to handle the authentication protocols such as the following code below, I get an error saying
"LOGIN command error: BAD ['Missing \'"\'']".
I have tried all sort of things I could find using google and non seem to handle it properly.
"""
E-mail Tester
NB: This is for educational purpose only.
"""
import sys
import imaplib
EMAILS_FILE = open('email_list.txt', 'r')
PASSWORD_FILE = open('pass_list.txt', 'r')
SUCCESS_FILE = open('success.txt', 'a')
EMAILS_FILE_LIST = []
def set_check(_emails):
email = str(_emails)
PASSWORD_FILE.seek(0)
for passwords in PASSWORD_FILE:
password = str(passwords)
# connect to host using SSL
imap_host = 'imap.server.com'
imap_port = '993'
imap = imaplib.IMAP4_SSL(imap_host, imap_port)
## login to server
try:
# print "%s%s" % (email,password)
# print "I got here so far"
# sys.exit()
print "Testing <--> E-mail: %s - Password: %s" % (email, password)
login = imap.login("%s","%s" % (email, password))
if login:
print login
print "OK <---> E-mail: %s\nPassword: %s" % (email, password)
except imaplib.IMAP4.error as error:
print error
for emails in EMAILS_FILE:
EMAILS_FILE_LIST.append(emails)
for email_count in range(0, len(EMAILS_FILE_LIST)):
set_check(EMAILS_FILE_LIST[email_count])
I have tried all kind of suggestions I could find on the internet but non has worked thus far.
I expect imap.login to handle the authentication without the mysterious error output
"LOGIN command error: BAD ['Missing \'"\'']"
login = imap.login("%s","%s" % (email, password))
does not work. It throws an error in Python: TypeError: not all arguments converted during string formatting, because you're providing two strings to one %s.
Why don't you just use imap.login(email, password)? It has the same effect as what you're trying to do.
And what does your password file look like? What is it actually sending? Please provide the log line before it crashes. (anonymizing if necessary, but leaving any punctuation in for help diagnosing)
Okay, so I actually got this fixed by removing trail lines from my strings.
email = str(_emails).rstrip()
PASSWORD_FILE.seek(0)
for passwords in PASSWORD_FILE:
password = str(passwords).rstrip()
the error is caused by trail lines in the strings.

Rally python REST: Query all tasks from chosen iteration

I'm trying to query all tasks from a specific iteration using the python toolkit for the rally REST API. The iteration will be chosen at run-time.
However I have been unable to set up the right query. I feel like i'm missing something small but important here.
This is the code:
query_criteria = 'Iteration.Name = "2014 november"'
response = rally.get('Task', fetch=True, query=query_criteria)
if response.errors:
sys.stdout.write("\n".join(response.errors))
sys.exit(1)
for Task in response:
if getattr(Task,"Iteration"):
print "%s %s" % (Task.Name,Task.Iteration.Name)
It will receive 0 rows in response.
If I remove , query=query_criteria and fetch all tasks, then i can see that there are tasks where the Task.Iteration.Name value is 2014 November.
The query does not give an error so I assume that the values of related objects (task->Iteration) are able to be included in the query. Yet I receive 0 rows in response.
Could the reason be that some tasks do not seem to be attached to an iteration?
One solution would be to fetch all tasks and then filter them afterwards. But that seems dirty.
If you query directly in the WS API in the browser do you get results?
https://rally1.rallydev.com/slm/webservice/v2.0/task?workspace=https://rally1.rallydev.com/slm/webservice/v2.0/workspace/12352608129&query=(Iteration.Name%20%3D%20%22my%20iteration%22)&pagesize=200
I verified that this code works with pyral 1.1.0, Python 2.7.0 and requests-2.3.0 - it returns all tasks of workproducts(e.g. user stories and defects) assigned to an iteration. I tested 3 queries: by state, by iteration reference and by iteration name (the first two are commented out in the code).
#!/usr/bin/env python
#################################################################################################
#
# showitems -- show artifacts in a workspace/project conforming to some common criterion
#
#################################################################################################
import sys, os
from pyral import Rally, rallyWorkset, RallyRESTAPIError
#################################################################################################
errout = sys.stderr.write
#################################################################################################
def main(args):
options = [opt for opt in args if opt.startswith('--')]
args = [arg for arg in args if arg not in options]
server, username, password, apikey, workspace, project = rallyWorkset(options)
if apikey:
rally = Rally(server, apikey=apikey, workspace=workspace, project=project)
else:
rally = Rally(server, user=username, password=password, workspace=workspace, project=project)
rally.enableLogging("rally.history.showitems")
fields = "FormattedID,State,Name"
#criterion = 'State != Closed'
#criterion = 'iteration = /iteration/20502967321'
criterion = 'iteration.Name = \"iteration 5\"'
response = rally.get('Task', fetch=fields, query=criterion, order="FormattedID",
pagesize=200, limit=400)
for task in response:
print "%s %s %s" % (task.FormattedID, task.Name, task.State)
print "-----------------------------------------------------------------"
print response.resultCount, "qualifying tasks"
#################################################################################################
#################################################################################################
if __name__ == '__main__':
main(sys.argv[1:])
sys.exit(0)

Script to move messages from one IMAP server to another

Our office uses 2 IMAP servers for e-mail, one is the incoming server and holds the recent e-mails and the other is an archive server. We mainly use Outlook 2010 and our current process is to periodically drag sent messages from the incoming server to the archive.
Today I was asked into looking into writing a script and that would periodically (probably using crontab) grab all sent messages and move them to archive.
I've looked into some example of SSL or telnet to access the server and poke around. However I don't know the best way to script this or how to move files cross server within the IMAP environment.
What's the best way to accomplish this? I'd prefer to use Python just from comfort level, but if there is already an existing solution in another language, I could deal with it.
Update:
Ok, here's some code. Currently It copies the messages just fine, however, it will duplicate exisiting messages on the archive server.
import imaplib
import sys
#copy from
f_server = 'some.secret.ip.address'
f_username = 'j#example.com'
f_password = 'password'
f_box_name = 'Sent Messages'
#copy to
t_server = 'archive.server.i.p'
t_username = 'username'
t_password = 'password'
t_box_name = 'test'
To = imaplib.IMAP4(t_server)
To.login(t_username, t_password)
print 'Logged into mail server'
From = imaplib.IMAP4(f_server)
From.login(f_username, f_password)
print 'Logged into archive'
From.select(f_box_name) #open box which will have its contents copied
print 'Fetching messages...'
typ, data = From.search(None, 'ALL') #get all messages in the box
msgs = data[0].split()
sys.stdout.write(" ".join(['Copying', str(len(msgs)), 'messages']))
for num in msgs: #iterate over each messages id number
typ, data = From.fetch(num, '(RFC822)')
sys.stdout.write('.')
To.append(t_box_name, None, None, data[0][1]) #add a copy of the message to the archive box specified above
sys.stdout.write('\n')
try:
From.close()
From.logout()
try:
To.close()
To.logout()
Some sources:
Doug Hellman's Blog: imaplib - IMAP4 Client Library
Tyler Lesmann's Blog: Copying IMAP Mailboxes with Python and imaplib
I still need to:
delete/expunge messages on the live server
not copy duplicates (actually this would be fixed by deleting originals after copying, but...)
error trapping
Update 2:
Anyone have any ideas on how to not create duplicates when copying? (excluding the option of deleting originals, for now) I thought about searching text, but realized nested replies could throw that off.
Here's what I ended up using. I don't claim that it's perfect, the way it uses msg_num and not id is a little risky. But this is fairly low volume moves, maybe a couple an hour (on cron).
import imaplib
#copy from
from_server = {'server': '1.1.1.1',
'username': 'j#example.com',
'password': 'pass',
'box_names': ['Sent', 'Sent Messages']}
#copy to
to_server = {'server': '2.2.2.2',
'username': 'archive',
'password': 'password',
'box_name': 'Sent'}
def connect_server(server):
conn = imaplib.IMAP4(server['server'])
conn.login(server['username'], server['password'])
print 'Logged into mail server # %s' % server['server']
return conn
def disconnect_server(server_conn):
out = server_conn.logout()
if __name__ == '__main__':
From = connect_server(from_server)
To = connect_server(to_server)
for box in from_server['box_names']:
box_select = From.select(box, readonly = False) #open box which will have its contents copied
print 'Fetching messages from \'%s\'...' % box
resp, items = From.search(None, 'ALL') #get all messages in the box
msg_nums = items[0].split()
print '%s messages to archive' % len(msg_nums)
for msg_num in msg_nums:
resp, data = From.fetch(msg_num, "(FLAGS INTERNALDATE BODY.PEEK[])") # get email
message = data[0][1]
flags = imaplib.ParseFlags(data[0][0]) # get flags
flag_str = " ".join(flags)
date = imaplib.Time2Internaldate(imaplib.Internaldate2tuple(data[0][0])) #get date
copy_result = To.append(to_server['box_name'], flag_str, date, message) # copy to archive
if copy_result[0] == 'OK':
del_msg = From.store(msg_num, '+FLAGS', '\\Deleted') # mark for deletion
ex = From.expunge() # delete marked
print 'expunge status: %s' % ex[0]
if not ex[1][0]: # result can be ['OK', [None]] if no messages need to be deleted
print 'expunge count: 0'
else:
print 'expunge count: %s' % len(ex[1])
disconnect_server(From)
disconnect_server(To)
I'm not sure what volume of messages you're dealing with, but you could extract the Message-ID from each one and use that to find out if it's a duplicate. Either generate a list of IDs already on the target server each time you prepare to move messages, or add them to a simple database as they are archived.
You could narrow things down by an additional message property like Date if the lookups are too expensive, then drop the older lists when you no longer need them.
Presumably too late to be helpful to the OP, but hopefully useful for anyone following along after now.
This looks like a generic requirement. You probably shouldn't be custom coding anything.
You would probably be better off using an MTA configured to send copies of everything to an archive as well as sending stuff to your IMAP server. If this is hard for you to set up, consider using a third party service, who would manage your archives, and forward mail on to your existing mail server.
If you really do want to do this by copying from IMAP, I'd suggest looking at offlineimap.
If you really do want to do it yourself, the way to track the messages you've already seen is by using the Message-ID header.

How can I retrieve a Google Talk users Status Message

I'd like to be able to retrieve a users Google Talk Status Message with Python, it's really hard to find documentation on how to use some of the libraries out there.
I don't have anything to hand with xmpp installed, but here's some old code I had lying around that might help you. You'll want to update the USERNAME/PASSWORD to your own values for test purposes.
Things to note: users logged in to Google Talk get a random presence string on their userid: that doesn't matter if you are trying to get the status of some other user, but if you want to write some code so want to communicate with yourself you need to distinguish the user logged in from GMail or a GTalk client from the test program. Hence the code searches through the userids.
Also, if you read the status immediately after logging in you probably won't get anything. There's a delay in the code because it takes a little while for the status to become available.
"""Send a single GTalk message to myself"""
import xmpp
import time
_SERVER = 'talk.google.com', 5223
USERNAME = 'someuser#gmail.com'
PASSWORD = 'whatever'
def sendMessage(tojid, text, username=USERNAME, password=PASSWORD):
jid = xmpp.protocol.JID(username)
client = xmpp.Client(jid.getDomain(), debug=[])
#self.client.RegisterHandler('message', self.message_cb)
if not client:
print 'Connection failed!'
return
con = client.connect(server=_SERVER)
print 'connected with', con
auth = client.auth(jid.getNode(), password, 'botty')
if not auth:
print 'Authentication failed!'
return
client.RegisterHandler('message', message_cb)
roster = client.getRoster()
client.sendInitPresence()
if '/' in tojid:
tail = tojid.split('/')[-1]
t = time.time() + 1
while time.time() < t:
client.Process(1)
time.sleep(0.1)
if [ res for res in roster.getResources(tojid) if res.startswith(tail) ]:
break
for res in roster.getResources(tojid):
if res.startswith(tail):
tojid = tojid.split('/', 1)[0] + '/' + res
print "sending to", tojid
id = client.send(xmpp.protocol.Message(tojid, text))
t = time.time() + 1
while time.time() < t:
client.Process(1)
time.sleep(0.1)
print "status", roster.getStatus(tojid)
print "show", roster.getShow(tojid)
print "resources", roster.getResources(tojid)
client.disconnect()
def message_cb(session, message):
print ">", message
sendMessage(USERNAME + '/Talk', "This is an automatically generated gtalk message: did you get it?")

Categories

Resources