Google API Multi-Processing - python

I'm trying to grab specific information from emails under my Gmail account (Subject, From, Date, Message Body) and was able to do so succesfully using the Google API and relevant libraries, however, I've noticed the more emails you have the longer it takes to parse, so much so that parsing 34 emails takes nearly 15 seconds, which is bad if you tried to scale that to parsing 1000 emails. My aim was to utilise concurrency/multi-processing on the parse_messages() function, however, I've had no luck and keep returning an empty list. The aim is to process all the emails, then append them all to a the combined list.
Apologies for the sloppyness, it's yet to be cleaned up, there's less than 100 lines in total.
from __future__ import print_function
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from concurrent.futures import ProcessPoolExecutor
import base64
import re
combined = []
def authenticate():
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'creds.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
return creds
def get_messages(creds):
# Get the messages
days = 31
service = build('gmail', 'v1', credentials=creds)
results = service.users().messages().list(userId='me', q=f'newer_than:{days}d, in:inbox').execute()
messages = results.get('messages', [])
message_count = len(messages)
print(f"You've received {message_count} email(s) in the last {days} days")
if not messages:
print(f'No Emails found in the last {days} days.')
return messages
def parse_message(msg):
# Call the Gmail API
service = build('gmail', 'v1', credentials=creds)
txt = service.users().messages().get(userId='me', id=msg['id']).execute()
payload = txt['payload']
headers = payload['headers']
#Grab the Subject Line, From and Date from the Email
for d in headers:
if d['name'] == 'Subject':
subject = d['value']
if d['name'] == 'From':
sender = d['value']
try:
match = re.search(r'<(.*)>', sender).group(1)
except:
match = sender
if d['name'] == "Date":
date_received = d['value']
def get_body(payload):
if 'body' in payload and 'data' in payload['body']:
return payload['body']['data']
elif 'parts' in payload:
for part in payload['parts']:
data = get_body(part)
if data:
return data
else:
return None
data = get_body(payload)
data = data.replace("-","+").replace("_","/")
decoded_data = base64.b64decode(data).decode("UTF-8")
decoded_data = (decoded_data.encode('ascii', 'ignore')).decode("UTF-8")
decoded_data = decoded_data.replace('\n','').replace('\r','').replace('\t', '')
# Append parsed message to shared list
return combined.append([date_received, subject, match, decoded_data])
if __name__ == '__main__':
creds = authenticate()
messages = get_messages(creds)
# Create a process pool with 4 worker processes
with ProcessPoolExecutor(max_workers=4) as executor:
# Submit the parse_message function for each message in the messages variable
executor.map(parse_message, messages)
print(f"Combined: {combined}")
When running the script, my output is normally.
You've received 34 email(s) in the last 31 days
combined: []

Thanks to the help of simpleApp, I made their changes along with a few others to get this working.
# Append parsed message to shared list
return [date_received, subject, match, decoded_data]
if __name__ == '__main__':
creds = authenticate()
messages, service = get_messages(creds)
# Create a process pool with default worker processes
with ProcessPoolExecutor() as executor:
combined = []
# Submit the parse_message function for each message in the messages variable
all_pools = executor.map(parse_message, messages, [service]*len(messages))
for e_p in all_pools:
combined.append(e_p)

Related

Script blocking in the Listener event

I'm trying to get the last email receirved using exchangelib listener;
the probleme here is that the code not printing the seconde print(account.inbox.all().count()) ,
and the first print(account.inbox.all().count()) printinig it fine,
see the result below the code
creds = Credentials(
username="domaine\\user",
password="password"
)
def main():
print("started !")
config = Configuration(server='server', credentials=creds)
account = Account(
primary_smtp_address="mail",
autodiscover=False,
config=config,
access_type=DELEGATE,
default_timezone=UTC
)
listener = Listener(account)
print(account.inbox.all().count())
def new_messaged_received():
print("---------------------------------new mail arrived----------------------------------------------");
for item in account.inbox.all().only('subject').order_by('-datetime_received')[:1]:
print(item.subject)
listener.streaming_event_received += new_messaged_received
listener.listen(NewMailEvent)
the result after receirving a new email :
7503
---------------------------------new mail arrived----------------------------------------------

Trying to create a Gmail API compatible message just does nothing and gives no errors

Gmail decided that SMTP was too simple so they had to block it and setup their own API with all the weird requirements around it. This script which I was trying to use has now become outdated and broken. In an attempt to use it anyway I tried to rewrite it:
"""
Checks stock on specified items at Microcenter store locations,
and sends email notifications when changes are detected.
Applicably, it helps the user obtain rare items during shortages.
"""
from aiohttp import ClientSession
from async_timeout import timeout
from getpass import getpass
from re import search
from smtplib import SMTP
import asyncio
import base64
from email.mime.audio import MIMEAudio
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import mimetypes
import os
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
class Item:
"""
Class for containing state of individual items; methods update state
by awaiting update().
Item does not need to be directly instantiated; Store will create one
per provided url.
"""
def __init__(self, storeNum, url):
self.storeNum, self.url = storeNum, url
self.sku = self.price = self.stock = None
self.stockChanged = self.priceChanged = False
self.loop = asyncio.get_event_loop()
def __str__(self):
stock = 'in' if self.stock else 'out of'
return f'SKU {self.sku} is {stock} stock for {self.price} at Microcenter {self.storeNum}\n{self.url}\n'
async def pull(self):
async with ClientSession() as session:
async with timeout(10):
async with session.get(self.url, params={'storeSelected': self.storeNum}) as response:
return await response.text()
#staticmethod
def parse_lines(page):
for var in ['SKU', 'inStock', 'productPrice']:
reply = search(f"(?<='{var}':').*?(?=',)", page)
if reply:
yield reply.group()
#staticmethod
def compare(new, old):
return (new != old and old is not None)
async def update(self):
data = tuple(self.parse_lines(await self.pull()))
if not data or any(data) is None:
raise ValueError('Data missing from request or store number invalid')
self.sku, stock, price = int(data[0]), data[1] is 'True', float(data[2])
self.stockChanged, self.priceChanged = self.compare(stock, self.stock), self.compare(price, self.price)
self.stock, self.price = stock, price
class Store:
"""
Periodically checks a given list of urls for stock changes
A store number is required to get accurate stock numbers.
The default store number is set to the North Dallas/Richardson, TX location.
Also required is valid email account information for notifications.
If a recipient address is not provided, the user will be prompted for one.
If the prompt is empty, notifications are sent from the sender
address to itself. Providing an empty string for recipient is a valid
argument to enable loopback operation, as only a value of None
will trigger a prompt.
The default time between checks is 15 minutes. This value should
be at least a few minutes, to avoid being blacklisted by the
server, though this class enforces no such limit. To change the
time period, provide a value in minutes to self.run(minutes).
Setting debug to True enables false positives for testing
"""
def __init__(
self, storeNum=131, sender=None,
recipient=None, debug=True, service=None
):
self.storeNum = storeNum
self.items, self.newInStock, self.totalInStock = set(), 0, 0
self.debug = debug
if not sender:
self.sender = input('Enter sender email address: ').lstrip().rstrip()
else:
self.sender = sender
if recipient is None:
prompted = input('Enter recipient email address (leave blank for loopback): ').lstrip().rstrip()
if not prompted:
self.recipient = self.sender
else:
self.recipient = prompted
else:
self.recipient = self.sender
#Google API BULLSHIT
SCOPES = ['https://www.googleapis.com/auth/gmail.compose','https://www.googleapis.com/auth/gmail.readonly']
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
self.service = build('gmail', 'v1', credentials=creds)
# Call the Gmail API
results = self.service.users().labels().list(userId='me').execute()
labels = results.get('labels', [])
if not labels:
print('No labels found.')
else:
print('Labels:')
for label in labels:
print((label['name']))
self.loop = asyncio.get_event_loop()
def __str__(self):
return '\n'.join(item.__str__() for item in self.items)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.loop.close()
#property
def storeNum(self):
return self._storeNum
#storeNum.setter
def storeNum(self, val):
"""
Check to see if value is formatted properly
storeNum must be sent as a string, but should contain an integer.
"""
assert isinstance(val, (int, str)), 'Store number must be an integer or string of integer'
try:
num = int(val)
except:
raise
else:
self._storeNum = str(num)
#property
def sender(self):
return self._sender
#sender.setter
def sender(self, val):
assert val is not None, 'Sender address cannot be empty'
assert isinstance(val, str), 'Must be str'
self._sender = val
def run(self, minutes=5):
run = asyncio.ensure_future(self.check(minutes))
self.loop.run_forever()
async def check(self, minutes=5):
assert isinstance(minutes, (int, float)), 'Minutes must be an integer or float'
seconds = minutes * 60
while True:
print('Checking stock...')
await self.update()
if self.newInStock:
print('New items available')
msg = email_message()
print("message created")
self.send_email(msg)
print("email send attempted")
#if sent:
#print('Recipient notified of stock changes')
else:
print('Stock unchanged')
await asyncio.sleep(seconds)
def add_interactive(self):
entry = True
while entry:
entry = eval(input('Add one or more URLs separated by spaces, or leave blank to complete: '))
try:
urls = entry.split()
except:
if entry and 'http' in entry:
self.add(entry.lstrip().rstrip())
else:
self.add(*urls)
def add(self, *urls):
for url in urls:
assert isinstance(url, str), 'URL must be a string'
if url not in (item.url for item in self.items):
new = Item(self.storeNum, url)
self.loop.run_until_complete(new.update())
self.items.add(new)
def remove(self, *urls):
for url in urls:
assert isinstance(url, str), 'URL must be a string'
self.items = set([item for item in self.items if item.url not in urls])
def email_message(self):
if self.debug:
new = self.items
else:
new = tuple([item for item in self.items if item.stockChanged])
message_text = '\n'.join(item.__str__() for item in new)
print(message_text)
#Create message container
message = MIMEMultipart('alternative') # needed for both plain & HTML (the MIME type is multipart/alternative)
message['Subject'] = self.email_subject()
print("set Subject")
message['From'] = self.sender
print("set sender")
message['To'] = self.recipient
print("set recipient")
#Create the body of the message (a plain-text and an HTML version)
message.attach(MIMEText(message_text, 'plain'))
print("attached plaintext")
message.attach(MIMEText(message_text, 'html'))
print("attached html")
raw_message_no_attachment = base64.urlsafe_b64encode(message.as_bytes())
print("encoded b64")
raw_message_no_attachment = raw_message_no_attachment.decode()
print("decoded raw")
body = {'raw': raw_message_no_attachment}
print("set body")
return body
def email_subject(self):
return f'({self.newInStock} new, {self.totalInStock} total) items in stock at Microcenter {self.storeNum}'
def send_email(self, msgOBJ):
message = msgOBJ
print("message encoded")
try:
message_sent = (self.service.users().messages().send(userId='me', body=message).execute())
message_id = message_sent['id']
# print(attached_file)
print (f'Message sent (without attachment) \n\n Message Id: {message_id}\n\n Message:\n\n {message_text_plain}')
# return body
return True
except errors.HttpError as error:
print (f'An error occurred: {error}')
return False
async def update(self):
for item in self.items:
await item.update()
if self.debug:
self.newInStock = self.totalInStock = len(self.items)
else:
self.newInStock = sum(item.stockChanged for item in self.items)
self.totalInStock = sum(item.stock for item in self.items)
class Clerk(Store):
"""
Further abstraction and automation of Store
Instantiate Clerk with a list of urls as arguments
and an optional store number as a keyword argument.
Clerk exists to be able to start and run a Store in one line.
The user will be prompted for email account information.
"""
def __init__(self, *urls, storeNum=131):
super().__init__(storeNum=storeNum)
if urls:
super().add(*urls)
else:
super().add_interactive()
super().run()
Clerk("https://www.microcenter.com/product/616858/amd-ryzen-9-3950x-35ghz-16-core-am4-boxed-processor", storeNum=155)
I wrote this in a way that is Python 3.6 compatible and Gmail API friendly so it'll actually work. However, upon calling the Store.email_message method (which is supposed to create and return the necessary b64 encoded message object) nothing happens, not one of the prints spaced throughout it is called and no error is returned either. It just stops there.
I initially tried the code from the examples in the Gmail API Documentation, but that didn't work, so then i went searching through the web until I decided to stop with the code I got here (code stolen from their send_Message_without_attachment and create_message_without_attachment functions) and ask for help.
Edit
I followed the advice of the answer I got and changed the email_message function to
def email_message(self):
if self.debug:
new = self.items
else:
new = tuple([item for item in self.items if item.stockChanged])
message_text = '\n'.join(item.__str__() for item in new)
print(message_text)
#Create message container
message = MIMEMultipart('alternative') # needed for both plain & HTML (the MIME type is multipart/alternative)
message['Subject'] = self.email_subject()
message['From'] = self.sender
message['To'] = self.recipient
#Create the body of the message (a plain-text and an HTML version)
message.attach(MIMEText(message_text, 'plain'))
message.attach(MIMEText(message_text, 'html'))
raw_message_no_attachment = urlsafe_b64encode(bytes(message))
raw_message_no_attachment = raw_message_no_attachment.decode()
body = {'raw': raw_message_no_attachment}
return body
That said it still gives no error and doesn't even get to print the message text when it gets to the point where it's called, so I'm still pretty lost.
For the encoding you have to change your import and use like this:
Import:
from base64 import urlsafe_b64encode
Use:
encode = urlsafe_b64encode(bytes(message))
For the scopes using this one is more than enough:
SCOPES = ['https://mail.google.com/']
Remember to delete and renew the token.pickle every time you change the scopes.
Be sure that the API credentials are Ok.

Get more details from exception

Trying to run Google spreadsheets sample application.
Got exception while uploading credentials:
try:
creds = store.get()
except Exception as e:
print(e)
print("exception end")
Strange is fact that only line _module was printed while executing Exception code block.
How to understand what exactly was wrong?
What does _module means?
Whole code:
from __future__ import print_function
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
# If modifying these scopes, delete the file token.json.
SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly'
# The ID and range of a sample spreadsheet.
SAMPLE_SPREADSHEET_ID = '1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms'
SAMPLE_RANGE_NAME = 'Class Data!A2:E'
def main():
print("starting")
"""Shows basic usage of the Sheets API.
Prints values from a sample spreadsheet.
"""
#store = file.Storage('token.json')
store = file.Storage('D:\pyth_nonsens\workspace_python\PyhonTutorial\google\credentials.json')
#store = file.Storage('My Project aaa-8102e33b9fef.json')
try:
creds = store.get()
except Exception as e:
print(e)
print("exception end")
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('sheets', 'v4', http=creds.authorize(Http()))
# Call the Sheets API
SPREADSHEET_ID = '1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms'
RANGE_NAME = 'Class Data!A2:E'
result = service.spreadsheets().values().get(spreadsheetId=SPREADSHEET_ID,
range=RANGE_NAME).execute()
values = result.get('values', [])
if not values:
print('No data found.')
else:
print('Name, Major:')
for row in values:
# Print columns A and E, which correspond to indices 0 and 4.
print('%s, %s' % (row[0], row[4]))
print(__name__)
if __name__ == 'test_spread':
print("true")
main()

Test a cell on a Google Sheet and write to that row

I've got a Google Sheet with students names (columns A and B) and some value in the column next to it (column C). I'd like to test each value in turn and write to that row's (column D) if the test resolves to True.
My code so far:
from __future__ import print_function
import httplib2
import os
from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
# Don't know what this is for
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/sheets.googleapis.com-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Google Sheets API'
# just copied code from Google's Dev guide
def get_credentials():
"""Gets valid user credentials from storage.
If nothing has been stored, or if the stored credentials are invalid,
the OAuth2 flow is completed to obtain the new credentials.
Returns:
Credentials, the obtained credential.
"""
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,
'sheets.googleapis.com-python-quickstart.json')
store = Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatibility with Python 2.6
credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
# PROBLEM IS IN THIS METHOD
def main():
# Hope Google doesn't change all this API stuff and make us redo:
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?'
'version=v4')
service = discovery.build('sheets', 'v4', http=http,
discoveryServiceUrl=discoveryUrl)
# Fake URI of our sheet
spreadsheetId = '5lksdfusdlfkjkj886kJUNKssdff'
# retrieve data works
rangeName = 'Sheet1!A2:D70'
result = service.spreadsheets().values().get(
spreadsheetId=spreadsheetId, range=rangeName).execute()
values = result.get('values', [])
if not values:
print('No data found.')
else:
for row in values:
print('Checking on: %s %s' % (row[0], row[1]))
# PROBLEM... can't seem
if row[2] == "Some arbitrary condition":
# Even trying to hardcode a range doesn't work
# I'd like to reference the coordinates of this row
result = service.spreadsheets().values().update(
spreadsheetId=spreadsheetId, range='Sheet1!D64',
body="can you see me?").execute()
print(result)
if __name__ == '__main__':
main()

google calendar acces from Python

I newbe on this communauty
till nov 17, I used google calendar V2 to turn on or off my swiming pool pump
I'm trying to upgrade my python script to Google API V3 but I'm not sure to understand everything:
I'm using (no, trying to use) following code get on internet :
# Inspired from 'Raspberry Pi as a Google Calender Alarm Clock'
# http://www.esologic.com/?p=634
from datetime import datetime
import logging, os, platform, re, time
from apiclient.discovery import build
import httplib2
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import run
from config import *
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class Alarm():
system = platform.system().lower()
flow = flow_from_clientsecrets(CLIENT_SECRET_FILE,
scope='https://www.googleapis.com/auth/calendar',
redirect_uri='http://localhost:8080/')
storage = Storage('calendar.dat')
credentials = storage.get()
if credentials is None or credentials.invalid == True:
credentials = run(flow, storage)
# Google Calendar service connection
http = httplib2.Http()
http = credentials.authorize(http)
service = build(serviceName='calendar', version='v3', http=http, developerKey=API_KEY)
def check_credentials(self):
if self.credentials is None or self.credentials.invalid == True:
credentials = run(self.flow, self.storage)
def calendar_event_query(self):
self.check_credentials()
today = datetime.today()
events = self.service.events().list(singleEvents=True, calendarId=CALENDAR_ID).execute()
for i, event in enumerate(events['items']):
name = event['summary'].lower()
start = event['start']['dateTime'][:-9]
description = event.get('description', '')
repeat = True if description.lower() == 'repeat' else False
now = today.strftime('%Y-%m-%dT%H:%M')
if start >= now:
logger.debug('Event #%s, Name: %s, Start: %s', i, name, start)
if start == now:
if name.startswith('say'):
name = re.sub(r'[^a-zA-Z0-9\s\']', '', name)
command = '{0} "{1}"'.format('say' if system == 'darwin' else 'espeak - ven+m2', name[4:])
logger.info('Event starting. Announcing \'%s\'...', name[4:])
else:
mp3_files = os.listdir(MP3_FOLDER)
mp3_name = name.replace(' ', '_') + '.mp3'
mp3_name = mp3_name if mp3_name in mp3_files else 'default.mp3'
command = 'mpg123 \'{}/{}\''.format(MP3_FOLDER, mp3_name)
logger.info('Event %s starting. Playing mp3 file %s...', name, mp3_name)
os.system(command)
if repeat == False:
time.sleep(60)
def poll(self):
logger.info('Polling calendar for events...')
self.calendar_event_query()
while True:
a = Alarm()
a.poll()
time.sleep(FREQUENCY_CHECK)
Of course, I created client ID and key ID on google consele
But, when I run my script, It's not working and I get a webpage on my raspberry with an error 400 : rederict_URI mismatch
An idea?

Categories

Resources