How to automatize the process of getting access to Google spreadsheets?
Right now we use gspread and oauth2client.service_account to get an access to Google spreadsheet. It works fine, but using OAuth2 credentials makes us manually share every single spreadsheet to "client_email" from credentials json-file.
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = [
'https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive'
]
credentials =
ServiceAccountCredentials.from_json_keyfile_name('path.json', scope)
gs = gspread.authorize(credentials)
That works, but how to modify?
So the desired outcome is: somebody shares a spreadsheet with me and I can start to work with it immediately in Python. Is it possible? Maybe we can use some triggers from incoming emails with the information about sharing or something similar?
You can try this script. It has a few sections we can differentiate:
Requesting access to Drive and Gmail. As you see we use the full
drive scope instead of drive.file. This is because there is an
existing bug that causes drive.file to crash(1), so meanwhile we have
to use this.
from __future__ import print_function
import pickle
import sys
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/gmail.modify']
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
mail_service = build('gmail', 'v1', credentials=creds)
drive_service = build('drive','v3', credentials=creds)
Declaring some variables. There is no issue here, just declaring the
variables where we will keep the mail ids, the file name and the
file name formatted for our needs.
mail_ids = []
file_name = []
name_string = []
Get the emails. We will only take the unread emails from
drive-shares-noreply. After this we will mark them as “read” so we
won’t take them the next time we execute the script.
def get_emails(mail_ids):
user_id = 'me' #Or your email
query = 'from:drive-shares-noreply#google.com, is:UNREAD' #Will search mails from drive shares and unread
response = mail_service.users().messages().list(userId=user_id, q=query).execute()
items = response.get('messages', [])
if not items:
print('No unread mails found')
sys.exit()
else:
for items in items:
mail_ids.append(items['id'])
for mail_ids in mail_ids:
mail_service.users().messages().modify(userId=user_id, id=mail_ids, body={"removeLabelIds":["UNREAD"]}).execute() #Marks the mails as read
Get the file names of the emails. The syntax of the Subject of the sharing sheets email is “Filename - Invitation to edit”, so we will take the subject of each email, and we will format the string later.
def get_filename(mail_ids, file_name):
user_id = 'me'
headers = []
for mail_ids in mail_ids:
response = mail_service.users().messages().get(userId=user_id, id=mail_ids, format="metadata", metadataHeaders="Subject").execute()
items = response.get('payload', [])
headers.append(items['headers'])
length = len(headers)
for i in range(length):
file_name.append(headers[i][0]['value'])
def process_name(file_name, name_string):
for file_name in file_name:
name_string.append(str(file_name).replace(" - Invitation to edit", ""))
Give permissions to the client_email
def give_permissions(name_string):
for name_string in name_string:
body = "'{}'".format(name_string)
results = drive_service.files().list(q="name = "+body).execute()
items = results.get('files', [])
if not items:
print('No files found.')
sys.exit()
else:
print('Files:')
for item in items:
print(u'{0} ({1})'.format(item['name'], item['id']))
file_id = item['id']
user_permission = {
'type': 'user',
'role': 'writer',
'emailAddress': 'your_client_email'
}
drive_service.permissions().create(body=user_permission, fileId=file_id).execute()
And then we just have to call the functions
get_emails(mail_ids)
get_filename(mail_ids, file_name)
process_name(file_name, name_string)
give_permissions(name_string)
There is no way to trigger this script for each new email received, but you can trigger it with a timer or something like that and it will search for new emails.
(1) The drive.file scope only works with certain files, according to the last update of the documentation
Related
I have a Python script that interacts with the Gmail API and searches mails that are unread, with certain description and from specific email addresses. But I want it so when it reads the mail it removes the UNREAD label so it doesn't detect it when I run the script again.
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
def main():
"""Shows basic usage of the Gmail API.
Lists the user's Gmail labels.
"""
email_address1 = "mensajeria#bancoestado.cl"
email_address2 = "noreply#correo.bancoestado.cl"
creds = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('gmail', 'v1', credentials=creds)
# Call the Gmail API
results = service.users().labels().list(userId='me').execute()
labels = results.get('labels', [])
#Get Messages
results = service.users().messages().list(userId='me', labelIds=['INBOX'], q=f'from:{email_address1} OR from:{email_address2} is:unread').execute()
messages = results.get('messages', [])
#Messages Filter
message_count = 100
for message in messages[:message_count]:
msg = service.users().messages().get(userId='me', id=message['id']).execute()
email = (msg['snippet'])
if "que recibiste Monto $" in email:
print(f'{email}\n')
service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()
if __name__ == '__main__':
main()
The script works just fine until it reaches my attempt of removing the UNREAD label so it doesn't detect it again. Here
service.users().messages().modify(userId='me', id=message['id'], body={'removeLabelIds': ['UNREAD']}).execute()
I know it can be done, because in the labels section of the Gmail API documentation it says that the UNREAD label can be manually changed.
So if someone could give me a hand I would appreciate it.
I've already found a solution. The problem was that the scope that I was using didn't had the permissions needed to remove a label from a message, so I changed the scope to this instead and it worked just fine.
SCOPES = ['https://mail.google.com/']
So my colleague and I have an application whereby we need to capture the OAuth Redirect from Google's OAuth Server Response, the reason being is we need to send a payload to capture to renew our pickle token, and we need to do it without human intervention. The code looks like this:
#!/usr/bin/env python3
import pickle
import os.path
import pandas as pd
import requests
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import base64
from datetime import datetime, timedelta
from urllib.parse import unquote
from bs4 import BeautifulSoup
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
def search_message(service, user_id, search_string):
"""
Search the inbox for emails using standard gmail search parameters
and return a list of email IDs for each result
PARAMS:
service: the google api service object already instantiated
user_id: user id for google api service ('me' works here if
already authenticated)
search_string: search operators you can use with Gmail
(see https://support.google.com/mail/answer/7190?hl=en for a list)
RETURNS:
List containing email IDs of search query
"""
try:
# initiate the list for returning
list_ids = []
# get the id of all messages that are in the search string
search_ids = service.users().messages().list(userId=user_id, q=search_string).execute()
# if there were no results, print warning and return empty string
try:
ids = search_ids['messages']
except KeyError:
print("WARNING: the search queried returned 0 results")
print("returning an empty string")
return ""
if len(ids) > 1:
for msg_id in ids:
list_ids.append(msg_id['id'])
return (list_ids)
else:
list_ids.append(ids['id'])
return list_ids
except:
print("An error occured: %s")
def get_message(service, user_id, msg_id):
"""
Search the inbox for specific message by ID and return it back as a
clean string. String may contain Python escape characters for newline
and return line.
PARAMS
service: the google api service object already instantiated
user_id: user id for google api service ('me' works here if
already authenticated)
msg_id: the unique id of the email you need
RETURNS
A string of encoded text containing the message body
"""
try:
final_list = []
message = service.users().messages().get(userId=user_id, id=msg_id).execute() # fetch the message using API
payld = message['payload'] # get payload of the message
report_link = ""
mssg_parts = payld['parts'] # fetching the message parts
part_one = mssg_parts[1] # fetching first element of the part
#part_onee = part_one['parts'][1]
#print(part_one)
part_body = part_one['body'] # fetching body of the message
part_data = part_body['data'] # fetching data from the body
clean_one = part_data.replace("-", "+") # decoding from Base64 to UTF-8
clean_one = clean_one.replace("_", "/") # decoding from Base64 to UTF-8
clean_one = clean_one.replace("amp;", "") # cleaned amp; in links
clean_two = base64.b64decode(clean_one) # decoding from Base64 to UTF-8
soup = BeautifulSoup(clean_two, features="html.parser")
for link in soup.findAll('a'):
if "talentReportRedirect?export" in link.get('href'):
report_link = link.get('href')
break
final_list.append(report_link) # This will create a dictonary item in the final list
except Exception:
print("An error occured: %s")
return final_list
def get_service():
"""
Authenticate the google api client and return the service object
to make further calls
PARAMS
None
RETURNS
service api object from gmail for making calls
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
auth_link = build('gmail', 'v1', credentials=creds)
parsed_url = unquote(auth_link).split('redirect')[-1]
return parsed_url
def get_report(link_array):
for link in link_array:
df = requests.get(link[0], allow_redirects=True)
# df.encoding
# dt = pd.DataFrame(data=df)
print(link)
# upload_to_database(df) -- Richard Barret please update this function
print(df.text)
## dt.to_csv(r'C:\Users\user\Desktop\api_gmail.csv', sep='\t',header=True)
if __name__ == "__main__":
link_list = []
monday = datetime(2022,12,5)#datetime.now() - timedelta(days=datetime.now().weekday())
thursday = datetime(2022,12,8)#datetime.now() - timedelta(days=datetime.now().weekday() - 3)
query = 'from:messages-noreply#linkedin.com ' + 'after:' + monday.strftime('%Y/%m/%d') + ' before:' + thursday.strftime('%Y/%m/%d')
service = get_service()
mssg_list = search_message(service, user_id='me', search_string=query)
for msg in mssg_list:
link_list.append(get_message(service, user_id='me', msg_id=msg))
get_report(link_list)
It is assumed that you have a directory structure like this:
├── credentials.json
├── gmail_api_linkedin.py
└── requirements.txt
Obviously, you won't have the credentials.json file, but in essence, the code works and redirects us to a login page to retrieve the new pickle:
The main thing is we can't interact with that in an autonomous fashion. As such, how can we capture the URL from the server that prints out the following information the is differenter every single time.
Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=212663976989-96o952s9ujadjgfdp6fm0p462p37opml.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A58605%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly&state=ztJir0haFQlvTP79BRthhmEHlSsqIj&access_type=offline
More succinctly, how can we capture the URL in a pythonic manner to send POST and PUT requests to that redirect?
renew our pickle token
I still do not understand why you feel the need to renew your token pickle.
how it all works.
The following example will spawn the consent screen directly on the machine its running on. It then stores the token within the token.json file
token.json
This file contains all the information needed by the script to run. It can automatically request a new access token when ever it needs.
{
"token": "[REDACTED]",
"refresh_token": "[REDACTED]",
"token_uri": "https://oauth2.googleapis.com/token",
"client_id": "[REDACTED]",
"client_secret": "[REDACTED],
"scopes": [
"https://mail.google.com/"
],
"expiry": "2023-01-03T19:06:13.959468Z"
}
gmail quickstart.
# To install the Google client library for Python, run the following command:
# pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib
from __future__ import print_function
import os.path
import google.auth.exceptions
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://mail.google.com/']
def main():
"""Shows basic usage of the Gmail v1 API.
Prints a list of user messages.
"""
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.json'):
try:
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
creds.refresh(Request())
except google.auth.exceptions.RefreshError as error:
# if refresh token fails, reset creds to none.
creds = None
print(f'An error occurred: {error}')
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'C:\YouTube\dev\credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.json', 'w') as token:
token.write(creds.to_json())
try:
service = build('gmail', 'v1', credentials=creds)
# Call the Gmail v1 API
results = service.users().messages().list(
userId='me').execute()
messages = results.get('messages', [])
if not messages:
print('No messages found.')
return
print('Messages:')
for message in messages:
print(u'{0} ({1})'.format(message['id'], message['threadId']))
except HttpError as error:
# TODO(developer) - Handle errors from gmail API.
print(f'An error occurred: {error}')
if __name__ == '__main__':
main()
expired refresh token.
If your issue is in fact that your refresh tokens are expiring this is because your app is currently in the testing phase. If you set your app to production then your refresh tokens will stope expiring.
I'm looking for a Google API to get the size of the drive of the users associated with university email/directory api, but I can't find anything. The code to delete a user's email using the Google API is provided below. Similarly, I need to know the size of the user's drive. Could someone please assist me? Is there a way to get the drive's size via an API? Thanks.
from __future__ import print_function
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/admin.directory.group', 'https://www.googleapis.com/auth/admin.directory.user']
def main():
"""Shows basic usage of the Admin SDK Directory API.
Prints the emails and names of the first 10 users in the domain.
"""
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.json', 'w') as token:
token.write(creds.to_json())
service = build('admin', 'directory_v1', credentials=creds)
return service
def test():
# user = service.users().get(userKey="user1#matador.csun.edu").execute()
# members = service.groups().list(domain='my.csun.edu', userKey=user['primaryEmail'], pageToken=None, maxResults=500).execute()
# print(user)
# Call the Admin SDK Directory API
print('Getting the first 10 users in the domain')
results = service.users().list(customer='my_customer', maxResults=10,
orderBy='email').execute()
print(results)
users = results.get('users', [])
if not users:
print('No users in the domain.')
else:
print('Users:')
for user in users:
print(user)
# print(dir(user))
# print(u'{0} ({1})'.format(user['primaryEmail'],
# user['name']['fullName']))
def del_user(user):
try:
service.users().delete(userKey=user).execute()
print("Deleted!")
except:
print("User doesn't exist!")
if __name__ == '__main__':
service = main()
nameExt='23'
# with open('NewGmailInProd/gmailUser'+nameExt+'.txt') as fileToRead:
# with open('NewGmailInProd/test.txt') as fileToRead:
# emails = fileToRead.readlines()
emails = ['user1#matador.csun.edu']
for email in emails:
del_user(email.strip())
This is my code to download the pdf attachments using GMAIL API in Python. It works fine in sense that I am able to download the pdf attachments but they don't open. I get the following error "There was an error opening this document. The file is damaged and could not be repaired".
from __future__ import print_function
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
import base64
from apiclient import errors
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://mail.google.com/']
def main():
"""Shows basic usage of the Gmail API.
Lists the user's Gmail labels.
"""
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
# to manage a different gmail account, delete the existing token.json file from the folder
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.json', 'w') as token:
token.write(creds.to_json())
service = build('gmail', 'v1', credentials=creds)
# Call the Gmail API
results = service.users().labels().list(userId='me').execute()
labels = results.get('labels', [])
# Call the Gmail API to fetch INBOX
results = service.users().messages().list(userId='me', labelIds=['Label_9213971794059785832']).execute()
messages = results.get('messages', [])
print(messages[1])
if not messages:
print("No messages found.")
else:
print("Message snippets:")
for message in messages:
GetAttachments(service,'me',message['id'])
# msg = service.users().messages().get(userId='me', id=message['id']).execute()
# print(msg)
# break
def GetAttachments(service, user_id, msg_id, store_dir="attachments/"):
"""Get and store attachment from Message with given id.
Args:
service: Authorized Gmail API service instance.
user_id: User's email address. The special value "me"
can be used to indicate the authenticated user.
msg_id: ID of Message containing attachment.
store_dir: The directory used to store attachments.
"""
try:
message = service.users().messages().get(userId=user_id, id=msg_id).execute()
parts = [message['payload']]
while parts:
part = parts.pop()
if part.get('parts'):
parts.extend(part['parts'])
if part.get('filename'):
if 'data' in part['body']:
file_data = base64.urlsafe_b64decode(part['body']['data'].encode('UTF-8'))
#self.stdout.write('FileData for %s, %s found! size: %s' % (message['id'], part['filename'], part['size']))
elif 'attachmentId' in part['body']:
attachment = service.users().messages().attachments().get(
userId=user_id, messageId=message['id'], id=part['body']['attachmentId']
).execute()
file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8'))
#self.stdout.write('FileData for %s, %s found! size: %s' % (message['id'], part['filename'], attachment['size']))
else:
file_data = None
if file_data:
#do some staff, e.g.
path = ''.join([store_dir, part['filename']])
with open(path, 'w') as f:
f.write(str(file_data))
except errors.HttpError as error:
print('An error occurred: %s' % error)
if __name__ == '__main__':
main()
please help!
Found the mistake! I needed to write the bytes, not string.
modified two lines of the code as following:
with open(path, 'wb') as f:
f.write(file_data)
I would like to list all message ID's from a Gmail account utilizing the Gmail API. So far I've been able to list the first and second page of message ID's. I know I have to use the pageToken to get to the next page of results, but I can't figure out how to restructure my code so I'm not using 1,2,3, etc variables to call each page. Source code is below.
get_email_ids.py:
from __future__ import print_function
import os.path
from collections import Counter
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
def main():
"""Shows basic usage of the Gmail API.
"""
creds = None
user_id = "me"
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.json', 'w') as token:
token.write(creds.to_json())
service = build('gmail', 'v1', credentials=creds)
### Call the Gmail API
### Show messages
token = ''
messages = service.users().messages().list(userId=user_id,pageToken=token).execute().get('messages', [])
token = service.users().messages().list(userId=user_id,pageToken=token).execute().get('nextPageToken', [])
print(messages,token)
messages2 = service.users().messages().list(userId=user_id,pageToken=token).execute().get('messages', [])
token2 = service.users().messages().list(userId=user_id,pageToken=token).execute().get('nextPageToken', [])
print(messages2,token2)
if __name__ == '__main__':
main()
Results of get_email_ids.py (shortened):
[{'id': '179ed5ae720de1f6', 'threadId': '179ed5ae720de1f6'}, ... {'id': '179ba226644a079a', 'threadId': '17972318184138fa'}] 09573475999783117733
[{'id': '179b9f8852d3b09d', 'threadId': '179b9f8852d3b09d'}, ... {'id': '1797fa390caa3454', 'threadId': '1797fa390caa3454'}] 07601624978802434502
I can't test it but I would use the same variables messages, token without 1,2,3 and results I would add to the same list with all messages. And I would run it in some loop.
Something like this
all_messages = []
token = ''
while True:
messages = service.users().messages().list(userId=user_id, pageToken=token).execute().get('messages', [])
token = service.users().messages().list(userId=user_id, pageToken=token).execute().get('nextPageToken', [])
print(messages, token)
if not messages:
break
#all_messages.extend(messages) # `extend` or `+=`, not `append`
all_messages += messages # `extend` or `+=`, not `append`
I only don't know how API informs that there is no more messages - maybe it returns empty list or maybe it gives empty token, or maybe it raise error.
EDIT:
Information for other users: as #emmalynnh mentioned in comment
When there are no more messages it gives an empty token
and the API will return a 400 error if you try to request.
A Improved version on #furas can be made.
all_messages = []
token = ''
while True:
service_messages = service.users().messages()
messages = service_messages.list(userId=user_id, pageToken=token).execute().get('messages', [])
token = service_messages.list(userId=user_id, pageToken=token).execute().get('nextPageToken', [])
if not messages:
break
all_messages += messages
print(all_messages)