New programmer who has been coding scripts to automate work responsibilities.
Scope of Problem:
I get bi-monthly excel reports from an outside vendor sent via email. This vendor uses ZixMail for encryption in which my company does not leverage. As a result, I have to access these emails via a Secure Mail Center with my username and password to log on this Mail Center website. I am trying to establish a connection to this server and download the attachment files.
What I have tried:
Tried a IMAP connection into the "server" (I am not sure if the website is a mail server)
Struck out many times, as I could never get a connection (If there are suggestions to try please share)
Accessing the site via HTTP using sessions.
I am able to connect to the site but when I go to .get and .write the file my excel file returns blank and corrupted.
On the Mail Center/website when I click the link/url it automatically downloads the file. I am not sure why this has to be so challenging?
The source code from the website where you download the file looks like:
a rel="external" href="/s/attachment?name=Random Letters and Numbers=emdeon" title="File Title.xlsx"
the href looks nothing like a normal URL and does not end in a .xlsx or any other type of file like most of the examples I have seen.
I guess I am just really looking for any ideas, thoughts, helps solutions.
Here is my HTTP connection code
import requests
import urllib.request
import shutil
import os
#Fill in your details here to be posted to the login form.
payload = {
'em': 'Username',
'passphrase': 'Password',
'validationKey': 'Key'
}
#This reads your URL and returns if the file is downloadable
def is_downloadable(URL_D):
h = requests.head(URL_D, allow_redirects=True)
header = h.headers
content_type = header.get('content-type')
if 'text' in content_type.lower():
return False
if 'html' in content_type.lower():
return False
return True
def download_file(URL_D):
with requests.get(URL_D, stream=True) as r:
r.raise_for_status()
with open(FileName, 'wb') as f:
for chunk in r.iter_content(chunk_size=None):
if chunk:
f.write(chunk)
f.close()
return FileName
def Main():
with requests.Session() as s:
p = s.post(URL, data=payload, allow_redirects=True )
print(is_downloadable(URL_D))
download_file(URL_D)
if __name__ == '__main__':
Path = "<path>"
FileName = os.path.join(Path,"Testing File.xlsx")
URL = 'login URL'
URL_D = 'Attachment URL"
Main()
is_downloadable(URL_D) returns as false and the excel file is empty and corrupted
Here is my code for the IMAP attempt:
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server,port=993)
self.connection.login(username, password)
self.connection.select('inbox',readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder):
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_messages(self):
emails = []
(result, messages) = self.connection.search(None, "(ON 20-Nov-2020)")
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print ("No emails to read for date.")
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def Main():
p = FetchEmail(mail_server,username,password)
msg = p.fetch_messages()
p.save_attachment(msg, download_folder)
p.close_connection()
if __name__ == "__main__":
mail_server = "Server"
username = "username"
password = "password"
download_folder= Path
Main()
Error Message: TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
Even if I wrote the IMAP script wrong, I tried to IMAP connect via cmd prompt and same results.
To recap all I am looking for is some pointers and ideas to solve this problem. Thank You!
For anyone who stumbled upon this because of a similar issue. Probably not since I have a really weird habit of making everything simple, complicated. But
I was able to solve problem by using selenium webdriver to login to the website, and navigate through using the "click" mechanism. This was the only way I'd be able to successfully download the reports.
import time
import os
import re
import datetime
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
today = datetime.date.today()
first = today.replace(day=1)
year = today.strftime('%Y')
month = today.strftime('%B')
lastMonth = (first - datetime.timedelta(days=1)).strftime('%b')
def Main():
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
s = Chrome(executable_path=path to chrome extension)
s.get("Website login page")
s.find_element_by_id("loginname").send_keys('username')
s.find_element_by_id("password").send_keys('password')
s.find_element_by_class_name("button").click()
for i in range(50):
s.get("landing page post login")
n = str(i)
subject = ("mailsubject"+n)
sent = ("mailsent"+n)
title = s.find_element_by_id(subject).text
date = s.find_element_by_id(sent).text
regex = "Bi Monthly"
regex_pr = "PR"
match = re.search(regex,title)
match_pr = re.search(regex_pr,title)
if match and not match_pr:
match_m = re.search(r"(\D{3})",date)
match_d = re.search(r"(\d{1,2})",date)
day = int(match_d.group())
m = (match_m.group(1))
if (day <= 15) and (m == lastMonth):
print("All up to date files have been dowloaded")
break
else:
name = ("messageItem"+n)
s.find_element_by_id(name).click()
s.find_element_by_partial_link_text("xlsx").click() #This should be under the else but its not formatting right on here
else:
continue
time.sleep(45)
if __name__ == "__main__":
Main()
Related
This is my first question, please bear with me. I am working with an API that authenticates using an access token that expires in 15 minutes, there is no refresh token to use in-lieu of a re-login. So far I have been able to get the access token and insert it into the requests.get call but I cannot seem to get it to renew and am at a loss as to how.
All of the work done with this API, and in general, is with Python so I am hoping to keep it in Python throughout and in the same file.
I get a 401 message code once the 15 minutes are up, and code 200 if successful. So far my only ideas are to put it on a timer for renewal but I cannot make heads or tails of stackoverflow posts or the documentation on doing that, have the login running in a separate script and then this script calls the other one for the current header variable (but that still would require a timer), or have it call to redo the login function once it hits a response.status_code != 200.
Example script for getting the access token
import requests, os, json, time, csv
def login (url, payload):
#this will log into API and get an access token
auth = requests.post(url, data=payload).json()
sessionToken = auth["token"]
sessionTimer = auth["validFor"]
headers = {'Access-Token': sessionToken}
return headers
#calling the function to generate the token
if __name__ == '__main__':
url = "url inserted here"
u = input("Enter your username: ")
p = input("Enter your password: ")
t = input("Enter your tenancy name: ")
payload = {'username': u, 'password': p, 'tenant': t}
print("Logging in")
headers = login(url, payload)
#the actual work as pulled from a csv file
valuables = input("CSV file with filepath: ")
file = open(valuables, 'r', encoding='utf-8')
csvin = csv.reader(file)
for row in csvin:
try:
uuidUrl = row[0]
output_file = row[1]
response = requests.get(uuidUrl, headers=headers)
print(response.status_code)
with open(output_file, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
fd.close()
except requests.exceptions.RequestException:
print(output_file,"may have failed")
login(url, payload)
continue
I couldn't get it to successfully recognize a if response.status_code != 200: as a way to call back on the login(). I also couldn't seem to get it to exit a while True: loop.
I apologize I cannot give more details on accessing the API for other people to try out. It is non-public
Eventually I was able to figure out the answer to my own question. Posting this for later users. Updated snippet is below.
Short version of the story: requests.status_code was sending back a integer but I made the faulty assumption that it would be a string, thus my internal comparison was no good.
for row in csvin:
try:
uuidUrl = row[0]
xip_file = row[1]
response = requests.get(uuidUrl, headers=headers)
status = response.status_code
print(status)
if status == 401:
print(xip_file, "may have failed, loggin back in")
login(url, payload)
headers = login(url, payload)
response = requests.get(uuidUrl, headers=headers)
with open(xip_file, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
fd.close()
else:
with open(xip_file, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
fd.close()
except requests.exceptions.RequestException:
print(xip_file,"may have failed")
headers = login(url, payload)
continue
I am currently using the below script to get regular files from Google Drive. It works fine and its basically the code from the user #user115202 cudos.
Now I need to get it to work for Whatsapp Backups which are stored under "Backup" in GoogleDrive and not as a regular file.
The tool WhatsApp Google Drive Extractor (Google Drive API) doesnt seem to work anymore.
Does anyone know an alternative?
import requests
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
if __name__ == "__main__":
import sys
if len(sys.argv) is not 3:
print "Usage: python google_drive.py drive_file_id destination_file_path"
else:
# TAKE ID FROM SHAREABLE LINK
file_id = sys.argv[1]
# DESTINATION FILE ON YOUR DISK
destination = sys.argv[2]
download_file_from_google_drive(file_id, destination)
I managed to tackle the API and made some changes in the code and it works now. The code will be available on https://github.com/EliteAndroidApps/WhatsApp-GD-Extractor
I'm trying to write a tiny piece of software that logs into mintos.com, and saves the account overview page (which is displayed after a successful login) in a html file. I tried some different approaches, and this is my current version.
import requests
import sys
import codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
username = 'abc'
password = '123'
loginUrl = 'https://www.mintos.com/en/login'
resp = requests.get(loginUrl, auth=(username, password))
file = codecs.open("mint.html", "w", "UTF-8")
file.write(resp.text)
file.close()
When I run the code, I only save the original page, not the one I should get when logged in. I guess I'm messing up the login (I mean...there's not much else to mess up). I spent an embarrassing amount of time on this problem already.
Edit:
I also tried something along the lines of:
import requests
import sys
import codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
loginUrl = "https://www.mintos.com/en/login";
username = "abc"
password = "123"
payload = {"username": username, "password": password}
with requests.session() as s:
resp = s.post(loginUrl, data = payload)
file = codecs.open("mint.html", "w", "UTF-8")
file.write(resp.text)
file.close()
Edit 2: Another non working version, this time with _csrf_token
with requests.session() as s:
resp = s.get(loginUrl)
toFind = '_csrf_token" value="'
splited = resp.text.split(toFind)[1]
_csrf_token = splited.split('"',1)[0]
payload = {"_username": _username, "_password": _password, "_csrf_token": _csrf_token}
final = s.post(loginUrl, data = payload)
file = codecs.open("mint.html", "w", "UTF-8")
file.write(final.text)
file.close()
But I still get the same result. The downloaded page has the same token as the one I extract, though.
Final Edit: I made it work, and I feel stupid now. I needed to use "'https://www.mintos.com/en/login/check' as my loginUrl.
The auth parameter is just a shorthand for HTTPBasicAuth, which is not what most websites use. Most of them use cookies or session data in order to store your login / info on your computer so they can check who you are while you're browsing the pages.
If you want to be able to log in on the website, you'll have to make a POST request on the login form and then store (and give back every time) the cookies they'll send to you. Also, this implies they don't have any kind of "anti-bot filter" (which makes you unable to login without having a real browser or, at least, not that easily).
I've been writing automated tests for a web application and it involves sending emails not just for account creation and password resets, but as the premise for the actual product it sends emails with virtual documents.
As part of my tests I obviously need to check that these emails contain certain elements eg. link to sign up, link to documents etc.
I have written some python code (for the gmail atom feed) that would just find and print the title of each email and if their is a link print that too but it cannot find the link.
import urllib2
import untangle
FEED_URL = 'https://mail.google.com/mail/feed/atom'
def get_unread_msgs(user, passwd):
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(
realm='New mail feed',
uri='https://mail.google.com',
user='{user}#gmail.com'.format(user=user),
passwd=passwd
)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
feed = urllib2.urlopen(FEED_URL)
return feed.read()
if __name__ == "__main__":
import getpass
user = raw_input('Username: ')
passwd = getpass.getpass('Password: ')
xml = get_unread_msgs(user, passwd)
o = untangle.parse(xml)
try:
for item in o.feed.entry:
title = item.title.cdata
print title
link = item.link.cdata
if link:
print "Link"
print ' ', link
except IndexError:
pass # no new mail
Edit: I've just realised that the atom feed doesn't actually give the message data..
Could anyone please suggest an alternative method of achieving my goal?
You could access the messages via imaplib instead:
import imaplib
def get_unread_msgs(user, passwd):
M = imaplib.IMAP4_SSL('imap.gmail.com')
M.login(user, passwd)
try:
M.select()
try:
type, data = M.search(None, '(UNSEEN)')
for num in data[0].split():
yield M.fetch(num, '(RFC822)')
finally:
M.close()
finally:
M.logout()
You will need to enable IMAP in your gmail settings if you haven't already:
Get started with IMAP and POP3
If you are looking for a (gmail specific) solution without polling the server for updates, you can look into the Gmail Notifications API.
Is there a way to upload a file on sharepoint site using python script? I tried installing haufe.sharepoint, but it seems like it failed to fetch ntlm while it was installing, and I can't even use the connector module without having ntlm installed.
I've also tried just saving the excel file to the server location (so save it to directory like \server\sharepointsite\files instead of connecting via the URL) using openpyxl, but it looks like the file remains checked out after the file is saved..
I would appreciate any help. Thanks!!
I'll start by saying this example is adapted from the example for Office365-REST-Python-Client. It works with Sharepoint online using the REST API.
https://github.com/vgrem/Office365-REST-Python-Client/blob/master/examples/sharepoint/files/upload_file.py
Example URL you might want to upload to [baseurl][site][folder][file].
https://your_company.sharepoint.com/path/to/site/Shared Documents/file.txt
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
baseurl = 'https://your_company.sharepoint.com'
basesite = '/path/to/site' # every share point has a home.
siteurl = baseurl + basesite
localpath = ./file.txt
remotepath = Shared Documents/file.txt # existing folder path under sharepoint site.
ctx_auth = AuthenticationContext(siteurl) # should also be the siteurl
ctx_auth.acquire_token_for_user(username, password)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.
with open(localpath, 'rb') as content_file:
file_content = content_file.read()
dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()
haufe.sharepoint only works for sharepoint lists, but you probably need access to document libraries.
You should use Python Requests with the help of Sharepoint's REST API.
If your sharepoint site doesn't support BasicAuth I recommend the requests_ntlm package.
It didn't work for me due to other reasons, but maybe it helps you out a bit.
You could upload files with SharePlum
install SharePlum: pip install SharePlum and try the code below
import requests
from shareplum import Office365
# Set Login Info
username = '<username>'
password = '<password>'
site_name = '<site_name>'
base_path = 'https://<domain_name>.sharepoint.com'
doc_library = 'Shared%20Documents'
nested_folder = 'Shared%20Documents/<folder1>/<folder2>' #if you want to upload in nested folders else nested_folder = doc_library
file_name = "my_file.zip" #when your file in the same directory
# Obtain auth cookie
authcookie = Office365(base_path, username=username, password=password).GetCookies()
session = requests.Session()
session.cookies = authcookie
session.headers.update({'user-agent': 'python_bite/v1'})
session.headers.update({'accept': 'application/json;odata=verbose'})
session.headers.update({'X-RequestDigest': 'FormDigestValue'})
response = session.post(url=base_path + "/sites/" + site_name + "/_api/web/GetFolderByServerRelativeUrl('" + doc_library + "')/Files/add(url='a.txt',overwrite=true)",
data="")
session.headers.update({'X-RequestDigest': response.headers['X-RequestDigest']})
# Upload file
with open(file_name, 'rb') as file_input:
try:
response = session.post(
url=base_path + "/sites/" + site_name + f"/_api/web/GetFolderByServerRelativeUrl('" + nested_folder + "')/Files/add(url='"
+ file_name + "',overwrite=true)",
data=file_input)
print("response: ", response.status_code) #it returns 200
if response.status_code == '200':
print("File uploaded successfully")
except Exception as err:
print("Something went wrong: " + str(err))
print('File Uploaded Successfully')
I think I might be a bit late in answering this question.
The following solution worked for me-
In the Sharepoint webpage, Go to Library Tools>> Library>> Open with Explorer Command( Its the tiny icon in the bottom right beside Connect to Office command.
The address bar gives us the address that we need to upload the file to. Remember to remove "http:" or "https:" from the address This address is your destination to upload the file.
Subsequently you can use shutil package to upload the file.
import shutil as sl
sl.copy(source,destination)
This should help you upload files to Sharepoint
Disclaimer- This works quite well in Python 3.6
The answers above didn't work for me.
I have found a simple and nice way by just mapping a drive to my sharepoint folder and then I used a copy to that drive.
import subprocess
import shutil
subprocess.call(r'net use Y: http://sharepoint/link/to/your/folder', shell=True)
shutil.copy("link_to_local_file","Y:\\")
Instead of copy, You can also delete files or do anything like a normal folder.
I have created a file in SharePoint site in python via rest api calls. Please find my code below.
def CreateHomePage():
server_relative_url = base_url+ '/_api/web/webinfos'
r1 = requests.get(server_relative_url, auth=HttpNtlmAuth(username, password), headers = headers, verify=True)
value = json.loads(r1.text)
for row in value['d']['results']:
if(row['Title'] == myvars['Site Name'].strip(' \t\n\r')):
Id= row['ServerRelativeUrl']
#Add Template and create file simultaneously
title = myvars['Site Name'].strip(' \t\n\r')
post_url = root_url +'GetFolderByServerRelativeUrl(\'/'+Id+'/Pages\')/Files/add(url=\'Home.aspx\',overwrite=true)'
r2 = requests.post(post_url, auth=HttpNtlmAuth(username, password), headers = headers, verify=True)
logger.debug("Creation of home page %d", r2.status_code)
I have created a script to upload attachment into a SharePoint list
let me know if it works
import requests
from shareplum import Office365
# Obtain auth cookie
authcookie = Office365('https://YOUR-NAME.sharepoint.com', username='YOUR-USERNAME',password='YOUR-PASSWORD').GetCookies()
session = requests.Session()
session.cookies = authcookie
session.headers.update({'user-agent': 'python_bite/v1'})
session.headers.update({'accept': 'application/json;odata=verbose'})
# dirty workaround.... I'm getting the X-RequestDigest from the first failed call
session.headers.update({'X-RequestDigest': 'FormDigestValue'})
response = session.post(url="https://YOUR-NAME.sharepoint.com/sites/YOU-SITE/_api/web/GetFolderByServerRelativeUrl('YOUR-FOLDER')/Files/add(url='a.txt',overwrite=true)",data="")
session.headers.update({'X-RequestDigest': response.headers['X-RequestDigest']})
# perform the upload
fileName = 'picture.png'
file_name = 'images.png'
with open(file_name, 'rb') as file_input:
response = session.post(
url="https://YOUR-NAME.sharepoint.com/sites/YOUR-SITE/_api/web/lists/getbytitle('ID-ROW-INTO-SHAREPOINT')/items(4)/AttachmentFiles/add(FileName='" + fileName + "')",data=file_input)
print(response.text)