Download xlsx with request w/pem cert - python

Im having abit of issue downloading an xlsx file over https.
Here my code to scrape the sight to get the download url, but its seems to redirect me to a new site. but when i put the link in my browswer, it downloads the file straight away.
Is there something im doing wrong?
here is the code i used for scraping the site:
import contextlib
import OpenSSL.crypto
import os
import requests
import ssl
import tempfile
import http.client
import shutil
from OpenSSL import crypto
import pem
import html2text
url = "https://signonssl.site.com"
base_url = "basedownloadurl"
p12_cert = "cert_path"
password = "password"
#contextlib.contextmanager
def pfx_to_pem(p12_path, pfx_password):
''' Decrypts the .p12 file to be used with requests. '''
with tempfile.NamedTemporaryFile(suffix='.pem') as t_pem:
f_pem = open(t_pem.name, 'wb')
pfx = open(p12_path, 'rb').read()
p12 = OpenSSL.crypto.load_pkcs12(pfx, pfx_password)
f_pem.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()))
f_pem.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()))
ca = p12.get_ca_certificates()
f_pem.close()
yield t_pem.name
with pfx_to_pem(p12_cert, password) as cert:
html_response = requests.get(url, cert=cert).content.decode("utf-8")
htmlconv = html2text.html2text(html_response).split("name")[1]
dl_link = htmlconv.split(")")[0].split("(")[1]
dl = requests.get(dl_link, cert=cert, stream=True, allow_redirects=False)
output = open('test.xlsx', 'wb')
output.write(dl.content)
output.close()
Any guidance is much appreciated.
Thanks!
Pon

Related

No results on fetching file extension using urllib3 and requests

I have a script for fetching/checking the specific file extension hosted on the web server (which is .txt). I am wondering why i am not getting a result:
#!/usr/bin/python3
import requests
import urllib3
requests.packages.urllib3.disable_warnings()
from urllib3 import disable_warnings
from urllib3.exceptions import InsecureRequestWarning
disable_warnings(InsecureRequestWarning)
PYTHONWARNINGS="ignore:Unverified HTTPS request"
url = "https://test.site/"`
pref = "testdir"
extension=[".asp", ".aspx", ".bat", ".sql", ".txt", ".xml"]
for i in extension:
new_url = url + pref + i
res = requests.get(new_url, verify=False)
if res.status_code == 200:
print("[+] %s existing!" % new_url)
Am I missing an import, package or undeclared line?

How to download a file from Sharepoint with python

I need to download a file from sharepoint using user credentials (rather that client credentials).
I've tried this:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
root_url = "https://company-my.sharepoint.com"
full_url = "https://company-my.sharepoint.com/personal/Documents/AB.csv"
ctx = ClientContext(root_url)
ctx.with_user_credentials(<my_email>,
<my_password)
response = File.open_binary(ctx, full_url)
print(response.content)
The response I am getting is
*b'{"error":{"code":"-2147024809, System.ArgumentException","message":{"lang":"en-US","value":"serverRelativePath\\r\\nParameter
name: Specified value is not supported for the serverRelativePath
parameter."}}}'*

Upload excel file to SharePoint Online using Python

I am trying to upload my excel spreadsheet to a document library on my SharePoint Online site. The Sharepoint URL and the folder location on the SharePoint site are listed in the excel Spreadsheet.
Here is the code that I have right now:
import numpy as np
import pandas as pd
import xlwings as xw
from xlwings.constants import Direction
import sys
import requests
from requests_ntlm import HttpNtlmAuth
pd.options.mode.chained_assignment = None
def Upload():
wb = xw.Book.caller()
ws = wb.sheets['Sheet1']
#Read filename from excel
fileName = sys.argv[1]
#Enter SharePoint ONline site and target library
SP_URL = ws.range('C7').value
folder_URL = ws.range('C8').value
#Set up the url for requesting file upload
request_URL = SP_URL + '/_api/web/getfolderbyserverrelativeurl(\'' +
folder_URL + '\')/Files/asdd(url=\'' + fileName + '\',overwrite=true)'
#read in the file that we are going to upload
file = open(fileName, 'rb')
headers = {'Content-Type': 'application/json; odata=verbose', 'accept':
'application/json;odata=verbose'}
r = requests.post(SP_URL +
"/_api/contextinfo",auth=HttpNtlmAuth('Domain\\username','password'),
headers=headers)
formDigestValue = r.json()['d']['GetContextWebInformation']
['FormDigestValue']
headers = {'Content-Type': 'application/json; odata=verbose', 'accept':
'application/json;odata=verbose', 'x-requestdigest' : formDigestValue}
uploadResult =
requests.post(request_URL,auth=HttpNtlmAuth('Domain\\username','password'),
headers=headers, data=file.read())
I am receiving the following error:
formDigestValue = r.json()['d']['GetContextWebInformation']['FormDigestValue']
KeyError: 'd'
requests_ntlm package
allows for HTTP NTLM authentication using the requests library
but NTLM is not supported for SharePoint Online.
Instead of requests_ntlm i would suggest to utilize Office365-REST-Python-Client (it supports to specify user credentials and consumes SharePoint REST API) package to upload file into SharePoint Online, for example:
ctx_auth = AuthenticationContext(url=settings['url'])
if ctx_auth.acquire_token_for_user(username=settings['user_credentials']['username'],
password=settings['user_credentials']['password']):
ctx = ClientContext(settings['url'], ctx_auth)
target_list = ctx.web.lists.get_by_title("Documents")
info = FileCreationInformation()
file_name = "Book.xlsx"
path = "{0}/data/{1}".format(os.path.dirname(__file__), file_name)
with open(path, 'rb') as content_file:
info.content = content = content_file.read()
info.url = file_name
info.overwrite = True
upload_file = target_list.root_folder.files.add(info)
ctx.execute_query()
formDigestValue = r.json()['d']['GetContextWebInformation']['FormDigestValue']
KeyError: 'd'
All this means is that the response content doesn't have 'd' as a key. Try looking at the json code
print(r.content) or something, there could be an error message indicating what is wrong with your post request

Download a compressed zip file from URL

I have a problem statement where I have to login to a website and then download a zip file. I have written the below code so far to login to website(able to print authentication successful message) and create a session. How can I download the zip file now ?
import requests
import urllib
import urllib.request
import zipfile
import io
import shutil
post_login_url = 'https://www.ims-dm.com/mvc/page/customer-sign-in/cgi/cookie.php'
request_url = 'http://www.ims-dm.com/cgi/securedownload.php?p=WPNFTPD#prodtype=wpn/WPN-FULL-20180306.TXT.zip'
payload = {
'sendusername':'xxxxxxxxxx',
'password':'xxxxxx'
}
with requests.Session() as session:
post = session.post(post_login_url,data=payload)
if post.status_code == 200:
print("Authentication sucessful !!")
url = session.get(request_url)

Why is that I can visit https webpage containing public files but I can't download them using Python script?

What should be my username and password?
import requests
import shutil
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx.txt"
#Note: It's https
r = requests.get(url, auth=('', ''), verify=False,stream=True)
r.raw.decode_content = True
with open("company.20170111.idx.txt", 'wb') as f:
shutil.copyfileobj(r.raw, f)
The URL you're trying to load should be:
https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170103.idx
You're also missing import requests and the server-side didn't like the auth parameter.
import shutil
import requests
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx"
r = requests.get(url, verify=False, stream=True)
r.raw.decode_content = True
with open("company.20170111.idx.txt", 'wb') as f:
shutil.copyfileobj(r.raw, f)
This worked fine. I dont know why :
import urllib2
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx"
r = urllib2.urlopen(url)
for l in r:
print l

Categories

Resources