Im having abit of issue downloading an xlsx file over https.
Here my code to scrape the sight to get the download url, but its seems to redirect me to a new site. but when i put the link in my browswer, it downloads the file straight away.
Is there something im doing wrong?
here is the code i used for scraping the site:
import contextlib
import OpenSSL.crypto
import os
import requests
import ssl
import tempfile
import http.client
import shutil
from OpenSSL import crypto
import pem
import html2text
url = "https://signonssl.site.com"
base_url = "basedownloadurl"
p12_cert = "cert_path"
password = "password"
#contextlib.contextmanager
def pfx_to_pem(p12_path, pfx_password):
''' Decrypts the .p12 file to be used with requests. '''
with tempfile.NamedTemporaryFile(suffix='.pem') as t_pem:
f_pem = open(t_pem.name, 'wb')
pfx = open(p12_path, 'rb').read()
p12 = OpenSSL.crypto.load_pkcs12(pfx, pfx_password)
f_pem.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()))
f_pem.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()))
ca = p12.get_ca_certificates()
f_pem.close()
yield t_pem.name
with pfx_to_pem(p12_cert, password) as cert:
html_response = requests.get(url, cert=cert).content.decode("utf-8")
htmlconv = html2text.html2text(html_response).split("name")[1]
dl_link = htmlconv.split(")")[0].split("(")[1]
dl = requests.get(dl_link, cert=cert, stream=True, allow_redirects=False)
output = open('test.xlsx', 'wb')
output.write(dl.content)
output.close()
Any guidance is much appreciated.
Thanks!
Pon
Related
I have a script for fetching/checking the specific file extension hosted on the web server (which is .txt). I am wondering why i am not getting a result:
#!/usr/bin/python3
import requests
import urllib3
requests.packages.urllib3.disable_warnings()
from urllib3 import disable_warnings
from urllib3.exceptions import InsecureRequestWarning
disable_warnings(InsecureRequestWarning)
PYTHONWARNINGS="ignore:Unverified HTTPS request"
url = "https://test.site/"`
pref = "testdir"
extension=[".asp", ".aspx", ".bat", ".sql", ".txt", ".xml"]
for i in extension:
new_url = url + pref + i
res = requests.get(new_url, verify=False)
if res.status_code == 200:
print("[+] %s existing!" % new_url)
Am I missing an import, package or undeclared line?
I need to download a file from sharepoint using user credentials (rather that client credentials).
I've tried this:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
root_url = "https://company-my.sharepoint.com"
full_url = "https://company-my.sharepoint.com/personal/Documents/AB.csv"
ctx = ClientContext(root_url)
ctx.with_user_credentials(<my_email>,
<my_password)
response = File.open_binary(ctx, full_url)
print(response.content)
The response I am getting is
*b'{"error":{"code":"-2147024809, System.ArgumentException","message":{"lang":"en-US","value":"serverRelativePath\\r\\nParameter
name: Specified value is not supported for the serverRelativePath
parameter."}}}'*
I am trying to upload my excel spreadsheet to a document library on my SharePoint Online site. The Sharepoint URL and the folder location on the SharePoint site are listed in the excel Spreadsheet.
Here is the code that I have right now:
import numpy as np
import pandas as pd
import xlwings as xw
from xlwings.constants import Direction
import sys
import requests
from requests_ntlm import HttpNtlmAuth
pd.options.mode.chained_assignment = None
def Upload():
wb = xw.Book.caller()
ws = wb.sheets['Sheet1']
#Read filename from excel
fileName = sys.argv[1]
#Enter SharePoint ONline site and target library
SP_URL = ws.range('C7').value
folder_URL = ws.range('C8').value
#Set up the url for requesting file upload
request_URL = SP_URL + '/_api/web/getfolderbyserverrelativeurl(\'' +
folder_URL + '\')/Files/asdd(url=\'' + fileName + '\',overwrite=true)'
#read in the file that we are going to upload
file = open(fileName, 'rb')
headers = {'Content-Type': 'application/json; odata=verbose', 'accept':
'application/json;odata=verbose'}
r = requests.post(SP_URL +
"/_api/contextinfo",auth=HttpNtlmAuth('Domain\\username','password'),
headers=headers)
formDigestValue = r.json()['d']['GetContextWebInformation']
['FormDigestValue']
headers = {'Content-Type': 'application/json; odata=verbose', 'accept':
'application/json;odata=verbose', 'x-requestdigest' : formDigestValue}
uploadResult =
requests.post(request_URL,auth=HttpNtlmAuth('Domain\\username','password'),
headers=headers, data=file.read())
I am receiving the following error:
formDigestValue = r.json()['d']['GetContextWebInformation']['FormDigestValue']
KeyError: 'd'
requests_ntlm package
allows for HTTP NTLM authentication using the requests library
but NTLM is not supported for SharePoint Online.
Instead of requests_ntlm i would suggest to utilize Office365-REST-Python-Client (it supports to specify user credentials and consumes SharePoint REST API) package to upload file into SharePoint Online, for example:
ctx_auth = AuthenticationContext(url=settings['url'])
if ctx_auth.acquire_token_for_user(username=settings['user_credentials']['username'],
password=settings['user_credentials']['password']):
ctx = ClientContext(settings['url'], ctx_auth)
target_list = ctx.web.lists.get_by_title("Documents")
info = FileCreationInformation()
file_name = "Book.xlsx"
path = "{0}/data/{1}".format(os.path.dirname(__file__), file_name)
with open(path, 'rb') as content_file:
info.content = content = content_file.read()
info.url = file_name
info.overwrite = True
upload_file = target_list.root_folder.files.add(info)
ctx.execute_query()
formDigestValue = r.json()['d']['GetContextWebInformation']['FormDigestValue']
KeyError: 'd'
All this means is that the response content doesn't have 'd' as a key. Try looking at the json code
print(r.content) or something, there could be an error message indicating what is wrong with your post request
I have a problem statement where I have to login to a website and then download a zip file. I have written the below code so far to login to website(able to print authentication successful message) and create a session. How can I download the zip file now ?
import requests
import urllib
import urllib.request
import zipfile
import io
import shutil
post_login_url = 'https://www.ims-dm.com/mvc/page/customer-sign-in/cgi/cookie.php'
request_url = 'http://www.ims-dm.com/cgi/securedownload.php?p=WPNFTPD#prodtype=wpn/WPN-FULL-20180306.TXT.zip'
payload = {
'sendusername':'xxxxxxxxxx',
'password':'xxxxxx'
}
with requests.Session() as session:
post = session.post(post_login_url,data=payload)
if post.status_code == 200:
print("Authentication sucessful !!")
url = session.get(request_url)
What should be my username and password?
import requests
import shutil
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx.txt"
#Note: It's https
r = requests.get(url, auth=('', ''), verify=False,stream=True)
r.raw.decode_content = True
with open("company.20170111.idx.txt", 'wb') as f:
shutil.copyfileobj(r.raw, f)
The URL you're trying to load should be:
https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170103.idx
You're also missing import requests and the server-side didn't like the auth parameter.
import shutil
import requests
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx"
r = requests.get(url, verify=False, stream=True)
r.raw.decode_content = True
with open("company.20170111.idx.txt", 'wb') as f:
shutil.copyfileobj(r.raw, f)
This worked fine. I dont know why :
import urllib2
url = "https://www.sec.gov/Archives/edgar/daily-index/2017/QTR1/company.20170111.idx"
r = urllib2.urlopen(url)
for l in r:
print l