IndexError using python-ntlm - python

I'm trying to use urllib2 and python-ntlm to connect to an NT authenticated server, but I'm getting an error. Here's the code I'm using, from the python-ntlm site:
user = 'DOMAIN\user.name'
password = 'Password123'
url = 'http://corporate.domain.com/page.aspx?id=foobar'
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, password)
# create the NTLM authentication handler
auth_NTLM = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman)
# create and install the opener
opener = urllib2.build_opener(auth_NTLM)
urllib2.install_opener(opener)
# retrieve the result
response = urllib2.urlopen(url)
return response.read()
And here's the error I get:
Traceback (most recent call last):
File "C:\Python27\test.py", line 112, in get_ntlm_data
response = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 398, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 511, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 430, in error
result = self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 370, in _call_chain
result = func(*args)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 99, in http_error_401
return self.http_error_authentication_required('www-authenticate', req, fp, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 35, in http_error_authentication_required
return self.retry_using_http_NTLM_auth(req, auth_header_field, None, headers)
File "C:\Python27\lib\site-packages\python_ntlm-1.0.1-py2.7.egg\ntlm\HTTPNtlmAuthHandler.py", line 72, in retry_using_http_NTLM_auth
UserName = user_parts[1]
IndexError: list index out of range
Any idea what I'm doing wrong?

Try:
user = r'DOMAIN\user.name'

Related

python urllib2.urlopen SSL: CERTIFICATE_VERIFY_FAILED

My operating environment is: Python2.7, django1.9
My original code was:
req = urllib2.Request(url, obj, headers)
opener = urllib2.urlopen(req)
But there was an error:[SSL: CERTIFICATE_VERIFY_FAILED],I found a solution on the Internet:
First:
import ssl
import urllib2
context = ssl._create_unverified_context()
print urllib2.urlopen("https://imaojia.com/", context=context).read()
Second:
import ssl
import urllib2
ssl._create_default_https_context = ssl._create_unverified_context
print urllib2.urlopen("https://imaojia.com/").read()
After I use them,now the code becomes:
req = urllib2.Request(url, obj, headers)
import ssl
opener = urllib2.urlopen(req, context=ssl._create_unverified_context())
Now there are new errors:
HTTP Error 503: Service Unavailable
Internal Server Error: /deploy/key_list_import/
Traceback (most recent call last):
File "/usr/lib/python2.7/site-packages/django/core/handlers/base.py", line 149, in get_response
response = self.process_exception_by_middleware(e, request)
File "/usr/lib/python2.7/site-packages/django/core/handlers/base.py", line 147, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/python2.7/site-packages/django/contrib/auth/decorators.py", line 23, in _wrapped_view
return view_func(request, *args, **kwargs)
File "/project/soms/deploy/views.py", line 398, in salt_key_import
minions,minions_pre = sapi.list_all_key()
File "/project/soms/deploy/saltapi.py", line 54, in list_all_key
self.token_id()
File "/project/soms/deploy/saltapi.py", line 30, in token_id
content = self.postRequest(obj,prefix='/login')
File "/project/soms/deploy/saltapi.py", line 42, in postRequest
opener = urllib2.urlopen(req, context=ssl._create_unverified_context())
File "/usr/lib64/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python2.7/urllib2.py", line 437, in open
response = meth(req, response)
File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib64/python2.7/urllib2.py", line 475, in error
return self._call_chain(*args)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 503: Service Unavailable
Who can give me some advice? Thank you!
Thanks to Chiheb Nexus for the answer that helped me solve, so to close this question, answer it and adopt it.
The answer for this question is my request would have caused HTTP Error 503.

Python3: HTTP Error 302 while using urllib

I want to read the value of different stocks from websites. Therefore I wrote this tiny script, which reads the page source and then parses out the value:
stock_reader.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from re import search
from urllib import request
def main():
links = [
[
'CSG',
'UBS',
],
[
'http://www.tradegate.de/orderbuch.php?isin=CH0012138530',
'http://www.tradegate.de/orderbuch.php?isin=CH0244767585',
],
]
for i in in range(len(links[0])):
url = links[1][i]
htmltext = request.urlopen(url).read().decode('utf-8')
source = htmltext.splitlines()
for line in source:
if 'id="bid"' in line:
m = search('\d+.\d+', line)
print('{}'.format(m.string[m.start():m.end()]))
if __name__ == '__main__':
main()
sometimes it works but sometimes this error gets raised:
error message
Traceback (most recent call last):
File "./aktien_reader.py", line 39, in <module>
main()
File "./aktien_reader.py", line 30, in main
htmltext = request.urlopen(url).read().decode('utf-8')
File "/usr/lib/python3.3/urllib/request.py", line 160, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 686, in http_error_302
self.inf_msg + msg, headers, fp)
urllib.error.HTTPError: HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
Found
My question is: why is it happening and how can I avoid it?
This happens probably because the destination site uses cookies and redirect you in case you don't send cookies.
What you can use is something like that :
from http.cookiejar import CookieJar
url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"
req = urllib.request.Request(url, None, {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Accept-Encoding': 'gzip, deflate, sdch','Accept-Language': 'en-US,en;q=0.8','Connection': 'keep-alive'})
cj = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
response = opener.open(req)
response.read()
This way, you support Cookies and website will allow you to get the page :-)
Another way would be to use the requests package which is really simplest to use. In your case, it would lead to :
import requests
url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"
r = requests.get(url, headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, timeout=15)
print(r.content)
This answer is a simplification of the one by Cédric J. You don't really need to import CookieJar or set various Accept headers if you don't want to. You should however generally set a timeout. It is tested with Python 3.7. I would typically remember to use a new opener for each random URL that I want cookies for.
from urllib.request import build_opener, HTTPCookieProcessor, Request
url = 'https://www.cell.com/cell-metabolism/fulltext/S1550-4131(18)30630-2'
opener = build_opener(HTTPCookieProcessor())
Without a Request object:
response = opener.open(url, timeout=30)
content = response.read()
With a Request object:
request = Request(url)
response = opener.open(request, timeout=30)
content = response.read()
HTTP Status code 302 it's a kind of a redirect, it will have a header with a new URL for access (Not necessary a working URL..)
Location: http://www.example.com/x/y/
This is quite often used to block bots who make to many requests in too of a short time frame. So not an coding problem.

Python: urllib2.HTTPError: HTTP Error 401: authenticationrequired

I was trying to get a web page, but got into this problem. I've looked up some references, and this is what I've done so far:
import sys
import urllib2
from bs4 import BeautifulSoup
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, password)
handler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(handler)
urllib2.install_opener(opener)
header = {
'Connection' : 'keep-alive',
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept-Language' : 'en-US,en;q=0.5',
'Accept-Encoding' : 'gzip, deflate'
}
html = urllib2.urlopen(urllib2.Request(url, None, header))
soup = BeautifulSoup(html, 'html.parser')
# some if else function afterwards #
When I try to run the script, it shows this kind of error:
python checker.py 8.8.8.8
Traceback (most recent call last):
File "checker.py", line 34, in <module>
html = urllib2.urlopen(urllib2.Request(url, None, header))
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 469, in error
result = self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 656, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 401: authenticationrequired
But if I opened the page or other web page, and manually enter my credential, this script works fine after that. Am I missing something?
Just to add, my current network are using McAfee web gateway device. So sometimes we need to enter our credential to proceed browsing the net. Our user/pass are integrated with Active Directory. Is that may cause the issue?
This seems to work really well (taken from another thread)
import urllib2
import base64
import sys
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
request = urllib2.Request(url)
base64string = base64.encodestring('%s:%s' % (user, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
result = urllib2.urlopen(request)
Or you may use requests:
from requests.auth import HTTPBasicAuth
user = 'myuserID'
password = "mypassword"
ip = sys.argv[1]
url = "http://www.websites.com/" + ip
res=requests.get(url , auth=HTTPBasicAuth(user, password))
print res.text

Getting json data from imgur.com

I was trying to get json data from imgur.com
To get it one has to hit this link :
http://imgur.com/user/{Username}/index/newest/page/{pagecount}/hit.json?scrolling
Where Username and pagecount may change. So i did something like this :
import urllib2, json
Username="Tighe"
count = 0
url = "http://imgur.com/user/"+arg+"/index/newest/page/"+str(count)+"/hit.json?scrolling"
print("URL " +url)
response = urllib2.urlopen(url)
data = response.read()
I get the data but now to convert it to json format I did something like this :
jsonData = json.loads(data)
Now , it give error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "imgur_battle.py", line 8, in battle
response = urllib2.urlopen(url)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1187, in do_open
r = h.getresponse(buffering=True)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1045, in getresponse
response.begin()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 373, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
import urllib2, json
username = "Tighe"
count = 0
url = "http://imgur.com/user/"+username+"/index/newest/page/"+str(count)+"/hit.json?scrolling"
response = urllib2.urlopen(url)
data = response.read()
jsonData = json.loads(data)
print jsonData
this work without any problem.
The only issue seems to be that you are using the arg variable instead of Username when you build the URLs. I got a NameError, so if you didn't presumably you have arg set to some extraneous value.

Cannot fetch url using neither urllib2 nor requests

I'm trying to do this on remote Ubuntu Server:
>>> import urllib2, requests
>>> url = 'http://python.org/'
>>> urllib2.urlopen(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 444, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 527, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
>>> requests.get(url)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 505, in send
history = [resp for resp in gen] if allow_redirects else []
File "/home/django/zyq2/venv/local/lib/python2.7/site-packages/requests/sessions.py", line 99, in resolve_redir ts
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects)
requests.exceptions.TooManyRedirects: Exceeded 30 redirects.
But it works fine on local Windows machine:
>>> urllib2.urlopen(url)
<addinfourl at 57470168 whose fp = <socket._fileobject object at 0x036CB630>>
>>> requests.get(url)
<Response [200]>
I have absolutely no idea about what's going on and would appreciate any suggestion.
Update
I tried S.M. Al Mamun's suggestion and got an exception with long traceback:
>>> req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
>>> urllib2.urlopen(req).read()
...
long traceback (more than one page)
...
urllib2.HTTPError: HTTP Error 303: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
See Other
Infinite loop again (I mean TooManyRedirects exception).
Try using a user-agent:
req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
urllib2.urlopen(req).read()
If it doesn't work still, that might be your Ubuntu is offline!

Categories

Resources