Can't get Custom DNS server working in Python - python

I'm having real trouble getting python to use a custom dns server.
I have followed this Tell urllib2 to use custom DNS
If I don't specify a self.host and self.port, it will go through without blocking.
Here is the code:
import urllib2
import httplib
import socket
class MyHTTPConnection (httplib.HTTPConnection):
def connect (self):
if self.host == 'www.porn.com':
self.host = '208.67.222.123' #OpenDNS FamilyShield
self.port = 53
self.sock = socket.create_connection ((self.host, self.port))
class MyHTTPHandler (urllib2.HTTPHandler):
def http_open (self, req):
return self.do_open (MyHTTPConnection, req)
opener = urllib2.build_opener(MyHTTPHandler)
urllib2.install_opener (opener)
f = urllib2.urlopen ('http://www.porn.com/videos/anime-toon.html')
data = f.read ()
print data
I keep getting a "raise BadStatusLine(line)" error
Error log:
Traceback (most recent call last):
File "K:\Desktop\rte\dns2.py", line 16, in <module>
f = urllib2.urlopen ('http://www.porn.com/videos/anime-toon.html')
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 394, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 412, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 372, in _call_chain
result = func(*args)
File "K:\Desktop\rte\dns2.py", line 12, in http_open
return self.do_open (MyHTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1170, in do_open
r = h.getresponse(buffering=True)
File "C:\Python27\lib\httplib.py", line 1027, in getresponse
response.begin()
File "C:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "C:\Python27\lib\httplib.py", line 371, in _read_status
raise BadStatusLine(line)
BadStatusLine: ''
EDIT: Going on isedev response, that I was going about it the wrong way.
It doesn't seem to register with urllib2 the changes to the namesservers
import dns.resolver
import urllib2
resolver = dns.resolver.Resolver()
resolver.nameservers = ['208.67.222.123']
answer = resolver.query('www.porn.com','A')
web_url = 'http://www.porn.com/videos/anime-toon.html'
req1 = urllib2.Request(web_url)
req1.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
response1 = urllib2.urlopen(req1)
html=response1.read()
print html

I think you've misunderstood what's being done in the "Custom DNS" answer you refer to. The example given in that solution is not in fact setting up a custom DNS server - the MyResolver class is given as example only and performs a hard-coded name-to-IP for 'news.bbc.co.uk'.
So what your code is actually doing is redirecting an HTTP request to 'www.porn.com' (port 80) to the OpenDNS Family Shield DNS server (on port 53)... which will obviously lead to the error you're getting.
So what you need to do is replace:
if self.host == 'www.porn.com':
self.host = '208.67.222.123' #OpenDNS FamilyShield
self.port = 53
with code that actually resolves 'www.porn.com' against the chosen DNS server directly (using dnspython for instance).
Assuming you've got the dnspython package installed, you could do something like:
import urllib2
import httplib
import socket
import dns.resolver
class MyHTTPConnection (httplib.HTTPConnection):
def connect (self):
if self.host == 'www.porn.com':
resolver = dns.resolver.Resolver()
resolver.nameservers = ['208.67.222.123']
answer = resolver.query(self.host,'A')
self.host = answer.rrset.items[0].address
self.sock = socket.create_connection ((self.host, self.port))
class MyHTTPHandler (urllib2.HTTPHandler):
def http_open (self, req):
return self.do_open (MyHTTPConnection, req)
opener = urllib2.build_opener(MyHTTPHandler)
urllib2.install_opener (opener)
f = urllib2.urlopen ('http://www.porn.com/videos/anime-toon.html')
data = f.read ()
print data
This code returns '404 - not found' and network trace shows HTTP request to 'hit-adult.opendns.com', which is what 'www.porn.com' resolves to when using the '208.67.222.123' nameserver:
dig #208.67.222.123 www.porn.com A
;; ANSWER SECTION:
www.porn.com. 0 IN A 67.215.65.130
nslookup 67.215.65.130
130.65.215.67.in-addr.arpa name = hit-adult.opendns.com.
The above is an example only. Real code would require error checking, etc...

Related

Python download images with alernating variables

I was trying to download images with url's that change but got an error.
url_image="http://www.joblo.com/timthumb.php?src=/posters/images/full/"+str(title_2)+"-poster1.jpg&h=333&w=225"
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
headers = {'User-Agent': user_agent}
req = urllib.request.Request(url_image, None, headers)
print(url_image)
#image, h = urllib.request.urlretrieve(url_image)
with urllib.request.urlopen(req) as response:
the_page = response.read()
#print (the_page)
with open('poster.jpg', 'wb') as f:
f.write(the_page)
Traceback (most recent call last):
File "C:\Users\luke\Desktop\scraper\imager finder.py", line 97, in
with urllib.request.urlopen(req) as response:
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 465, in open
response = self._open(req, data)
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 483, in _open
'_open', req)
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1268, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1243, in do_open
r = h.getresponse()
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1174, in getresponse
response.begin()
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 282, in begin
version, status, reason = self._read_status()
File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 264, in _read_status
raise BadStatusLine(line)
http.client.BadStatusLine:
My advice is to use urlib2. In addition, I've written a nice function (I think) that will also allow gzip encoding (reduce bandwidth) if the server supports it. I use this for downloading social media files, but should work for anything.
I would try to debug your code, but since it's just a snippet (and the error messages are formatted badly), it's hard to know exactly where your error is occurring (it's certainly not line 97 in your code snippet).
This isn't as short as it could be, but it's clear and reusable. This is python 2.7, it looks like you're using 3 - in which case you google some other questions that address how to use urllib2 in python 3.
import urllib2
import gzip
from StringIO import StringIO
def download(url):
"""
Download and return the file specified in the URL; attempt to use
gzip encoding if possible.
"""
request = urllib2.Request(url)
request.add_header('Accept-Encoding', 'gzip')
try:
response = urllib2.urlopen(request)
except Exception, e:
raise IOError("%s(%s) %s" % (_ERRORS[1], url, e))
payload = response.read()
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(payload)
f = gzip.GzipFile(fileobj=buf)
payload = f.read()
return payload
def save_media(filename, media):
file_handle = open(filename, "wb")
file_handle.write(media)
file_handle.close()
title_2 = "10-cloverfield-lane"
media = download("http://www.joblo.com/timthumb.php?src=/posters/images/full/{}-poster1.jpg&h=333&w=225".format(title_2))
save_media("poster.jpg", media)

Python Requests Timeout Value error

Good Evening,
I cannot get my https request to go through. I'm having to use SSLv3, so I'm specifying the protocol with:
import requests
from requests.adapters import HTTPAdapter
from urllib3.poolmanager import PoolManager
import ssl
class MyAdapter(HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = PoolManager(num_pools=connections,
maxsize=maxsize,
block=block,
ssl_version=ssl.PROTOCOL_SSLv3)
username = 'username'
password = 'password'
email = 'email#example.com'
url = 'https://api.example.com/'
headers = {'Accept': 'application/json', 'content-type': 'application/json'}
params = {'emailaddress': email}
auth = (username, password)
s = requests.Session()
s.mount(url, MyAdapter())
r = s.get(url+'customer.svc/search', params=params, auth=auth, headers=headers)
When I run my get request I get the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/requests/sessions.py", line 473, in get
return self.request('GET', url, **kwargs)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/requests/sessions.py", line 461, in request
resp = self.send(prep, **send_kwargs)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/requests/adapters.py", line 370, in send
timeout=timeout
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/urllib3/connectionpool.py", line 517, in urlopen
timeout_obj = self._get_timeout(timeout)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/urllib3/connectionpool.py", line 283, in _get_timeout
return Timeout.from_float(timeout)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/urllib3/util/timeout.py", line 152, in from_float
return Timeout(read=timeout, connect=timeout)
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/urllib3/util/timeout.py", line 95, in __init__
self._connect = self._validate_timeout(connect, 'connect')
File "/home/ubuntu/workspace/app/venv/lib/python2.7/site-packages/urllib3/util/timeout.py", line 125, in _validate_timeout
"int or float." % (name, value))
ValueError: Timeout value connect was Timeout(connect=None, read=None, total=None), but it must be an int or float.
Any ideas? I can't figure it out.
Additional Context: I'm on an Amazon EC2 Ubuntu Instance, running requests 2.5.1 and python 2.7.6
Looks like requests doesn't always play nice when you bring in an outside urllib3 distribution. What I ended up doing was calling requests internal urllib3 instead.
from requests.packages.urllib3.poolmanager import PoolManager
instead of:
from urllib3.poolmanager import PoolManager
No issues since I started doing this.

How to send POST data from Python to PHP scripts with basic HTTP authentication?

I try to send POST data from a Python program to a PHP file that uses basic HTTP authentication. I run this code:
import urllib.parse
from urllib.request import urlopen
path="https://username:password#url_to_my_file.php"
path=path.encode('utf8')
data=urllib.parse.urlencode({"Hello":"There"})
data=data.encode('utf8')
req=urlopen(path,mydata)
req.add_header("Content-type","application/x-www-form-urlencoded")
page=urllib.urlopen(req).read()
I got this error:
req.data=data
AttributeError: 'bytes' object has not attribute 'data'
How can I fix this bug ?
UPDATE:
Following the solution below, I changed my code this way:
from urllib.request import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener, Request
import urllib
url="https://www.my_website.com/file.php"
path="http://my_username:my_password#https://www.my_website.com/file.php"
mydata=urllib.parse.urlencode({"Hello":"Test"})
pwmgr = HTTPPasswordMgrWithDefaultRealm()
pwmgr.add_password(None, url, 'my_username', 'my_password')
authhandler = HTTPBasicAuthHandler(pwmgr)
opener = build_opener(authhandler)
req = Request(path, mydata)
req.add_header("Content-type","application/x-www-form-urlencoded")
page = opener.open(req).read()
I got these errors:
Traceback (most recent call last):
File "/usr/local/python3.1.3/lib/python3.1/http/client.py", line 673, in _set_hostport
port = int(host[i+1:])
ValueError: invalid literal for int() with base 10: ''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "a1.py", line 17, in <module>
page = opener.open(req).read()
File "/usr/local/python3.1.3/lib/python3.1/urllib/request.py", line 350, in open
response = self._open(req, data)
File "/usr/local/python3.1.3/lib/python3.1/urllib/request.py", line 368, in _open
'_open', req)
File "/usr/local/python3.1.3/lib/python3.1/urllib/request.py", line 328, in _call_chain
result = func(*args)
File "/usr/local/python3.1.3/lib/python3.1/urllib/request.py", line 1112, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/local/python3.1.3/lib/python3.1/urllib/request.py", line 1065, in do_open
h = http_class(host, timeout=req.timeout) # will parse host:port
File "/usr/local/python3.1.3/lib/python3.1/http/client.py", line 655, in __init__
self._set_hostport(host, port)
File "/usr/local/python3.1.3/lib/python3.1/http/client.py", line 675, in _set_hostport
raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
http.client.InvalidURL: nonnumeric port: ''
You opened the URL twice. First with:
req=urlopen(path,mydata)
Then again with:
page=urllib.urlopen(req).read()
If you wanted to create a separate Request object, do so:
from urllib.request import urlopen, Request
req = Request(path, mydata)
req.add_header("Content-type","application/x-www-form-urlencoded")
page = urlopen(req).read()
Note that you should not encode the URL; it should be a str value.
urllib.request will also not parse authentication information from the URL; you'll need to provide that separately by using a password manager:
from urllib.request import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, build_opener
url = "https://url_to_my_file.php"
pwmgr = HTTPPasswordMgrWithDefaultRealm()
pwmgr.add_password(None, url, 'username', 'password')
authhandler = HTTPBasicAuthHandler(pwmgr)
opener = build_opener(authhandler)
req = Request(path, mydata)
req.add_header("Content-type","application/x-www-form-urlencoded")
page = opener.open(req).read()

Using multiple proxies to open a link in urllib2

What i am trying to do is read a line(an ip address), open the website with that address, and then repeat with all the addresses in the file. instead, i get an error. I am new to python, so maybe its a simple mistake. Thanks in advance !!!
CODE:
>>> f = open("proxy.txt","r"); #file containing list of ip addresses
>>> address = (f.readline()).strip(); # to remove \n at end of line
>>>
>>> while line:
proxy = urllib2.ProxyHandler({'http': address })
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
urllib2.urlopen('http://www.google.com')
address = (f.readline()).strip();
ERROR:
Traceback (most recent call last):
File "<pyshell#15>", line 5, in <module>
urllib2.urlopen('http://www.google.com')
File "D:\Programming\Python\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "D:\Programming\Python\lib\urllib2.py", line 394, in open
response = self._open(req, data)
File "D:\Programming\Python\lib\urllib2.py", line 412, in _open
'_open', req)
File "D:\Programming\Python\lib\urllib2.py", line 372, in _call_chain
result = func(*args)
File "D:\Programming\Python\lib\urllib2.py", line 1199, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "D:\Programming\Python\lib\urllib2.py", line 1174, in do_open
raise URLError(err)
URLError: <urlopen error [Errno 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond>
It means that the proxy is unavailable.
Here's a proxy checker that checks several proxies simultaneously:
#!/usr/bin/env python
import fileinput # accept proxies from files or stdin
try:
from gevent.pool import Pool # $ pip install gevent
import gevent.monkey; gevent.monkey.patch_all() # patch stdlib
except ImportError: # fallback on using threads
from multiprocessing.dummy import Pool
try:
from urllib2 import ProxyHandler, build_opener
except ImportError: # Python 3
from urllib.request import ProxyHandler, build_opener
def is_proxy_alive(proxy, timeout=5):
opener = build_opener(ProxyHandler({'http': proxy})) # test redir. and such
try: # send request, read response headers, close connection
opener.open("http://example.com", timeout=timeout).close()
except EnvironmentError:
return None
else:
return proxy
candidate_proxies = (line.strip() for line in fileinput.input())
pool = Pool(20) # use 20 concurrent connections
for proxy in pool.imap_unordered(is_proxy_alive, candidate_proxies):
if proxy is not None:
print(proxy)
Usage:
$ python alive-proxies.py proxy.txt
$ echo user:password#ip:port | python alive-proxies.py

using mwclient under proxy server

I use net under proxy server
self.wp = site if site else mwclient.Site(self.url)
when above line is encountered following errors are show
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\client.py", line 92, in __init__
self.site_init()
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\client.py", line 100, in site_init
siprop = 'general|namespaces', uiprop = 'groups|rights')
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\client.py", line 165, in api
info = self.raw_api(action, **kwargs)
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\client.py", line 248, in raw_api
json_data = self.raw_call('api', data).read()
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\client.py", line 223, in raw_call
url, data = data, headers = headers)
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\http.py", line 225, in post
return self.find_connection(host).post(host,
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\http.py", line 218, in find_connection
conn = cls(host, self)
File "C:\Python27\lib\site-packages\mwclient-0.6.5-py2.7.egg\mwclient\http.py", line 62, in __init__
self._conn.connect()
File "C:\Python27\lib\httplib.py", line 757, in connect
self.timeout, self.source_address)
File "C:\Python27\lib\socket.py", line 571, in create_connection
raise err
error: [Errno 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
I tried setting proxy using urllib2 by following steps, but it didnt help
>>> import urllib2
>>> auth = 'http://xxxxx:xxxx#10.1.9.30:8080'
>>> handler = urllib2.ProxyHandler({'http':auth})
>>> opener = urllib2.build_opener(handler)
>>> urllib2.install_opener(opener)
This is a bit old, but I faced the same problem yesterday and I'm posting the solution here, as it may help other people.
I managed to sort it out by changing the file mwclinet/http.py. Basically I check if the environment variable http_proxy exists and connect through a proxy rather than directly.
In class class HTTPPersistentConnection(object): I added a variable usesProxy = False. Around line 61 I replaced self._conn = self.http_class(host) by:
http_proxy_env = os.environ.get('http_proxy')
if http_proxy_env is not None:
try:
# proxy
http_proxy_url = urlparse.urlparse(http_proxy_env)
http_proxy_host,http_proxy_port = http_proxy_url.netloc.split(':')
self._conn = self.http_class(http_proxy_host,int(http_proxy_port))
self.usesProxy=True;
except:
self._conn = self.http_class(host)
else:
self._conn = self.http_class(host)
Then I substituted the next 2 occurrences of: self._conn.request(method, path, ....).
At line 94 by :
if self.usesProxy is False:
self._conn.request(method, path, headers = headers)
else:
self._conn.request(method, "http://"+host+path, headers = headers)
and at line 107 by:
if self.usesProxy is False:
self._conn.request(method, path, data, headers)
else:
self._conn.request(method, "http://"+host+path, data, headers)
It should do the job!

Categories

Resources