http.client.IncompleteRead & multiprocessing.pool.MaybeEncodingError: Error sending result: - python

My program is written to scan through a large list of websites for SQLi vulnerabilities by adding a simple string query (') to the end of URLs and looking for errors in the page source.
My program keeps getting stuck on the same website. Here's the error I keep receiving:
[-] http://www.pluralsight.com/guides/microsoft-net/getting-started-with-asp-net-mvc-core-1-0-from-zero-to-hero?status=in-review'
[-] Page not found.
[-] http://lfg.go2dental.com/member/dental_search/searchprov.cgi?P=LFGDentalConnect&Network=L'
[-] http://www.parlimen.gov.my/index.php?lang=en'
[-] http://www.otakunews.com/category.php?CatID=23'
[-] http://plaine-d-aunis.bibli.fr/opac/index.php?lvl=cmspage&pageid=6&id_rubrique=100'
[-] Page not found.
[-] http://www.rvparkhunter.com/state.asp?state=britishcolumbia'
[-] http://ensec.org/index.php?option=com_content&view=article&id=547:lord-howell-british-fracking-policy--a-change-of-direction-needed&catid=143:issue-content&Itemid=433'
[-] URL Timed Out
[-] http://www.videohelp.com/tools.php?listall=1'
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "C:\Users\Brice\Desktop\My Site Hunter\sitehunter.py", line 81, in
mp_worker
mainMethod(URLS)
File "C:\Users\Brice\Desktop\My Site Hunter\sitehunter.py", line 77, in
mainMethod
tryMethod(req, URL)
File "C:\Users\Brice\Desktop\My Site Hunter\sitehunter.py", line 48, in
tryMethod
checkforMySQLError(req, URL)
File "C:\Users\Brice\Desktop\My Site Hunter\sitehunter.py", line 23, in
checkforMySQLError
response = urllib.request.urlopen(req, context=gcontext, timeout=2)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 564, in error
result = self._call_chain(*args)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\urllib\request.py", line 753, in http_error_302
fp.read()
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\http\client.py", line 462, in read
s = self._safe_read(self.length)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\http\client.py", line 614, in _safe_read
raise IncompleteRead(b''.join(s), amt)
http.client.IncompleteRead: IncompleteRead(4659 bytes read, 15043 more
expected)
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "sitehunter.py", line 91, in <module>
mp_handler(URLList)
File "sitehunter.py", line 86, in mp_handler
p.map(mp_worker, URLList)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-
32\lib\multiprocessing\pool.py", line 608, in get
raise self._value
http.client.IncompleteRead: IncompleteRead(4659 bytes read, 15043 more
expected)
C:\Users\Brice\Desktop\My Site Hunter>
Here's my full source code. I narrow it down for you in the next section.
# Start off with imports
import urllib.request
import urllib.error
import socket
import threading
import multiprocessing
import time
import ssl
# Fake a header to get less errors
headers={'User-agent' : 'Mozilla/5.0'}
# Make a class to pass to upon exception errors
class MyException(Exception):
pass
# Checks for mySQL error responses after putting a string (') query on the end of a URL
def checkforMySQLError(req, URL):
# gcontext is to bypass a no SSL error from shutting down my program
gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
response = urllib.request.urlopen(req, context=gcontext, timeout=2)
page_source = response.read()
page_source_string = page_source.decode(encoding='cp866', errors='ignore')
# The if statements behind the whole thing. Checks page source for these errors,
# and returns any that come up positive.
# I'd like to do my outputting here, if possible.
if "You have an error in your SQL syntax" in page_source_string:
print ("\t [+] " + URL)
elif "mysql_fetch" in page_source_string:
print ("\t [+] " + URL)
elif "mysql_num_rows" in page_source_string:
print ("\t [+] " + URL)
elif "MySQL Error" in page_source_string:
print ("\t [+] " + URL)
elif "MySQL_connect()" in page_source_string:
print ("\t [+] " + URL)
elif "UNION SELECT" in page_source_string:
print ("\t [+] " + URL)
else:
print ("\t [-] " + URL)
# Attempts to connect to the URL, and passes an error on if it fails.
def tryMethod(req, URL):
try:
checkforMySQLError(req, URL)
except urllib.error.HTTPError as e:
if e.code == 404:
print("\t [-] Page not found.")
if e.code == 400:
print ("\t [+] " + URL)
except urllib.error.URLError as e:
print("\t [-] URL Timed Out")
except socket.timeout as e:
print("\t [-] URL Timed Out")
pass
except socket.error as e:
print("\t [-] Error in URL")
pass
# This is where the magic begins.
def mainMethod(URLList):
##### THIS IS THE WORK-AROUND I USED TO FIX THIS ERROR ####
# URL = urllib.request.urlopen(URLList, timeout=2)
# Replace any newlines or we get an invalid URL request.
URL = URLList.replace("\n", "")
# URLLib doesn't like https, not sure why.
URL = URL.replace("https://","http://")
# Python likes to truncate urls after spaces, so I add a typical %20.
URL = URL.replace("\s", "%20")
# The blind sql query that makes the errors occur.
URL = URL + "'"
# Requests to connect to the URL and sends it to the tryMethod.
req = urllib.request.Request(URL)
tryMethod(req, URL)
# Multi-processing worker
def mp_worker(URLS):
mainMethod(URLS)
# Multi-processing handler
def mp_handler(URLList):
p = multiprocessing.Pool(25)
p.map(mp_worker, URLList)
# The beginning of it all
if __name__=='__main__':
URLList = open('sites.txt', 'r')
mp_handler(URLList)
Here's the important parts of the code, specifically the parts where I read from URLs using urllib:
def mainMethod(URLList):
##### THIS IS THE WORK-AROUND I USED TO FIX THIS ERROR ####
# URL = urllib.request.urlopen(URLList, timeout=2)
# Replace any newlines or we get an invalid URL request.
URL = URLList.replace("\n", "")
# URLLib doesn't like https, not sure why.
URL = URL.replace("https://","http://")
# Python likes to truncate urls after spaces, so I add a typical %20.
URL = URL.replace("\s", "%20")
# The blind sql query that makes the errors occur.
URL = URL + "'"
# Requests to connect to the URL and sends it to the tryMethod.
req = urllib.request.Request(URL)
tryMethod(req, URL)
# Checks for mySQL error responses after putting a string (') query on the end of a URL
def checkforMySQLError(req, URL):
# gcontext is to bypass a no SSL error from shutting down my program
gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
response = urllib.request.urlopen(req, context=gcontext, timeout=2)
page_source = response.read()
page_source_string = page_source.decode(encoding='cp866', errors='ignore')
I got past this error by making a request to read from URLList before making any changes to it. I commented out the part that fixed it - but only to get another error that looks worse/harder to fix (which is why I included this error although I had fixed it)
Here's the new error when I remove the comment from that line of code:
[-] http://www.davis.k12.ut.us/site/Default.aspx?PageType=1&SiteID=6497&ChannelID=6507&DirectoryType=6'
[-] http://www.surreyschools.ca/NewsEvents/Posts/Lists/Posts/ViewPost.aspx?ID=507'
[-] http://plaine-d-aunis.bibli.fr/opac/index.php?lvl=cmspage&pageid=6&id_rubrique=100'
[-] http://www.parlimen.gov.my/index.php?lang=en'
[-] http://www.rvparkhunter.com/state.asp?state=britishcolumbia'
[-] URL Timed Out
[-] http://www.videohelp.com/tools.php?listall=1'
Traceback (most recent call last):
File "sitehunter.py", line 91, in <module>
mp_handler(URLList)
File "sitehunter.py", line 86, in mp_handler
p.map(mp_worker, URLList)
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\Brice\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\pool.py", line 608, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x0381C790>'. Reason: 'TypeError("cannot serialize '_io.BufferedReader' object",)'
C:\Users\Brice\Desktop\My Site Hunter>
The new error seems worse than the old one, to be honest. That's why I included both. Any information on how to fix this would be greatly appreciated, as I've been stuck trying to fix it for the past few hours.

Related

reddit API praw error HTTP request or JSON object

I use reddit API praw and psraw to extract comments from a subreddit, however, I got two errors today after running a few loops:
JSON object decoded error or empty -> ValueError, even I catch exception in my code, still doesnt work.
http request
example:
Traceback (most recent call last):
File "C:/Users/.../subreddit psraw.py", line 20, in <module>
for comment in submission.comments:
File "C:\Python27\lib\site-packages\praw\models\reddit\base.py", line 31, in __getattr__
self._fetch()
File "C:\Python27\lib\site-packages\praw\models\reddit\submission.py", line 142, in _fetch
'sort': self.comment_sort})
File "C:\Python27\lib\site-packages\praw\reddit.py", line 367, in get
data = self.request('GET', path, params=params)
File "C:\Python27\lib\site-packages\praw\reddit.py", line 451, in request
params=params)
File "C:\Python27\lib\site-packages\prawcore\sessions.py", line 174, in request
params=params, url=url)
File "C:\Python27\lib\site-packages\prawcore\sessions.py", line 108, in _request_with_retries
data, files, json, method, params, retries, url)
File "C:\Python27\lib\site-packages\prawcore\sessions.py", line 93, in _make_request
params=params)
File "C:\Python27\lib\site-packages\prawcore\rate_limit.py", line 33, in call
response = request_function(*args, **kwargs)
File "C:\Python27\lib\site-packages\prawcore\requestor.py", line 49, in request
raise RequestException(exc, args, kwargs)
prawcore.exceptions.RequestException: error with request
HTTPSConnectionPool(host='oauth.reddit.com', port=443): Read timed out. (read timeout=16.0)
Since a subreddit contains 10k+ comments, is there a way to solve such issue? is it because reddit website has some problems today?
My code:
import praw, datetime, os, psraw
reddit = praw.Reddit('bot1')
subreddit = reddit.subreddit('example')
for submission in psraw.submission_search(reddit, subreddit='example', limit=1000000):
try:
#get comments
for comment in submission.comments:
subid = submission.id
comid = comment.id
com_body = comment.body.encode('utf-8').replace("\n", " ")
com_date = datetime.datetime.utcfromtimestamp(comment.created_utc)
string_com = '"{0}", "{1}", "{2}"\n'
formatted_string_com = string_com.format(comid, com_body, com_date)
indexFile_comment = open('path' + subid + '.txt', 'a+')
indexFile_comment.write(formatted_string_com)
except ValueError:
print ("error")
pass
continue
except AttributeError:
print ("error")
pass
continue

urllib2 does not handle http NO_CONTENT (204) as a successful HTTPS response

I am using an embedded python (2.4.3) in digi Connectport X4 here is the code to post to an Azure IoT Hub using HTTPS:
import urllib, sys, datetime, time
import urllib2
iot_device_id = 'HTTP_Device'
iot_endpoint = 'https://winiothub.azure-devices.net/devices/' + iot_device_id + '/messages/events?api-version=2016-02-03'
sas_header = {'Authorization': 'SharedAccessSignature sr=winiothub.azure-devices.net%2Fdevices%2FHTTP_Device&sig=o7dndsA%2FJOnkzYRUhqAwMrQXVhOTpIJqJqILyGDdQAc%3D&se=1522414643'}
while True:
#try:
body_data = { 'gateway_serial': '123', 'readingvalue':'66.00', 'date': str(datetime.datetime.now())}
iot_request = urllib2.Request(iot_endpoint, str(body_data), sas_header)
resp = urllib2.urlopen(iot_request)
contents = resp.read()
resp.close()
time.sleep(1)
#except:
# print 'error'
# time.sleep(1)
The code actually post the message to the hub, but is throwing the following error:
Traceback (most recent call last):
File "C:\Users\JeffreyBiesecker\documents\visual studio 2017\Projects\NewGateAzure\NewGateAzure\NewGateAzure2.py", line 14, in ?
urllib2.urlopen(iot_request)
File "C:\Python24\lib\urllib2.py", line 130, in urlopen
return _opener.open(url, data)
File "C:\Python24\lib\urllib2.py", line 364, in open
response = meth(req, response)
File "C:\Python24\lib\urllib2.py", line 471, in http_response
response = self.parent.error(
File "C:\Python24\lib\urllib2.py", line 402, in error
return self._call_chain(*args)
File "C:\Python24\lib\urllib2.py", line 337, in _call_chain
result = func(*args)
File "C:\Python24\lib\urllib2.py", line 480, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 204: No Content
Press any key to continue . . .
I get an error if running the code in the embedded Digi gateway or if I run in Python in Visual Studio using version 2.4.3.
urllib2.HttpError includes the response code that's received. Because of this, you can catch the exception, test for 204, and continue safely if it is the case. Otherwise, you can handle (or rethrow) the exception.
try:
resp = urllib2.urlopen(iot_request)
except urllib2.HTTPError as e:
if e.code == 204: pass
else: raise

Try block not catching - Am I making inadvertent internet access?

I accidentally disconnected my internet connection and received this error below. However, why did this line trigger the error?
self.content += tuple(subreddit_posts)
Or perhaps I should ask, why did the following line not lead to a sys.exit? It seems it should catch all errors:
try:
subreddit_posts = self.r.get_content(url, limit=10)
except:
print '*** Could not connect to Reddit.'
sys.exit()
Does this mean I am inadvertently hitting reddit's network twice?
FYI, praw is a reddit API client. And get_content() fetches a subreddit's posts/submissons as a generator object.
The error message:
Traceback (most recent call last):
File "beam.py", line 49, in <module>
main()
File "beam.py", line 44, in main
scan.scanNSFW()
File "beam.py", line 37, in scanNSFW
map(self.getSub, self.nsfw)
File "beam.py", line 26, in getSub
self.content += tuple(subreddit_posts)
File "/Library/Python/2.7/site-packages/praw/__init__.py", line 504, in get_co
page_data = self.request_json(url, params=params)
File "/Library/Python/2.7/site-packages/praw/decorators.py", line 163, in wrap
return_value = function(reddit_session, *args, **kwargs)
File "/Library/Python/2.7/site-packages/praw/__init__.py", line 557, in reques
retry_on_error=retry_on_error)
File "/Library/Python/2.7/site-packages/praw/__init__.py", line 399, in _reque
_raise_response_exceptions(response)
File "/Library/Python/2.7/site-packages/praw/internal.py", line 178, in _raise
response.raise_for_status()
File "/Library/Python/2.7/site-packages/requests/models.py", line 831, in rais
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 503 Server Error: Service Unavailable
The script (it's short):
import sys, os, pprint, praw
class Scanner(object):
''' A scanner object. '''
def __init__(self):
self.user_agent = 'debian.22990.myapp'
self.r = praw.Reddit(user_agent=self.user_agent)
self.nsfw = ('funny', 'nsfw')
self.nsfw_posters = set()
self.content = ()
def getSub(self, subreddit):
''' Accepts a subreddit. Connects to subreddit and retrieves content.
Unpacks generator object containing content into tuple. '''
url = 'http://www.reddit.com/r/{sub}/'.format(sub=subreddit)
print 'Scanning:', subreddit
try:
subreddit_posts = self.r.get_content(url, limit=10)
except:
print '*** Could not connect to Reddit.'
sys.exit()
print 'Constructing list.',
self.content += tuple(subreddit_posts)
print 'Done.'
def addNSFWPoster(self, post):
print 'Parsing author and adding to posters.'
self.nsfw_posters.add(str(post.author))
def scanNSFW(self):
''' Scans all NSFW subreddits. Makes list of posters.'''
# Get content from all nsfw subreddits
print 'Executing map function.'
map(self.getSub, self.nsfw)
# Scan content and get authors
print 'Executing list comprehension.'
[self.addNSFWPoster(post) for post in self.content]
def main():
scan = Scanner()
scan.scanNSFW()
for i in scan.nsfw_posters:
print i
print len(scan.content)
main()
It looks like praw is going to lazily get objects, so when you actually use subreddit_posts is when the request gets made, which explains why it's blowing up on that line.
See: https://praw.readthedocs.org/en/v2.1.20/pages/lazy-loading.html

Rest API to output in .json

I am creating a script that will call a rest API in python and spits out the results in JSON format. I am getting some few trace back errors in my code. How can I go about fixing this issue.
'import sitecustomize' failed; use -v for traceback
Traceback (most recent call last):
File "/home/Desktop/Sync.py", line 12, in <module>
url = urllib2.Request(request)
File "/usr/lib/python2.7/urllib2.py", line 202, in __init__
self.__original = unwrap(url)
File "/usr/lib/python2.7/urllib.py", line 1057, in unwrap
url = url.strip()
File "/usr/lib/python2.7/urllib2.py", line 229, in __getattr__
raise AttributeError, attr
AttributeError: strip
Here's the code:
import urllib2
import json
url = "http://google.com"
request = urllib2.Request(url)
request.add_header("Authorization","Basic xxxxxxxxxxxxxxxxxx")
socket = urllib2.urlopen(request)
data = json.dumps(socket)
hdrs = socket.headers
source = socket.read()
socket.close()
print "---- Headers -----"
print data
print "---- Source HTML -----"
print source
print "---- END -----"
value = 0
for line in source.splitlines():
if not line.strip(): continue
if line.startswith("value="):
try:
value = line.split("=")
except IndexError:
pass
if value > 0:
break
open("some.json", "w").write("value is: %d" % value)
You seem to have an issue here:
request=urllib2.Request( "http.google.com")
request.add_header("Authorization","Basic xxxxxxxxxxxxxxxxxxxxxxxx=")
url = urllib2.Request(request)
socket = urllib2.urlopen(url)
You are trying to create a Request object named "url" by passing a Request object into the constructor.
See http://docs.python.org/2/library/urllib2.html#urllib2.Request
Try this:
request=urllib2.Request( "http.google.com")
request.add_header("Authorization","Basic xxxxxxxxxxxxxxxxxxxxxxxx=")
socket = urllib2.urlopen(request)
From documentation of Request class:
url should be a string containing a valid URL.
You are curretnly passing another Request object to its constructor, so that's the reason for the error you're seeing. The correct way to do this:
request=urllib2.Request( "http.google.com")
request.add_header("Authorization","Basic xxxxxxxxxxxxxxxxxxxxxxxx=")
socket = urllib2.urlopen(request)

Having Trouble Raising Exception for urllib2.urlopen()

I'm using the following code and I can't figure out why it's not raising an exception when the urlopen() is failing..
In my particular case, I know why it's failing.. My url's don't have http:// in front of them... but I want to catch those cases so the script can continue to run my code rather than exiting.
req = urllib2.Request(link)
try:
url = urllib2.urlopen(req)
except urllib2.URLError, e:
print e.code
print e.read()
return False
and I'm getting..
Traceback (most recent call last):
File "./getURLs.py", line 141, in <module>
main()
File "./getURLs.py", line 82, in main
Process(args).get_children()
File "./getURLs.py", line 65, in get_children
self.get_links(link)
File "./getURLs.py", line 46, in get_links
data = urllib2.urlopen(req)
File "/usr/local/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/local/lib/python2.7/urllib2.py", line 383, in open
protocol = req.get_type()
File "/usr/local/lib/python2.7/urllib2.py", line 244, in get_type
raise ValueError, "unknown url type: %s" % self.__original
ValueError: unknown url type: /
.
.
.
Solution
for anyone else interested in my particular solution.. I'm using the following to catch both exceptions.
req = urllib2.Request(link)
try:
url = urllib2.urlopen(req)
except (ValueError,urllib2.URLError) as e:
print e
return False
From what you've pasted, it looks like you're catching the wrong type of exception. The code should say
try:
url=urllib2.urlopen(req)
except ValueError:
# etc etc etc.
If it's critical that the entirety of your code run, you can also have a plain except: with an unspecified exception type, or even a finally. See: http://docs.python.org/tutorial/errors.html

Categories

Resources