python 3 - urllib issue

python 3 - urllib issue - python

I'm using python 3.3.0 in Windows 7.
I have two files: dork.txt and fuzz.py
dork.txt contains following:
/about.php?id=1
/en/company/news/full.php?Id=232
/music.php?title=11
fuzz.py contains following:
srcurl = "ANY-WEBSITE"
drkfuz = open("dorks.txt", "r").readlines()
print("\n[+] Number of dork names to be fuzzed:",len(drkfuz))
for dorks in drkfuz:
dorks = dorks.rstrip("\n")
srcurl = "http://"+srcurl+dorks
requrl = urllib.request.Request(srcurl)
#httpreq = urllib.request.urlopen(requrl)
# Starting the request
try:
httpreq = urllib.request.urlopen(requrl)
except urllib.error.HTTPError as e:
print ("[!] Error code: ", e.code)
print("")
#sys.exit(1)
except urllib.error.URLError as e:
print ("[!] Reason: ", e.reason)
print("")
#sys.exit(1)
#if e.code != 404:
if httpreq.getcode() == 200:
print("\n*****srcurl********\n",srcurl)
return srcurl
So, when I enter the correct website name which has /about.php?id=1, it works fine.
But when I provide the website which has /en/company/news/full.php?Id=232, it first
prints Error code: 404 and then gives me the following error: UnboundLocalError: local
variable 'e' referenced before assignment or UnboundLocalError: local variable 'httpreq' referenced before assignment
I can understand that if the website doesn't have the page which contains /about.php?id=1, it gives Error code: 404 but why it's not going back in the for loop to check the remaining dorks in the text file??? Why it stops here and throws an error?
I want to make a script to find out valid page from just a website address like: www.xyz.com

When the line urllib.request.urlopen(requrl) expression throws an exception, the variable httpreq is never set. You could set it to None before the try statement, then test if it is still None afterwards:
httpreq = None
try:
httpreq = urllib.request.urlopen(requrl)
# ...
if httpreq is not None and httpreq.getcode() == 200:

srcurl = "ANY-WEBSITE"
drkfuz = open("dorks.txt", "r").readlines()
print("\n[+] Number of dork names to be fuzzed:",len(drkfuz))
for dorks in drkfuz:
dorks = dorks.rstrip("\n")
srcurl = "http://"+srcurl+dorks
try:
requrl = urllib.request.Request(srcurl)
if requrl != None and len(requrl) > 0:
try:
httpreq = urllib.request.urlopen(requrl)
if httpreq.getcode() == 200:
print("\n*****srcurl********\n",srcurl)
return srcurl
except:
# Handle exception
pass
except:
# Handle your exception
print "Exception"
Untested code, but it will work logically.

Related

UnboundLocalError: local variable 'server' referenced before assignment

When I use the variable server on my print statement I get this error.
File
"/Users/c0deninja/projects/gsecurity/modules/fetch_requests.py", line
40, in get_headers
print(f"{Fore.MAGENTA}[+] {Fore.CYAN}-{Fore.WHITE} SERVER: {Fore.GREEN}{server}") UnboundLocalError: local variable 'server'
referenced before assignment
Also the error only appears on certain websites, maybe when there's no server header it starts getting buggy?
def get_headers(url: str) -> str:
sessions = requests.Session()
try:
res = sessions.get(url, verify=False, headers=header)
if res.status_code == 200:
for value, desc in res.headers.items():
if value == "Server":
server = desc
if value == "Via":
via = desc
print(f"{Fore.MAGENTA}[+] {Fore.CYAN}-{Fore.WHITE} SERVER: {Fore.GREEN}{server}")
print(f"{Fore.MAGENTA}[+] {Fore.CYAN}-{Fore.WHITE} VIA: {Fore.GREEN}{via}")
except requests.exceptions.InvalidSchema:
print("Please use https://www.target.com")
except requests.exceptions.ConnectionError:
print("Check the target URL and try again\n")

Ok guys I fixed it, what I did was create an empty list outside of the loop and append the output there and then print it.
def get_headers(url: str) -> str:
sessions = requests.Session()
server_output = []
via_output = []
try:
res = sessions.get(url, verify=False, headers=header)
if res.status_code == 200:
for value, desc in res.headers.items():
if value == "Server":
server_output.append(desc)
else:
pass
if value == "Via":
via_output.append(desc)
else:
pass
print(f"{Fore.MAGENTA}[+] {Fore.CYAN}-{Fore.WHITE} SERVER: {Fore.GREEN}{', '.join(map(str,server_output))}")
print(f"{Fore.MAGENTA}[+] {Fore.CYAN}-{Fore.WHITE} VIA: {Fore.GREEN}{', '.join(map(str,via_output))}")
except requests.exceptions.InvalidSchema:
print("Please use https://www.target.com")
except requests.exceptions.ConnectionError:
print("Check the target URL and try again\n")

For loop only iterating one object from list using python

For loop only iterating one object from the list provided by the function. Below are the code and terminal logs.
Note:- I want to delete both the URLs which are the part of below list
function delete_index_url() output is like :-
['https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13', 'https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.16']
def clean_index( ):
delete_urls = delete_index_url() # above function output assign to variable
for i in delete_urls:
print(i) <-- this only print "https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13"
try:
req = requests.delete(i)
except requests.exceptions.ConnectionError as e:
print ('ERROR: Not able to connect to URL')
return 0
except requests.exceptions.Timeout as e:
print ('ERROR: ElasticSearch time out')
return 0
except requests.exceptions.HTTPError as e:
print ('ERROR: HTTP error')
return 0
else:
print ('INFO: ElasticSearch response status code was %s' % req.status_code)
if req.status_code != 200:
return 0
else:
return 1
print(clean_index())
Logs output from a python script:-
INFO: Sorting indexes
['https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13', 'https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.16']
INFO: Getting a list of indexes
INFO: ElasticSearch response status code was 200
INFO: Found 200 indexes
INFO: Sorting indexes
https://vpc.xxx.es.amazonaws.com/staging-logs-2019.09.13 # only 2019.09.13, not 2019.09.16 logs URLs

Instead of returning 0 or 1 and ending the function right away, you can create a list and store the responses and return it:
def clean_index( ):
responses = []
delete_urls = delete_index_url() # above function output assign to variable
for i in delete_urls:
print(i)
try:
req = requests.delete(i)
except requests.exceptions.ConnectionError as e:
print ('ERROR: Not able to connect to URL')
responses.append(0)
except requests.exceptions.Timeout as e:
print ('ERROR: ElasticSearch time out')
responses.append(0)
except requests.exceptions.HTTPError as e:
print ('ERROR: HTTP error')
responses.append(0)
else:
print ('INFO: ElasticSearch response status code was %s' % req.status_code)
if req.status_code != 200:
responses.append(0)
else:
responses.append(1)
return responses
print(clean_index())

how to check if cpanel can login successfully with python2.7?

i need a script to make it like a cpanel checker, with more than 1 url and the url is stored in a txt file.
usage : python script.py list.txt
format in file list.txt : https://demo.cpanel.net:2083|democom|DemoCoA5620
this is my code but it doesn't work, can someone help me?
Thanks.
import requests, sys
from multiprocessing.dummy import Pool as ThreadPool
try:
with open(sys.argv[1], 'r') as f:
list_data = [line.strip() for line in f if line.strip()]
except IOError:
pass
def cpanel(url):
try:
data = {'user':'democom', 'pass':'DemoCoA5620'}
r = requests.post(url, data=data)
if r.status_code==200:
print "login success"
else:
print "login failed"
except:
pass
def chekers(url):
try:
cpanel(url)
except:
pass
def Main():
try:
start = timer()
pp = ThreadPool(25)
pr = pp.map(chekers, list_data)
print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()

I fixed your code in a way that it will return an actual array containing a boolean array indicating the success of the cpanel function.
from __future__ import print_function
import requests
from multiprocessing.pool import ThreadPool
try:
list_data = ["https://demo.cpanel.net:2083|democom|DemoCoA5620",
"https://demo.cpanel.net:2083|UserDoesNotExist|WRONGPASSWORD",
]
except IOError:
pass
def cpanel(url):
try:
# try to split that url to get username / password
try:
url, username, password = url.split('|')
except Exception as e:
print("Url {} seems to have wrong format. Concrete error: {}".format(url, e))
return False
# build the correct url
url += '/login/?login_only=1'
# build post parameters
params = {'user': username,
'pass': password}
# make request
r = requests.post(url, params)
if r.status_code==200:
print("login for user {} success".format(username))
return True
else:
print("login for user {} failed due to Status Code {} and message \"{}\"".format(username, r.status_code, r.reason))
return False
except Exception as e:
print("Error occured for url {} ".format(e))
return False
def chekers(url):
return cpanel(url)
def Main():
try:
# start = timer()
pp = ThreadPool(1)
pr = pp.map(chekers, list_data)
print(pr)
# print('Time: ' + str(timer() - start) + ' seconds')
except:
pass
if __name__ == '__main__':
Main()
Output:
login for user democom success
login for user UserDoesNotExist failed due to Status Code 401 and message "Access Denied"
[True, False]
Be aware that I replaced your file read operation by some fixed urls.
Since you use request.post I guess you actually want to POST something to that urls. Your code does not do that. If you just want to send a request, use the requests.get method.
See the official documentation for the requests packet: https://2.python-requests.org/en/master/user/quickstart/#make-a-request for more details.
Also note that
"but it doesn't work"
is NOT a question.

401. "str' object has no attribute 'read'

The code below produces these errors:
'Response' object has no attribute 'read'
Please help me understand what I did wrong. I made sure un and pwd are correct
user = "un"
password = 'pwd'
datanodes = ["https://server040:25000/"]
for i, datanode in enumerate(datanodes):
print("Checking {0}: {1}".format(i, datanode))
try:
print "trying"
response = requests.get(datanode + "queries?json",auth=(user,
password), verify='certs.pem')
print response
data = json.loads(response.read())
print data
if data["num_waiting_queries"] > 0:
print(data["num_waiting_queries"])
for in_flight_query in data["in_flight_queries"]:
if in_flight_query["waiting"] is True and
in_flight_query['state'] == "FINISHED":
cancel_url = datanode + "cancel_query?query_id=
{}".format(in_flight_query['query_id'])
print(cancel_url)
except IOError as ioe:
print ioe
except Exception as e:
print(e)
I have tried both json.load(reponse) and json.loads(response.read())

I was able to get around the issues by adding HTTPDigestAuth and changing data = json.loads(response.read()) to data = response.json()

How to properly debug ThreadPool?

I'm trying to get some data from a web page. To speed up this process (they allow me to make 1000 requests per minute), I use ThreadPool.
Since there is a huge amount of data, the process is quite vulnerable to connection fails etc. so I try to log everything I can to be able to detect each mistake I did in code.
The problem is that program sometimes just stops without any exception (it acts like it is running but with no effect - I use PyCharm). I log catched exceptions everywhere I can but I can't see any exception in any log.
I assume that if there were a timeout reached, the exception would be raised and logged.
I've found out where the problem could be. Here is the code:
As a pool, I use: from multiprocessing.pool import ThreadPool as Pool
And lock: from threading import Lock
The download_category function is being used in loop.
def download_category(url):
# some code
#
# ...
log('Create pool...')
_pool = Pool(_workers_number)
with open('database/temp_produkty.txt') as f:
log('Spracovavanie produktov... vytvaranie vlakien...') # I see this in log
for url_product in f:
x = _pool.apply_async(process_product, args=(url_product.strip('\n'), url))
_pool.close()
_pool.join()
log('Presuvanie produktov z temp export do export.csv...') # I can't see this in log
temp_export_to_export_csv()
set_spracovanie_kategorie(url)
except Exception as e:
logging.exception('Got exception on download_one_category: {}'.format(url))
And process_product function:
def process_product(url, cat):
try:
data = get_product_data(url)
except:
log('{}: {} exception while getting product data... #') # I don't see this in log
return
try:
print_to_temp_export(data, cat) # I don't see this in log
except:
log('{}: {} exception while printing to csv... #') # I don't see this in log
raise
LOG function:
def log(text):
now = datetime.now().strftime('%d.%m.%Y %H:%M:%S')
_lock.acquire()
mLib.printToFile('logging/log.log', '{} -> {}'.format(now, text))
_lock.release()
I use logging module too. In this log, I see that probably 8 (number of workers) times request was sent but no answer hasn't been recieved.
EDIT1:
def get_product_data(url):
data = defaultdict(lambda: '-')
root = load_root(url)
try:
nazov = root.xpath('//h1[#itemprop="name"]/text()')[0]
except:
nazov = root.xpath('//h1/text()')[0]
under_block = root.xpath('//h2[#id="lowest-cost"]')
if len(under_block) < 1:
under_block = root.xpath('//h2[contains(text(),"Naj")]')
if len(under_block) < 1:
return False
data['nazov'] = nazov
data['url'] = url
blocks = under_block[0].xpath('./following-sibling::div[#class="shp"]/div[contains(#class,"shp")]')
i = 0
for block in blocks:
i += 1
data['dat{}_men'.format(i)] = eblock.xpath('.//a[#class="link"]/text()')[0]
del root
return data
LOAD ROOT:
class RedirectException(Exception):
pass
def load_url(url):
r = requests.get(url, allow_redirects=False)
if r.status_code == 301:
raise RedirectException
if r.status_code == 404:
if '-q-' in url:
url = url.replace('-q-','-')
mLib.printToFileWOEncoding('logging/neexistujuce.txt','Skusanie {} kategorie...'.format(url))
return load_url(url) # THIS IS NOT LOOPING
else:
mLib.printToFileWOEncoding('logging/neexistujuce.txt','{}'.format(url))
html = r.text
return html
def load_root(url):
try:
html = load_url(url)
except Exception as e:
logging.exception('load_root_exception')
raise
return etree.fromstring(html, etree.HTMLParser())

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

python 3 - urllib issue - python

Related

UnboundLocalError: local variable 'server' referenced before assignment

For loop only iterating one object from list using python

how to check if cpanel can login successfully with python2.7?

401. "str' object has no attribute 'read'

How to properly debug ThreadPool?

Categories

Resources