Python Repeated IP-Scan with threads - python

After repeated ip scans the program crashes. The threads are not killed and I don't know how to to this. Any idea?
import time
import datetime
import subprocess,os,threading
from queue import Queue
ipbase = "192.168.101.{0}"
startadd = 20
stoppadd = 100
def ipscan(): #looking for available IP adresses
lock=threading.Lock()
_start=time.time()
def check(n):
with open(os.devnull, "wb") as limbo:
ip=ipbase.format(n)
result=subprocess.Popen(["ping", "-n", "2", "-w", "300", ip],stdout=limbo, stderr=limbo).wait(timeout=10)
with lock:
if not result:
print (ip, "active")
else:
pass
def threader():
while True:
worker=q.get()
check(worker)
q.task_done()
print("Scan IP...")
print("Address scan from " + ipbase + str(startadd) + " until " + ipbase + str(stoppadd))
q=Queue()
for _ in range(startadd,stoppadd):
t=threading.Thread(target=threader)
t.daemon=True
t.start()
for worker in range(startadd,stoppadd):
q.put(worker)
q.join()
if __name__ == "__main__":
starttime = datetime.datetime.now()
print (starttime)
print("first check of available ip adresses..")
ipscan() #looking for available IP adresses
cyclebegin = datetime.datetime.now()
acttime = datetime.datetime.now()
sampletime = 3
while (1):
if ((acttime - cyclebegin) > datetime.timedelta(seconds=sampletime)):
dtime = acttime - cyclebegin
print ("delta-seconds: ",dtime.seconds)
cyclebegin = datetime.datetime.now()
ipscan()
acttime = datetime.datetime.now()
After ipscan is finished with q.join() the tasks should be finished but not killed, as I understood. With the repeated call of ipscan the limit of the threads is exeeded. What do I have to modify to prevent this?

Related

python multiprocessing doesn't work(doesn't go back to parent process) using Queue

I have a question about Queue in python multiprocessing.
I tried to perform multiprocessing operation on DataFrame using Queue and Process.
However, only sub-processes are executed, and subsequent operations are not executed.
In other words, only the completion messages of sub-processes are printed, and subsequent operations are not executed.
The code I ran is below.
import pandas as pd
from multiprocessing import Process, Queue
import os
import numpy as np
from pandasql import sqldf
import time
count_of_core = 8
def add_rownum_col(queue, df_of_split) :
sub_process_id = os.getpid()
print("!!! {} subprocess started !!!".format(sub_process_id), "\n")
df_of_split_with_rownum_col = sqldf("SELECT *, ROW_NUMBER() OVER() AS ROWNUM_ FROM df_of_split")
print("* making df_of_split_with_rownum_col finished", "\n")
print(df_of_split_with_rownum_col, "\n")
queue.put(df_of_split_with_rownum_col)
print("!!! {} subprocess finished !!!".format(sub_process_id), "\n")
return
if __name__ == '__main__':
parent_process_id = os.getpid()
print("* parent_process_id :", parent_process_id, "\n")
df_of_example = pd.read_csv('../data/example_for_multiprocessing.csv', engine='python', encoding='utf-8')
print("* df_of_example :", "\n")
print(df_of_example, "\n")
print("* length of df_of_example :", len(df_of_example), "\n")
df_of_split = np.array_split(df_of_example, count_of_core)
print("count of batch :", len(df_of_split), "\n")
print("!!! multiprocessing started !!!", "\n")
df_of_result = pd.DataFrame()
queue = Queue()
start_time_of_multiprocessing = time.time()
p_0 = Process(target = add_rownum_col, args = (queue, df_of_split[0]))
p_0.start()
p_1 = Process(target = add_rownum_col, args = (queue, df_of_split[1]))
p_1.start()
p_2 = Process(target = add_rownum_col, args = (queue, df_of_split[2]))
p_2.start()
p_3 = Process(target = add_rownum_col, args = (queue, df_of_split[3]))
p_3.start()
p_4 = Process(target = add_rownum_col, args = (queue, df_of_split[4]))
p_4.start()
p_5 = Process(target = add_rownum_col, args = (queue, df_of_split[5]))
p_5.start()
p_6 = Process(target = add_rownum_col, args = (queue, df_of_split[6]))
p_6.start()
p_7 = Process(target = add_rownum_col, args = (queue, df_of_split[7]))
p_7.start()
p_0.join()
p_1.join()
p_2.join()
p_3.join()
p_4.join()
p_5.join()
p_6.join()
p_7.join()
print("!!! multiprocessing finished !!!")
print("!!! back to parent process({}) !!!".format(parent_process_id))
print("* time for multiprocessing :", time.time() - start_time_of_multiprocessing, "\n")
print("!!! concatenating all batches in queue started !!!", "\n")
queue.put('exit')
while True :
df_batch = queue.get()
if df_batch == 'exit':
break
else:
df_of_result = pd.concat([df_of_result, df_batch])
print("!!! concatenating all batches in queue finished !!!", "\n")
print("* df_of_result :", "\n")
print(df_of_result, "\n")
print("* length of df_of_result :", len(df_of_result), "\n")
It seems that it cannot return to the parent-process after the operation of sub-processes is finished.
I would like to get a detailed answer as to why this problem occurred.
Please provide detailed answer.
(I'm using python 3.7)

Queue.put inside a worker thread failing

Inside a worker thread I am generating a data frame . Trying to put this into the queue passed to the worker thread is failing. In fact trying to put any values into the queue is failing.
The part of the code that is failing inside the worker thread task1() is given below:
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
Complete code
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl1 = TSNE(perplexity=100,early_exaggeration=1, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl1.fit_transform(dense_mx))
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
print(df)
print(str(mdltiming))
print(paramval)
def task2(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl2 = TSNE(perplexity=100,early_exaggeration=10, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl2.fit_transform(dense_mx2))
qmdlvalues.put(pd.DataFrame([[1,2,3,4],[3,4,5,6]]))
qmdlparams.put(paramval)
mdltiming = time.time() - start
qtiming.put(mdltiming)
if __name__ == "__main__":
dense_mx2 = dense_mx
dense_mx3 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qtme = queue.Queue()
mdlvalues = pd.DataFrame()
t1 = threading.Thread(target=task1,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 1 timing:$_plex100_exag1.csv"), name='t1')
t2 = threading.Thread(target=task2,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 10 timing:$_plex100_exag10.cv"), name='t2')
# starting threads
t1.start()
t2.start()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
# wait until all threads finish
t1.join()
t2.join()
Below is the actual output I am getting from the code in the main
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
ID of process running main program: 6456
Main thread name: MainThread
Queue closed. Exiting thread.
I want to able to put the data frame into a queue inside the worker thread and access the same data frame in the main thread.
There are parameter mis-matches in my earlier code those have been corrected an a full working code presented below.
I stored the output of t-SNE directly into the queue and retrieved the same in the main thread. The next progression would be convert this to thread pool and sub-classing.
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl1 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl1.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
print(str(mdltiming)+"time")
qmdltime.put(mdltiming)
def task2(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl2 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl2.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task3(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl3 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl3.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task4(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl4 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl4.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
if __name__ == "__main__":
# print ID of current process
print("ID of process running main program: {}".format(os.getpid()))
# print name of main thread
print("Main thread name: {}".format(threading.main_thread().name))
dense_mx2 = dense_mx
dense_mx3 = dense_mx
dense_mx4 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qmdltme = queue.Queue()
qmdlhdrfname = queue.Queue()
perplex = 200
# creating threads
exag=10
t1 = threading.Thread(target=task1,args=(dense_mx,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 10 timing:$_plex200_exag10.csv"), name='t1')
exag=30
t2 = threading.Thread(target=task2,args=(dense_mx2,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 30 timing:$_plex200_exag30.cv"), name='t2')
exag=50
t3 = threading.Thread(target=task3,args=(dense_mx3,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 50 timing:$_plex200_exag50.csv"), name='t3')
exag=100
t4 = threading.Thread(target=task4,args=(dense_mx4,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 100 timing:$_plex200_exag100.cv"), name='t4')
# starting threads
t1.start()
t2.start()
t3.start()
t4.start()
# wait until all threads finish
t1.join()
t2.join()
t3.join()
t4.join()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item1 = qmdlvl.get(timeout=.5)
item2 = qmdlch.get(timeout=.5)
item3 = qmdltme.get(timeout=.5)
header,fname = qmdlhdrfname.get(timeout=.5).split('$')
except:
continue
write_tsne_op(item1,fname,header)

Only 1 Thread started in for loop

So Im trying to code a really simple Internet Download Manager Spoof with Python 2.7
It is supposed to query a files HTTP header, get the byte range and spread the download among a no.of threads(I hard-coded 2 for simplicity) according to the byte range and later join the file parts together again.
The problem is my console log tells me that only 1 thread is started.
[EDIT] The problem has been solved. Find the working code below.
Here is my source:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
# url to open
url = "http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
u = urllib.urlopen(url)
# define file
file_name = "test.mp4"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
end = stream_size
return end
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
data = urllib2.urlopen(req)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
#percentage = (current_size * 100 / total_size)
status = str(thread_id) + "_" + str(current_size) + "_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==1):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 1
if(i==2):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
And the output:
{'start': 0, 'end': 527868}
{'start': 0, 'end': 527868}
Thread 1 started
Start at_0Ends at_515
1_0_515
1_0_515
Finito!
Download took_6.97844422658
Working code:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
parts = {}
# url to open
url = "http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3"
u = urllib.urlopen(url)
# define file
file_name = "test.mp3"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
file_size = stream_size
return file_size
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
status = "Thread ID_" +str(thread_id) + "Downloaded_" + str(int(start/1024)) + "Total_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==0):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 2
if(i==1):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
# Sort parts and you're done
# result = ''
# for i in range(2):
# result += parts[i*block_sz]
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
You have:
for i in range(2):
if(i==1):
...
if(i==2):
...
But range(2) iterates over [0,1] not [1,2].
Save some trouble and just remove those 3 lines. The code to start the two threads can just run serially.

Real-time-ability python multiprocessing (Queue and Pipe)

I am a little bit confused testing the multiprocessing module.
Let's simulate a digital timer. The code would look like:
start=datetime.now()
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
print s
time.sleep(1)
Which returns correctly:
8e-06
1.001072
2.00221
3.003353
4.004416
...
Now I want to read the clock from my virtual external digital clock device using a pipe:
def ask_timer(conn):
start=datetime.now()
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
conn.send(s)
parent_conn, child_conn = Pipe()
p = Process(target=ask_timer, args=(child_conn,))
p.start()
while True:
print parent_conn.recv()
time.sleep(1)
It returns:
2.9e-05
6.7e-05
7.7e-05
8.3e-05
8.9e-05
9.4e-05
0.0001
...
Here the timer doesn't seem to run permanently in the background..The implementation of "Queue" looks like:
def ask_timer(q):
while True:
now=datetime.now()
delta=now-start
s = delta.seconds + delta.microseconds/1E6
q.put(s)
#conn.close()
q = Queue()
p = Process(target=ask_timer, args=(q,))
p.start()
while True:
print q.get()
time.sleep(1)
which does the same like pipe. Is this just my misconception of multiprocessing of python? How could I ask a value realtime from a running parallel-thread?
Everything is working correctly. The child process is executing ask_timer() function completely independently from you main process. You don't have any time.sleep() in this function, so it just prints or puts in the queue deltas in the infinite loop with interval of like 10ms.
Once a second your main process asks child process for data and get's it. Data is one of those small intervals.
The problem there is that you're putting much more data into pipe/queue, than taking from it. So you're getting old data, when you ask. To test that you can print queue size in the loop (won't work on OS X):
def ask_timer(q):
start = datetime.now()
while True:
now = datetime.now()
delta = now - start
s = delta.seconds + delta.microseconds / 1E6
q.put(s)
q = Queue()
p = Process(target=ask_timer, args=(q,))
p.start()
while True:
print q.get()
print q.qsize()
time.sleep(1)
The queue size will grow really fast.
Apparently you can use shared memory to read current value from the child process.
from multiprocessing import Process, Value
from datetime import datetime
import time
from ctypes import c_double
def ask_timer(v):
start = datetime.now()
while True:
now = datetime.now()
delta = now - start
s = delta.seconds + delta.microseconds / 1E6
v.value = s
val = Value(c_double, 0.0)
p = Process(target=ask_timer, args=(val,))
p.start()
while True:
print(val.value)
time.sleep(1)

Sending and receiving async over multiprocessing.Pipe() in Python

I'm having some issues getting the Pipe.send to work in this code. What I would ultimately like to do is send and receive messages to and from the foreign process while its running in a fork. This is eventually going to be integrated into a pexpect loop for talking to interpreter processes.
from multiprocessing import Process, Pipe
from pexpect import spawn
class CockProc(Process):
def start(self):
self.process = spawn('coqtop', ['-emacs-U'])
def run(self, conn):
while True:
if not conn.poll():
cmd = conn.recv()
self.process.send(cmd)
self.process.expect('\<\/prompt\>')
result = self.process.before + self.process.after + " "
conn.send(result)
q, p = Pipe()
proc = CockProc()
proc.start()
proc.run(p)
res = q.recv()
command = raw_input(res + " ")
q.send(command)
res = q.recv()
parent_conn.send('OHHAI')
p.join()
`
This works, but might need some more work. Not sure how many of these i can create and loop over.
from multiprocessing import Process, Pipe
from pexpect import spawn
class CockProc(Process):
def start(self):
self.process = spawn('coqtop', ['-emacs-U'])
def run(self, conn):
if conn.poll():
cmd = conn.recv()
self.process.send(cmd + "\n")
print "sent comm"
self.process.expect('\<\/prompt\>')
result = self.process.before + self.process.after + " "
conn.send(result)
here, there = Pipe(duplex=True)
proc = CockProc()
proc.start()
proc.run(there)
while True:
if here.poll():
res = here.recv()
command = raw_input(res + " ")
here.send(command)
proc.run(there)

Categories

Resources