Strange output for threaded subprocesses - python

thanks for taking the time to look at this, any help with this would be most appreciated!
I am trying to get some network stats by pinging a list of IP address' called ips. The problem i'm having however is that my output is a list containing several 'None's'. Before implementing threads to run the sub-process commands, the output shown below was a series of numbers. If anybody can take a look at my source code and shed some light on the issue i'd be very grateful!
Thank you in advance!
import subprocess
import re
import threading
from multiprocessing import Pool, Lock
from multiprocessing.dummy import Pool as ThreadPool
def get_ips():
# Fill empty list with IP address
ips = []
with open('C:\Python26\ARPips.prn','r')as f:
for line in f:
line = line[:-1]
if line != "end":
ips.append(line)
return ips
def ping(pingArgs):
lock = Lock()
lock.acquire()
# Ping with "pingArgs" as the arguments
ping = subprocess.Popen(pingArgs,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
shell=True)
# Get and parse output
out = ping.communicate()
out = ''.join((out))
lost = re.findall(r"Lost = (\d+)", out)
minimum = re.findall(r"Minimum = (\d+)", out)
maximum = re.findall(r"Maximum = (\d+)", out)
avg = re.findall(r"Average = (\d+)", out)
no = re.findall(r"Sent = (\d+)", out)
# Change output to integers
lost = [int(x) for x in lost]
minimum = [int(x) for x in minimum]
maximum = [int(x) for x in maximum]
avg = [int(x) for x in avg]
no = [int(x) for x in no]
print "%s \t \t %s \t \t%s \t \t %s \t \t%s" % (no, lost, maximum, minimum, avg)
lock.release()
def main():
# grab IP address list
ips = get_ips()
# Declare global variables
global position, newIP, pingArgs
position = 0
newIP = ips[position]
position += 1
pingArgs = ["ping", "-n", "1", "-l", "1", "-w", "100", newIP]
# Header for output
print "Packets \t loss(%) \t Max(ms) \t Min(ms) \t Average(ms)"
# Instantiate Pool objects, and set size of pool
pool = Pool(processes=12)
#Ping ips in own threads and return the results
result = pool.map(ping, ips)
# Close the pool and wait for work to finish
pool.close()
pool.join()
# print the results
print result
if __name__ == '__main__':
main()
print get_ips()
The output is shown here below:
Packets loss(%) Max(ms) Min(ms) Average(ms)
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
['10.10.10.1', '10.10.10.41', '10.10.10.42', '10.10.10.43', '10.10.10.49', '10.10.10.51', '10.10.10.61', '10.10.10.71', '10.10.10.91', '10.10.10.92', '10.10.10.201', '10.10.10.205', '10.10.10.208', '10.10.10.209', '10.10.10.213', '10.10.10.214']
Process finished with exit code 0

The problem i'm having however is that my output is a list containing several 'None's'
The Nones are produced by print result where result = pool.map(ping, ips) because ping() function doesn't return anything (it means it returns None in Python).
Before implementing threads to run the sub-process commands, the output shown below was a series of numbers.
You are passing ip addresses to Popen() instead of the full ping command to run.
You are defining global pingArgs that is not used anywhere (local pingArgs in ping() overshadows it). You could replace pool.map by builtin map call that creates the result list in the same process and in the same thread, to see that the result is the same (regexes do not match) and the error is not related to threads/processes.
Local lock = Lock() is useless.
Here's the code where the above issues are fixed:
#!/usr/bin/env python
import re
from multiprocessing.dummy import Pool # use threads
from subprocess import Popen, PIPE
def get_ips(filename):
ips = []
with open(filename) as f:
for line in f:
line = line.strip()
if line and line != "end":
ips.append(line)
return ips
def ping(ip):
cmd = ["ping", "-n", "1", "-l", "1", "-w", "100", ip]
ping = Popen(cmd, stdout=PIPE, stderr=PIPE)
# Get and parse output
output, err = ping.communicate()
out = ''.join([output, err])
lost = re.findall(r"Lost = (\d+)", out)
minimum = re.findall(r"Minimum = (\d+)", out)
maximum = re.findall(r"Maximum = (\d+)", out)
avg = re.findall(r"Average = (\d+)", out)
no = re.findall(r"Sent = (\d+)", out)
# Change output to integers
lost = [int(x) for x in lost]
minimum = [int(x) for x in minimum]
maximum = [int(x) for x in maximum]
avg = [int(x) for x in avg]
no = [int(x) for x in no]
return "%s \t \t %s \t \t%s \t \t %s \t \t%s" % (
no, lost, maximum, minimum, avg)
def main():
# grab IP address list
ips = get_ips(r'C:\Python26\ARPips.prn')
# Header for output
print "Packets \t loss(%) \t Max(ms) \t Min(ms) \t Average(ms)"
# Instantiate Pool objects, and set size of pool
pool = Pool(processes=12)
#Ping ips in own threads and return the results
results = pool.map(ping, ips)
# Close the pool and wait for work to finish
pool.close()
pool.join()
# print the results
print "\n".join(results)
if __name__ == '__main__':
main()

This is what got it working for me (unix) system
import subprocess
import re
from multiprocessing import Pool, Lock
lock = Lock()
pingArgs = "ping -n -l 1 -w 100 "
def get_ips():
# Fill empty list with IP address
ips = []
with open('C:\\Python26\\ARPips.prn', 'r')as f:
for line in f:
line = line[:-1]
if line != "end":
ips.append(line)
return ips
def ping(ip):
global lock
pingCommand = pingArgs + ip
lock.acquire()
# Ping with "pingArgs" as the arguments
ping = subprocess.Popen(pingCommand,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
shell=True)
# Get and parse output
out = ping.communicate()
out = ''.join((out))
lost = re.findall(r"Lost = (\d+)", out)
minimum = re.findall(r"Minimum = (\d+)", out)
maximum = re.findall(r"Maximum = (\d+)", out)
avg = re.findall(r"Average = (\d+)", out)
no = re.findall(r"Sent = (\d+)", out)
# Change output to integers
lost = [int(x) for x in lost]
minimum = [int(x) for x in minimum]
maximum = [int(x) for x in maximum]
avg = [int(x) for x in avg]
no = [int(x) for x in no]
lock.release()
return "%s \t \t %s \t \t%s \t \t %s \t \t%s" % (no, lost, maximum, minimum, avg)
def main():
# grab IP address list
ips = get_ips()
# Header for output
print("Packets \t loss(%) \t Max(ms) \t Min(ms) \t Average(ms)")
# Instantiate Pool objects, and set size of pool
pool = Pool(processes=12)
#Ping ips in own threads and return the results
results = pool.map(ping, ips)
# Close the pool and wait for work to finish
pool.close()
pool.join()
# print the results
for result in results:
print(result)
if __name__ == '__main__':
main()
print(get_ips())
The issues I was experiencing with the code were incorrect ping arguments (-n didn't take any additional parameters), lock should be global, when shell=True it didn't take a list, only a string, and ping(pingArgs) could overwrite the global argument, whereas you want the pool to send only the ip to the worker and then the worker should add it as the final argument.
also I noticed you didn't escape your path to the file on dist, I didn't use/test that part but included it escaped properly as well for reference.
Edit: Also changed it so each function would simply return the value to be printed and print all of them when completed. Instead of having them being printed inside the function as well as the results of the functions without returns (the list of Nones).

Related

How to write output without newline sequence in in Python

I have the ip_file.txt with the following input fields.
8.8.8.8,8.8.4.4
www.google.com,www.yahoo.com
And I am getting the output as below:
8.8.8.8,UP,20,2022-08-11 22:58:16
8.8.4.4,UP,17,2022-08-11 22:58:16
www.google.com,UP,17,2022-08-11 22:58:16
www.yahoo.com,UP,364,2022-08-11 22:58:16
Here is my code in Python.
import subprocess
import threading
import time
import re
timestr = time.strftime("%Y-%m-%d %H%M%S")
timesec = time.strftime("%Y-%m-%d %H:%M:%S")
raw_list = []
def ping(host):
results_file = open("results_bng_" + str(timestr) + ".txt", "a")
p = subprocess.Popen(["ping", host, "-n", "5"], shell=True, universal_newlines=True, stdout=subprocess.PIPE)
response = p.communicate()[0]
for i in response.split("\n"):
para =i.split("=")
# print(para)
try:
if para[0].strip() =="Minimum":
latency =para[3].strip()
print(latency)
latfin = re.findall('\d+', latency)
latfin1 = latfin[0]
except:
print("time run")
if "Received = 1" and "Approximate" in response:
print(f"UP {host} Ping Successful")
results_file.write(f"{host},UP,{latfin1},{timesec}"+ "\n")
else:
print(f"Down {host} Ping Unsuccessful")
results_file.write(f"{host},Down,0,{timesec}" + "\n")
results_file.close()
with open(r'ip_file.txt', "r") as server_list_file:
hosts = server_list_file.read()
hosts_list = hosts.replace('\n', ',').split(',')
print(hosts_list)
num_threads = 1
number = 0
while number< len(hosts_list):
# print(number)
for i in range(num_threads):
t = threading.Thread(target=ping, args=(hosts_list[number+i],))
t.start()
t.join()
number = number +1
Is there any chance I get the output format as noted below:
8.8.8.8,UP,20,2022-08-11 22:58:16,8.8.4.4,UP,17,2022-08-11 22:58:16
www.google.com,UP,17,2022-08-11 22:58:16,www.yahoo.com,UP,364,2022-08-11 22:58:16
I tried multiple ways, but the input file is taken as a single Array and not able to write the output as given above. Can Anybody help in this. Thank you for your valuable time.

how can I extract Average in this each ping result and store in file

Here I can ping each IP but am not able to extract the Average of the result. Can anyone help, please?
import subprocess
import threading
ip_list = []
def ping(host):
ip_list.append(host+ ' '+ str((subprocess.run('ping '+host +' -n 1').returncode)))
with open(r'input.txt', "r") as input_file:
hosts = input_file.read()
hosts_list =hosts.split('\n')
num_threads = 1
number = 0
while number< len(hosts_list):
for i in range(num_threads):
t = threading.Thread(target=ping, args=(hosts_list[number+i],))
t.start()
t.join()
number = number +1
After doing some research I found out that using subprocess.run and then getting the returncode you dont get the output but only the return code so usually 0 for a successfull run with no error.
If you want to get the output of the process you have to use subprocess.Popen and then communicate.
Then if you only want the average you have to do some string manipulation with the output to only get the number after "Average".
Here's an exemple:
def ping(host):
output = subprocess.Popen(["ping", host, "-n", "1"], stdout=subprocess.PIPE).communicate()[0]
words = str(output).split(sep=" ")
average = words[words.index("Average")+2].split("ms")[0]
ip_list.append(host+ ' '+ average)

Remove duplicate lines from threading

I have a program that reads lines randomly from a file, and uses threading. The problem is that whenever it reads the lines from a file, it sometimes reads a duplicate line from the file. For instance, let's say I use 5 threads and my file looks like this:
line1
line2
line3
line4
line5
The program uses threading to read the lines randomly, but sometimes it can read line4, line3, line5, line2, line5 (again). So my question is how would I get rid of the line5 being a duplicate?
Code:
def get_token():
tokens = []
with open('pokens.txt', 'r', encoding='UTF-8') as file:
lines = file.readlines()
for line in lines:
tokens.append(line.replace('\n', ''))
return tokens
def get_proxy():
proxies = []
with open('proxies.txt', 'r', encoding='UTF-8') as file:
lines = file.readlines()
for line in lines:
proxies.append(line.replace('\n', ''))
return proxies
class Gen:
def __init__(self, token, proxy=None):
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
proxy_ip_port = proxy
proxy2 = Proxy()
proxy2.proxy_type = ProxyType.MANUAL
proxy2.http_proxy = proxy_ip_port
proxy2.ssl_proxy = proxy_ip_port
capabilities = webdriver.DesiredCapabilities.CHROME
proxy2.add_to_capabilities(capabilities)
self.browser = webdriver.Chrome("chromedriver.exe")
self.token = token
self.proxy = proxy
self.password = 'passwordhere'
def register(self):
print('hi')
# Code continues with no duplicates
def worker(proxy=None):
token_list = get_token()
token = random.choice(token_list)
d = Gen(token, proxy=proxy)
d.register()
def main():
threads = []
num_thread = input('Number of Threads: ')
num_thread = int(num_thread)
proxies = get_proxy()
for i in range(num_thread):
t = threading.Thread(target=worker, args= (random.choice(proxies), ))
threads.append(t)
t.start()
if __name__ == '__main__':
main()
Below is a simplified "toy version" of your program that I updated to do the following:
Read the tokens-file from the main thread, into a list
Randomly shuffle the order of the list
Give each worker a roughly-equally-sized subset of the tokens-list for it to choose from
Each worker merely prints out the data that it was given by the main thread (actually doing anything with the data is omitted, for clarity)
This approach avoid duplicates because any given token appears in the list only once, and each thread has been given a different subset of the list to choose tokens from.
import threading
import random
def read_tokens_list():
tokens = []
with open('pokens.txt', 'r', encoding='UTF-8') as file:
lines = file.readlines()
for line in lines:
tokens.append(line.replace('\n', ''))
return tokens
def read_proxies_list():
proxies = []
with open('proxies.txt', 'r', encoding='UTF-8') as file:
lines = file.readlines()
for line in lines:
proxies.append(line.replace('\n', ''))
return proxies
def worker(proxy,token_list):
token = random.choice(token_list)
print("Worker: my proxy is [%s], my token list is %s, I've chosen [%s] as my token" % (proxy, token_list, token))
def main():
threads = []
num_thread = input('Number of Threads: ')
num_thread = int(num_thread)
proxies = read_proxies_list()
token_list = read_tokens_list() # read in the pokens.txt file
random.shuffle(token_list) # shuffle the list into random order
tokens_per_worker = len(token_list) // num_thread # how many tokens from the list each worker will get (roughly)
for i in range(num_thread):
if ((i+1)<num_thread):
num_tokens_for_this_worker = tokens_per_worker # give each worker an even share of the list
else:
num_tokens_for_this_worker = len(token_list) # except the last worker gets whatever is left
# we'll give the first (num_tokens_for_this_worker) tokens in the list to this worker
tokens_for_this_worker = token_list[0:num_tokens_for_this_worker]
# and remove those tokens from the list so that they won't get used by anyone else
token_list = token_list[num_tokens_for_this_worker:]
t = threading.Thread(target=worker, args=(random.choice(proxies), tokens_for_this_worker, ))
threads.append(t)
t.start()
for t in threads:
t.join()
if __name__ == '__main__':
main()

python for loop in parallel

I am trying to read data from an input file, and for each line perform a task in a while loop. Problem is that when I create the first process - its loop is executing and not returning control to the above for loop. Bottom line there is no parallelism. What am I doing wrong?
Here is the relevant code:
from multiprocessing import Process
def work_line(list1Line,jobId):
while True:
print list1Line
tenant = list1Line[0]
module = list1Line[1]
endTime = int(time.time())
startTime = endTime - startTimeDelta
generate(jobId, startTime, endTime, tenantServiceAddress, tenant, module)
print ("tenant {} will sleep for {} seconds").format(tenant,sleepBetweenLoops)
time.sleep(sleepBetweenLoops)
def openFiles():
file = open(CLOUD_INPUT_FILE, 'r')
lines = file.readlines()
file.close()
linesLen = len(lines)
processes = []
for linesIndex in range(0, linesLen):
jobId = GenerateRandomID()
line = lines[linesIndex]
list1Line = line.split()
p = Process(target=work_line(list1Line,jobId))
p.start()
processes.append(p)
print processes
for p in processes:
p.join()
if __name__ == '__main__':
CLOUD_INPUT_FILE = r'C:\CF\input_file.txt'
tenantServiceAddress = 'address.address'
startTimeDelta = 300
sleepBetweenLoops = 1800
print multiprocessing.cpu_count()
openFiles()
You are actually calling the function. Change to
p = Process(target=work_line, args=(list1Line,jobId))

Python Multiprocessing arcgis shapefiles with PP or async stalling on large files

I am new trying to implement either Parallel Python (PP) or async to multiprocess arcgis shapefile clipping. I have been successful with both pool_async and PP; however, it stalls forever on big files (and yes I tried making python access large addresses). Here is my code using PP, please offer any solutions and sorry for glaring errors if there are :-)
def ClipDo(F,M,O,OW = ""):
#for F in F:
print "\n"+"PID:%s"%(os.getpid())
arcpy.env.overwriteOutput = False
if OW == "":
pass
else:
arcpy.env.overwriteOutput = True
FPath = os.path.dirname(F)
F = os.path.basename(F)
ClipList = []
pattern = '*.shp'
for filename in M:
ClipList.append(filename)
clipN = str(os.path.splitext(os.path.basename(filename))[0])
if not os.path.isdir(O+"/"+clipN+"/"):
os.makedirs(O+"/"+clipN+"/")
#Counts files in clip directory
count = len(ClipList)
for num in range(0,count):
clip = ClipList[num]
clipN = str(os.path.splitext(os.path.basename(clip))[0])
OutShp = clipN +"_"+ F
try:
print "Clipping, Base File: %s Clip File: %s Output: %s" % (F,clip,O+"\\"+OutShp)
arcpy.Clip_analysis(os.path.join(FPath,F),os.path.join(M,clip), os.path.join(os.path.join(O+"\\",clipN),OutShp))
print "Clipping SUCCESS "
except:
print "Clipping FAILED " +F
def PP(F,M,O,OW):
print F
#~ # tuple of all parallel python servers to connect with
ncpus = 6
ncpus = ncpus
ppservers = ("localhost",)
#~ #ppservers = ("10.0.0.1",)
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
# Creates jobserver with ncpus workers
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
#~ # Creates jobserver with automatically detected number of workers
job_server = pp.Server(ncpus,ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
jobs = []
start_time = time.time()
for f in F:
job = job_server.submit(ClipDo, (f,M,O,OW),(), ("arcpy","NullGeomFilter"))
jobs.append(job)
for job in jobs:
result = job()
print result
if result:
break
job_server.destroy()
print "\n"+"PID:%s"%(os.getpid())
print "Time elapsed: ", time.time() - start_time, "s"
Could it be that your big chunks are just too big for arcpy and that the parallelization is not the problem?
As a test, it might be good to run one of arg lists through your function with the big data interactively/locally to see if that's working at all. If it does, then you could move on to logging and debugging the parallel version.

Categories

Resources