I have this code:
configurationsFile = "file.csv"
configurations = []
def loadConfigurations():
with open(configurationsFile) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=';')
line_count = 0
for row in csv_reader:
url = row[0]
line_count += 1
configurations.append({"url": url})
print(f'{line_count} urls loaded.')
loadConfigurations()
failedConfigs = []
session_requests = requests.session()
for config in configurations:
try:
"Do something with the url loaded fron file.csv"
except Exception as e:
print(e)
failedConfigs.append(config)
if len(failedConfigs) > 0:
print("These errored out:")
for theConfig in failedConfigs:
print("ERROR: {}".format(theConfig['url']))
It reads urls from a csv file, and then runs a code for each of the urls that's listed in the csv file.
The only "problem" is if the csv file contains a lot of urls, then it takes a long time to run thru them all. So I'm looking for a way to run more then one url at a time.
I'm not that good with python so I don't even know if it's possible.
But the question is, is there some way to tell the code to run, say 5 urls at once instead of just 1?
You can use the threading.Thread class. Here is an example:
from threading import Thread
def read(file, start, end):
with open(file, 'r') as r:
for i, v in enumerate(r):
if start <= i < end:
print(v)
file = "file.txt"
t1 = Thread(target=read, args=(file, 0, 100))
t2 = Thread(target=read, args=(file, 100, 200))
t3 = Thread(target=read, args=(file, 200, 300))
t4 = Thread(target=read, args=(file, 300, 400))
t5 = Thread(target=read, args=(file, 400, 500))
t1.start()
t2.start()
t3.start()
t3.start()
t5.start()
t1.join()
t2.join()
t3.join()
t4.join()
t5.join()
Or use a loop:
from threading import Thread
def read(file, start, end):
with open(file, 'r') as r:
for i, v in enumerate(r):
if start <= i < end:
print(v)
file = "file.txt"
threads = []
for i in range(5):
threads.append(Thread(target=read, args=(file, i * 100, (i + 1) * 100)))
for t in threads:
t.start()
for t in threads:
t.join()
Basically, the read() function defined above reads in a file from line start to line end. Split the reading tasks into 5 segments so that 5 threads can simultaneously read the file.
UPDATE UPON REQUEST
For your code, the
for config in configurations:
try:
"Do something with the url loaded fron file.csv"
except Exception as e:
print(e)
failedConfigs.append(config)
Can be converted to a function which allows you to specify from which index to which index of the configurations you want to process:
def process(start, end):
for i in range(start, end):
config = configurations[i]
try:
"Do something with the url loaded fron file.csv"
except Exception as e:
print(e)
failedConfigs.append(config)
Which you can then add
threads = []
for i in range(5):
threads.append(Thread(target=process, args=(i * 100, (i + 1) * 100)))
for t in threads:
t.start()
for t in threads:
t.join()
So you might end up with something like:
configurationsFile = "file.csv"
configurations = []
def loadConfigurations():
with open(configurationsFile) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=';')
line_count = 0
for row in csv_reader:
url = row[0]
line_count += 1
configurations.append({"url": url})
print(f'{line_count} urls loaded.')
loadConfigurations()
failedConfigs = []
session_requests = requests.session()
def process(start, end):
for i in range(start, end):
config = configurations[i]
try:
"Do something with the url loaded fron file.csv"
except Exception as e:
print(e)
failedConfigs.append(config)
threads = []
for i in range(5):
threads.append(Thread(target=process, args=(i * 100, (i + 1) * 100)))
for t in threads:
t.start()
for t in threads:
t.join()
if len(failedConfigs) > 0:
print("These errored out:")
for theConfig in failedConfigs:
print("ERROR: {}".format(theConfig['url']))
Inside a worker thread I am generating a data frame . Trying to put this into the queue passed to the worker thread is failing. In fact trying to put any values into the queue is failing.
The part of the code that is failing inside the worker thread task1() is given below:
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
Complete code
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl1 = TSNE(perplexity=100,early_exaggeration=1, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl1.fit_transform(dense_mx))
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
print(df)
print(str(mdltiming))
print(paramval)
def task2(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl2 = TSNE(perplexity=100,early_exaggeration=10, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl2.fit_transform(dense_mx2))
qmdlvalues.put(pd.DataFrame([[1,2,3,4],[3,4,5,6]]))
qmdlparams.put(paramval)
mdltiming = time.time() - start
qtiming.put(mdltiming)
if __name__ == "__main__":
dense_mx2 = dense_mx
dense_mx3 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qtme = queue.Queue()
mdlvalues = pd.DataFrame()
t1 = threading.Thread(target=task1,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 1 timing:$_plex100_exag1.csv"), name='t1')
t2 = threading.Thread(target=task2,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 10 timing:$_plex100_exag10.cv"), name='t2')
# starting threads
t1.start()
t2.start()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
# wait until all threads finish
t1.join()
t2.join()
Below is the actual output I am getting from the code in the main
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
ID of process running main program: 6456
Main thread name: MainThread
Queue closed. Exiting thread.
I want to able to put the data frame into a queue inside the worker thread and access the same data frame in the main thread.
There are parameter mis-matches in my earlier code those have been corrected an a full working code presented below.
I stored the output of t-SNE directly into the queue and retrieved the same in the main thread. The next progression would be convert this to thread pool and sub-classing.
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl1 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl1.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
print(str(mdltiming)+"time")
qmdltime.put(mdltiming)
def task2(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl2 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl2.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task3(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl3 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl3.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task4(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl4 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl4.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
if __name__ == "__main__":
# print ID of current process
print("ID of process running main program: {}".format(os.getpid()))
# print name of main thread
print("Main thread name: {}".format(threading.main_thread().name))
dense_mx2 = dense_mx
dense_mx3 = dense_mx
dense_mx4 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qmdltme = queue.Queue()
qmdlhdrfname = queue.Queue()
perplex = 200
# creating threads
exag=10
t1 = threading.Thread(target=task1,args=(dense_mx,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 10 timing:$_plex200_exag10.csv"), name='t1')
exag=30
t2 = threading.Thread(target=task2,args=(dense_mx2,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 30 timing:$_plex200_exag30.cv"), name='t2')
exag=50
t3 = threading.Thread(target=task3,args=(dense_mx3,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 50 timing:$_plex200_exag50.csv"), name='t3')
exag=100
t4 = threading.Thread(target=task4,args=(dense_mx4,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 100 timing:$_plex200_exag100.cv"), name='t4')
# starting threads
t1.start()
t2.start()
t3.start()
t4.start()
# wait until all threads finish
t1.join()
t2.join()
t3.join()
t4.join()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item1 = qmdlvl.get(timeout=.5)
item2 = qmdlch.get(timeout=.5)
item3 = qmdltme.get(timeout=.5)
header,fname = qmdlhdrfname.get(timeout=.5).split('$')
except:
continue
write_tsne_op(item1,fname,header)
i write this simple code ... i need to save the code output to text file in my pc how i can do that ?
import threading
import time
def qan(hey):
while True:
d = hey + 1
print d
time.sleep(1)
def printd(printme):
while True:
print printme + "\n"
time.sleep(1)
t1 = threading.Thread(target=qan, args=(1,))
t2 = threading.Thread(target=printd, args=("hey",))
t2.start()
t1.start()
and this is my code output
hey
2 2 hey
2hey
2
Use some buffer with data:
import threading
import time
buffer = []
def qan(hey):
while True:
d = hey + 1
buffer.append(d)
time.sleep(1)
def printd(printme):
while True:
buffer.append(printme + "\n")
time.sleep(1)
t1 = threading.Thread(target=qan, args=(1,))
t2 = threading.Thread(target=printd, args=("hey",))
t2.start()
t1.start()
with open('output.txt') as f:
f.write(''.join(buffer))
so I have this code where I need these two threads to be run one after the other CONSTANTLY.
So, once thread 1 finishes, thread 2 goes and once thread 2 finishes, then thread 1 goes then thread 2 etc...like constantly as if it's an infinite loop.
import httplib, urllib
import time, sys
import serial
from threading import Thread
#from multiprocessing import Process
key = 'MY API KEY' #API Key required for ThingSpeak.
rfWaterLevelVal = 0 #Global variable that holds the final water level value.
ser = serial.Serial('/dev/ttyUSB0',9600)
#Gathers the rf data received and separated it to obtain the water level data.
def rfWaterLevel():
global rfWaterLevelVal
rfDataArray = ser.readline().strip().split()
print 'incoming: %s' %rfDataArray
if len(rfDataArray) == 5:
rfWaterLevelVal = float(rfDataArray[4])
print 'RFWater Level1: %.3f cm' % (rfWaterLevelVal)
#Created purely to making the multithreading easier.
def rfWaterLevelFinal():
while True:
try:
rfWaterLevel()
except KeyboardInterrupt:
print "caught keyboard interrupt"
sys.exit()
#Sends the sensor data over to ThingSpeak.
def sendData():
global rfWaterLevelVal
params = urllib.urlencode({'field1':rfWaterLevelVal, 'key':key})
headers = {"Content-type" : "application/x-www-form-urlencoded","Accept": "text/plain"}
conn = httplib.HTTPConnection("api.thingspeak.com:80", timeout = 5)
conn.request("POST", "/update", params, headers)
response = conn.getresponse()
print response.status, response.reason
data = response.read()
conn.close()
#Created purely to make multithreading easier.
def sendDataFinal():
while True:
try:
sendDataFinal()
except KeyboardInterrupt:
print "caught keyboard interrupt"
sys.exit()
#start thread 1 for rf water level data.
t1 = Thread(target = rfWaterLevelFinal())
t1.start()
#start thread 2 for sending the data.
t2 = Thread(target = sendDataFinal())
t2.start()
#wait for both threads to finish
t1.join()
t2.join()
So essentially I need this thread 1 start then finish, thread 2 start then finish, to be constantly run (as if it's in an infinite loop).
I have looked at using a threadpool for this in python but i have no clue how to apply it.
Any ideas on what I could do to get the results that I want?
Cheers
Thanks in Advance!
This is what you wanted,
while True:
t1 = Thread(target = rfWaterLevelFinal())
t1.start()
t1.join()
t2 = Thread(target = sendDataFinal())
t2.start()
t2.join()
But no need to run like that with threads, you can just call the methods.
while True:
rfWaterLevelFinal()
sendDataFinal()
I have a python code with threads, and i need that if in for example 1 hour the threads are not finished, finish all threads and finish the script, and if the hour are not complete wait that all my threads finish.
I try with a daemon thread, and with a sleep of the hour, and if the hour is complete use a: sys.exit() but it not works to me, because always wait to my sleep threadh, then my script wait until the thread finished and the sys.exit() does not work.
import socket, threading, time, sys
from sys import argv
import os
acc_time=0
transactions_ps=5
ins = open(sys.argv[1],'r')
msisdn_list = []
for line in ins:
msisdn_list.append (line.strip('\n'))
# print line
ins.close()
def worker(msisdn_list):
semaphore.acquire()
global transactions_ps
print " ***** ", threading.currentThread().getName(), "Lanzado"
count=1
acc_time=0
print "len: ",len(msisdn_list)
for i in msisdn_list:
try:
init=time.time()
time.sleep(2)
print "sleeping...",i
time.sleep(4)
final=time.time()
acc_time = acc_time+final-init
print acc_time
except IOError:
print "Connection failed",sys.exc_info()[0]
print "Deteniendo ",threading.currentThread().getName()
semaphore.release()
def kill_process(secs_to_die):
time.sleep(secs_to_die)
sys.exit()
seconds_to_die=3600
thread_kill = threading.Thread(target = kill_process, args=(seconds_to_die,))
thread_kill.start()
max_con=5
semaphore = threading.BoundedSemaphore(max_con)
for i in range(0,28,transactions_ps):
w = threading.Thread(target=worker, args=(msisdn_list[i:i+transactions_ps-1],))
w.setDaemon(True)
w.start()
How can to do it
A minimal change to your code that would fix the issue is threading.Barrier:
barrier = Barrier(number_of_threads, timeout=3600)
# create (number_of_threads - 1) threads, pass them barrier
# each thread calls barrier.wait() on exit
barrier.wait() # after number_of_threads .wait() calls or on timeout it returns
A simpler alternative is to use multiprocessing.dummy.Pool that creates daemon threads:
from multiprocessing.dummy import Pool # use threads
start = timer()
endtime = start + 3600
for result in pool.imap_unordered(work, args):
if timer() > endtime:
exit("timeout")
The code doesn't timeout until a work item is done i.e., it expects that processing a single item from the list doesn't take long.
Complete example:
#!/usr/bin/env python3
import logging
import multiprocessing as mp
from multiprocessing.dummy import Pool
from time import monotonic as timer, sleep
info = mp.get_logger().info
def work(i):
info("start %d", i)
sleep(1)
info("end %d", i)
seconds_to_die = 3600
max_con = 5
mp.log_to_stderr().setLevel(logging.INFO) # enable logging
pool = Pool(max_con) # no more than max_con at a time
start = timer()
endtime = start + seconds_to_die
for _ in pool.imap_unordered(work, range(10000)):
if timer() > endtime:
exit("timeout")
You may refer to this implementation of KThread:
http://python.todaysummary.com/q_python_45717.html