Writing to the 16x2 LCD display on the raspberryp pi can take some time to finish, especially with the module I wrote that automatically scrolls text that exceeds the length of the display.
I need to use multithreading, or something similar, to send the output to the display and continue with the rest of the program. I've tried a couple things with multithreading, but haven't quite got it.
This is the working code without any multithreading. The method I want to be multithreaded is "TextToLCD.ProcessFrameBuffer".
piBell.py
#!/usr/bin/env python3
import time
import rekognition
import TextToLCD
import PiPhoto
import json
import logging
import re
import threading
from queue import Queue
logFormatter = logging.Formatter("%(asctime)s [%(name)-8.8s]/[%(funcName)-12.12s] [%(levelname)-5.5s] %(message)s")
rootLogger = logging.getLogger('piBell')
fileHandler = logging.FileHandler("{0}/{1}.log".format("./", "piBell"), 'a')
fileHandler.setFormatter(logFormatter)
rootLogger.addHandler(fileHandler)
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logFormatter)
rootLogger.addHandler(consoleHandler)
reFace = re.compile('face|head|selfie|portrait|person', re.IGNORECASE)
def main(debugMode='INFO'):
TextToLCD.Clear()
rootLogger.setLevel(debugMode)
imgRotation = 270
imgPath = './'
imgName = 'image.jpg'
TextToLCD.ProcessFrameBuffer(["Scanning:", "................."], debugMode)
PiPhoto.GetPhoto(imgPath + imgName, imgRotation, "INFO")
rootLogger.info("Sending image to rekognition.")
TextToLCD.ProcessFrameBuffer(["Processing","................."], debugMode)
jsonLabels = rekognition.get_labels(imgPath + imgName)
rootLogger.info("Obtained JSON payload from rekognition.")
rootLogger.debug(json.dumps(jsonLabels))
if len(json.dumps(jsonLabels)) > 0:
if IsFace(jsonLabels):
if TestFace(imgPath + imgName):
TextToLCD.ProcessFrameBuffer(['Hello', ' :)'], debugMode)
celeb = IsCelebrity(imgPath + imgName)
if celeb:
TextToLCD.ProcessFrameBuffer(["You look like:", celeb], debugMode)
else:
rootLogger.info("No face detected.")
TextToLCD.ProcessFrameBuffer(['No face detected', ' :('], debugMode)
else:
rootLogger.info("No face detected.")
TextToLCD.ProcessFrameBuffer(['No face detected', ' :('], debugMode)
else:
rootLogger.error("JSON payload from rekognition was empty.")
def IsFace(jsonPayload):
for value in jsonPayload:
rootLogger.info("Label: " + value['Name'] + ", Confidence: " + str(round(value['Confidence'])))
rootLogger.debug(json.dumps(jsonPayload))
if reFace.match(value['Name']) and round(value['Confidence']) > 75:
rootLogger.info("Possible face match.")
return True
return False
def TestFace(img):
jsonFaces = rekognition.get_faces(img)
rootLogger.debug(json.dumps(jsonFaces))
if len(json.dumps(jsonFaces)) > 2:
for item in jsonFaces:
if item['Confidence']:
if item['Confidence'] > 75:
rootLogger.info("Face detected. Confidence: " + str(round(item['Confidence'])))
return True
else:
rootLogger.info("No facial data obtained.")
return False
def IsCelebrity(img):
celebMatchAccuracy = 25
jsonCelbFaces = rekognition.get_celebrities(img)
rootLogger.debug(json.dumps(jsonCelbFaces))
if len(json.dumps(jsonCelbFaces)) > 2:
for item in jsonCelbFaces:
if item['MatchConfidence']:
if item['MatchConfidence'] > celebMatchAccuracy and item['Name']:
rootLogger.info("Celebirity match detected: " + item['Name'] + ", Confidence: " + str(round(item['MatchConfidence'])))
return item['Name']
else:
rootLogger.info("No celebirity match found.")
return False
if __name__ == "__main__":
main('INFO')
First of all, it would be nice to see your LCD functions.
You are using hardware, so hardware is a limited resource.
For this you will need some kind of access control, this could be implemented with a Lock Object or Event Object.
You have two choices when you used the display:
Run the current writing to the end
Interrupt the current writing
import threading
import time
def parallelWithLock(lock:threading.Lock, name:str):
with lock:
for i in range(5):
print(f"{name}: {i}")
time.sleep(0.5)
# doWantYouWant(...)
def parallelWithInterrupt(event:threading.Event,lock:threading.Lock,name:str):
event.set()
i = 0
with lock:
event.clear()
while True:# or writing
if event.isSet():
print(f"{name} Interrupted!")
break
print(f"{name}: {i}")
time.sleep(1)
i += 1
#doWantYouWant(...)
if __name__ == '__main__':
lock = threading.Lock()
t1 = threading.Thread(target=parallelWithLock,args=(lock,"Thread_1"))
t2 = threading.Thread(target=parallelWithLock,args=(lock,"Thread_2"))
t1.start()
t2.start()
t1.join()
t2.join()
event = threading.Event()
lock = threading.Lock()
t3 = threading.Thread(target=parallelWithInterrupt,args=(event,lock,"Thread_3"))
t4 = threading.Thread(target=parallelWithInterrupt,args=(event,lock,"Thread_4"))
t5 = threading.Thread(target=parallelWithInterrupt, args=(event,lock, "Thread_4"))
t3.start()
time.sleep(5)
t4.start()
time.sleep(3)
t5.start()
t3.join()
t4.join()
time.sleep(2)
event.set()
t5.join()
Related
Inside a worker thread I am generating a data frame . Trying to put this into the queue passed to the worker thread is failing. In fact trying to put any values into the queue is failing.
The part of the code that is failing inside the worker thread task1() is given below:
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
Complete code
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl1 = TSNE(perplexity=100,early_exaggeration=1, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl1.fit_transform(dense_mx))
df = pd.DataFrame([[1,2,3,4],[3,4,5,6]])
qmdlvalues.put(df)
mdltiming = time.time() - start
qmdlparams.put(paramval)
qtiming.put(mdltiming)
print(df)
print(str(mdltiming))
print(paramval)
def task2(qmdlvalues,qmdlparams,qtiming,paramval):
start = time.time()
#tmpmdl2 = TSNE(perplexity=100,early_exaggeration=10, n_components=2,random_state=0,verbose=1)
#qmdlvalues.put(tmpmdl2.fit_transform(dense_mx2))
qmdlvalues.put(pd.DataFrame([[1,2,3,4],[3,4,5,6]]))
qmdlparams.put(paramval)
mdltiming = time.time() - start
qtiming.put(mdltiming)
if __name__ == "__main__":
dense_mx2 = dense_mx
dense_mx3 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qtme = queue.Queue()
mdlvalues = pd.DataFrame()
t1 = threading.Thread(target=task1,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 1 timing:$_plex100_exag1.csv"), name='t1')
t2 = threading.Thread(target=task2,args=(qmdlvl,qmdlch,qtme,"#perplex: 100 early exag: 10 timing:$_plex100_exag10.cv"), name='t2')
# starting threads
t1.start()
t2.start()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
# wait until all threads finish
t1.join()
t2.join()
Below is the actual output I am getting from the code in the main
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item = qmdlvl.get(timeout=.5)
except:
continue
print("Got item:", item)
ID of process running main program: 6456
Main thread name: MainThread
Queue closed. Exiting thread.
I want to able to put the data frame into a queue inside the worker thread and access the same data frame in the main thread.
There are parameter mis-matches in my earlier code those have been corrected an a full working code presented below.
I stored the output of t-SNE directly into the queue and retrieved the same in the main thread. The next progression would be convert this to thread pool and sub-classing.
import threading
import queue
from sklearn.manifold import TSNE
import os
import time
def write_tsne_op(opdata,fname,header):
with open(fname, 'w') as outfile:
outfile.write(header)
for data_slice in opdata:
np.savetxt(outfile, data_slice,delimiter=",")
def task1(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl1 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl1.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
print(str(mdltiming)+"time")
qmdltime.put(mdltiming)
def task2(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl2 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl2.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task3(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl3 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl3.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
def task4(ip_matrix,qmdlvalues,qmdlparam,plex,exag,qmdltime,qmdlhrfn,hderfname):
string=""
start=0
end=0
mdltiming=0
start = time.time()
tmpmdl4 = TSNE(perplexity=plex,early_exaggeration=exag, n_components=2,random_state=0,verbose=1)
qmdlvalues.put(tmpmdl4.fit_transform(ip_matrix))
string = str(plex)+ "$" + str(exag)
qmdlparam.put(string)
qmdlhrfn.put(hderfname)
end = time.time()
mdltimig = end - start
qmdltime.put(mdltiming)
if __name__ == "__main__":
# print ID of current process
print("ID of process running main program: {}".format(os.getpid()))
# print name of main thread
print("Main thread name: {}".format(threading.main_thread().name))
dense_mx2 = dense_mx
dense_mx3 = dense_mx
dense_mx4 = dense_mx
qmdlvl = queue.Queue()
qmdlch = queue.Queue()
qmdltme = queue.Queue()
qmdlhdrfname = queue.Queue()
perplex = 200
# creating threads
exag=10
t1 = threading.Thread(target=task1,args=(dense_mx,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 10 timing:$_plex200_exag10.csv"), name='t1')
exag=30
t2 = threading.Thread(target=task2,args=(dense_mx2,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 30 timing:$_plex200_exag30.cv"), name='t2')
exag=50
t3 = threading.Thread(target=task3,args=(dense_mx3,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 50 timing:$_plex200_exag50.csv"), name='t3')
exag=100
t4 = threading.Thread(target=task4,args=(dense_mx4,qmdlvl,qmdlch,perplex,exag,qmdltme,qmdlhdrfname,"#perplex: 200 early exag: 100 timing:$_plex200_exag100.cv"), name='t4')
# starting threads
t1.start()
t2.start()
t3.start()
t4.start()
# wait until all threads finish
t1.join()
t2.join()
t3.join()
t4.join()
while True:
if qmdlvl.empty():
print("Queue closed. Exiting thread.")
break
try:
item1 = qmdlvl.get(timeout=.5)
item2 = qmdlch.get(timeout=.5)
item3 = qmdltme.get(timeout=.5)
header,fname = qmdlhdrfname.get(timeout=.5).split('$')
except:
continue
write_tsne_op(item1,fname,header)
For my program, I have a file that writes random integers to a .CSV file.
from __future__ import absolute_import, division, print_function
from numpy.random import randint as randrange
import os, argparse, time
from tqdm import tqdm
def write_to_csv(filename, *args, newline = True):
write_string = ''
for arg in args:
if type(arg) == list:
for i in arg:
write_string += str(i) + ','
else:
write_string += str(arg) + ','
if newline:
write_string = write_string.rstrip(',') + '\n'
else:
write_string = write_string.rstrip(',')
with open(filename+'.csv', 'a') as file:
file.write(write_string)
def move_dir(dirname, parent = False):
if not parent:
dirname = str(dirname)
exists = os.path.isfile(dirname)
try:
os.mkdir(dirname)
os.chdir(dirname)
except FileExistsError:
os.chdir(dirname)
else:
os.chdir("..")
def calculate_probability(odds, exitmode = False, low_cpu = 0):
try:
file_count = 0
move_dir('Probability')
move_dir(str(odds))
d = {}
writelist = []
percentlist = []
for i in tqdm(range(odds)):
d[str(i)] = 0
writelist.append(f'Times {i}')
percentlist.append(f'Percent {i}')
while True:
if os.path.isfile(str(file_count)+'.csv'):
file_count += 1
else:
break
filename = str(file_count)
write_to_csv(filename, 'Number', 'Value')
rep = 500 * odds
if rep > 10000:
rep = 10000
for i in tqdm(range(rep)):
ran = randrange(odds)
ran = int(ran)
d[str(ran)] += 1
if i == 999:
write_to_csv(filename, i, ran+1, newline = False)
else:
write_to_csv(filename, i, ran+1)
if low_cpu:
time.sleep(0.01*float(low_cpu))
writelist2 = []
percentlist2 = []
for i in tqdm(range(odds)):
val = d[str(i)]
writelist2.append(val)
percentlist2.append(round(((val/rep)*100), 2))
if os.path.isfile('runs.csv'):
write_to_csv('runs', file_count, writelist2, percentlist2)
else:
write_to_csv('runs', 'Run #', writelist, percentlist)
write_to_csv('runs', file_count, writelist2, percentlist2)
if exitmode:
exit()
except(KeyboardInterrupt, SystemExit):
if exitmode:
os.remove(str(file_count)+'.csv')
exit()
else:
try:
os.system('cls')
print('User/program interrupted, lauching shutdown mode...')
os.remove(str(file_count)+'.csv')
print('Finilizaing current trial...')
os.chdir("..")
os.chdir("..")
except FileNotFoundError:
exit()
calculate_probability(odds, exitmode = True)
I also have a repetition system to do this multiple times.
def run_tests(times, odds, low_cpu = 0, shutdown = False):
for i in tqdm(range(times)):
calculate_probability(odds, low_cpu = low_cpu)
os.chdir("..")
os.chdir("..")
if shutdown:
os.system('shutdown /S /F /T 0 /hybrid')
However, if I were to run like 30 trails, it would take forever. So I decided to use the multiprocessing module to speed up the process. Because each run needs to write to the same file at the end, I had to collect the data and write them after the processes ended.
def calculate_probability(odds, low_cpu = 0):
try:
file_count = 0
move_dir('Probability')
move_dir(str(odds))
d = {}
writelist = []
percentlist = []
for i in tqdm(range(odds)):
d[str(i)] = 0
writelist.append(f'Times {i}')
percentlist.append(f'Percent {i}')
while True:
if os.path.isfile(str(file_count)+'.csv'):
file_count += 1
else:
break
filename = str(file_count)
write_to_csv(filename, 'Number', 'Value')
rep = 500 * odds
if rep > 10000:
rep = 10000
for i in range(rep):
ran = randrange(odds)
ran = int(ran)
d[str(ran)] += 1
if i == 999:
write_to_csv(filename, i, ran+1, newline = False)
else:
write_to_csv(filename, i, ran+1)
if low_cpu:
time.sleep(0.01*float(low_cpu))
writelist2 = []
percentlist2 = []
for i in range(odds):
val = d[str(i)]
writelist2.append(val)
percentlist2.append(round(((val/rep)*100), 2))
return (writelist, percentlist, writelist2, percentlist2)
except(KeyboardInterrupt, SystemExit):
try:
os.remove(str(file_count)+'.csv')
finally:
exit()
def worker(odds, returndict, num, low_cpu = 0):
returndict[f'write{num}'] = calculate_probability(odds, low_cpu = low_cpu)
os.chdir("..")
os.chdir("..")
os.system('cls')
def run_tests(times, odds, low_cpu = 0, shutdown = False):
print('Starting...')
manager = Manager()
return_dict = manager.dict()
job_list = []
for i in range(times):
p = Process(target=worker, args=(odds,return_dict,i), kwargs = {'low_cpu' : low_cpu})
job_list.append(p)
p.start()
try:
for proc in job_list:
proc.join()
except KeyboardInterrupt:
print('User quit program...')
time.sleep(5)
for proc in job_list:
proc.join()
exit()
else:
move_dir('Probability')
move_dir(str(odds))
if not os.path.isfile('runs.csv'):
write_to_csv('runs', return_dict.values()[0][0], return_dict.values()[0][1])
for value in return_dict.values():
write_to_csv('runs', value[2], value[3])
print('Done!')
finally:
if shutdown:
os.system('shutdown /S /F /T 0 /hybrid')
However, when I run this new code, there is one progressbar, and each process overwrites the bar, so the bar is flashing with random numbers, making the bar useful. I want to have a stack of bars, one for each process, that each update without interrupting the others. The bars do not need to be ordered; I just need to have an idea of how fast each process is doing their tasks.
STDOUT is just a stream, and all of your processes are attached to the same one, so there's no direct way to tell it to print the output from different processes on different lines.
Probably the simplest way to achieve this would be to have a separate process that is responsible for aggregating the status of all the other processes and reporting the results. You can use a multiprocessing.Queue to pass data from the worker threads to the status thread, then the status thread can print the status to stdout. If you want a stack of progress bars, you'll have to get a little creative with the formatting (essentially update all the progress bars at the same time and print them in the same order so they appear to stack up).
I started learning Python two months ago and I have written a code for a Raspberry pi project. My problem is that the program stucks after some hours of operation. I think that in all cases, it stopped after some wifi connection drops. But I don't understand why the whole program stops if there something wrong with the wifi connection. It stops uploading values and renew the lcd screen messages it prints (I removed this and other stuff from the code in order to be more easy to read.)
The code starts at the startup (sudo python /home/pi/test.py &) and contains two Threads:
The "Control" thread reads temperature and humidity by using an i2c bus and a sensor am2315 and according to a temperature threshold, controls a relay through GPIO.
The "Thingspeak" thread reads the temperature threshold from a 'Thingspeak' channel and then uploads the measurements from the previous thread to 'Thingspeak'.
I really don't know what to do and how to search for any solutions.
Any help will be much appreciated.
#! /usr/bin/env python
from time import sleep
import datetime
import urllib2
import RPi.GPIO as GPIO
import threading
import smbus
from tentacle_pi.AM2315 import AM2315
import smtplib
import contextlib
sleep(120)
# Lock
tLock = threading.Lock()
# Global variables
tem_global = 0; hum_global = 0
tem_hi = 35; relay = 21
# GPIO setup
GPIO.setmode(GPIO.BCM)
GPIO.setup(relay, GPIO.OUT)
GPIO.output(relay, False)
sleep(1)
def Control():
global temg, humg, tem_hi, relay
# AM2315 setup
am = AM2315(0x5c,"/dev/i2c-1")
I2C_address = 0x70; I2C_bus_number = 1; i2c_channel_setup = 1
bus = smbus.SMBus(I2C_bus_number)
bus.write_byte(I2C_address, i2c_channel_setup)
sleep(1)
while True:
try:
tem_local, hum_local = am2315meas()
except:
tem_local = -1; hum_local = -1
tLock.acquire()
tem_global = tem_local; hum_global = hum_local
if tem_local < tem_hi:
GPIO.output(relay, True)
else:
GPIO.output(relay, False)
tLock.release()
sleep(150)
def Thingspeak():
global tem_global, hum_global, tem_hi
myAPI = "..."
channelID = "..."
baseURL = 'https://api.thingspeak.com/update?api_key=%s' % myAPI
while True:
sleep(30)
try:
# Reading value from thingspeak
tLock.acquire()
with contextlib.closing(urllib2.urlopen("https://api.thingspeak.com/channels/%s/fields/1/last?" % channelID)) as f_read:
tem_hi = float(fread.read())
t.Lock.release()
sleep(30)
# Uploading values to thingspeak
tLock.acquire()
with contextlib.closing(urllib2.urlopen(baseURL + "&field1=%s" % tem_global + "&field2=%s" % hum_global)) as f_upload:
pass
tLock.release()
except:
with open('/home/pi/errors.txt', mode='a') as file:
file.write('Network error recorded at %s.\n' % datetime.datetime.now())
file.close()
sleep(60)
continue
def Main():
t1 = threading.Thread(target=Thingspeak)
t2 = threading.Thread(target=Control)
t1.start()
t2.start()
t1.join()
t2.join()
GPIO.cleanup()
if __name__ == '__main__':
Main()
Problem solved. As James K Polk indicated, there was an error after the tLock.acquire(), every time the internet connection went off, causing deadlock of the program. Below is a corrected part of the code for anyone would be interested.
def Control():
global tem_global, hum_global, tem_hi, relay
# AM2315 setup
am = AM2315(0x5c,"/dev/i2c-1")
I2C_address = 0x70; I2C_bus_number = 1; i2c_channel_setup = 1
bus = smbus.SMBus(I2C_bus_number)
bus.write_byte(I2C_address, i2c_channel_setup)
sleep(1)
while True:
try:
tem_local, hum_local = am2315meas()
except:
tem_local = -1; hum_local = -1
with tLock:
tem_global = tem_local; hum_global = hum_local
if tem_local < tem_hi:
GPIO.output(relay, True)
else:
GPIO.output(relay, False)
sleep(150)
def Thingspeak():
global tem_global, hum_global, tem_hi
myAPI = "..."
channelID = "..."
baseURL = 'https://api.thingspeak.com/update?api_key=%s' % myAPI
while True:
sleep(30)
try:
# Reading value from thingspeak
with tLock:
with contextlib.closing(urllib2.urlopen("https://api.thingspeak.com/channels/%s/fields/1/last?" % channelID)) as f_read:
tem_hi = float(fread.read())
sleep(30)
# Uploading values to thingspeak
with tLock:
with contextlib.closing(urllib2.urlopen(baseURL + "&field1=%s" % tem_global + "&field2=%s" % hum_global)) as f_upload:
pass
except:
with open('/home/pi/errors.txt', mode='a') as file:
file.write('Network error recorded at %s.\n' % datetime.datetime.now())
sleep(60)
continue
I'm trying to implement basic multiprocessing and I've run into an issue. The python script is attached below.
import time, sys, random, threading
from multiprocessing import Process
from Queue import Queue
from FrequencyAnalysis import FrequencyStore, AnalyzeFrequency
append_queue = Queue(10)
database = FrequencyStore()
def add_to_append_queue(_list):
append_queue.put(_list)
def process_append_queue():
while True:
item = append_queue.get()
database.append(item)
print("Appended to database in %.4f seconds" % database.append_time)
append_queue.task_done()
return
def main():
database.load_db()
print("Database loaded in %.4f seconds" % database.load_time)
append_queue_process = Process(target=process_append_queue)
append_queue_process.daemon = True
append_queue_process.start()
#t = threading.Thread(target=process_append_queue)
#t.daemon = True
#t.start()
while True:
path = raw_input("file: ")
if path == "exit":
break
a = AnalyzeFrequency(path)
a.analyze()
print("Analyzed file in %.4f seconds" % a._time)
add_to_append_queue(a.get_results())
append_queue.join()
#append_queue_process.join()
database.save_db()
print("Database saved in %.4f seconds" % database.save_time)
sys.exit(0)
if __name__=="__main__":
main()
The AnalyzeFrequency analyzes the frequencies of words in a file and get_results() returns a sorted list of said words and frequencies. The list is very large, perhaps 10000 items.
This list is then passed to the add_to_append_queue method which adds it to a queue. The process_append_queue takes the items one by one and adds the frequencies to a "database". This operation takes a bit longer than the actual analysis in main() so I am trying to use a seperate process for this method. When I try and do this with the threading module, everything works perfectly fine, no errors. When I try and use Process, the script hangs at item = append_queue.get().
Could someone please explain what is happening here, and perhaps direct me toward a fix?
All answers appreciated!
UPDATE
The pickle error was my fault, it was just a typo. Now I am using the Queue class within multiprocessing but the append_queue.get() method still hangs.
NEW CODE
import time, sys, random
from multiprocessing import Process, Queue
from FrequencyAnalysis import FrequencyStore, AnalyzeFrequency
append_queue = Queue()
database = FrequencyStore()
def add_to_append_queue(_list):
append_queue.put(_list)
def process_append_queue():
while True:
database.append(append_queue.get())
print("Appended to database in %.4f seconds" % database.append_time)
return
def main():
database.load_db()
print("Database loaded in %.4f seconds" % database.load_time)
append_queue_process = Process(target=process_append_queue)
append_queue_process.daemon = True
append_queue_process.start()
#t = threading.Thread(target=process_append_queue)
#t.daemon = True
#t.start()
while True:
path = raw_input("file: ")
if path == "exit":
break
a = AnalyzeFrequency(path)
a.analyze()
print("Analyzed file in %.4f seconds" % a._time)
add_to_append_queue(a.get_results())
#append_queue.join()
#append_queue_process.join()
print str(append_queue.qsize())
database.save_db()
print("Database saved in %.4f seconds" % database.save_time)
sys.exit(0)
if __name__=="__main__":
main()
UPDATE 2
This is the database code:
class FrequencyStore:
def __init__(self):
self.sorter = Sorter()
self.db = {}
self.load_time = -1
self.save_time = -1
self.append_time = -1
self.sort_time = -1
def load_db(self):
start_time = time.time()
try:
file = open("results.txt", 'r')
except:
raise IOError
self.db = {}
for line in file:
word, count = line.strip("\n").split("=")
self.db[word] = int(count)
file.close()
self.load_time = time.time() - start_time
def save_db(self):
start_time = time.time()
_db = []
for key in self.db:
_db.append([key, self.db[key]])
_db = self.sort(_db)
try:
file = open("results.txt", 'w')
except:
raise IOError
file.truncate(0)
for x in _db:
file.write(x[0] + "=" + str(x[1]) + "\n")
file.close()
self.save_time = time.time() - start_time
def create_sorted_db(self):
_temp_db = []
for key in self.db:
_temp_db.append([key, self.db[key]])
_temp_db = self.sort(_temp_db)
_temp_db.reverse()
return _temp_db
def get_db(self):
return self.db
def sort(self, _list):
start_time = time.time()
_list = self.sorter.mergesort(_list)
_list.reverse()
self.sort_time = time.time() - start_time
return _list
def append(self, _list):
start_time = time.time()
for x in _list:
if x[0] not in self.db:
self.db[x[0]] = x[1]
else:
self.db[x[0]] += x[1]
self.append_time = time.time() - start_time
Comments suggest you're trying to run this on Windows. As I said in a comment,
If you're running this on Windows, it can't work - Windows doesn't
have fork(), so each process gets its own Queue and they have nothing
to do with each other. The entire module is imported "from scratch" by
each process on Windows. You'll need to create the Queue in main(),
and pass it as an argument to the worker function.
Here's fleshing out what you need to do to make it portable, although I removed all the database stuff because it's irrelevant to the problems you've described so far. I also removed the daemon fiddling, because that's usually just a lazy way to avoid shutting down things cleanly, and often as not will come back to bite you later:
def process_append_queue(append_queue):
while True:
x = append_queue.get()
if x is None:
break
print("processed %d" % x)
print("worker done")
def main():
import multiprocessing as mp
append_queue = mp.Queue(10)
append_queue_process = mp.Process(target=process_append_queue, args=(append_queue,))
append_queue_process.start()
for i in range(100):
append_queue.put(i)
append_queue.put(None) # tell worker we're done
append_queue_process.join()
if __name__=="__main__":
main()
The output is the "obvious" stuff:
processed 0
processed 1
processed 2
processed 3
processed 4
...
processed 96
processed 97
processed 98
processed 99
worker done
Note: because Windows doesn't (can't) fork(), it's impossible for worker processes to inherit any Python object on Windows. Each process runs the entire program from its start. That's why your original program couldn't work: each process created its own Queue, wholly unrelated to the Queue in the other process. In the approach shown above, only the main process creates a Queue, and the main process passes it (as an argument) to the worker process.
queue.Queue is thread-safe, but doesn't work across processes. This is quite easy to fix, though. Instead of:
from multiprocessing import Process
from Queue import Queue
You want:
from multiprocessing import Process, Queue
So Im trying to code a really simple Internet Download Manager Spoof with Python 2.7
It is supposed to query a files HTTP header, get the byte range and spread the download among a no.of threads(I hard-coded 2 for simplicity) according to the byte range and later join the file parts together again.
The problem is my console log tells me that only 1 thread is started.
[EDIT] The problem has been solved. Find the working code below.
Here is my source:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
# url to open
url = "http://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
u = urllib.urlopen(url)
# define file
file_name = "test.mp4"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
end = stream_size
return end
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
data = urllib2.urlopen(req)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
#percentage = (current_size * 100 / total_size)
status = str(thread_id) + "_" + str(current_size) + "_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==1):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 1
if(i==2):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
And the output:
{'start': 0, 'end': 527868}
{'start': 0, 'end': 527868}
Thread 1 started
Start at_0Ends at_515
1_0_515
1_0_515
Finito!
Download took_6.97844422658
Working code:
from __future__ import print_function
import threading
import urllib
import urllib2
import time
threads = []
parts = {}
# url to open
url = "http://www.sample-videos.com/audio/mp3/india-national-anthem.mp3"
u = urllib.urlopen(url)
# define file
file_name = "test.mp3"
f = open(file_name, 'wb')
# open url and get header info
def get_file_size(url):
stream_size = u.info()['Content-Length']
file_size = stream_size
return file_size
start = 0
#get stream size
end = get_file_size(url)
# specify block size
block_sz = 512
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread1():
full_stream_size = end
first_thread = {'start':0, 'end':(int(full_stream_size)/2)}
print(first_thread)
return first_thread
#algo to divide work among 2 threads
def calculate_no_of_bytes_for_thread2():
full_stream_size = end
second_thread= {'start':int(full_stream_size)/2,'end': int(full_stream_size)}
print(second_thread)
return second_thread
# download function
def download_thread(url ,id,start,end):
current_size = int(float(start)/1024)
total_size = int(float(end)/1024)
print ("Start at_"+str(current_size) + "Ends at_" + str(total_size))
# specify request range and init stream
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
while True:
buffer = u.read(block_sz)
if not buffer:
break
start += len(buffer)
f.write(buffer)
thread_id = id
status = "Thread ID_" +str(thread_id) + "Downloaded_" + str(int(start/1024)) + "Total_" +str(total_size)
print (status)
#starts 2 threads
def start_threads():
for i in range(2):
#if first loop, start thread 1
if(i==0):
start = calculate_no_of_bytes_for_thread1().get('start')
end = calculate_no_of_bytes_for_thread1().get('end')
print("Thread 1 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
#if second loop, start thread 2
if(i==1):
start = calculate_no_of_bytes_for_thread2().get('start')
end = calculate_no_of_bytes_for_thread2().get('end')
print("Thread 2 started")
t = threading.Thread(target=download_thread, args=(url,i,start,end))
t.start()
threads.append( t)
# Join threads back (order doesn't matter, you just want them all)
for i in threads:
i.join()
# Sort parts and you're done
# result = ''
# for i in range(2):
# result += parts[i*block_sz]
#start benchmarking
start_time = time.clock()
start_threads()
print ("Finito!")
end_time = time.clock()
benchmark = str(end_time - start_time)
print ("Download took_" +benchmark)
f.close()
You have:
for i in range(2):
if(i==1):
...
if(i==2):
...
But range(2) iterates over [0,1] not [1,2].
Save some trouble and just remove those 3 lines. The code to start the two threads can just run serially.