multiprocessing in Python not using all cores

multiprocessing in Python not using all cores - python

I am trying to process several folders that contain many rasters; in each folder, there are rasters with different dates on the same area. In order to save some time, I want to use the multiprocessing (or multithreading?) module to work in parallel.
Basically, my script does this: for one pixel, it makes some calculation on the first pixel and loads it to a numpy array if the number is higher than the previous one that was in the numpy array; then it continues with another pixel. The result should be several numpy arrays (one per folder).
It works fine without multiprocessing; when I try to multiprocess it, it gets very slow and doesn't take advantage of all 10 cores:
Here is my code:
import os, sys, math, time, datetime
import numpy as np
from numpy import *
from osgeo import gdal,gdal_array,osr
from itertools import islice
from multiprocessing import Pool, Process
import multiprocessing
#prints full size numpy array instead of extract
np.set_printoptions(threshold=sys.maxsize)
#define tresholds for dNBR, NBR and NDVI difference (ratio NDVIafter/NDVIbefore)
dNBRthreshold=0.15
RdNBRthreshold=0.4
NDVIdiffThreshold=0.1
def proc (path):
#print information to a log file
log = open(path+"\\myprog.log", "a")
sys.stdout = log
#create a list of all files in the current directory
ListImages=[]
for file in os.listdir(path):
if file.endswith(".tif"):
ListImages.append(os.path.join(path, file))
#sort the list aphabetically
ListImages.sort()
print ("Image list: ", ListImages)
#create empty numpy array the same size as the first image and with number of bands defined by user
firstImage=gdal.Open(ListImages[0])
band0 = firstImage.GetRasterBand(1)
arrayOfFirstImage = band0.ReadAsArray()
listEmpty=[]
#create numpy array with same size as first image but dimension defined by user in "range"
for x in range(30):
name="emptyArray_" + str(x)
#create raster with same size as first image
name=np.full_like(arrayOfFirstImage, np.nan, dtype=np.double)
listEmpty.append(name)
arrayStack=np.stack(listEmpty)
num_dim, num_rows,num_cols = arrayStack.shape
listRows = list(range(num_rows))
#creates loop over all pixels in raster
for row in range(num_rows):
print("row number: ", row)
for col in range(num_cols):
#reset counter for band as script is working with a new pixel; cntrForBand is used to change arrayStack bands that will be written on
cntrForBand=0
print("col number: ", col)
#loop for all images in list ListImages to get image 1
#user ITER to be able to jump 7 o 22 loops
iterListImages = iter(ListImages)
for image in iterListImages:
#get number of image in the List of Images
indexImage1 = ListImages.index(image)
#get its full path
img1Path=os.path.abspath(image)
print ("path image 1: " + img1Path)
print ("index Image 1: ",indexImage1)
#open geotiff with gdal
img = gdal.Open(image)
#get first band data of image 1: NDVI value
band1Image1=img.GetRasterBand(1)
#get second band data of image 1: NBR value
band2Image1 = img.GetRasterBand(2)
## compute statistics of band 1
if band1Image1.GetMinimum() is None or band1Image1.GetMaximum()is None:
band1Image1.ComputeStatistics(0)
print("Statistics computed.")
## compute statistics of band 2
if band2Image1.GetMinimum() is None or band2Image1.GetMaximum()is None:
band2Image1.ComputeStatistics(0)
print("Statistics computed.")
#converts gdal array (raster or band) into a numpy array:
band1Image1asArray = band1Image1.ReadAsArray()
#print ("NDVI array= ",band1Image1asArray)
band2Image1asArray = band2Image1.ReadAsArray()
#Get NDVI value of pixel of interest
itemNDVIimage1=band1Image1asArray[row][col]
print("itemNDVIimage1: ",itemNDVIimage1)
#Get NBR value of pixel of interest
itemImage1=band2Image1asArray[row][col]
print("itemImage1: ",itemImage1)
#if pixel has no value, don´t do anything
if itemImage1== band2Image1.GetNoDataValue() or itemImage1==-32768:
print("row number: ", row)
print("col number: ", col)
print ("image 1 pixel with no data value; initiating with another image")
#if pixel has a value, proceed
else:
#reset switch to False (switch is used to skip images
switch1=False
#list of numbers for image 2: from index of image + 1 to index of image 1 + 8
listImg2=[indexImage1+1,indexImage1+2,indexImage1+3,indexImage1+4,indexImage1+5,indexImage1+6,indexImage1+7,indexImage1+8]
for indexImg2 in listImg2:
print("length list image: ", len(ListImages))
print ("Current indexImg2: ", indexImg2)
print("row number: ", row)
print("col number: ", col)
#if number of image 2 is above number of images in list, stop (all images have been processed)
if indexImg2>=len(ListImages):
break
#if not, proceed
else:
#open next image in the list (next date)
image2=gdal.Open(ListImages[indexImg2])
img2Path=os.path.abspath(ListImages[indexImg2])
print ("path image 2: " + img2Path)
#get image 2 NDVI value for this pixel
band1Image2 = image2.GetRasterBand(1)
band1Image2AsArray = band1Image2.ReadAsArray()
itemNDVIimage2=band1Image2AsArray[row][col]
print("item image 2, Band 1 (NDVI): ", itemNDVIimage2)
#get image 2 NBR value for this pixel
band2Image2 = image2.GetRasterBand(2)
band2Image2AsArray = band2Image2.ReadAsArray()
#print ("Image 2, Band 2:",band2Image2AsArray)
itemImage2=band2Image2AsArray[row][col]
print("item image 2: ", itemImage2)
#if image 2 has no value for NBR band, stop and continue with next image 2
if itemImage2== band2Image2.GetNoDataValue() or itemImage2==-32768:
print ("image 2 pixel with no data value; initiating with another image")
else:
#calculate dNBR, NBR and NDVI difference between the two images
dNBR=itemImage1-itemImage2
RdNBR=dNBR/(math.sqrt(abs(itemImage1)))
NDVIdiff=1-itemNDVIimage2/itemNDVIimage1
print ("dNBR: ",dNBR)
print ("RdNBR: ", RdNBR)
print ("NDVI difference: ", NDVIdiff)
#if dNBR equals exactly 0, it means that image 1 and image 2 were the same; stop and continue with next image
if dNBR==0:
print("same image for image 1 and image2; initiating with another image for image 2")
#if dNBR, NBR or NDVI difference values are under thresholds, stop and continue with next image
elif dNBR<dNBRthreshold or RdNBR<RdNBRthreshold or NDVIdiff<NDVIdiffThreshold :
print("dNBR or RdNBR or NDVIdiff under threshold; continue with next image for image 2")
else:
#open empty image and set new dNBR and RdNBR and date values in first, second and third band respectively. in ArrayStack, first number is number of band (first is zero) then row then column.
#if dNBR or RdNBR values is above value already saved in the array or if current value is empty (nan), overwrite it; else, don't overwrite it
print ("current dNBR value for this cell in arrayStack: ",arrayStack[cntrForBand][row][col])
if (dNBR>arrayStack[cntrForBand][row][col] and RdNBR>arrayStack[cntrForBand+1][row][col]) or (math.isnan(arrayStack[cntrForBand][row][col])):
#keep dNBR, RdNBR and date value in first, second and third of the three bands (hence cntrForBand for dNBR, cntrForBand+1 for RdNBR and cntrForBand+2 for Date)
arrayStack[cntrForBand][row][col]= dNBR
arrayStack[cntrForBand+1][row][col]= RdNBR
#arrayStack[0,0,0]=dNBR
#date value put in second band
date=int(img2Path[-15:-8])
arrayStack[cntrForBand+2][row][col]= date
print ("arrayStack updated: ",arrayStack)
#turn switch on to skip 22 images (forest and therefore fire won't come back soon...)
switch1= True
else:
#print(arrayStack)
print ("dNBR value lower than value already in arrayStack; not changing value")
#if one value of dNBR and RdNBR is above threshold during loops with image 1 and 2, then skip 6 monts and continue with image 1 + 22
#else, continue with image 1 + 7
if switch1==True:
next(islice(iterListImages, 44, 44), None) # consume 22
print("a value has been found for this set of 8 images; continuing with image 1 + 44")
#cntr for band increments with 3 so that next round three other bands of arrayStack get the dNBR, NBR and Date values
cntrForBand=cntrForBand+3
print ("cntrForBand=",cntrForBand)
else:
#if no high value found, go to image+7 in list
next(islice(iterListImages, 7, 7), None)
print("No value found for this set of 8 images; continuing with next image (+1)")
print ("done!!!!")
print (arrayStack)
np.save(path+"\\FINAL.csv", arrayStack)
print("file FINAL.csv saved")
if __name__ == '__main__':
listFolders= [ f.path for f in os.scandir("C:\\incendios\\Temp3") if f.is_dir() ]
print (listFolders, type(listFolders))
cpuCount = os.cpu_count()
print ("number of core: ",cpuCount)
p = Pool(10)
print(p.map(proc,listFolders))
If a run a very simple code that uses NumPy, it works perfectly fine and uses 100% CPU and all 10 cores:
import multiprocessing
import time
import numpy as np
start = time.time()
print("hello")
array=np.random.rand(3000,3000)
def worker():
"""worker function"""
mult=np.dot(array, array)
print (mult)
return mult
if __name__ == '__main__':
jobs = []
for i in range(50):
p = multiprocessing.Process(target=worker)
jobs.append(p)
p.start()
end = time.time()
print(end - start)
I know that NumPy can cause some issues with multiprocessing, but this doesn't seem to be the issue I have here.
So I guess there is something wrong with my code that makes it difficult to process with multiple cores. Is there anything I can do to improve it?
PS: I'm using Windows 10 64 bits and python 3.5.0 and the script works fine without multiprocessing...
EDIT:
to answer Mark Stechell´s question: Actually I have 10 folders; each folder has around 900 rasters that cover one area per folder, with one raster every 8 days from 2000 to 2020. These rasters are satellite images that I have already processed; the first band is a Vegetation Index (called NDVI) and the second one is a Burn Area Index (NBR, a basic index used to identify forestry fires); in this script, I use those data to calculate other indexes (dNBR and RdNBR; the last one is a relative index, it means that I compare NBR indexes of two different dates to detect a significant change). If those indexes are high enough (threshold are defined at the beginning of the script) which means that a forestry fire was detected, I keep the NDVI and RdNBR values in a numpy array with the date. But I only do this comparison with 8 following dates; if no significant value has been found, the script goes on with another image in the list and its 7 following images (chronologically); If a significant value has been found, the script jumps 22 images in the list because another forestry fire won´t happen again in this area before a long time..
Following mkrieger1´s advice, I am trying to simplify this as much as a can to see where the problem is. I am also going to try to use Pool in the very simple code I mentioned to see if this works

So, following mkrieger1´s advice (many thanks, now I know...), I´ve tried to run my script line by line to see where the problem was. It is clearly related to the GDAL library. the getNoDataValue(), getMinimum() and getMaximum() functions are the problem here for multiprocessing. I have change the code with functions related to other library (if itemImage1==getNoDataValue () has been changed with if math.isnan(x) for instance).
Now it is working perfectly...
I hope it will help others with the same issue.
Many thanks!

Related

Trying to add a progress bar as my python program runs

I am a beginner writing a Python code, where the computer generates a random number between 1 and 10, 1 and 100, 1 and 1000, 1 and 10000, 1 and 100000 and so on. The computer itself will guess the random number a number of times (a user input number), and every time there is a count of how many times the computer took to guess the random number. A mean of the count over the number of times will be calculated and put in an array, where matplotlib will generate a graph of x=log10(the upper bounds of the random number range, i.e. 10, 100, 1000,...)
At the moment, I print the log10 of each bound as it is processed, and that has been acting as my progress tracker. But I am thinking of adding my progress bar, and I don't know where to put it so that I can see how much of the overall program has run.
I have added tqdm.tqdm in all sorts of different places to no avail. I am expecting a progress bar increasing as the program runs.
My program is as shown.
# Importing the modules needed
import random
import time
import timeit
import numpy as np
import matplotlib.pyplot as plt
import tqdm
# Function for making the computer guess the number it itself has generated and seeing how many times it takes for it to guess the number
def computer_guess(x):
# Telling program that value "low" exists and it's 0
low = 0
# Telling program that value "high" exists and it's the arbitrary parameter x
high = x
# Storing random number with lower limit "low" and upper limit "high" as "ranno" for the while-loop later
ranno = random.randint(low, high)
# Setting initial value of "guess" for iteration
guess = -1
# Setting initial value of "count" for iteration
count = 1
# While-loop for all guessing conditions
while guess != ranno:
# Condition: As long as values of "low" and "high" aren't the same, keep guessing until the values are the same, in which case "guess" is same as "low" (or "high" becuase they are the same anyway)
if low != high:
guess = random.randint(low, high)
else:
guess = low
# Condition: If the guess if bigger than the random number, lower the "high" value to one less than 1, and add 1 to the guess count
if guess > ranno:
high = guess - 1
count += 1
# Condition: If the guess if smaller than the random number, increase the "low" value to one more than 1, and add 1 to the guess count
elif guess < ranno:
low = guess + 1
count += 1
# Condition: If it's not either of the above, i.e. the computer has guessed the number, return the guess count for this function
else:
return count
# Setting up a global array "upper_bounds" of the range of range of random numbers as a log array from 10^1 to 10^50
upper_bounds = np.logspace(1, 50, 50, 10)
def guess_avg(no_of_guesses):
# Empty array for all averages
list_of_averages = []
# For every value in the "upper_bounds" array,
for bound in upper_bounds:
# choose random number, "ranx", between 0 and the bound in the array
ranx = random.randint(0, bound)
# make an empty Numpy array, "guess_array", to insert the guesses into
guess_array = np.array([])
# For every value in whatever the no_of_guesses is when function called,
for i in np.arange(no_of_guesses):
# assign value, "guess", as calling function with value "ranx"
guess = computer_guess(ranx)
# stuff each resultant guess into the "guess_array" array
guess_array = np.append(guess_array, guess)
# Print log10 of each value in "upper_bound"
print(int(np.log10(bound)))
# Finding the mean of each value of the array of all guesses for the order of magnitude
average_of_guesses = np.mean(guess_array)
# Stuff the averages of guesses into the array the empty array made before
list_of_averages.append(average_of_guesses)
# Save the average of all averages in the list of averages into a single variable
average_of_averages = np.mean(list_of_averages)
# Print the list of averages
print(f"Your list of averages: {list_of_averages}")
# Print the average of averages
print(f"Average of averages: {average_of_averages}")
return list_of_averages
# Repeat the "guess_avg" function as long as the program is running
while True:
# Ask user to input a number for how many guesses they want the computer to make for each order of magnitude, and use that number for calling the function "guess_avg()"
resultant_average_numbers = guess_avg(
int(input("Input the number of guesses you want the computer to make: ")))
# Plot a graph with log10 of the order of magnitudes on the horizontal and the returned number of average of guesses
plt.plot(np.log10(upper_bounds), resultant_average_numbers)
# Show plot
plt.show()
I apologise if this is badly explained, it's my first time using Stackoverflow.

You can define the following progress_bar function, which you will call from wherever you want to monitor the advancement in you code:
import colorama
def progress_bar(progress, total, color=colorama.Fore.YELLOW):
percent = 100 * (progress / float(total))
bar = '█' * int(percent) + '-' * (100 - int(percent))
print(color + f'\r|{bar}| {percent:.2f}%', end='\r')
if progress == total:
print(colorama.Fore.GREEN + f'\r|{bar}| {percent:.2f}%', end='\r')
Hope this helps

You can also call tqdm by hand and then update it manually.
progress_bar = tqdm.tqdm(total=100)
progress_bar.update()
When you are finished, you can call progress_bar.clear() to start again.

You probably want two progress bars in the guess_avg() function. One to track the ranges and another to track the guesses.
In this example I've used the Enlighten progress bar library, but you can accomplish similar behavior with other libraries that support nested progress bars. One advantage Enlighten is going to have over others is you can print whatever you want while the progress bar is running, good for debugging.
You can make this simpler by using context managers and auto-updating counters, but I didn't do that here to make it clearer what's happening. You can also customize the template used for the progress bar.
import enlighten
def guess_avg(no_of_guesses):
# Empty array for all averages
list_of_averages = []
# For every value in the "upper_bounds" array,
# Create a progress bar manager manager
manager = enlighten.get_manager(leave=False)
# Create main progress bar for ranges
pbar_bounds = manager.counter(total=len(upper_bounds), desc='Bound ranges', unit='ranges')
for bound in upper_bounds:
# choose random number, "ranx", between 0 and the bound in the array
ranx = random.randint(0, bound)
# make an empty Numpy array, "guess_array", to insert the guesses into
guess_array = np.array([])
# For every value in whatever the no_of_guesses is when function called,
# Create nested progress bar for guesses, leave removes progress bar on close
pbar_guesses = manager.counter(total=no_of_guesses, desc='Guessing', unit='guesses', leave=False)
for i in np.arange(no_of_guesses):
# assign value, "guess", as calling function with value "ranx"
guess = computer_guess(ranx)
# stuff each resultant guess into the "guess_array" array
guess_array = np.append(guess_array, guess)
pbar_guesses.update() # Update nested progress bar
pbar_guesses.close() # Close nested progress bar
# Print log10 of each value in "upper_bound"
print(int(np.log10(bound))) # You can remove this now if you want
pbar_bounds.update() # Update main progress bar
# Finding the mean of each value of the array of all guesses for the order of magnitude
average_of_guesses = np.mean(guess_array)
# Stuff the averages of guesses into the array the empty array made before
list_of_averages.append(average_of_guesses)
manager.stop() # Stop the progress bar manager
# Save the average of all averages in the list of averages into a single variable
average_of_averages = np.mean(list_of_averages)
# Print the list of averages
print(f"Your list of averages: {list_of_averages}")
# Print the average of averages
print(f"Average of averages: {average_of_averages}")
return list_of_averages

How to read a text file and convert into a list for use with statistics package in Python

The code I am running so far is as follows
import os
import math
import statistics
def main ():
infile = open('USPopulation.txt', 'r')
values = infile.read()
infile.close()
index = 0
while index < len(values):
values(index) = int(values(index))
index += 1
print(values)
main()
The text file contains 41 rows of numbers each entered on a single line like so:
151868
153982
156393
158956
161884
165069
168088
etc.
My tasks is to create a program which shows average change in population during the time period. The year with the greatest increase in population during the time period. The year with the smallest increase in population (from the previous year) during the time period.
The code will print each of the text files entries on a single line, but upon trying to convert to int for use with the statistics package I am getting the following error:
values(index) = int(values(index))
SyntaxError: can't assign to function call
The values(index) = int(values(index)) line was taken from reading as well as resources on stack overflow.

You can change values = infile.read() to values = list(infile.read())
and it will have it ouput as a list instead of a string.
One of the things that tends to happen whenever reading a file like this is, at the end of every line there is an invisible '\n' that declares a new line within the text file, so an easy way to split it by lines and turn them into integers would be, instead of using values = list(infile.read()) you could use values = values.split('\n') which splits the based off of lines, as long as values was previously declared.
and the while loop that you have can be easily replace with a for loop, where you would use len(values) as the end.
the values(index) = int(values(index)) part is a decent way to do it in a while loop, but whenever in a for loop, you can use values[i] = int(values[i]) to turn them into integers, and then values becomes a list of integers.
How I would personally set it up would be :
import os
import math
import statistics
def main ():
infile = open('USPopulation.txt', 'r')
values = infile.read()
infile.close()
values = values.split('\n') # Splits based off of lines
for i in range(0, len(values)) : # loops the length of values and turns each part of values into integers
values[i] = int(values[i])
changes = []
# Use a for loop to get the changes between each number.
for i in range(0, len(values)-1) : # you put the -1 because there would be an indexing error if you tried to count i+1 while at len(values)
changes.append(values[i+1] - values[i]) # This will get the difference between the current and the next.
print('The max change :', max(changes), 'The minimal change :', min(changes))
#And since there is a 'change' for each element of values, meaning if you print both changes and values, you would get the same number of items.
print('A change of :', max(changes), 'Happened at', values[changes.index(max(changes))]) # changes.index(max(changes)) gets the position of the highest number in changes, and finds what population has the same index (position) as it.
print('A change of :', min(changes), 'Happened at', values[changes.index(min(changes))]) #pretty much the same as above just with minimum
# If you wanted to print the second number, you would do values[changes.index(min(changes)) + 1]
main()
If you need any clarification on anything I did in the code, just ask.

I personally would use numpy for reading a text file.
in your case I would do it like this:
import numpy as np
def main ():
infile = np.loadtxt('USPopulation.txt')
maxpop = np.argmax(infile)
minpop = np.argmin(infile)
print(f'maximum population = {maxpop} and minimum population = {minpop}')
main()

How to read the first frame of video using OpenCV in python?

Here is some code I am using which should succeed in returning the first frame of a video. The relevant bits here I think are lines 2:5 and the second to last one where count=2.
def define_bounds(video_file, threshold_value, index_value):
vidcap = cv2.VideoCapture(video_file)
count=0
while count <1:
success,color_img = vidcap.read()
blkwht_img = np.asarray(Image.fromarray(color_img).convert('L'))[index_value]
retval, final_img = cv2.threshold(blkwht_img, threshold_value, 200, cv2.ADAPTIVE_THRESH_GAUSSIAN_C)
for pixel_row in range(139,-1,-1):
if np.median(final_img[pixel_row,:]) <10:
silenced = pixel_row - 2
upmotion = pixel_row - 16
destination = pixel_row - 44
downmotion = pixel_row - 31
break
count=2
return silenced, upmotion, destination, downmotion
I do know for a fact that vidcap.read() succeeds in reading frames from the first to the last (I have other code I use to determine this, where it just exports all frames from first to last).
But this code above fails in reading the first frame... it reads SOME frame but I don't know which. It's not the first, second or last as the output of the function does not match when I manually input the first, second and last frames.
Is there something stupidly wrong in my code? Also: am I using break correctly? I'm still new to this. Thanks!

Drawing 1 image from previous image set + 2 random images from list (Python/Psychopy)

In the code below i am drawing 3 images from a list (targetset) then displaying them to the screen. Next I am displaying another 3 images (pics) to a different part of the screen. For this second part; on 50% of occasions I want the 'pics' images to be the same as those displayed initially (target set). I am ok with this part, I am setting pics = targetset when x == 0 (basically flipping a coin).
My probleme is, on the other 50% of occasions I want one of the 'pics' set to be the same as one of the orignally showed set (targetset), any one of the three. And i want the remaining 2 to be randomly choosen from the pics list. As it stands, according to my logic, I can only get the 'pics' images to be all the same, or all different as the originally shown images from 'targetset'.
Adding to the problem: When x==0 (making pics=targetset) they all display ok, but when x==1 (meaning make no change) i get the following error message:
pics[i].pos = location[i]
UnboundLocalError: local variable 'pics' referenced before assignment
Here is my code:
#create initial target set
imgList1 = glob.glob(os.path.join('stim','*.png'))
random.shuffle(imgList1)
targetset = [visual.ImageStim(window, img) for img in imgList1[:3]]
#initial target set location
setlocation = [(-2,0),(0,0),(2,0)]
random.shuffle(setlocation)
#create target list
imgList = glob.glob(os.path.join('stim', '*.png'))
random.shuffle(imgList)
pics = [visual.ImageStim(window, img) for img in imgList[:3]]
#set target locations
location = [(1,2),(3,3),(5,5)]
random.shuffle(location)
'''define sequential presentation function'''
def seq():
x = random.randint(0,1)
if x == 0:
pics = targetset
print x
#display initial target set
for i in range(3):
targetset[i].pos = setlocation[i]
targetset[i].draw()
window.flip()
core.wait(3)
#display targets
for i in range(3):
pics[i].pos = location[i]
pics[i].draw()
window.flip()
core.wait(1)
seq()
core.wait(3)
window.close()
quit()
I hope someone can help,
Cheers
S

I see a few possible pitfalls with your code. One is that you are creating two separate lists imgList1 and imgList from the same set of images, then you are randomizing each of these lists separately and pulling the first three elements from each list into targetset and pics respectively. It is possible that these sub-sets will have some overlap in images. From the description of your task, I don't think that this is intentional. I might suggest creating only 1 imgList and then using the imgList.pop() function to pull images from the list. Pop returns the last element from the list and then deletes that element from the list. I think of it like drawing a card from a deck, you cannot draw the card again because it is no longer in the deck. What you're currently doing is like drawing cards from two different decks. You might get the same card twice.
The second problem I see here is that you define a condition for what to do when your coin-flip for x is 0, but you do not define one for when your coin-flip is 1. Currently, it will just display the first 3 images from imgList because that's how you've defined pics. I might suggest defining pics within an if/else block for your x coin-flip. If it's 0, then go with targetset, if it's 1, then randomly choose an element from target list, then copy that element to pics and pop two more images from imgList. As with all coding, there is more than one way to solve this problem, but I'll offer a solution here.
#Outside the trial loop
imgList = glob.glob(os.path.join('stim', '*.png'))
random.shuffle(imgList)
#Inside the trial loop
targetset = []
for i in range(0,3):
targetset.append(imgList.pop())
#Do your display stuff with targetset
if random.randint(0,1) == 0:
pics = targetset
else:
pics = []
pics.append(targetset[random.randint(0,2)])
pics.append(imgList.pop())
pics.append(imgList.pop())
random.shuffle(pics)
#Do your display stuff with pics

How to verify that two images are exactly identical?

I am writing an opencv program where I track position of an object by use of a usb camera. To make sure I get as high frame rate as possible with the camera I am using I made a threaded process that read the images from the camera. The image processing is done in another loop which also writes the object position to the file.
Now I want a way to avoid processing the same frame multiple times. So I thought I could compare the image just processed with that available from the the video stream thread.
First I tried to use if frame1 == frame2, but got error message that "the truth value of an array with more than one element is ambiguous. Use a.any() or a.all()."
After some googling I found cv2.compare and the flag CMP_EQ. Made a sample code, and made it work in some way. However, my question is. How could this be done in an easier or better way?
import cv2
cv2.namedWindow('image1', cv2.WINDOW_NORMAL)
cv2.namedWindow('image2', cv2.WINDOW_NORMAL)
frame1 = cv2.imread("sample1.png")
frame2 = frame1
frame3 = cv2.imread("sample2.png")
compare1 = cv2.compare(frame1,frame2,0)
compare2 = cv2.compare(frame1,frame3,0)
cv2.imshow('image1', compare1)
cv2.imshow('image2', compare2)
if compare1.all():
print "equal"
else:
print "not equal"
if compare2.all():
print "equal"
else:
print "not equal"
cv2.waitKey(0)
cv2.destroyAllWindows()

open("image1.jpg","rb").read() == open("image2.jpg","rb").read()
should tell you if they are exactly the same ...

I was doing something close to what you are doing; I was trying to get the difference. I used the subtract function. It may help you.
UPDATE:
import cv2
import numpy as np
a = cv2.imread("sample1.png")
b = cv2.imread("sample2.png")
difference = cv2.subtract(a, b)
result = not np.any(difference)
if result is True:
print "Pictures are the same"
else:
cv2.imwrite("ed.jpg", difference )
print "Pictures are different, the difference is stored as ed.jpg"

How about giving your Images an index?
Pseudocode:
class Frame
{
cvImage img;
uint idx;
}
Than simply check if the current index is greater than the last one you processed.
It is simple and definitely faster than any image processing based approach.

You can compare the size of two image files as the first level of check for reduced computational complexity. With compression, it is highly unlikely for two different image files to have the same size to the accuracy of the number of bytes. With equal file size, you can then compare the image contents.

You should try something like this.
import cv2
import numpy as np
original = cv2.imread("rpi1.jpg")
duplicate = cv2.imread("rpi2.jpg")
if original.shape == duplicate.shape:
print("The images have same size and channels")
difference = cv2.subtract(original, duplicate)
b, g, r = cv2.split(difference)
if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and
cv2.countNonZero(r) == 0:
print("The images are completely Equal")
else:
print("images are different")

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

multiprocessing in Python not using all cores - python

Related

Trying to add a progress bar as my python program runs

How to read a text file and convert into a list for use with statistics package in Python

How to read the first frame of video using OpenCV in python?

Drawing 1 image from previous image set + 2 random images from list (Python/Psychopy)

How to verify that two images are exactly identical?

Categories

Resources