How to implement Binary Symmetric Channel in Python correctly? - python

Basically I'd like to implement BSC. A photo is changed into bits, some of them are changed and a new image is created. The issue I encountered is that I get the same image back. I've put some print() statements to see if the error() works and it looks like it does. Here's my code:
import numpy as np
import random as rand
# Seed
rand.seed(4)
# Filenames
in_name = 'in_img.png'
out_name = 'out_img.png'
# Into bits
in_bytes = np.fromfile(in_name, dtype="uint8")
in_bits = np.unpackbits(in_bytes)
data = list(in_bits)
# BSC
def error(x):
p = 0.1
is_wrong = rand.random() < p
if is_wrong:
if x == 1:
return 0
else:
return 1
else:
return x
for i in data:
i = error(i)
# To PNG
out_bits = np.array(data)
out_bytes = np.packbits(out_bits)
out_bytes.tofile(out_name)

While the problem in your code seems to be a duplicate as kazemakase points out in a comment, your code should not use such a loop and a Python list in the first place. With numpy one usually tries to push as many loops as possible into the numpy data types.
import numpy as np
def main():
np.random.seed(4)
in_name = 'in_img.png'
out_name = 'out_img.png'
bits = np.unpackbits(np.fromfile(in_name, np.uint8))
bits ^= np.random.random(bits.shape) < 0.1
np.packbits(bits).tofile(out_name)
if __name__ == '__main__':
main()

Related

how can I solve this: Too many pixels in sample; must be <= 262144. Got 700494 (Earth Engine)

I'm trying to convert an ee.Image to a Numpy array. Somehow small areas are computed correctly, yet big amounts of pixels present an error related to the max amount of pixels using the function sample.Rectangle.
Here's the code:
import numpy as np
import ee
naipCollection = (
ee.ImageCollection("USDA/NAIP/DOQQ")
.filterBounds(a_file)
.filterDate("2017-01-01", "2020-12-15")
)
naip = naipCollection.mosaic()
naipNDVI = naip.normalizedDifference(["R", "N"]).rename("NDVI")
demClip = naipNDVI.clip(a_file)
vegetacion = demClip.gt(0).lt(0.4).selfMask()
naip_b = naipNDVI.reproject("EPSG:32613")
array_or = naip_b.sampleRectangle(a_file, defaultValue=0)
arr_or_or = array_or.get("NDVI")
np_arr = np.array(arr_or_or.getInfo())
nonzero_or = np.count_nonzero(np_arr)

Random walk plotting in python

I have been working through the book "A student's guide to Python for Physical Modeling" by Jesse M. Kinder & Philip Nelson and there is an exercise where I'm instructed to build a Brownian motion simulator/ random walk simulator and plot it. I don't know why my code is not working and I was hoping I could get some help from you:
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import random as rng
def Brownian_motion(steps):
"""
this is a random walk function
define the number of steps to be taken as a integer value
"""
#these are the random numbers
steps_x = rng(steps)
steps_y = rng(steps)
#Here I change the random numbers to 1 or -1
pace_x = (steps_x < 0.5)
for i in pace_x:
if i == False:
pace_x[i] = -1
else:
pace_x[i] = 1
return pace_x
pace_y = (steps_y < 0.5)
for i in pace_y:
if i == False:
pace_y[i] = -1
else:
pace_x[i] = 1
return pace_y
plt.plot(np.cumsum(pace_x), np.cumsum(pace_y))
plt.show()
Brownian_motion(500)
It does not throw and error but I can't get it to plot
EDIT:
This is similar to what I'm expecting to see:
http://people.sc.fsu.edu/~jburkardt/m_src/random_walk_2d_simulation/walks_1_steps_1000_plot.png
With numpy you can create boolean slices which are more efficient. Note that this does not work with Python Lists/Tuples.
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import random as rng
def Brownian_motion(steps):
"""
this is a random walk function
define the number of steps to be taken as a integer value
"""
#these are the random numbers
steps_x = rng(steps)
steps_y = rng(steps)
pace_x = np.ones_like(steps_x)
idx = steps_x < 0.5
pace_x[idx] = -1
idy = steps_y < 0.5
pace_y = np.ones_like(steps_y)
pace_y[idy] = -1
plt.plot(np.cumsum(pace_x), np.cumsum(pace_y))
# plt.axis("equal")
# I would also add this. This way your plot won't be
# distorted.
plt.show()
a = Brownian_motion(500)
You have unnecessary return statements at the end of your loops, so your code never gets to the plot. Remove those and the Brownian_motion function should have a chance to complete execution.
Try to remove the return from your function, and cast your booleans to integers
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import random as rng
def Brownian_motion(steps):
"""
this is a random walk function
define the number of steps to be taken as a integer value
"""
#these are the random numbers
steps_x = rng(steps)
steps_y = rng(steps)
#Here I change the random numbers to 1 or -1
pace_x = (steps_x < 0.5)*1
for i in pace_x:
if i == False:
pace_x[i] = -1
else:
pace_x[i] = 1
#return pace_x
pace_y = (steps_y < 0.5)*1
for i in pace_y:
if i == False:
pace_y[i] = -1
else:
pace_x[i] = 1
#return pace_y
plt.plot(np.cumsum(pace_x), np.cumsum(pace_y))
plt.show()
Brownian_motion(500)
I do not know what a Brownian motion simulator/ random walk simulator is but the problem in your code is that in your function you have a return statement (actually 2) that makes your function stop without executing the the plot.
Commenting it seems to work and it plots something (I do not know it it is what you are expecting).
The code:
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import random as rng
def Brownian_motion(steps):
"""
this is a random walk function
define the number of steps to be taken as a integer value
"""
#these are the random numbers
steps_x = rng(steps)
steps_y = rng(steps)
#Here I change the random numbers to 1 or -1
pace_x = (steps_x < 0.5)
#print(pace_x)
x_list = list()
y_list = list()
for i in pace_x:
if i == False:
#pace_x[i] = -1
x_list.append(-1)
else:
#pace_x[i] = 1
x_list.append(1)
#return pace_x
print("Hello there")
pace_y = (steps_y < 0.5)
for i in pace_y:
if i == False:
#pace_y[i] = -1
y_list.append(-1)
else:
#pace_x[i] = 1
y_list.append(1)
#return pace_y
plt.plot(x_list, y_list)
plt.show()
Brownian_motion(500)
Piece of advice: when something is wrong in Python try to put print function calls in your code to check what you are expecting is correct. For example I put the line print("Hello there") after the return and have seen that it was never executed (now it's commented).

Python MNIST Digit Data Testing Failure

Hello I am using Python to try to read the digit data provided by MNIST into a data structure I can use to train a neural network. I am testing to ensure the data was read properly by creating an image using PIL. The image that is being created is horribly wrong, and I am not sure if it is because I am using PIL incorrectly or my data structures and methods are not right.
The format of the two data files is described here:
http://yann.lecun.com/exdb/mnist/
Here are the applicable functions:
read_image_data reads the pixel data organizing it into a list of 2D array numpy arrays
def read_image_data():
fd = open("train-images.idx3-ubyte", "rb")
images_bin_string = fd.read()
num_images = struct.unpack(">i", images_bin_string[4:8])[0]
image_data_bank = []
uint32_num_bytes = 4
current_index = 8
num_rows = struct.unpack(">I", \
images_bin_string[current_index: current_index + uint32_num_bytes])[0]
num_cols = struct.unpack(">I", \
images_bin_string[current_index + uint32_num_bytes: \
current_index + uint32_num_bytes * 2])[0]
current_index += 8
i = 0
while i < num_images:
image_data = np.zeros([num_rows, num_cols])
for j in range(num_rows - 1):
for k in range(num_cols - 1):
image_data[j][k] = images_bin_string[current_index + j * k]
current_index += num_rows * num_cols
i += 1
image_data_bank.append(image_data)
return image_data_bank
read_label_data reads the corresponding labels into a list
def read_label_data():
fd = open("train-labels.idx1-ubyte", "rb")
images_bin_string = fd.read()
num_images = struct.unpack(">i", images_bin_string[4:8])[0]
image_data_bank = []
current_index = 8
i = 0
while i < num_images:
image_data_bank.append(images_bin_string[current_index])
current_index += 1
i += 1
return image_data_bank
collect_data zips the structures together
def collect_data():
print("Reading image data...")
image_data = read_image_data()
print("Reading label data...")
label_data = read_label_data()
print("Zipping data sets...")
all_data = np.array(list(zip(image_data, label_data)))
return all_data
lastly run_test uses PIL to print the pixels from the first 28x28 np structure created by read_image_data
def run_test(data):
example = data[0]
pixel_data = example[0]
number = example[1]
print(number)
im = Image.fromarray(pixel_data)
im.show()
When I run the script:
Collecting data... Reading image data... Reading label data... Zipping
data sets... 5
I must be messing something up with the PIL library, but I do not know what.
That is a really weird looking 5. I am guessing that I went wrong somewhere in my organization of the data. The directions did say "Pixels are organized row-wise.", but I think I covered that by having my outer loop as the row loop then the inner as the column loop
UPDATE
I reversed the order of the row and column index in the np.arrays in read_image_data and it is making no difference.
image_data[k][j] = images_bin_string[current_index + j * k]
UPDATE
Ran quick test with matplotlib
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
imgplot = plt.imshow(pixel_data)
plt.show()
Here is what I got from matplotlib
That means it is definitely a problem with my code and not the library. The question is if it is the way I am passing the pixels to the imaging libraries or how I structured the data. If anyone can find the mistake, I would greatly appreciate.

Manipulating a large binary image array with numpy and cv2

My code is the following:
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# Point filtering
for i in xrange(self.h):
for j in xrange(self.w):
if self.th2.item(i,j) == 0:
#self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
self.bPoints.append((i,j))
else:
self.wPoints.append((i,j))
#self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)
I want to find and separate the white from the black points. I have commented the lines that show a possible (but very-very slow) solution via numpy. Can you recommend me a better and faster solution? I will appreciate it if you do so!
Thanks
I'm assuming self.th2 is a numpy array. This might take some adjustment if that is not the case. Basically, this uses the np.where function to determine all the indices which are 0 or 255.
import cv2; import numpy as np
class MyClass:
def __init__(self,imagefile):
self.image = cv2.imread(imagefile)
#image details
self.h,self.w = self.image.shape[:2]
#self.bPoints, self.wPoints = np.array([[0,0]]),np.array([[0,0]])
self.bPoints, self.wPoints = [],[]
#CAUTION! Points are of the form (y,x)
# use the np.where method instead of a double loop.
# make sure self.th2 is a numpy array
indx = np.where(self.th2==0)
for i,j in zip(indx[0], indx[1]):
self.bPoints.append((i,j))
indx = np.where(self.th2==255)
for i,j in zip(indx[0], indx[1]):
self.wPoints.append((i,j))
# Point filtering
#for i in xrange(self.h):
# for j in xrange(self.w):
# if self.th2.item(i,j) == 0:
# #self.bPoints = np.append([[i,j]], self.bPoints, axis=0)
# self.bPoints.append((i,j))
# else:
# self.wPoints.append((i,j))
# #self.wPoints = np.append([[i,j]], self.wPoints, axis=0)
#self.bPoints = self.bPoints[:len(self.bPoints) - 1]
#self.wPoints = self.wPoints[:len(self.wPoints) - 1]
self.bPoints, self.wPoints = np.array(self.bPoints), np.array(self.wPoints)

Averaging over multipage TIFF pages in Python

What would be the fastest/memory efficient way to get average over many frames of 16-bit TIFF image as numpy array?
What I came up so far is the code below. To my surprise, method2 was faster than method1.
But, for profiling never assume, test it! So, I want to test more.
Worth trying Wand? I did not include here because after imstalling ImageMagick-6.8.9-Q16 and MAGICK_HOME env var it still does not import... Any other library for multipage tiff in Python? GDAL maybe little too much for this.
(edit) I included libtiff. Still method2 fastest and quite memory efficient.
from time import time
#import cv2 ## no multi page tiff support
import numpy as np
from PIL import Image
#from scipy.misc import imread ## no multi page tiff support
import tifffile # http://www.lfd.uci.edu/~gohlke/code/tifffile.py.html
from libtiff import TIFF # https://code.google.com/p/pylibtiff/
fp = r"path/2/1000frames-timelapse-image.tif"
def method1(fp):
'''
using tifffile.py by Christoph (Version: 2014.02.05)
(http://www.lfd.uci.edu/~gohlke/code/tifffile.py.html)
'''
with tifffile.TIFFfile(fp) as imfile:
return imfile.asarray().mean(axis=0)
def method2(fp):
'primitive peak memory friendly way with tifffile.py'
with tifffile.TIFFfile(fp) as imfile:
nframe, h, w = imfile.series[0]['shape']
temp = np.zeros( (h,w), dtype=np.float64 )
for n in range(nframe):
curframe = imfile.asarray(n)
temp += curframe
return (temp / nframe)
def method3(fp):
' like method2 but using pillow 2.3.0 '
im = Image.open(fp)
w, h = im.size
temp = np.zeros( (h,w), dtype=np.float64 )
n = 0
while True:
curframe = np.array(im.getdata()).reshape(h,w)
temp += curframe
n += 1
try:
im.seek(n)
except:
break
return (temp / n)
def method4(fp):
'''
https://code.google.com/p/pylibtiff/
documentaion seems out dated.
'''
tif = TIFF.open(fp)
header = tif.info()
meta = dict() # extracting meta
for l in header.splitlines():
if l:
if l.find(':')>0:
parts = l.split(':')
key = parts[0]
value = ':'.join(parts[1:])
elif l.find('=')>0:
key, value =l.split('=')
meta[key] = value
nframes = int(meta['frames'])
h = int(meta['ImageLength'])
w = int(meta['ImageWidth'])
temp = np.zeros( (h,w), dtype=np.float64 )
for frame in tif.iter_images():
temp += frame
return (temp / nframes)
t0 = time()
avgimg1 = method1(fp)
print time() - t0
# 1.17-1.33 s
t0 = time()
avgimg2 = method2(fp)
print time() - t0
# 0.90-1.53 s usually faster than method1 by 20%
t0 = time()
avgimg3 = method3(fp)
print time() - t0
# 21 s
t0 = time()
avgimg4 = method4(fp)
print time() - t0
# 1.96 - 2.21 s # may not be accurate. I got warning for every frame with the tiff file I tested.
np.testing.assert_allclose(avgimg1, avgimg2)
np.testing.assert_allclose(avgimg1, avgimg3)
np.testing.assert_allclose(avgimg1, avgimg4)
Simple logic would make me bet my money on method 1 or 3, since method 2 and 4 have for-loops in them. For-loops Always make your code go slower if you have more input.
I would definitely go for method 1: neat, clear to read...
To be really sure, just test them I would say. If you don't feel like testing, I would go for method one.
Kind regards,

Categories

Resources