Convert depth map (base64) of google street view to image - python

I wonder if someone know how to convert the depth map of google street view (encoded as base64) into an image or a 2D matrix. I'm using Python and was able to get the base64 string, decode it and save it to a .png file. However, the exported png file cannot be open by any graphic viewer... I guess the base64 code wasn't decoded correctly...
Here is a post that mentioned the base64 string in GSV.
Here is my code:
#URL of the json file of a GSV depth map
url_depthmap='http://maps.google.com/cbk?output=json&cb_client=maps_sv&v=4&dm=1&pm=1&ph=1&hl=en&panoid=lcptgwtxfJ6DccSzyWp0zA'
# getting the json file
r = requests.get(url_depthmap)
# open it
depth_json= r.json()
# get the base64 string of the depth map
data=depth_json['model']['depth_map']
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
data += "=" * ((4 - len(data) % 4) % 4)
# convert the URL safe format to regular format.
data=data.replace('-','+').replace('_','/')
data = base64.decodestring(data) # decode the string
data=zlib.decompress(data) #decompress the data
# write it to a png file
image_result = open('downloads/deer_decode.png', 'wb')
image_result.write(data)

looking at https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js and starting with your piece of code, here is my overall processing:
import base64
import zlib
import numpy as np
import struct
import matplotlib.pyplot as plt
def parse(b64_string):
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
b64_string += "=" * ((4 - len(b64_string) % 4) % 4)
# convert the URL safe format to regular format.
data = b64_string.replace("-", "+").replace("_", "/")
data = base64.b64decode(data) # decode the string
data = zlib.decompress(data) # decompress the data
return np.array([d for d in data])
def parseHeader(depthMap):
return {
"headerSize": depthMap[0],
"numberOfPlanes": getUInt16(depthMap, 1),
"width": getUInt16(depthMap, 3),
"height": getUInt16(depthMap, 5),
"offset": getUInt16(depthMap, 7),
}
def get_bin(a):
ba = bin(a)[2:]
return "0" * (8 - len(ba)) + ba
def getUInt16(arr, ind):
a = arr[ind]
b = arr[ind + 1]
return int(get_bin(b) + get_bin(a), 2)
def getFloat32(arr, ind):
return bin_to_float("".join(get_bin(i) for i in arr[ind : ind + 4][::-1]))
def bin_to_float(binary):
return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]
def parsePlanes(header, depthMap):
indices = []
planes = []
n = [0, 0, 0]
for i in range(header["width"] * header["height"]):
indices.append(depthMap[header["offset"] + i])
for i in range(header["numberOfPlanes"]):
byteOffset = header["offset"] + header["width"] * header["height"] + i * 4 * 4
n = [0, 0, 0]
n[0] = getFloat32(depthMap, byteOffset)
n[1] = getFloat32(depthMap, byteOffset + 4)
n[2] = getFloat32(depthMap, byteOffset + 8)
d = getFloat32(depthMap, byteOffset + 12)
planes.append({"n": n, "d": d})
return {"planes": planes, "indices": indices}
def computeDepthMap(header, indices, planes):
v = [0, 0, 0]
w = header["width"]
h = header["height"]
depthMap = np.empty(w * h)
sin_theta = np.empty(h)
cos_theta = np.empty(h)
sin_phi = np.empty(w)
cos_phi = np.empty(w)
for y in range(h):
theta = (h - y - 0.5) / h * np.pi
sin_theta[y] = np.sin(theta)
cos_theta[y] = np.cos(theta)
for x in range(w):
phi = (w - x - 0.5) / w * 2 * np.pi + np.pi / 2
sin_phi[x] = np.sin(phi)
cos_phi[x] = np.cos(phi)
for y in range(h):
for x in range(w):
planeIdx = indices[y * w + x]
v[0] = sin_theta[y] * cos_phi[x]
v[1] = sin_theta[y] * sin_phi[x]
v[2] = cos_theta[y]
if planeIdx > 0:
plane = planes[planeIdx]
t = np.abs(
plane["d"]
/ (
v[0] * plane["n"][0]
+ v[1] * plane["n"][1]
+ v[2] * plane["n"][2]
)
)
depthMap[y * w + (w - x - 1)] = t
else:
depthMap[y * w + (w - x - 1)] = 9999999999999999999.0
return {"width": w, "height": h, "depthMap": depthMap}
# see https://stackoverflow.com/questions/56242758/python-equivalent-for-javascripts-dataview
# for bytes-parsing reference
# see https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js
# for overall processing reference
# Base64 string from request to:
# https://maps.google.com/cbk?output=xml&ll=45.508457,-73.532738&dm=1
# edit long and lat
s = "eJzt2gt0FNUdx_FsQtgEFJIQiAlvQglBILzktTOZ2YitloKKD6qAFjy2KD5ABTlqMxUsqICcVqqC1VoUqS34TkMhydjaorYCStXSKkeEihQoICpUFOzuJruZO4_dmdmZe2fY3_cc4RCS-L_z-c_sguaNy8rKzgrkZSGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBCbgsFgB9YzIFYFo_wdsAAZWbC5DliATCwYVPhjATKsYNC0f5s2baiNhagUDFrzb47aeMjVgkF7_liC06JgWv7YAX-n1rfjjx3wbVp-m_5YAV_mpH-bPGpjI4dy1h8L4Lcc9scC-Cyn_bEA_sq6vzF_zB8b4Ktc8McC-Cg3_LEAvkmH3wF_LIBfcskfC-CTHH37r_DHAvgj1_yxAL7IPX8sgB9y0R8L4IPc9McCeD9X_bEAns9dfyyA13PZHwvg8Rz11_LnFVI7SVZWO2UU_71-7jTybwd_67nuT28B4G8j-Gd2LvsXFtJbAPjbyLq_MT_8_RcFf1oLAH8bwd9fFRQUOPr9aPhTWoDT378glqPfEv4-qSCRo98W_v6owM_-dBYA_jaCvz_ytz-VBYC_jeDvsfprP1SeRd0_yVfA39W0_uV0_R393z_gbzX42w3-1rP-8g9_d9P4D4C_yeBvOfhTr7Q06W_D33Z-8Y-l-ABBrvYf4B9_LX_Cf6iTQxsFf8vBn3qp_FULAH_T-cq_dQHgr1v37t0tfgX8LUfJf6hp_-7KLB4G_paj5W96IL_5d9Yp-VeY9j839qPX_I35k_zxT9d_2LDmn3v06NH6wdPBP_kCkP4jR44s7T8y1pCWRkeL_2JI1L9Xr14K_16tpT0-e_8ePUj_jq2d3v6juUhRxNL-vZIV8Q-Hwwr_0YO02R2fhf-w-M8a_45kp4l_GVncv8xs5eWRHxT-0Y_1S6T8TOvjwz_NdP1LkhXDN_zdLjqVR_5R-Ot9ijZz48PffmdF0_XvZjtDTl3_Ir3OUhcZtahrJL0z0PIfWl1dPSqWrv-YWB70P6O1Tt9qqahoYLwuXXT9E5e-p8V0QWMp_I2cI3VqLqv5M7oqa9tWcZb48QJG_n2jFTdX0ru1qip7_pWVY6OFolVWVtacE68y5h_5UKUgCGr_wZr68Txf0jpQ_BwjonOS_n365ChqTxbIVpZzpiLlZYoVCASIX8eucGIXdP31bmA9LnVdOnXS0YwX91d9WCT2RGUejTwLmZF_RUVFbl9FxfFK7PkrX-IqiWL-Le9lVP6xz-5NVBVpRLRiorMj5eYS_sb47SPnJv1ziA04k7xmigun3YNOhLvunTtQ8bRIUSfDRN1vrSVvqyiZeyr_6GWraC43Wusm2PJXvcnppyjm3wxbpfKvimsXG9TMHq1Pczk66ehr_VUroLmAqquoswjKWl8p3MjI2wK7Kf9EFcpNsOOv8z63Kn5Hx_xbPFX-KdUT8HruSfR1_bUPgRRLYGIR9NfC9F4kdTZUN8Ge0j83d7je1YwsgQ1_o9tXk7E_YZ7ihk_ysp9I199gA1IugY1V0C0lcnr3ujX_WDpLYN1f9Q7CvD8pbsU9hb6hf47ey4ClLdDZBbPLQIncir_OElj11zFsKXFvJz6i8rfJnlo_mb_xQ8DyFljcCPe1tRn4a7ESS2DRn3jvoLsAirtc7W8d3QC_vfrcyfxzkj4E0toCz2Xi9lctgSX_FErq76_ytwOvp69z7hT-KR8CBlvguzWw5h_Ngn9Kp-bv2PprZ_xT65vwzzHzEPD_08DAP3JR0vW3I-eEvxl9c_5mHwJ-fh4Y-xttgCl-W3IO-JvDD5j1t7MCvtqE_Pwk_vorkNrfJn6stPxN61vwz7H0OmB2GbyyDfn5mg0g_HU2IIW_7l8bWci2vwX8gDV_-w8BK1HxVhf1z49cAWN_zQYk9Vd-4nCbq2DL35q-VX8aK0BBW1vcP1bCP1tdUn-jN4o6mVsJy_5W8QM2_F1fAbepdSP8m5dAq0-sgFXyVCXbADv6Jg9ux9_VFXCV2TCtv3EOoqfyzzHtbws_YNvfvRVwzzhZVvzdWQGjy2zG38ZzP559f_UKOLQDbgknz6K_GyuQZAGs6Fs8eFr-mh3IIH_HV8D4EifzTws_4IC_egXS3QHnbc1ky9_ZFbBx4dO0j-aEv2YHMsffwRVIx94mfsA5f8d2wEFUC6Xh79QK2MZP6-BO-mt2wMYSOORptfT8HdkBW_bp4Qec99cuQYb4p70D1vGdOLgr_tolML0FTpzJRs74p7cDVuwdO7h7_va2wLGDWcs5f_s7QJs-ltv-eluQbA2cPZ3pnPW3twS06WPR8TfcBM0quHBEM7ngb3kJktC7d3Da_qly76RJc8vfyha0XgQK7vHIQdm5x3P_xLq5ym9yD3JoeGuCfzQ6_im2gcnJ4R-Nhb8mJicnJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U4-YgLU-_KlHTMBaH_7UIyZgrc_K392__jcbk6MTE7DWhz_1iAlY68OfesQErPXhTz1iAtb68KceMQFrffhTj5iAtT78qUdMwFof_tQjJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U48cgTU_Q3825srYnJ0YgTU__KlHjMCaH_7UI0ZgzQ9_6hEjsOaHP_WIEVjzw596xAis-eFPPWIE1vxs_L3xx3_4w59BxAis-eFPPWIE1vzwpx4xAmt--FOPGIE1P_ypR4zAmh_-1CNGYM0Pf-oRI7Dmhz_1yBky1p8NORGTs3vsAcDkCsA_EfzZxeTs8Ie_okzkh39rmejvkbf_8Ic_i4gZ4M8uJoeHP_wVwZ9dTA4P_0z399ICMDm_R_jhD38mEUPAn1lMDh_IeH-vvPzDH_5MIoaAP7NYHD4aMQT8mcXi8NHIKTLSnw24KhaHj0VMAX9WsTh8LGKKTPP3zOMf_vBnEzEF_FnF4PDNkWNkoD8bb3UMDt8SMQb8GcXg8C0RY2SWv3ce__CHP6PIOTLOnw23JvqHT0TMAX820T98ImKOTPL30OPfM_4MF4D6wT3Ez9LfKw8A2sf20u0Pf_izi5gko_wZaWujfXgiYpJM8ffU7c_W3xsLQPfI3uJn7O-JBaB7Ym_xw5-yv8duf9b-XlgAmsf1Gj9zfw8sAMXDeo6fvT_7BaB3VO_xe8Cf-QZQO6f39L3hz3gBKB3Sgzd_tkf8mW4AnQN6U98z_gz_ayCNw3kUP9tD_gFWK-D6sbyLn-0t_2jkdL7397R9NFdPbztiRH_ye_UFX5Vbx3coYlY_-PuEvTWHz-9q5OQe4fcbuKr0L4CnUp0uTX6f25qKCos3Y33pvRNrCfqxvuIeKUvVBQ0jQy8dv5C7_YQk140qE4-83eepF-Z-wT24W5KXc2XigpsnbdxVvzm0-ZQkDy_sJRZ1v1n4a93vmy5ecTs3ZG_X8M79k0PrF3UNzftGkis79BVnnXw-9P6Yu7l_H5Pk54RuYtuCvsLaBePkg8d3b1pQf1CcW1Ar5F57lN8-OJc__itZvGPKkL6XjNvCn6qrlee8Wyqurb5OXpW_vemSPuWNt71WUBN8f0moYO_chuD_JPny1eXi2ZMWhW6pq-BrX5fknPMGidftnh5aOvylhkOHJXluTkfxH5-WC7vHjxM6S8P5rhUrxQtH1cqzO-1r_OxnbRqrpn5P3DbuDuHXt11affmCjvzkSavDnz8_T5i0eHH1-Y915jeWXCRW1LXhpq96gbvlkCQLh7qIQnZJaOs5O7hv9knyd0b1jcw_ivt0wmBueGSeV7qVicOO3yEf2XF-de23X2zo0P7l8DfTa7i3nsjj978nyVPaDxWv--JYqG7UWu6uI5J84vVssfc7S7gfHdjDf_fqWrlnzzXiwH43yBc98Jdq4doZDbMW7ArnPXkF94fupfz1b0ry2qfGiOUFU7mTg5bxA6-S5Jo3ponbP-rIHdh4Fv_xNknu-dYocdn6WfKkvZub_jOxd2P9_iPhpaHb5SfvW9j0xtbixp-ce1icemxE6Muc5dyrX0jyhyfyxPlPZ_GLd57kBr0iySsmni3-MHsPd2LgyYZ7XpPkZy6bL55x_ZpN0z7N4St2SnL_2lzxq9_dKry54ZdN78jLuKpFi8Rrjrbnp67b1rhpca28_IKnxaG_HTj2uaPjG_ZH9mdhzUcCt6eSG_O3eu6rA5HzF58SpoXncdMmzGl8dZEk3_f0LeJD22bLby1ZX716xIKGrEVbw48-_CB39d7_chv-KcmPbn1PuC1vjsBNXNc0uHAh9_gnPcWmLl0aTn18H79wpiSPLXpC-NOPV4cOX5zVUHVSklcW7xBKtrfj31-6heeX18qXzd0nHp5ZyAtlsxpXXC_JM2pfFIuXLOS4wjHcVZ9J8rEzy8Qb7_6aO1BXyq9_UZLnyJeLj7w0R9jz0Nrq-9_syn__vZlC0Q2z5WObn21a1K174_3zDof3DPhNw_qartWTv75LLlp-hfBo2XNjz7t3PP_1s5K8ZcXjQocr53NjpD3c0A8j86z5l9D5gUF8Xtk6vtfOWnnSlQPCo_94KdfuyM9Db38pyatu3iB-_uS73Auz2_HhP0vyzG4jxKcmb-Rq9q0MTYrsW1Hdw-K-D-7hz19X3zj-xlp5SoUY3lA_ketdsJM7GLkfHzv3A-Guv-fzB7aM5JetjJxv-kLx6ISXN065cwI_a70kZxVdKVx7sIo_OKy0cfUvJPnWxmvCcu-e_FUzahse3yzJ9bPXiHN2_GTTK5_Mb_xBxOPolEeqZxQIPBd4g__kzlo5f9op8YwdS7mbJr7LleyS5HvXfSnsWjVHvrXgmeqbLvxpw6G9H4n_Bz8_xLw"
# decode string + decompress zip
depthMapData = parse(s)
# parse first bytes to describe data
header = parseHeader(depthMapData)
# parse bytes into planes of float values
data = parsePlanes(header, depthMapData)
# compute position and values of pixels
depthMap = computeDepthMap(header, data["indices"], data["planes"])
# process float 1D array into int 2D array with 255 values
im = depthMap["depthMap"]
im[np.where(im == max(im))[0]] = 255
if min(im) < 0:
im[np.where(im < 0)[0]] = 0
im = im.reshape((depthMap["height"], depthMap["width"])).astype(int)
# display image
plt.imshow(im)
plt.show()
(and save with plt.imsave)

Related

Small unpacking buffer for wave.struct

I am building a SSTV programm, that can convert 180x136 picture into the sound with the RGB component. But when I try to read data from microphone, it gives me
struct.error: unpack requires a buffer of 400 bytes
when I use only 200 bytes.
I expected the output as decoded microphone out (in short: microphone -> decode signal -> image).
I tried to use chunk*2 but it gave me same error:
struct.error: unpack requires a buffer of 800 bytes
Here is the code for decoder:
import numpy as np
import pyaudio
import wave
from tkinter import *
root = Tk()
c = Canvas(root, width=90*8, height=68*8, bg='white')
c.pack()
def pix(x, y, rr, rg, rb): # r + (r/g/b) is raw rgb because we are decoding frequency
if rr < 0:
r = 0
if rg < 0:
g = 0
if rb < 0:
b = 0
try:
r = min(255,round(rr * (255 / 1124)))
except:
r = 0
try:
g = min(255,round(rg * (255 / 1124)))
except:
g = 0
try:
b = min(255,round(rb * (255 / 1124)))
except:
b = 0
if r < 0:
r = 0
if g < 0:
g = 0
if b < 0:
b = 0
print(r,g,b)
n = hex(r).replace("0x","")
rh = (2 - len(n)) * "0" + n
n = hex(g).replace("0x", "")
gh = (2 - len(n)) * "0" + n
n = hex(b).replace("0x", "")
bh = (2 - len(n)) * "0" + n
c.create_line(x, y, x + 4, y, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+1, x + 4, y+1, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+2, x + 4, y+2, fill=f"#{rh}{gh}{bh}")
c.create_line(x, y+3, x + 4, y+3, fill=f"#{rh}{gh}{bh}")
chunk = 100
# open up a wave
wf = wave.open('input.wav', 'rb')
swidth = wf.getsampwidth()
RATE = wf.getframerate()
# use a Blackman window
window = np.blackman(chunk)
# open stream
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
output = True)
microphone = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = RATE,
input = True)
# read some data
data = wf.readframes(chunk)
#print(len(data))
#print(chunk*swidth)
pc = 0
x = 0
e = [255,252]
totaly = 0
rrgb = [255, 255, 255] # raw rgb
# play stream and find the frequency of each chunk
while True:
data = microphone.read(chunk)
# write data out to the audio stream
stream.write(data)
indata = np.array(wave.struct.unpack("%dh"%((len(data))/swidth),data))*window#"%dh"%((len(data)-1)/swidth)
# Take the fft and square each value
fftData=abs(np.fft.rfft(indata))**2
# find the maximum
which = fftData[1:].argmax() + 1
# use quadratic interpolation around the max
if which != len(fftData)-1:
y0,y1,y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
# find the frequency and output it
thefreq = (which+x1)*RATE/chunk
if thefreq>3030 or totaly==540:
#print("PING!",f"The freq is %f Hz. {pc}" % (thefreq))
if pc==0:
totaly = x
pc+=4
x=0
root.update()
else:
#print(f"The freq is %f Hz. min {min(e)} max {max(e)}, {x//4}" % (thefreq), end=" ")
if x//4 % 3 == 0:
pix(x / 3, pc, rrgb[0], rrgb[1], rrgb[2]) # /0.77039274924
rrgb[0] = thefreq
#print("R")
elif x//4 % 3 == 1:
rrgb[1] = thefreq
#print("G")
elif x//4 % 3 == 2:
rrgb[2] = thefreq
#print("B")
#e.append(thefreq)
#pix()
x+=4#print("The freq is %f Hz." % (thefreq))
else:
thefreq = which*RATE/chunk
#print("The freq is %f Hz." % (thefreq))
# read some more data
data = microphone.read(chunk)
if data:
stream.write(data)
stream.close()
p.terminate()
root.mainloop()
Here is the code for encoder:
import numpy as np
# Importing Image from PIL package
from PIL import Image
# creating a image object
im = Image.open(r"original.bmp")
px = im.load()
arx = []
art = []
pix = ()
pixels = list(im.getdata())
width, height = im.size
pixels = [pixels[i * width:(i + 1) * width] for i in range(height)]
print(pixels)
for ypos in range(0,136):
for xpos in range(0,180):
pix = pixels[ypos][xpos]
#gray = ((pix[0])+(pix[1])+(pix[2]))/3
rgb = (pix[0],pix[1],pix[2])
arx.append(rgb)
arx.append(406)
art.append(arx)
rate = 44100 # samples per second
T = 3 # sample duration (seconds)
slowdown = int(input("Speed in Hz:"))
f = 100.0/slowdown # sound frequency (Hz)
print("speed multiplier is",slowdown)
encoding = "ansi"
total = []
xpos2 = 0
c = 0
for ypos in art:
for xpos in ypos:
#print(xpos)
t = np.linspace(0, 0.01, (slowdown), endpoint=False)
if xpos==406:
sig = np.sin(2 * np.pi * f * 500 * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * 700 * t)
total.extend(sig)
continue
sig = np.sin(2 * np.pi * f * xpos[0] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[1] * t)
total.extend(sig)
sig = np.sin(2 * np.pi * f * xpos[2] * t)
total.extend(sig)
print(xpos)
sig = np.sin(2 * np.pi * f * 300 * t)
break
import wavio
wavio.write("input.wav", total, rate, sampwidth=2)
input("done, saved as \'input.wav\'")

function that takes 2 colors as arguments and returns the average color

I'm new to coding and I am trying to complete this assignment, but I can't get it to work:
def colorMean(colorA, colorB):
rgb1 = int(colorA, 16)
rgb2 = int(colorB, 16)
aveColor = 0
for i in range (3):
comp1 = int(color1 >> (8 * i)) & 0xff
comp2 = int(color2 >> (8 * i)) & 0xff
value = int((comp1 + comp2) / 2) << 8 * i
avgColor = int((comp1 + comp2) / 2) << 8 * i
return hex(aveColo)
colorMean("0000ff", "ff0000")
Maybe this is what you're looking for. If there are restrictions and sample outputs to the assignment, you should post those too.
NOTE: I got the first two function hex_to_rgb and rgb_to_hex from here
def hex_to_rgb(value):
value = value.lstrip('#')
lv = len(value)
return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
def rgb_to_hex(rgb):
return '%02x%02x%02x' % rgb
def mean_color(color1, color2):
rgb1 = hex_to_rgb(color1)
rgb2 = hex_to_rgb(color2)
avg = lambda x, y: round((x+y) / 2)
new_rgb = ()
for i in range(len(rgb1)):
new_rgb += (avg(rgb1[i], rgb2[i]),)
return rgb_to_hex(new_rgb)
avg_color = mean_color('ff0000', '0000ff')
print(avg_color) # 800080

The problem with rewriting the code from Matlab to python

I am new to Python and I am trying to convert and modify the following code from Matlab to python:
function A = alg_a(RGBimage, n)
[row, column, d] = size(RGBimage);
if (d==3)
HSVimage = rgb2hsv(RGBimage);
V = HSVimage(:,:,3);
else
V =double(RGBimage)/255;
end
V=V(:);
[Vsorted, ix] = sort(V);
s = (row*column)/n;
i=0;
h=[];
while (i < n)
i=i+1;
z = Vsorted(((floor(s*(i-1))+1)):floor(s*i));
Vstart = (s*(i-1))/(row*column);
Vstop = (s*i)/(row*column);
r=z-z(1);
f = (1/n)/(r(size(r,1)));
g = r*f;
if(isnan(g(1)))
g = r + Vstop;
else
g = g + Vstart;
end
h=vertcat(h,g);
end
m(ix)=h;
m=m(:);
if(d==3)
HSVimage(:,:,3) = reshape(m,row,column);
A=hsv2rgb(HSVimage);
else
A=reshape(m,row,column);
end
return;
end
This function implements the SMQT image preprocessing algorithm, and my problem is that I don't really understand how it works, which is why I'm stuck here:
import cv2
import numpy as np
import math
img = cv2.imread("16.bmp")
n = 8
(row, column, dim) = img.shape
if dim == 3:
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
V = hsv_img[:, :, 2]
else:
V = img / 255
V = V.flatten(order='F')
Ix = V.argsort()[::-1]
V_sorted = V[Ix]
s = int(row*column/n)
i = 0
h = []
while i < n:
i += 1
z = V_sorted[math.floor(s*(i-1))+1:math.floor(s*i)]
z = np.array(z)
V_start = (s*(i-1))/(row*column)
V_stop = (s * i) / (row * column)
r = z - z[0]
f = (1 / n) / r[len(r)-1]
g = r * f
if math.isnan(g[0]):
g = r + V_stop
else:
g = g + V_start
if len(h) == 0:
h = g
else:
h = np.vstack((h, g))
M = np.array([])
M[Ix] = h
M = M.flatten(order='F')
if dim == 3:
hsv_img = np.reshape(M, row, column)
img_res = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
else:
img_res = np.reshape(M, row, column)
cv2.imwrite('16_smqt', img_res)
It seems that I wrote a code similar in functionality, but it does not work, and I haven't any idea why. The Matlab code was taken by me from the article, so I have no doubt that it works. Please help me find inaccuracies in my translation from Matlab to python.

Handwriting neural network weights don't change

from struct import unpack
import gzip
import numpy
from numpy import *
import matplotlib.pyplot as plt
learningRate = 0.1
def get_labeled_data(imagefile, labelfile):
"""Read input-vector (image) and target class (label, 0-9) and return
it as list of tuples.
"""
# Open the images with gzip in read binary mode
images = gzip.open(imagefile, 'rb')
labels = gzip.open(labelfile, 'rb')
# Read the binary data
# We have to get big endian unsigned int. So we need '>I'
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4) # skip the magic_number
N = labels.read(4)
N = unpack('>I', N)[0]
if number_of_images != N:
raise Exception('number of labels did not match the number of images')
# Get the data
x = zeros((N, rows, cols), dtype="float32") # Initialize numpy array
y = zeros((N, 1), dtype="uint8") # Initialize numpy array
for i in range(N):
if i % 1000 == 0:
print("i: %i" % i)
for row in range(rows):
for col in range(cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][row][col] = tmp_pixel
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
return (x, y)
ld = get_labeled_data("C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-images-idx3-ubyte.gz", "C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-labels-idx1-ubyte.gz")
def sigmoid(x):
return 1/(1+numpy.exp(-x))
def sigmoid_P(x):
return sigmoid(x) * (1 - sigmoid(x))
def cost(i, t):
return (i - t) ** 2
def cost_P(i, t):
return 2 * (i - t)
# 10x28x28 - number x row x column
weights = numpy.random.random((10, 28, 28))
biases = numpy.random.random((10, 28, 28))
dr = 0
da = 0
for loopi in range(10000):
r = numpy.random.randint(0, len(ld[0][0]))
targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
image = ld[0][r]
label = ld[1][r][0]
# weighted 3D Matrix of [number][row][column]
predictions = []
wPredictions = []
# average of predictions for each number
avgPred = []
avgPred2 = []
img = list(image)
for i in range(10):
x = []
y = []
for b, w in zip(biases[i], weights[i]):
x.append(sigmoid(numpy.dot(w, image) + b))
y.append(numpy.dot(w, image) + b)
predictions.append(x)
avgPred.append(numpy.average(list(x)))
avgPred2.append(numpy.average(list(y)))
for i in range(10):
sqError = cost(avgPred[i], targets[i])
# derivative of the cost with respect to each of the weights and biases
dc_dp = cost_P(avgPred[i], targets[i])
dp_dz = sigmoid_P(avgPred2[i])
#for b, w in zip(biases[i], weights[i]):
for imgRow in range(28):
for imgCol in range(28):
dz_dw = image[imgRow][imgCol]
dz_db = 1
print("dc_dp: " + str(dc_dp) + "\ndp_dz: "+ str(dp_dz) + "\ndz_dw: " + str(dz_dw))
dc_dw = dc_dp * dp_dz * dz_dw
dc_db = dc_dp * dp_dz * dz_db
dr = dc_dw
weights[i][imgRow][imgCol] -= learningRate * dc_dw
da = weights[i][imgRow][imgCol]
biases[i][imgRow][imgCol] -= learningRate * dc_db
while True:
big = 0
intid = int(input())
imag = ld[0][intid]
for l in range(10):
papa = []
for b, w in zip(biases[i], weights[i]):
papa.append(sigmoid(numpy.dot(w, imag) + b))
lol = numpy.average(papa)
if(lol > big):
big = l
print(str(dr) + " " + str(da))
print(big)
The weights aren't changing because dp_dz is always 0, I'm not sure what's causing that. I don't mean that they're changing but only a very small change, they're literally NOT changing at all. I believe it has to do with my approach in general, but I'm not sure how else I could approach this problem, I'm very new to neural networks. Any help would be greatly appreciated!

How do I manually blur an image in python?

In the code below I am trying to blur an image in python by replacing each pixel with the average of its surrounding pixels as long as the original pixel is not at the border of the image. However, whenever I run my code I am simply receiving a black image. Can anyone tell me what I'm doing wrong?
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread, imsave, imresize
imgOld = imread('house.png') # read image into a numpy array
imgNew = imgOld
(imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA)
plt.imshow(imgOld, origin='lower')
# blur
for y in range(imgH):
for x in range(imgW):
xLast = 0
yLast = 0
if x != 0 and y != 0:
xLast = (x-1) % imgW
yLast = (y-1) % imgH
else:
xLast = 0
yLast = 0
xNext = (x+1) % imgW
yNext = (y+1) % imgH
rgb = imgNew[y,x]
r = (imgOld[yLast,xLast,2]
+ imgOld[yLast,x,2]
+ imgOld[yLast,xNext,2]
+ imgOld[y,xLast,2]
+ imgOld[y,x,2]
+ imgOld[y,xNext,2]
+ imgOld[yNext,xLast,2]
+ imgOld[yNext,x,2]
+ imgOld[yNext,xNext,2]) / 9
g = (imgOld[yLast,xLast,1]
+ imgOld[yLast,x,1]
+ imgOld[yLast,xNext,1]
+ imgOld[y,xLast,1]
+ imgOld[y,x,1]
+ imgOld[y,xNext,1]
+ imgOld[yNext,xLast,1]
+ imgOld[yNext,x,1]
+ imgOld[yNext,xNext,1]) / 9
b = (imgOld[yLast,xLast,0]
+ imgOld[yLast,x,0]
+ imgOld[yLast,xNext,0]
+ imgOld[y,xLast,0]
+ imgOld[y,x,0]
+ imgOld[y,xNext,0]
+ imgOld[yNext,xLast,0]
+ imgOld[yNext,x,0]
+ imgOld[yNext,xNext,0]) / 9
imgNew[y,x] = [b,g,r]
plt.imshow(imgNew, origin='lower')
Edit: I have updated my code to average based on the individual color value of each pixel however I am still receiving a black image as a result.
I am adding my perfectly working java version of this code. I do not understand where I am going wrong
Java version:
protected void proc_17() {
info = "Blur";
for (int y = 0; y < imgH; y++) {
for (int x = 0; x < imgW; x++) {
int xLast = 0;
int yLast = 0;
if (x != 0 && y != 0) {
xLast = (x-1) % imgW;
yLast = (y-1) % imgH;
} else {
xLast = 0;
yLast = 0;
}
int xNext = (x+1) % imgW;
int yNext = (y+1) % imgH;
float r = (imgOld.getR(xLast, yLast)
+ imgOld.getR(x, yLast)
+ imgOld.getR(xNext, yLast)
+ imgOld.getR(xLast, y)
+ imgOld.getR(x, y)
+ imgOld.getR(xNext, y)
+ imgOld.getR(xLast, yNext)
+ imgOld.getR(x, yNext)
+ imgOld.getR(xNext, yNext)) / 9;
float g = (imgOld.getG(xLast, yLast)
+ imgOld.getG(x, yLast)
+ imgOld.getG(xNext, yLast)
+ imgOld.getG(xLast, y)
+ imgOld.getG(x, y)
+ imgOld.getG(xNext, y)
+ imgOld.getG(xLast, yNext)
+ imgOld.getG(x, yNext)
+ imgOld.getG(xNext, yNext)) / 9;
float b = (imgOld.getB(xLast, yLast)
+ imgOld.getB(x, yLast)
+ imgOld.getB(xNext, yLast)
+ imgOld.getB(xLast, y)
+ imgOld.getB(x, y)
+ imgOld.getB(xNext, y)
+ imgOld.getB(xLast, yNext)
+ imgOld.getB(x, yNext)
+ imgOld.getB(xNext, yNext)) / 9;
imgNew.setR(x, y, r);
imgNew.setG(x, y, g);
imgNew.setB(x, y, b);
}
}
}
The problem with this code is that it is not adding up the values ​ in the variables r, g, and b.
This may be for reasons of indentation, so I put these values ​​as a list, made the sum, and divided it by 9.
Follow the script below:
imgOld = imread('house.png') # read image into a numpy array imgNew =
imgOld (imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA)
plt.imshow(imgOld, vmin=0, vmax=255) plt.show()
# blur for y in range(imgH):
for x in range(imgW):
xLast = 0
yLast = 0
if x != 0 and y != 0:
xLast = (x-1) % imgW
yLast = (y-1) % imgH
else:
xLast = 0
yLast = 0
xNext = (x+1) % imgW
yNext = (y+1) % imgH
rgb = imgNew[y,x]
aux_r = (imgOld[yLast,xLast,2],
imgOld[yLast,x,2],
imgOld[yLast,xNext,2],
imgOld[y,xLast,2],
imgOld[y,x,2],
imgOld[y,xNext,2],
imgOld[yNext,xLast,2],
imgOld[yNext,x,2],
imgOld[yNext,xNext,2])
r = sum(aux_r)
r = r/9
aux_g = (imgOld[yLast,xLast,1],
imgOld[yLast,x,1],
imgOld[yLast,xNext,1],
imgOld[y,xLast,1],
imgOld[y,x,1],
imgOld[y,xNext,1],
imgOld[yNext,xLast,1],
imgOld[yNext,x,1],
imgOld[yNext,xNext,1])
g = sum(aux_g)
g = g/9
aux_b = (imgOld[yLast,xLast,0],
imgOld[yLast,x,0],
imgOld[yLast,xNext,0],
imgOld[y,xLast,0],
imgOld[y,x,0],
imgOld[y,xNext,0],
imgOld[yNext,xLast,0],
imgOld[yNext,x,0],
imgOld[yNext,xNext,0])
b = sum(aux_b)
b = b/9
imgNew[y,x] = [b,g,r] plt.imshow(imgNew, vmin=0, vmx=255) plt.show()

Categories

Resources