Convert depth map (base64) of google street view to image - python
I wonder if someone know how to convert the depth map of google street view (encoded as base64) into an image or a 2D matrix. I'm using Python and was able to get the base64 string, decode it and save it to a .png file. However, the exported png file cannot be open by any graphic viewer... I guess the base64 code wasn't decoded correctly...
Here is a post that mentioned the base64 string in GSV.
Here is my code:
#URL of the json file of a GSV depth map
url_depthmap='http://maps.google.com/cbk?output=json&cb_client=maps_sv&v=4&dm=1&pm=1&ph=1&hl=en&panoid=lcptgwtxfJ6DccSzyWp0zA'
# getting the json file
r = requests.get(url_depthmap)
# open it
depth_json= r.json()
# get the base64 string of the depth map
data=depth_json['model']['depth_map']
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
data += "=" * ((4 - len(data) % 4) % 4)
# convert the URL safe format to regular format.
data=data.replace('-','+').replace('_','/')
data = base64.decodestring(data) # decode the string
data=zlib.decompress(data) #decompress the data
# write it to a png file
image_result = open('downloads/deer_decode.png', 'wb')
image_result.write(data)
looking at https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js and starting with your piece of code, here is my overall processing:
import base64
import zlib
import numpy as np
import struct
import matplotlib.pyplot as plt
def parse(b64_string):
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
b64_string += "=" * ((4 - len(b64_string) % 4) % 4)
# convert the URL safe format to regular format.
data = b64_string.replace("-", "+").replace("_", "/")
data = base64.b64decode(data) # decode the string
data = zlib.decompress(data) # decompress the data
return np.array([d for d in data])
def parseHeader(depthMap):
return {
"headerSize": depthMap[0],
"numberOfPlanes": getUInt16(depthMap, 1),
"width": getUInt16(depthMap, 3),
"height": getUInt16(depthMap, 5),
"offset": getUInt16(depthMap, 7),
}
def get_bin(a):
ba = bin(a)[2:]
return "0" * (8 - len(ba)) + ba
def getUInt16(arr, ind):
a = arr[ind]
b = arr[ind + 1]
return int(get_bin(b) + get_bin(a), 2)
def getFloat32(arr, ind):
return bin_to_float("".join(get_bin(i) for i in arr[ind : ind + 4][::-1]))
def bin_to_float(binary):
return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]
def parsePlanes(header, depthMap):
indices = []
planes = []
n = [0, 0, 0]
for i in range(header["width"] * header["height"]):
indices.append(depthMap[header["offset"] + i])
for i in range(header["numberOfPlanes"]):
byteOffset = header["offset"] + header["width"] * header["height"] + i * 4 * 4
n = [0, 0, 0]
n[0] = getFloat32(depthMap, byteOffset)
n[1] = getFloat32(depthMap, byteOffset + 4)
n[2] = getFloat32(depthMap, byteOffset + 8)
d = getFloat32(depthMap, byteOffset + 12)
planes.append({"n": n, "d": d})
return {"planes": planes, "indices": indices}
def computeDepthMap(header, indices, planes):
v = [0, 0, 0]
w = header["width"]
h = header["height"]
depthMap = np.empty(w * h)
sin_theta = np.empty(h)
cos_theta = np.empty(h)
sin_phi = np.empty(w)
cos_phi = np.empty(w)
for y in range(h):
theta = (h - y - 0.5) / h * np.pi
sin_theta[y] = np.sin(theta)
cos_theta[y] = np.cos(theta)
for x in range(w):
phi = (w - x - 0.5) / w * 2 * np.pi + np.pi / 2
sin_phi[x] = np.sin(phi)
cos_phi[x] = np.cos(phi)
for y in range(h):
for x in range(w):
planeIdx = indices[y * w + x]
v[0] = sin_theta[y] * cos_phi[x]
v[1] = sin_theta[y] * sin_phi[x]
v[2] = cos_theta[y]
if planeIdx > 0:
plane = planes[planeIdx]
t = np.abs(
plane["d"]
/ (
v[0] * plane["n"][0]
+ v[1] * plane["n"][1]
+ v[2] * plane["n"][2]
)
)
depthMap[y * w + (w - x - 1)] = t
else:
depthMap[y * w + (w - x - 1)] = 9999999999999999999.0
return {"width": w, "height": h, "depthMap": depthMap}
# see https://stackoverflow.com/questions/56242758/python-equivalent-for-javascripts-dataview
# for bytes-parsing reference
# see https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js
# for overall processing reference
# Base64 string from request to:
# https://maps.google.com/cbk?output=xml&ll=45.508457,-73.532738&dm=1
# edit long and lat
s = "eJzt2gt0FNUdx_FsQtgEFJIQiAlvQglBILzktTOZ2YitloKKD6qAFjy2KD5ABTlqMxUsqICcVqqC1VoUqS34TkMhydjaorYCStXSKkeEihQoICpUFOzuJruZO4_dmdmZe2fY3_cc4RCS-L_z-c_sguaNy8rKzgrkZSGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBCbgsFgB9YzIFYFo_wdsAAZWbC5DliATCwYVPhjATKsYNC0f5s2baiNhagUDFrzb47aeMjVgkF7_liC06JgWv7YAX-n1rfjjx3wbVp-m_5YAV_mpH-bPGpjI4dy1h8L4Lcc9scC-Cyn_bEA_sq6vzF_zB8b4Ktc8McC-Cg3_LEAvkmH3wF_LIBfcskfC-CTHH37r_DHAvgj1_yxAL7IPX8sgB9y0R8L4IPc9McCeD9X_bEAns9dfyyA13PZHwvg8Rz11_LnFVI7SVZWO2UU_71-7jTybwd_67nuT28B4G8j-Gd2LvsXFtJbAPjbyLq_MT_8_RcFf1oLAH8bwd9fFRQUOPr9aPhTWoDT378glqPfEv4-qSCRo98W_v6owM_-dBYA_jaCvz_ytz-VBYC_jeDvsfprP1SeRd0_yVfA39W0_uV0_R393z_gbzX42w3-1rP-8g9_d9P4D4C_yeBvOfhTr7Q06W_D33Z-8Y-l-ABBrvYf4B9_LX_Cf6iTQxsFf8vBn3qp_FULAH_T-cq_dQHgr1v37t0tfgX8LUfJf6hp_-7KLB4G_paj5W96IL_5d9Yp-VeY9j839qPX_I35k_zxT9d_2LDmn3v06NH6wdPBP_kCkP4jR44s7T8y1pCWRkeL_2JI1L9Xr14K_16tpT0-e_8ePUj_jq2d3v6juUhRxNL-vZIV8Q-Hwwr_0YO02R2fhf-w-M8a_45kp4l_GVncv8xs5eWRHxT-0Y_1S6T8TOvjwz_NdP1LkhXDN_zdLjqVR_5R-Ot9ijZz48PffmdF0_XvZjtDTl3_Ir3OUhcZtahrJL0z0PIfWl1dPSqWrv-YWB70P6O1Tt9qqahoYLwuXXT9E5e-p8V0QWMp_I2cI3VqLqv5M7oqa9tWcZb48QJG_n2jFTdX0ru1qip7_pWVY6OFolVWVtacE68y5h_5UKUgCGr_wZr68Txf0jpQ_BwjonOS_n365ChqTxbIVpZzpiLlZYoVCASIX8eucGIXdP31bmA9LnVdOnXS0YwX91d9WCT2RGUejTwLmZF_RUVFbl9FxfFK7PkrX-IqiWL-Le9lVP6xz-5NVBVpRLRiorMj5eYS_sb47SPnJv1ziA04k7xmigun3YNOhLvunTtQ8bRIUSfDRN1vrSVvqyiZeyr_6GWraC43Wusm2PJXvcnppyjm3wxbpfKvimsXG9TMHq1Pczk66ehr_VUroLmAqquoswjKWl8p3MjI2wK7Kf9EFcpNsOOv8z63Kn5Hx_xbPFX-KdUT8HruSfR1_bUPgRRLYGIR9NfC9F4kdTZUN8Ge0j83d7je1YwsgQ1_o9tXk7E_YZ7ihk_ysp9I199gA1IugY1V0C0lcnr3ujX_WDpLYN1f9Q7CvD8pbsU9hb6hf47ey4ClLdDZBbPLQIncir_OElj11zFsKXFvJz6i8rfJnlo_mb_xQ8DyFljcCPe1tRn4a7ESS2DRn3jvoLsAirtc7W8d3QC_vfrcyfxzkj4E0toCz2Xi9lctgSX_FErq76_ytwOvp69z7hT-KR8CBlvguzWw5h_Ngn9Kp-bv2PprZ_xT65vwzzHzEPD_08DAP3JR0vW3I-eEvxl9c_5mHwJ-fh4Y-xttgCl-W3IO-JvDD5j1t7MCvtqE_Pwk_vorkNrfJn6stPxN61vwz7H0OmB2GbyyDfn5mg0g_HU2IIW_7l8bWci2vwX8gDV_-w8BK1HxVhf1z49cAWN_zQYk9Vd-4nCbq2DL35q-VX8aK0BBW1vcP1bCP1tdUn-jN4o6mVsJy_5W8QM2_F1fAbepdSP8m5dAq0-sgFXyVCXbADv6Jg9ux9_VFXCV2TCtv3EOoqfyzzHtbws_YNvfvRVwzzhZVvzdWQGjy2zG38ZzP559f_UKOLQDbgknz6K_GyuQZAGs6Fs8eFr-mh3IIH_HV8D4EifzTws_4IC_egXS3QHnbc1ky9_ZFbBx4dO0j-aEv2YHMsffwRVIx94mfsA5f8d2wEFUC6Xh79QK2MZP6-BO-mt2wMYSOORptfT8HdkBW_bp4Qec99cuQYb4p70D1vGdOLgr_tolML0FTpzJRs74p7cDVuwdO7h7_va2wLGDWcs5f_s7QJs-ltv-eluQbA2cPZ3pnPW3twS06WPR8TfcBM0quHBEM7ngb3kJktC7d3Da_qly76RJc8vfyha0XgQK7vHIQdm5x3P_xLq5ym9yD3JoeGuCfzQ6_im2gcnJ4R-Nhb8mJicnJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U4-YgLU-_KlHTMBaH_7UIyZgrc_K392__jcbk6MTE7DWhz_1iAlY68OfesQErPXhTz1iAtb68KceMQFrffhTj5iAtT78qUdMwFof_tQjJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U48cgTU_Q3825srYnJ0YgTU__KlHjMCaH_7UI0ZgzQ9_6hEjsOaHP_WIEVjzw596xAis-eFPPWIE1vxs_L3xx3_4w59BxAis-eFPPWIE1vzwpx4xAmt--FOPGIE1P_ypR4zAmh_-1CNGYM0Pf-oRI7Dmhz_1yBky1p8NORGTs3vsAcDkCsA_EfzZxeTs8Ie_okzkh39rmejvkbf_8Ic_i4gZ4M8uJoeHP_wVwZ9dTA4P_0z399ICMDm_R_jhD38mEUPAn1lMDh_IeH-vvPzDH_5MIoaAP7NYHD4aMQT8mcXi8NHIKTLSnw24KhaHj0VMAX9WsTh8LGKKTPP3zOMf_vBnEzEF_FnF4PDNkWNkoD8bb3UMDt8SMQb8GcXg8C0RY2SWv3ce__CHP6PIOTLOnw23JvqHT0TMAX820T98ImKOTPL30OPfM_4MF4D6wT3Ez9LfKw8A2sf20u0Pf_izi5gko_wZaWujfXgiYpJM8ffU7c_W3xsLQPfI3uJn7O-JBaB7Ym_xw5-yv8duf9b-XlgAmsf1Gj9zfw8sAMXDeo6fvT_7BaB3VO_xe8Cf-QZQO6f39L3hz3gBKB3Sgzd_tkf8mW4AnQN6U98z_gz_ayCNw3kUP9tD_gFWK-D6sbyLn-0t_2jkdL7397R9NFdPbztiRH_ye_UFX5Vbx3coYlY_-PuEvTWHz-9q5OQe4fcbuKr0L4CnUp0uTX6f25qKCos3Y33pvRNrCfqxvuIeKUvVBQ0jQy8dv5C7_YQk140qE4-83eepF-Z-wT24W5KXc2XigpsnbdxVvzm0-ZQkDy_sJRZ1v1n4a93vmy5ecTs3ZG_X8M79k0PrF3UNzftGkis79BVnnXw-9P6Yu7l_H5Pk54RuYtuCvsLaBePkg8d3b1pQf1CcW1Ar5F57lN8-OJc__itZvGPKkL6XjNvCn6qrlee8Wyqurb5OXpW_vemSPuWNt71WUBN8f0moYO_chuD_JPny1eXi2ZMWhW6pq-BrX5fknPMGidftnh5aOvylhkOHJXluTkfxH5-WC7vHjxM6S8P5rhUrxQtH1cqzO-1r_OxnbRqrpn5P3DbuDuHXt11affmCjvzkSavDnz8_T5i0eHH1-Y915jeWXCRW1LXhpq96gbvlkCQLh7qIQnZJaOs5O7hv9knyd0b1jcw_ivt0wmBueGSeV7qVicOO3yEf2XF-de23X2zo0P7l8DfTa7i3nsjj978nyVPaDxWv--JYqG7UWu6uI5J84vVssfc7S7gfHdjDf_fqWrlnzzXiwH43yBc98Jdq4doZDbMW7ArnPXkF94fupfz1b0ry2qfGiOUFU7mTg5bxA6-S5Jo3ponbP-rIHdh4Fv_xNknu-dYocdn6WfKkvZub_jOxd2P9_iPhpaHb5SfvW9j0xtbixp-ce1icemxE6Muc5dyrX0jyhyfyxPlPZ_GLd57kBr0iySsmni3-MHsPd2LgyYZ7XpPkZy6bL55x_ZpN0z7N4St2SnL_2lzxq9_dKry54ZdN78jLuKpFi8Rrjrbnp67b1rhpca28_IKnxaG_HTj2uaPjG_ZH9mdhzUcCt6eSG_O3eu6rA5HzF58SpoXncdMmzGl8dZEk3_f0LeJD22bLby1ZX716xIKGrEVbw48-_CB39d7_chv-KcmPbn1PuC1vjsBNXNc0uHAh9_gnPcWmLl0aTn18H79wpiSPLXpC-NOPV4cOX5zVUHVSklcW7xBKtrfj31-6heeX18qXzd0nHp5ZyAtlsxpXXC_JM2pfFIuXLOS4wjHcVZ9J8rEzy8Qb7_6aO1BXyq9_UZLnyJeLj7w0R9jz0Nrq-9_syn__vZlC0Q2z5WObn21a1K174_3zDof3DPhNw_qartWTv75LLlp-hfBo2XNjz7t3PP_1s5K8ZcXjQocr53NjpD3c0A8j86z5l9D5gUF8Xtk6vtfOWnnSlQPCo_94KdfuyM9Db38pyatu3iB-_uS73Auz2_HhP0vyzG4jxKcmb-Rq9q0MTYrsW1Hdw-K-D-7hz19X3zj-xlp5SoUY3lA_ketdsJM7GLkfHzv3A-Guv-fzB7aM5JetjJxv-kLx6ISXN065cwI_a70kZxVdKVx7sIo_OKy0cfUvJPnWxmvCcu-e_FUzahse3yzJ9bPXiHN2_GTTK5_Mb_xBxOPolEeqZxQIPBd4g__kzlo5f9op8YwdS7mbJr7LleyS5HvXfSnsWjVHvrXgmeqbLvxpw6G9H4n_Bz8_xLw"
# decode string + decompress zip
depthMapData = parse(s)
# parse first bytes to describe data
header = parseHeader(depthMapData)
# parse bytes into planes of float values
data = parsePlanes(header, depthMapData)
# compute position and values of pixels
depthMap = computeDepthMap(header, data["indices"], data["planes"])
# process float 1D array into int 2D array with 255 values
im = depthMap["depthMap"]
im[np.where(im == max(im))[0]] = 255
if min(im) < 0:
im[np.where(im < 0)[0]] = 0
im = im.reshape((depthMap["height"], depthMap["width"])).astype(int)
# display image
plt.imshow(im)
plt.show()
(and save with plt.imsave)
Related
Small unpacking buffer for wave.struct
I am building a SSTV programm, that can convert 180x136 picture into the sound with the RGB component. But when I try to read data from microphone, it gives me struct.error: unpack requires a buffer of 400 bytes when I use only 200 bytes. I expected the output as decoded microphone out (in short: microphone -> decode signal -> image). I tried to use chunk*2 but it gave me same error: struct.error: unpack requires a buffer of 800 bytes Here is the code for decoder: import numpy as np import pyaudio import wave from tkinter import * root = Tk() c = Canvas(root, width=90*8, height=68*8, bg='white') c.pack() def pix(x, y, rr, rg, rb): # r + (r/g/b) is raw rgb because we are decoding frequency if rr < 0: r = 0 if rg < 0: g = 0 if rb < 0: b = 0 try: r = min(255,round(rr * (255 / 1124))) except: r = 0 try: g = min(255,round(rg * (255 / 1124))) except: g = 0 try: b = min(255,round(rb * (255 / 1124))) except: b = 0 if r < 0: r = 0 if g < 0: g = 0 if b < 0: b = 0 print(r,g,b) n = hex(r).replace("0x","") rh = (2 - len(n)) * "0" + n n = hex(g).replace("0x", "") gh = (2 - len(n)) * "0" + n n = hex(b).replace("0x", "") bh = (2 - len(n)) * "0" + n c.create_line(x, y, x + 4, y, fill=f"#{rh}{gh}{bh}") c.create_line(x, y+1, x + 4, y+1, fill=f"#{rh}{gh}{bh}") c.create_line(x, y+2, x + 4, y+2, fill=f"#{rh}{gh}{bh}") c.create_line(x, y+3, x + 4, y+3, fill=f"#{rh}{gh}{bh}") chunk = 100 # open up a wave wf = wave.open('input.wav', 'rb') swidth = wf.getsampwidth() RATE = wf.getframerate() # use a Blackman window window = np.blackman(chunk) # open stream p = pyaudio.PyAudio() stream = p.open(format = p.get_format_from_width(wf.getsampwidth()), channels = wf.getnchannels(), rate = RATE, output = True) microphone = p.open(format = p.get_format_from_width(wf.getsampwidth()), channels = wf.getnchannels(), rate = RATE, input = True) # read some data data = wf.readframes(chunk) #print(len(data)) #print(chunk*swidth) pc = 0 x = 0 e = [255,252] totaly = 0 rrgb = [255, 255, 255] # raw rgb # play stream and find the frequency of each chunk while True: data = microphone.read(chunk) # write data out to the audio stream stream.write(data) indata = np.array(wave.struct.unpack("%dh"%((len(data))/swidth),data))*window#"%dh"%((len(data)-1)/swidth) # Take the fft and square each value fftData=abs(np.fft.rfft(indata))**2 # find the maximum which = fftData[1:].argmax() + 1 # use quadratic interpolation around the max if which != len(fftData)-1: y0,y1,y2 = np.log(fftData[which-1:which+2:]) x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0) # find the frequency and output it thefreq = (which+x1)*RATE/chunk if thefreq>3030 or totaly==540: #print("PING!",f"The freq is %f Hz. {pc}" % (thefreq)) if pc==0: totaly = x pc+=4 x=0 root.update() else: #print(f"The freq is %f Hz. min {min(e)} max {max(e)}, {x//4}" % (thefreq), end=" ") if x//4 % 3 == 0: pix(x / 3, pc, rrgb[0], rrgb[1], rrgb[2]) # /0.77039274924 rrgb[0] = thefreq #print("R") elif x//4 % 3 == 1: rrgb[1] = thefreq #print("G") elif x//4 % 3 == 2: rrgb[2] = thefreq #print("B") #e.append(thefreq) #pix() x+=4#print("The freq is %f Hz." % (thefreq)) else: thefreq = which*RATE/chunk #print("The freq is %f Hz." % (thefreq)) # read some more data data = microphone.read(chunk) if data: stream.write(data) stream.close() p.terminate() root.mainloop() Here is the code for encoder: import numpy as np # Importing Image from PIL package from PIL import Image # creating a image object im = Image.open(r"original.bmp") px = im.load() arx = [] art = [] pix = () pixels = list(im.getdata()) width, height = im.size pixels = [pixels[i * width:(i + 1) * width] for i in range(height)] print(pixels) for ypos in range(0,136): for xpos in range(0,180): pix = pixels[ypos][xpos] #gray = ((pix[0])+(pix[1])+(pix[2]))/3 rgb = (pix[0],pix[1],pix[2]) arx.append(rgb) arx.append(406) art.append(arx) rate = 44100 # samples per second T = 3 # sample duration (seconds) slowdown = int(input("Speed in Hz:")) f = 100.0/slowdown # sound frequency (Hz) print("speed multiplier is",slowdown) encoding = "ansi" total = [] xpos2 = 0 c = 0 for ypos in art: for xpos in ypos: #print(xpos) t = np.linspace(0, 0.01, (slowdown), endpoint=False) if xpos==406: sig = np.sin(2 * np.pi * f * 500 * t) total.extend(sig) sig = np.sin(2 * np.pi * f * 700 * t) total.extend(sig) continue sig = np.sin(2 * np.pi * f * xpos[0] * t) total.extend(sig) sig = np.sin(2 * np.pi * f * xpos[1] * t) total.extend(sig) sig = np.sin(2 * np.pi * f * xpos[2] * t) total.extend(sig) print(xpos) sig = np.sin(2 * np.pi * f * 300 * t) break import wavio wavio.write("input.wav", total, rate, sampwidth=2) input("done, saved as \'input.wav\'")
function that takes 2 colors as arguments and returns the average color
I'm new to coding and I am trying to complete this assignment, but I can't get it to work: def colorMean(colorA, colorB): rgb1 = int(colorA, 16) rgb2 = int(colorB, 16) aveColor = 0 for i in range (3): comp1 = int(color1 >> (8 * i)) & 0xff comp2 = int(color2 >> (8 * i)) & 0xff value = int((comp1 + comp2) / 2) << 8 * i avgColor = int((comp1 + comp2) / 2) << 8 * i return hex(aveColo) colorMean("0000ff", "ff0000")
Maybe this is what you're looking for. If there are restrictions and sample outputs to the assignment, you should post those too. NOTE: I got the first two function hex_to_rgb and rgb_to_hex from here def hex_to_rgb(value): value = value.lstrip('#') lv = len(value) return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)) def rgb_to_hex(rgb): return '%02x%02x%02x' % rgb def mean_color(color1, color2): rgb1 = hex_to_rgb(color1) rgb2 = hex_to_rgb(color2) avg = lambda x, y: round((x+y) / 2) new_rgb = () for i in range(len(rgb1)): new_rgb += (avg(rgb1[i], rgb2[i]),) return rgb_to_hex(new_rgb) avg_color = mean_color('ff0000', '0000ff') print(avg_color) # 800080
The problem with rewriting the code from Matlab to python
I am new to Python and I am trying to convert and modify the following code from Matlab to python: function A = alg_a(RGBimage, n) [row, column, d] = size(RGBimage); if (d==3) HSVimage = rgb2hsv(RGBimage); V = HSVimage(:,:,3); else V =double(RGBimage)/255; end V=V(:); [Vsorted, ix] = sort(V); s = (row*column)/n; i=0; h=[]; while (i < n) i=i+1; z = Vsorted(((floor(s*(i-1))+1)):floor(s*i)); Vstart = (s*(i-1))/(row*column); Vstop = (s*i)/(row*column); r=z-z(1); f = (1/n)/(r(size(r,1))); g = r*f; if(isnan(g(1))) g = r + Vstop; else g = g + Vstart; end h=vertcat(h,g); end m(ix)=h; m=m(:); if(d==3) HSVimage(:,:,3) = reshape(m,row,column); A=hsv2rgb(HSVimage); else A=reshape(m,row,column); end return; end This function implements the SMQT image preprocessing algorithm, and my problem is that I don't really understand how it works, which is why I'm stuck here: import cv2 import numpy as np import math img = cv2.imread("16.bmp") n = 8 (row, column, dim) = img.shape if dim == 3: hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) V = hsv_img[:, :, 2] else: V = img / 255 V = V.flatten(order='F') Ix = V.argsort()[::-1] V_sorted = V[Ix] s = int(row*column/n) i = 0 h = [] while i < n: i += 1 z = V_sorted[math.floor(s*(i-1))+1:math.floor(s*i)] z = np.array(z) V_start = (s*(i-1))/(row*column) V_stop = (s * i) / (row * column) r = z - z[0] f = (1 / n) / r[len(r)-1] g = r * f if math.isnan(g[0]): g = r + V_stop else: g = g + V_start if len(h) == 0: h = g else: h = np.vstack((h, g)) M = np.array([]) M[Ix] = h M = M.flatten(order='F') if dim == 3: hsv_img = np.reshape(M, row, column) img_res = cv2.cvtColor(img, cv2.COLOR_HSV2BGR) else: img_res = np.reshape(M, row, column) cv2.imwrite('16_smqt', img_res) It seems that I wrote a code similar in functionality, but it does not work, and I haven't any idea why. The Matlab code was taken by me from the article, so I have no doubt that it works. Please help me find inaccuracies in my translation from Matlab to python.
Handwriting neural network weights don't change
from struct import unpack import gzip import numpy from numpy import * import matplotlib.pyplot as plt learningRate = 0.1 def get_labeled_data(imagefile, labelfile): """Read input-vector (image) and target class (label, 0-9) and return it as list of tuples. """ # Open the images with gzip in read binary mode images = gzip.open(imagefile, 'rb') labels = gzip.open(labelfile, 'rb') # Read the binary data # We have to get big endian unsigned int. So we need '>I' # Get metadata for images images.read(4) # skip the magic_number number_of_images = images.read(4) number_of_images = unpack('>I', number_of_images)[0] rows = images.read(4) rows = unpack('>I', rows)[0] cols = images.read(4) cols = unpack('>I', cols)[0] # Get metadata for labels labels.read(4) # skip the magic_number N = labels.read(4) N = unpack('>I', N)[0] if number_of_images != N: raise Exception('number of labels did not match the number of images') # Get the data x = zeros((N, rows, cols), dtype="float32") # Initialize numpy array y = zeros((N, 1), dtype="uint8") # Initialize numpy array for i in range(N): if i % 1000 == 0: print("i: %i" % i) for row in range(rows): for col in range(cols): tmp_pixel = images.read(1) # Just a single byte tmp_pixel = unpack('>B', tmp_pixel)[0] x[i][row][col] = tmp_pixel tmp_label = labels.read(1) y[i] = unpack('>B', tmp_label)[0] return (x, y) ld = get_labeled_data("C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-images-idx3-ubyte.gz", "C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-labels-idx1-ubyte.gz") def sigmoid(x): return 1/(1+numpy.exp(-x)) def sigmoid_P(x): return sigmoid(x) * (1 - sigmoid(x)) def cost(i, t): return (i - t) ** 2 def cost_P(i, t): return 2 * (i - t) # 10x28x28 - number x row x column weights = numpy.random.random((10, 28, 28)) biases = numpy.random.random((10, 28, 28)) dr = 0 da = 0 for loopi in range(10000): r = numpy.random.randint(0, len(ld[0][0])) targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] image = ld[0][r] label = ld[1][r][0] # weighted 3D Matrix of [number][row][column] predictions = [] wPredictions = [] # average of predictions for each number avgPred = [] avgPred2 = [] img = list(image) for i in range(10): x = [] y = [] for b, w in zip(biases[i], weights[i]): x.append(sigmoid(numpy.dot(w, image) + b)) y.append(numpy.dot(w, image) + b) predictions.append(x) avgPred.append(numpy.average(list(x))) avgPred2.append(numpy.average(list(y))) for i in range(10): sqError = cost(avgPred[i], targets[i]) # derivative of the cost with respect to each of the weights and biases dc_dp = cost_P(avgPred[i], targets[i]) dp_dz = sigmoid_P(avgPred2[i]) #for b, w in zip(biases[i], weights[i]): for imgRow in range(28): for imgCol in range(28): dz_dw = image[imgRow][imgCol] dz_db = 1 print("dc_dp: " + str(dc_dp) + "\ndp_dz: "+ str(dp_dz) + "\ndz_dw: " + str(dz_dw)) dc_dw = dc_dp * dp_dz * dz_dw dc_db = dc_dp * dp_dz * dz_db dr = dc_dw weights[i][imgRow][imgCol] -= learningRate * dc_dw da = weights[i][imgRow][imgCol] biases[i][imgRow][imgCol] -= learningRate * dc_db while True: big = 0 intid = int(input()) imag = ld[0][intid] for l in range(10): papa = [] for b, w in zip(biases[i], weights[i]): papa.append(sigmoid(numpy.dot(w, imag) + b)) lol = numpy.average(papa) if(lol > big): big = l print(str(dr) + " " + str(da)) print(big) The weights aren't changing because dp_dz is always 0, I'm not sure what's causing that. I don't mean that they're changing but only a very small change, they're literally NOT changing at all. I believe it has to do with my approach in general, but I'm not sure how else I could approach this problem, I'm very new to neural networks. Any help would be greatly appreciated!
How do I manually blur an image in python?
In the code below I am trying to blur an image in python by replacing each pixel with the average of its surrounding pixels as long as the original pixel is not at the border of the image. However, whenever I run my code I am simply receiving a black image. Can anyone tell me what I'm doing wrong? import numpy as np import matplotlib.pyplot as plt from scipy.misc import imread, imsave, imresize imgOld = imread('house.png') # read image into a numpy array imgNew = imgOld (imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA) plt.imshow(imgOld, origin='lower') # blur for y in range(imgH): for x in range(imgW): xLast = 0 yLast = 0 if x != 0 and y != 0: xLast = (x-1) % imgW yLast = (y-1) % imgH else: xLast = 0 yLast = 0 xNext = (x+1) % imgW yNext = (y+1) % imgH rgb = imgNew[y,x] r = (imgOld[yLast,xLast,2] + imgOld[yLast,x,2] + imgOld[yLast,xNext,2] + imgOld[y,xLast,2] + imgOld[y,x,2] + imgOld[y,xNext,2] + imgOld[yNext,xLast,2] + imgOld[yNext,x,2] + imgOld[yNext,xNext,2]) / 9 g = (imgOld[yLast,xLast,1] + imgOld[yLast,x,1] + imgOld[yLast,xNext,1] + imgOld[y,xLast,1] + imgOld[y,x,1] + imgOld[y,xNext,1] + imgOld[yNext,xLast,1] + imgOld[yNext,x,1] + imgOld[yNext,xNext,1]) / 9 b = (imgOld[yLast,xLast,0] + imgOld[yLast,x,0] + imgOld[yLast,xNext,0] + imgOld[y,xLast,0] + imgOld[y,x,0] + imgOld[y,xNext,0] + imgOld[yNext,xLast,0] + imgOld[yNext,x,0] + imgOld[yNext,xNext,0]) / 9 imgNew[y,x] = [b,g,r] plt.imshow(imgNew, origin='lower') Edit: I have updated my code to average based on the individual color value of each pixel however I am still receiving a black image as a result. I am adding my perfectly working java version of this code. I do not understand where I am going wrong Java version: protected void proc_17() { info = "Blur"; for (int y = 0; y < imgH; y++) { for (int x = 0; x < imgW; x++) { int xLast = 0; int yLast = 0; if (x != 0 && y != 0) { xLast = (x-1) % imgW; yLast = (y-1) % imgH; } else { xLast = 0; yLast = 0; } int xNext = (x+1) % imgW; int yNext = (y+1) % imgH; float r = (imgOld.getR(xLast, yLast) + imgOld.getR(x, yLast) + imgOld.getR(xNext, yLast) + imgOld.getR(xLast, y) + imgOld.getR(x, y) + imgOld.getR(xNext, y) + imgOld.getR(xLast, yNext) + imgOld.getR(x, yNext) + imgOld.getR(xNext, yNext)) / 9; float g = (imgOld.getG(xLast, yLast) + imgOld.getG(x, yLast) + imgOld.getG(xNext, yLast) + imgOld.getG(xLast, y) + imgOld.getG(x, y) + imgOld.getG(xNext, y) + imgOld.getG(xLast, yNext) + imgOld.getG(x, yNext) + imgOld.getG(xNext, yNext)) / 9; float b = (imgOld.getB(xLast, yLast) + imgOld.getB(x, yLast) + imgOld.getB(xNext, yLast) + imgOld.getB(xLast, y) + imgOld.getB(x, y) + imgOld.getB(xNext, y) + imgOld.getB(xLast, yNext) + imgOld.getB(x, yNext) + imgOld.getB(xNext, yNext)) / 9; imgNew.setR(x, y, r); imgNew.setG(x, y, g); imgNew.setB(x, y, b); } } }
The problem with this code is that it is not adding up the values ​ in the variables r, g, and b. This may be for reasons of indentation, so I put these values ​​as a list, made the sum, and divided it by 9. Follow the script below: imgOld = imread('house.png') # read image into a numpy array imgNew = imgOld (imgH, imgW, imgC) = imgOld.shape # imgC = 2 or 4 (RGB or RGBA) plt.imshow(imgOld, vmin=0, vmax=255) plt.show() # blur for y in range(imgH): for x in range(imgW): xLast = 0 yLast = 0 if x != 0 and y != 0: xLast = (x-1) % imgW yLast = (y-1) % imgH else: xLast = 0 yLast = 0 xNext = (x+1) % imgW yNext = (y+1) % imgH rgb = imgNew[y,x] aux_r = (imgOld[yLast,xLast,2], imgOld[yLast,x,2], imgOld[yLast,xNext,2], imgOld[y,xLast,2], imgOld[y,x,2], imgOld[y,xNext,2], imgOld[yNext,xLast,2], imgOld[yNext,x,2], imgOld[yNext,xNext,2]) r = sum(aux_r) r = r/9 aux_g = (imgOld[yLast,xLast,1], imgOld[yLast,x,1], imgOld[yLast,xNext,1], imgOld[y,xLast,1], imgOld[y,x,1], imgOld[y,xNext,1], imgOld[yNext,xLast,1], imgOld[yNext,x,1], imgOld[yNext,xNext,1]) g = sum(aux_g) g = g/9 aux_b = (imgOld[yLast,xLast,0], imgOld[yLast,x,0], imgOld[yLast,xNext,0], imgOld[y,xLast,0], imgOld[y,x,0], imgOld[y,xNext,0], imgOld[yNext,xLast,0], imgOld[yNext,x,0], imgOld[yNext,xNext,0]) b = sum(aux_b) b = b/9 imgNew[y,x] = [b,g,r] plt.imshow(imgNew, vmin=0, vmx=255) plt.show()