i = 2 #int
cv2.imwrite([r'C:\Users\Desktop\result (' + str(i) + ').png'], result) #result is 16bit image
I want to save image under the name 'result (2).png'
Because, i is stuck in 'for loop'.
However, the code above causes an error.
Please help me.
add)
## Flat Field Correction (FFC) ##
import numpy as np
import cv2
import matplotlib.pyplot as plt
import numba as nb
import multiprocessing as multi
import parmap
import time
start = time.time()
B = cv2.imread(r'D:\remedi\Exercise\Xray\Offset.png', -1) # offset image
for i in range(2,3):
org_I = cv2.imread(r'D:\remedi\Exercise\Xray\objects\object (' + str(i) + ').png', -1) # original image
w = cv2.imread(r'D:\remedi\Exercise\Xray\white\white (' + str(i) + ').png', -1) # white image
## dead & bad pixel correction
corrected_w = w.copy()
corrected_org_I = org_I.copy()
c = np.mean(corrected_w)
p = np.abs(corrected_w - c)
sens = 0.7
[num_y, num_x] = np.where((p < c*sens) | (p > c*sens))
#[num_y, num_x] = np.where((corrected_w < c*0.97) | (corrected_w > c*1.03))
ar = np.zeros((3,3))
ar2 = np.zeros((3,3))
#pool = multi.Pool(processes=6)
iter = num_y.shape[0]
for n in range(iter):
#parmap.map(bad_pixel_correction, [n, num_y, num_x, ar, ar2, corrected_w, corrected_org_I], pm_pbar=True, pm_processes=6)
for j in range(-1,2):
for k in range(-1,2):
if num_y[n]+j == -1 or num_x[n]+k == -1 or num_y[n]+j == 576 or num_x[n]+k == 576:
ar[j+1][k+1] = 0
ar2[j+1][k+1] = 0
else:
ar[j+1][k+1] = corrected_w[num_y[n]+j][num_x[n]+k]
ar2[j+1][k+1] = corrected_org_I[num_y[n]+j][num_x[n]+k]
ar[1][1] = 0
ar2[1][1] = 0
corrected_w[num_y[n]][num_x[n]] = np.sum(ar)/np.count_nonzero(ar)
corrected_org_I[num_y[n]][num_x[n]] = np.sum(ar2)/np.count_nonzero(ar2)
c = np.mean(corrected_w) # constant
## flat field correction
FFC = np.uint16(np.divide(c*(corrected_org_I-B), (corrected_w-B)))
F = np.fft.fft2(FFC)
Fshift = np.fft.fftshift(F)
magnitude_spectrum3 = 20*np.log(np.abs(Fshift))
[row, col] = org_I.shape
[row2, col2] = np.array([row, col], dtype=np.int) // 2
row2_range = 1
col2_range = 2
Fshift[:row2-row2_range-1, col2-col2_range-1:col2+col2_range] = 0
Fshift[row2+row2_range:, col2-col2_range-1:col2+col2_range] = 0
fishift = np.fft.ifftshift(Fshift)
result = np.fft.ifft2(fishift)
print("time :", time.time() - start)
cv2.imwrite(r'C:\Users\jhjoo\Desktop\corrected_org_I (' + str(i) + ').png', result)
cv2.imwrite(r'C:\Users\jhjoo\Desktop\corrected_org_I (' + str(i) + ').png', corrected_org_I)
cv2.imwrite(r'C:\Users\jhjoo\Desktop\corrected_w (' + str(i) + ').png', corrected_w)
cv2.imwrite takes first argument as a string, not a list. You should fix your code as following:
cv2.imwrite(r'C:\Users\Desktop\result (' + str(i) + ').png', result) #result is 16bit image
I wonder if someone know how to convert the depth map of google street view (encoded as base64) into an image or a 2D matrix. I'm using Python and was able to get the base64 string, decode it and save it to a .png file. However, the exported png file cannot be open by any graphic viewer... I guess the base64 code wasn't decoded correctly...
Here is a post that mentioned the base64 string in GSV.
Here is my code:
#URL of the json file of a GSV depth map
url_depthmap='http://maps.google.com/cbk?output=json&cb_client=maps_sv&v=4&dm=1&pm=1&ph=1&hl=en&panoid=lcptgwtxfJ6DccSzyWp0zA'
# getting the json file
r = requests.get(url_depthmap)
# open it
depth_json= r.json()
# get the base64 string of the depth map
data=depth_json['model']['depth_map']
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
data += "=" * ((4 - len(data) % 4) % 4)
# convert the URL safe format to regular format.
data=data.replace('-','+').replace('_','/')
data = base64.decodestring(data) # decode the string
data=zlib.decompress(data) #decompress the data
# write it to a png file
image_result = open('downloads/deer_decode.png', 'wb')
image_result.write(data)
looking at https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js and starting with your piece of code, here is my overall processing:
import base64
import zlib
import numpy as np
import struct
import matplotlib.pyplot as plt
def parse(b64_string):
# fix the 'inccorrect padding' error. The length of the string needs to be divisible by 4.
b64_string += "=" * ((4 - len(b64_string) % 4) % 4)
# convert the URL safe format to regular format.
data = b64_string.replace("-", "+").replace("_", "/")
data = base64.b64decode(data) # decode the string
data = zlib.decompress(data) # decompress the data
return np.array([d for d in data])
def parseHeader(depthMap):
return {
"headerSize": depthMap[0],
"numberOfPlanes": getUInt16(depthMap, 1),
"width": getUInt16(depthMap, 3),
"height": getUInt16(depthMap, 5),
"offset": getUInt16(depthMap, 7),
}
def get_bin(a):
ba = bin(a)[2:]
return "0" * (8 - len(ba)) + ba
def getUInt16(arr, ind):
a = arr[ind]
b = arr[ind + 1]
return int(get_bin(b) + get_bin(a), 2)
def getFloat32(arr, ind):
return bin_to_float("".join(get_bin(i) for i in arr[ind : ind + 4][::-1]))
def bin_to_float(binary):
return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]
def parsePlanes(header, depthMap):
indices = []
planes = []
n = [0, 0, 0]
for i in range(header["width"] * header["height"]):
indices.append(depthMap[header["offset"] + i])
for i in range(header["numberOfPlanes"]):
byteOffset = header["offset"] + header["width"] * header["height"] + i * 4 * 4
n = [0, 0, 0]
n[0] = getFloat32(depthMap, byteOffset)
n[1] = getFloat32(depthMap, byteOffset + 4)
n[2] = getFloat32(depthMap, byteOffset + 8)
d = getFloat32(depthMap, byteOffset + 12)
planes.append({"n": n, "d": d})
return {"planes": planes, "indices": indices}
def computeDepthMap(header, indices, planes):
v = [0, 0, 0]
w = header["width"]
h = header["height"]
depthMap = np.empty(w * h)
sin_theta = np.empty(h)
cos_theta = np.empty(h)
sin_phi = np.empty(w)
cos_phi = np.empty(w)
for y in range(h):
theta = (h - y - 0.5) / h * np.pi
sin_theta[y] = np.sin(theta)
cos_theta[y] = np.cos(theta)
for x in range(w):
phi = (w - x - 0.5) / w * 2 * np.pi + np.pi / 2
sin_phi[x] = np.sin(phi)
cos_phi[x] = np.cos(phi)
for y in range(h):
for x in range(w):
planeIdx = indices[y * w + x]
v[0] = sin_theta[y] * cos_phi[x]
v[1] = sin_theta[y] * sin_phi[x]
v[2] = cos_theta[y]
if planeIdx > 0:
plane = planes[planeIdx]
t = np.abs(
plane["d"]
/ (
v[0] * plane["n"][0]
+ v[1] * plane["n"][1]
+ v[2] * plane["n"][2]
)
)
depthMap[y * w + (w - x - 1)] = t
else:
depthMap[y * w + (w - x - 1)] = 9999999999999999999.0
return {"width": w, "height": h, "depthMap": depthMap}
# see https://stackoverflow.com/questions/56242758/python-equivalent-for-javascripts-dataview
# for bytes-parsing reference
# see https://github.com/proog128/GSVPanoDepth.js/blob/master/src/GSVPanoDepth.js
# for overall processing reference
# Base64 string from request to:
# https://maps.google.com/cbk?output=xml&ll=45.508457,-73.532738&dm=1
# edit long and lat
s = "eJzt2gt0FNUdx_FsQtgEFJIQiAlvQglBILzktTOZ2YitloKKD6qAFjy2KD5ABTlqMxUsqICcVqqC1VoUqS34TkMhydjaorYCStXSKkeEihQoICpUFOzuJruZO4_dmdmZe2fY3_cc4RCS-L_z-c_sguaNy8rKzgrkZSGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBCbgsFgB9YzIFYFo_wdsAAZWbC5DliATCwYVPhjATKsYNC0f5s2baiNhagUDFrzb47aeMjVgkF7_liC06JgWv7YAX-n1rfjjx3wbVp-m_5YAV_mpH-bPGpjI4dy1h8L4Lcc9scC-Cyn_bEA_sq6vzF_zB8b4Ktc8McC-Cg3_LEAvkmH3wF_LIBfcskfC-CTHH37r_DHAvgj1_yxAL7IPX8sgB9y0R8L4IPc9McCeD9X_bEAns9dfyyA13PZHwvg8Rz11_LnFVI7SVZWO2UU_71-7jTybwd_67nuT28B4G8j-Gd2LvsXFtJbAPjbyLq_MT_8_RcFf1oLAH8bwd9fFRQUOPr9aPhTWoDT378glqPfEv4-qSCRo98W_v6owM_-dBYA_jaCvz_ytz-VBYC_jeDvsfprP1SeRd0_yVfA39W0_uV0_R393z_gbzX42w3-1rP-8g9_d9P4D4C_yeBvOfhTr7Q06W_D33Z-8Y-l-ABBrvYf4B9_LX_Cf6iTQxsFf8vBn3qp_FULAH_T-cq_dQHgr1v37t0tfgX8LUfJf6hp_-7KLB4G_paj5W96IL_5d9Yp-VeY9j839qPX_I35k_zxT9d_2LDmn3v06NH6wdPBP_kCkP4jR44s7T8y1pCWRkeL_2JI1L9Xr14K_16tpT0-e_8ePUj_jq2d3v6juUhRxNL-vZIV8Q-Hwwr_0YO02R2fhf-w-M8a_45kp4l_GVncv8xs5eWRHxT-0Y_1S6T8TOvjwz_NdP1LkhXDN_zdLjqVR_5R-Ot9ijZz48PffmdF0_XvZjtDTl3_Ir3OUhcZtahrJL0z0PIfWl1dPSqWrv-YWB70P6O1Tt9qqahoYLwuXXT9E5e-p8V0QWMp_I2cI3VqLqv5M7oqa9tWcZb48QJG_n2jFTdX0ru1qip7_pWVY6OFolVWVtacE68y5h_5UKUgCGr_wZr68Txf0jpQ_BwjonOS_n365ChqTxbIVpZzpiLlZYoVCASIX8eucGIXdP31bmA9LnVdOnXS0YwX91d9WCT2RGUejTwLmZF_RUVFbl9FxfFK7PkrX-IqiWL-Le9lVP6xz-5NVBVpRLRiorMj5eYS_sb47SPnJv1ziA04k7xmigun3YNOhLvunTtQ8bRIUSfDRN1vrSVvqyiZeyr_6GWraC43Wusm2PJXvcnppyjm3wxbpfKvimsXG9TMHq1Pczk66ehr_VUroLmAqquoswjKWl8p3MjI2wK7Kf9EFcpNsOOv8z63Kn5Hx_xbPFX-KdUT8HruSfR1_bUPgRRLYGIR9NfC9F4kdTZUN8Ge0j83d7je1YwsgQ1_o9tXk7E_YZ7ihk_ysp9I199gA1IugY1V0C0lcnr3ujX_WDpLYN1f9Q7CvD8pbsU9hb6hf47ey4ClLdDZBbPLQIncir_OElj11zFsKXFvJz6i8rfJnlo_mb_xQ8DyFljcCPe1tRn4a7ESS2DRn3jvoLsAirtc7W8d3QC_vfrcyfxzkj4E0toCz2Xi9lctgSX_FErq76_ytwOvp69z7hT-KR8CBlvguzWw5h_Ngn9Kp-bv2PprZ_xT65vwzzHzEPD_08DAP3JR0vW3I-eEvxl9c_5mHwJ-fh4Y-xttgCl-W3IO-JvDD5j1t7MCvtqE_Pwk_vorkNrfJn6stPxN61vwz7H0OmB2GbyyDfn5mg0g_HU2IIW_7l8bWci2vwX8gDV_-w8BK1HxVhf1z49cAWN_zQYk9Vd-4nCbq2DL35q-VX8aK0BBW1vcP1bCP1tdUn-jN4o6mVsJy_5W8QM2_F1fAbepdSP8m5dAq0-sgFXyVCXbADv6Jg9ux9_VFXCV2TCtv3EOoqfyzzHtbws_YNvfvRVwzzhZVvzdWQGjy2zG38ZzP559f_UKOLQDbgknz6K_GyuQZAGs6Fs8eFr-mh3IIH_HV8D4EifzTws_4IC_egXS3QHnbc1ky9_ZFbBx4dO0j-aEv2YHMsffwRVIx94mfsA5f8d2wEFUC6Xh79QK2MZP6-BO-mt2wMYSOORptfT8HdkBW_bp4Qec99cuQYb4p70D1vGdOLgr_tolML0FTpzJRs74p7cDVuwdO7h7_va2wLGDWcs5f_s7QJs-ltv-eluQbA2cPZ3pnPW3twS06WPR8TfcBM0quHBEM7ngb3kJktC7d3Da_qly76RJc8vfyha0XgQK7vHIQdm5x3P_xLq5ym9yD3JoeGuCfzQ6_im2gcnJ4R-Nhb8mJicnJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U4-YgLU-_KlHTMBaH_7UIyZgrc_K392__jcbk6MTE7DWhz_1iAlY68OfesQErPXhTz1iAtb68KceMQFrffhTj5iAtT78qUdMwFof_tQjJmCtD3_qEROw1oc_9YgJWOvDn3rEBKz14U89YgLW-vCnHjEBa334U48cgTU_Q3825srYnJ0YgTU__KlHjMCaH_7UI0ZgzQ9_6hEjsOaHP_WIEVjzw596xAis-eFPPWIE1vxs_L3xx3_4w59BxAis-eFPPWIE1vzwpx4xAmt--FOPGIE1P_ypR4zAmh_-1CNGYM0Pf-oRI7Dmhz_1yBky1p8NORGTs3vsAcDkCsA_EfzZxeTs8Ie_okzkh39rmejvkbf_8Ic_i4gZ4M8uJoeHP_wVwZ9dTA4P_0z399ICMDm_R_jhD38mEUPAn1lMDh_IeH-vvPzDH_5MIoaAP7NYHD4aMQT8mcXi8NHIKTLSnw24KhaHj0VMAX9WsTh8LGKKTPP3zOMf_vBnEzEF_FnF4PDNkWNkoD8bb3UMDt8SMQb8GcXg8C0RY2SWv3ce__CHP6PIOTLOnw23JvqHT0TMAX820T98ImKOTPL30OPfM_4MF4D6wT3Ez9LfKw8A2sf20u0Pf_izi5gko_wZaWujfXgiYpJM8ffU7c_W3xsLQPfI3uJn7O-JBaB7Ym_xw5-yv8duf9b-XlgAmsf1Gj9zfw8sAMXDeo6fvT_7BaB3VO_xe8Cf-QZQO6f39L3hz3gBKB3Sgzd_tkf8mW4AnQN6U98z_gz_ayCNw3kUP9tD_gFWK-D6sbyLn-0t_2jkdL7397R9NFdPbztiRH_ye_UFX5Vbx3coYlY_-PuEvTWHz-9q5OQe4fcbuKr0L4CnUp0uTX6f25qKCos3Y33pvRNrCfqxvuIeKUvVBQ0jQy8dv5C7_YQk140qE4-83eepF-Z-wT24W5KXc2XigpsnbdxVvzm0-ZQkDy_sJRZ1v1n4a93vmy5ecTs3ZG_X8M79k0PrF3UNzftGkis79BVnnXw-9P6Yu7l_H5Pk54RuYtuCvsLaBePkg8d3b1pQf1CcW1Ar5F57lN8-OJc__itZvGPKkL6XjNvCn6qrlee8Wyqurb5OXpW_vemSPuWNt71WUBN8f0moYO_chuD_JPny1eXi2ZMWhW6pq-BrX5fknPMGidftnh5aOvylhkOHJXluTkfxH5-WC7vHjxM6S8P5rhUrxQtH1cqzO-1r_OxnbRqrpn5P3DbuDuHXt11affmCjvzkSavDnz8_T5i0eHH1-Y915jeWXCRW1LXhpq96gbvlkCQLh7qIQnZJaOs5O7hv9knyd0b1jcw_ivt0wmBueGSeV7qVicOO3yEf2XF-de23X2zo0P7l8DfTa7i3nsjj978nyVPaDxWv--JYqG7UWu6uI5J84vVssfc7S7gfHdjDf_fqWrlnzzXiwH43yBc98Jdq4doZDbMW7ArnPXkF94fupfz1b0ry2qfGiOUFU7mTg5bxA6-S5Jo3ponbP-rIHdh4Fv_xNknu-dYocdn6WfKkvZub_jOxd2P9_iPhpaHb5SfvW9j0xtbixp-ce1icemxE6Muc5dyrX0jyhyfyxPlPZ_GLd57kBr0iySsmni3-MHsPd2LgyYZ7XpPkZy6bL55x_ZpN0z7N4St2SnL_2lzxq9_dKry54ZdN78jLuKpFi8Rrjrbnp67b1rhpca28_IKnxaG_HTj2uaPjG_ZH9mdhzUcCt6eSG_O3eu6rA5HzF58SpoXncdMmzGl8dZEk3_f0LeJD22bLby1ZX716xIKGrEVbw48-_CB39d7_chv-KcmPbn1PuC1vjsBNXNc0uHAh9_gnPcWmLl0aTn18H79wpiSPLXpC-NOPV4cOX5zVUHVSklcW7xBKtrfj31-6heeX18qXzd0nHp5ZyAtlsxpXXC_JM2pfFIuXLOS4wjHcVZ9J8rEzy8Qb7_6aO1BXyq9_UZLnyJeLj7w0R9jz0Nrq-9_syn__vZlC0Q2z5WObn21a1K174_3zDof3DPhNw_qartWTv75LLlp-hfBo2XNjz7t3PP_1s5K8ZcXjQocr53NjpD3c0A8j86z5l9D5gUF8Xtk6vtfOWnnSlQPCo_94KdfuyM9Db38pyatu3iB-_uS73Auz2_HhP0vyzG4jxKcmb-Rq9q0MTYrsW1Hdw-K-D-7hz19X3zj-xlp5SoUY3lA_ketdsJM7GLkfHzv3A-Guv-fzB7aM5JetjJxv-kLx6ISXN065cwI_a70kZxVdKVx7sIo_OKy0cfUvJPnWxmvCcu-e_FUzahse3yzJ9bPXiHN2_GTTK5_Mb_xBxOPolEeqZxQIPBd4g__kzlo5f9op8YwdS7mbJr7LleyS5HvXfSnsWjVHvrXgmeqbLvxpw6G9H4n_Bz8_xLw"
# decode string + decompress zip
depthMapData = parse(s)
# parse first bytes to describe data
header = parseHeader(depthMapData)
# parse bytes into planes of float values
data = parsePlanes(header, depthMapData)
# compute position and values of pixels
depthMap = computeDepthMap(header, data["indices"], data["planes"])
# process float 1D array into int 2D array with 255 values
im = depthMap["depthMap"]
im[np.where(im == max(im))[0]] = 255
if min(im) < 0:
im[np.where(im < 0)[0]] = 0
im = im.reshape((depthMap["height"], depthMap["width"])).astype(int)
# display image
plt.imshow(im)
plt.show()
(and save with plt.imsave)
I have the following code in C++ that uses the Eigen library, need help to translate to python (numpy)
Initialization
double b = 20.0;
Eigen::Vector3d C(1.0/10.2, 1.0/10.2, 1/30);
Eigen::MatrixXd U(5200, 3);
int i = 0;
for (double x = 10.2/2.0; x < 200; x += 10) {
for (double y = 10.2/2.0; y < 200; y += 10) {
for (double t = 0; t <= 360; t += 30) {
U(i, 0) = x;
U(i, 1) = y;
U(i, 2) = psi;
i += 1;
}
}
}
Function:
Eigen::VectorXd operator()(const Eigen::VectorXd& s) {
Eigen::VectorXd p(length());
p(0) = s[0];
p(1) = s[1];
p(2) = s[2];
p(3) = s[3];
for (int i = 0; i < U.rows(); i++) {
p(i + 4) = b*exp(-0.5*(s.tail(U.cols()) - U.row(i).transpose()).dot(C*(s.tail(U.cols())
- U.row(i).transpose())));
if (p(i + 4) < 0.1) {
p(i + 4) = 0;
}
}
return p;
}
Python version
Initialization:
my_x = 10.2/2.0
my_y = 10.2/2.0
my_p = 0
xx = []
while my_x < 200:
xx.append(my_x)
my_x += 10
yy = []
while my_y < 200:
yy.append(my_y)
my_y += 10
pps = []
while my_psi <= 360:
pps.append(my_p)
my_p+=30
U =[]
for x in xx:
for y in yy:
for p in pps:
U.append([x,y,p])
U = numpy.matrix(U)
C = numpy.array([1.0/10.2, 1.0/10.2, 1.0/30.0])
b = 20.0
The Function
Instead of operator() I will call the function doSomething()
def doSomething(s): # Where s is a numpy array (1-d vector)
p[0:4] = s[0:4]
for i in range (U.shape[0]):
s_dash = -0.5*(s - U[i].T)
s_ddash = C*s
s_dddash = s_dash.dot(s_ddash) - U[i].T
p[i+4] = b * numpy.exp(s_dddash)
if p[i+4] < 0.1: p[i+4] = 0
What I am confused:
In the C++ implementation, I think p[i+4] is supposed to be a single value
In my python version, I get a p[i+4] as square matrix
Each p[i+4] is a zero matrix.
I am unable to decipher my mistake. Please help!
I'm working on a solver for a differential equation for a particle smulation using Pyopencl.
To solve this equation each particle must access it's neighbors information.
The arrays I'm using are numpy complex64 arrays each with 7 elements.
When accessing the neighbors, the program returns the error:
clWaitForEvents failed: out of resources
My OpenCl code is the following. I guess most of it isn't related to this error but i'll post it anyway because it might help somehow:
#define complex_ctr(x, y) (float2)(x, y)
#define complex_add(a, b) complex_ctr((a).x + (b).x, (a).y + (b).y)
#define complex_mul(a, b) complex_ctr(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y))
#define complex_mul_scalar(a, b) complex_ctr((a).x * (b), (a).y * (b))
#define complex_div_scalar(a, b) complex_ctr((a).x / (b), (a).y / (b))
#define conj(a) complex_ctr((a).x, -(a).y)
#define conj_transp(a) complex_ctr(-(a).y, (a).x)
#define conj_transp_and_mul(a, b) complex_ctr(-(a).y * (b), (a).x * (b))
#define complex_real(a) a.x
#define complex_imag(a) a.y
#define complex_unit (float2)(0, 1)
constant int M=10;
constant float L=1e-09;
constant float p0=1.0;
constant float delta=1.0;
constant float gama=1.0;
constant float omc=1.0;
constant float k_p=1.0;
constant float om_p=1.0;
constant float v = 0.001;
constant float b = 1.0;
constant float dt=0.01;
void f(__global float2 *X,
__global float2 *K,
int id,
uint W,
float t){
float exp_arg;
float2 p11, p22, p33, p21, p31, p32, op, aux, ar, al, p;
p11 = X[id*W];
p22 = X[id*W+1];
p33 = X[id*W+2];
p21 = X[id*W+3];
p31 = X[id*W+4];
p32 = X[id*W+5];
al = X[(id-1)*W+6];
ar = X[(id+1)*W+6];
op = p0 * complex_mul(X[id*W+6], complex_unit);
aux = p22 * gama/2 + complex_mul(op, p22) + conj(p22) * gama/2 + complex_mul(op, conj(p22));
K[id*W] = aux;
aux = (-p22*gama - complex_mul(op, p21) + complex_mul(p32, complex_unit)*omc
- conj(p22)*gama - complex_mul(op, conj(p21)) + complex_mul(conj(p32), complex_unit)*omc);
K[id*W+1] = aux;
aux = p22*gama/2 - complex_mul(p32, complex_unit)*omc + conj(p22)*gama/2 - complex_mul(conj(p32), complex_unit)*omc;
K[id*W+2] = aux;
aux = complex_mul(op, p11) - complex_mul(op, p22) - p21*gama + complex_mul(p21, complex_unit)*delta + complex_mul(p31, complex_unit)*omc;
K[id*W+3] = aux;
aux = complex_mul(p21, complex_unit)*omc + complex_mul(p31, complex_unit)*delta - complex_mul(op, p32);
K[id*W+4] = aux;
aux = (complex_mul(p22, complex_unit)*omc - complex_mul(p33, complex_unit)*omc - complex_mul(op, p31) - p32*gama);
K[id*W+5] = aux;
exp_arg = k_p * L * id - om_p * t;
p = complex_mul(b*p0*p21*complex_ctr(cos(exp_arg), sin(exp_arg)), complex_unit);
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
aux = aux + p;
K[id*W+6] = aux;
}
__kernel void RK4Step(__global float2 *X,
__global float2 *K,
__global float2 *Xs,
__global float2 *Xm,
uint W,
float t){
const int gid_x = get_global_id(0);
int idx = 0;
//computation of k1
f(X, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = X[idx] + dt*K[idx]/6;
Xm[idx] = X[idx] + dt*K[idx]/2;
}
//computation of k2
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/3;
Xm[idx] = X[idx] + dt*K[idx]/2;
}
//computation of k3
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/3;
Xm[idx] = X[idx] + dt*K[idx];
}
//computation of k4
f(Xm, K, gid_x, W, t);
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
Xs[idx] = Xs[idx] + dt*K[idx]/6;
}
//update photon
for(int i=0; i<W; i++)
{
idx = gid_x*W+i;
X[idx] = Xs[idx];
}
}
If i comment this line:
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
The code runs through with no errors, but if I uncomment it, i get the error i described.
The python code that calls this kernel is the following:
import pyopencl as cl
import numpy as np
from pylab import *
import matplotlib.pyplot as plt
import time
"""
Solve the problem
Xi' = M1*Xi + M2*Xi~
M1 and M2 are 6*6 Matrixes which elements are complex numbers
Xi in the form [P11i, P22i, P33i, P21i, P31i, P32i, Ai] where Pxyi is a complex number
"""
########################################################
# #
#'_h' buffers are host buffers. '_d' are device buffers#
# #
########################################################
#Initialization of the device and workspace
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
MF = cl.mem_flags
# Constants
M = 2000 # Number of atoms
L = np.float32(0.000000001) # Atom Spacing
N = 1000 # Number of time intervals
dt = np.float32(0.1) # Time interval
Timeline = np.arange(0.0, N, dt).astype(np.float32)
p0 = np.float32(1.0) # constant P0 [OMP = P0*Ai]
delta = np.float32(1.0) # constant DELTA
gama = np.float32(1.0) # constant GAMA
omc = np.float32(1.0) # constant OMC
# Writing the source code with the constants declared by the user
text = ""
##text = "__constant int M=" + str(M) + "; \n"
##text += "__constant float L=" + str(L) + "; \n"
##text += "__constant float dt=" + str(dt) + "; \n"
##text += "__constant float p0=" + str(p0) + "; \n"
##text += "__constant float delta=" + str(delta) + "; \n"
##text += "__constant float gama=" + str(gama) + "; \n"
##text += "__constant float omc=" + str(omc) + "; \n"
f1 = open("precode.cl", "r")
f2 = open("kernel.cl", "r")
f3 = open("source.cl",'w+')
precode = f1.read()
kernel = f2.read()
f3.write(precode + text + kernel)
f1.close()
f2.close()
f3.close()
#Initial Conditions
A_h = (np.arange(M) + 1j*np.zeros(M)).astype(np.complex64)
A_h = np.exp(-((A_h-M/2.0)/(0.05 * M))**2)*np.exp(1j * 200.0 * A_h /M)
P11_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P22_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P33_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P21_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P31_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
P32_h = (np.random.randn(M) + 1j*np.random.randn(M)).astype(np.complex64)
W = np.uint32(7) # The row width to compute the index inside the kernel
X_h = []
for i in range(M):
X_h.append( np.array([P11_h[i], P22_h[i], P33_h[i], P21_h[i], P31_h[i], P32_h[i], A_h[i]]).astype(np.complex64) )
X_h = np.array(X_h).astype(np.complex64)
K_h = np.empty_like(X_h)
Xs_h = np.empty_like(X_h)
Xm_h = np.empty_like(X_h)
A_h = X_h[:,6]
figure(1)
plt.plot(np.real(A_h))
plt.plot(np.abs(A_h))
# Allocation of required buffers on the device
X_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=X_h)
K_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=K_h)
Xs_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=Xs_h)
Xm_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=Xm_h)
f = open("source.cl", "r")
source = f.read()
f.close()
prg = cl.Program(ctx, source).build()
print "Begin Calculation"
start_time = time.time()
for t in Timeline:
completeevent = prg.RK4Step(queue, (M,), None, X_d, K_d, Xs_d, Xm_d, W, t)
completeevent.wait()
cl.enqueue_copy(queue, X_h, X_d)
end_time = time.time()
print "All done"
print "Calculation took " + str(end_time - start_time) + " seconds"
A_h = X_h[:,6]
figure(2)
plt.plot(np.real(A_h))
plt.plot(np.abs(A_h))
##plt.show()
Some code is commented because I'm still working on it but it's minor stuff just to get things a bit cleaner.
I can't understand why this happens. I've tried something similiar but with a much simpler code and the neighbors access goes just fine as I intended.
For examples, when I run this module:
import pyopencl as cl
import numpy as np
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
MF = cl.mem_flags
M = 3
zero = np.complex64(0.0)
X1_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
X2_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
X3_h = np.array([1 + 1j*2, 2 + 1j*3, 3 + 1j*4]).astype(np.complex64)
Y1_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
Y2_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
Y3_h = np.array([4 + 1j*5, 5 + 1j*6, 6 + 1j*7]).astype(np.complex64)
aux_h = np.complex64(1 + 1j*1)
RES_h = np.empty_like(X1_h)
dados_h = []
for i in range(3):
dados_h.append(np.array([X1_h[i], X2_h[i], X3_h[i], Y1_h[i], Y2_h[i], Y3_h[i]]).astype(np.complex64))
dados_h = np.array(dados_h).astype(np.complex64)
print dados_h
aux_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=aux_h)
dados_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf=dados_h)
RES_d = cl.Buffer(ctx, MF.READ_WRITE | MF.COPY_HOST_PTR, hostbuf = RES_h)
Source = """
__kernel void soma( __global float2 *dados, __global float2 *res, int rowWidth){
const int gid_x = get_global_id(0);
res[gid_x] = dados[(gid_x-1)*rowWidth] + dados[(gid_x+1)*rowWidth];
}
"""
prg = cl.Program(ctx, Source).build()
completeEvent = prg.soma(queue, (M,), None, dados_d, RES_d, np.int32(6))
completeEvent.wait()
cl.enqueue_copy(queue, RES_h, RES_d)
print "GPU RES"
print RES_h
the result i get is:
[[ 1.+2.j 1.+2.j 1.+2.j 4.+5.j 4.+5.j 4.+5.j]
[ 2.+3.j 2.+3.j 2.+3.j 5.+6.j 5.+6.j 5.+6.j]
[ 3.+4.j 3.+4.j 3.+4.j 6.+7.j 6.+7.j 6.+7.j]]
GPU RES
[ 2.+3.j 4.+6.j 2.+3.j]
which is exactly what i expected.
Can anyone give me some help on what is happening here? It's probably something simple but I can't find what's wrong.
One additional info: This happens only when i run the code on my GTX970. I have a laptop with an old ATI card that handles the code above just fine with no errors, which got me even more confused on this whole thing.
PS: Sorry for the long post
If id=0 and W=7 then you use negative index value to access element of 'X' array:
aux = (X[(id-1)*W+6] + X[(id+1)*W+6]);
which for id=0 and W=7 is:
aux = (X[-1] + X[13]);
From my experience a code with error like this one may produce still good results on some GPUs therefore I always test my opencl code using different GPU vendors. Especially opencl validation on CPU seems to be very sensitive to such errors.
I've been working on a computational physics project (plotting related rates of chemical reactants with respect to eachother to show oscillatory behavior) with a fair amount of success. However, one of my simulations involves more than two active oscillating agents (five, in fact) which would obviously be unsuitable for any single visual plot...
My scheme was hence to have the user select which two reactants they wanted plotted on the x-axis and y-axis respectively. I tried (foolishly) to convert string input values into the respective variable names, but I guess I need a radically different approach if any exist?
If it helps clarify any, here is part of my code:
def coupledBrusselator(A, B, t_trial,display_x,display_y):
t = 0
t_step = .01
X = 0
Y = 0
E = 0
U = 0
V = 0
dX = (A) - (B+1)*(X) + (X**2)*(Y)
dY = (B)*(X) - (X**2)*(Y)
dE = -(E)*(U) - (X)
dU = (U**2)*(V) -(E+1)*(U) - (B)*(X)
dV = (E)*(U) - (U**2)*(V)
array_t = [0]
array_X = [0]
array_Y = [0]
array_U = [0]
array_V = [0]
while t <= t_trial:
X_1 = X + (dX)*(t_step/2)
Y_1 = Y + (dY)*(t_step/2)
E_1 = E + (dE)*(t_step/2)
U_1 = U + (dU)*(t_step/2)
V_1 = V + (dV)*(t_step/2)
dX_1 = (A) - (B+1)*(X_1) + (X_1**2)*(Y_1)
dY_1 = (B)*(X_1) - (X_1**2)*(Y_1)
dE_1 = -(E_1)*(U_1) - (X_1)
dU_1 = (U_1**2)*(V_1) -(E_1+1)*(U_1) - (B)*(X_1)
dV_1 = (E_1)*(U_1) - (U_1**2)*(V_1)
X_2 = X + (dX_1)*(t_step/2)
Y_2 = Y + (dY_1)*(t_step/2)
E_2 = E + (dE_1)*(t_step/2)
U_2 = U + (dU_1)*(t_step/2)
V_2 = V + (dV_1)*(t_step/2)
dX_2 = (A) - (B+1)*(X_2) + (X_2**2)*(Y_2)
dY_2 = (B)*(X_2) - (X_2**2)*(Y_2)
dE_2 = -(E_2)*(U_2) - (X_2)
dU_2 = (U_2**2)*(V_2) -(E_2+1)*(U_2) - (B)*(X_2)
dV_2 = (E_2)*(U_2) - (U_2**2)*(V_2)
X_3 = X + (dX_2)*(t_step)
Y_3 = Y + (dY_2)*(t_step)
E_3 = E + (dE_2)*(t_step)
U_3 = U + (dU_2)*(t_step)
V_3 = V + (dV_2)*(t_step)
dX_3 = (A) - (B+1)*(X_3) + (X_3**2)*(Y_3)
dY_3 = (B)*(X_3) - (X_3**2)*(Y_3)
dE_3 = -(E_3)*(U_3) - (X_3)
dU_3 = (U_3**2)*(V_3) -(E_3+1)*(U_3) - (B)*(X_3)
dV_3 = (E_3)*(U_3) - (U_3**2)*(V_3)
X = X + ((dX + 2*dX_1 + 2*dX_2 + dX_3)/6) * t_step
Y = Y + ((dX + 2*dY_1 + 2*dY_2 + dY_3)/6) * t_step
E = E + ((dE + 2*dE_1 + 2*dE_2 + dE_3)/6) * t_step
U = U + ((dU + 2*dU_1 + 2*dY_2 + dE_3)/6) * t_step
V = V + ((dV + 2*dV_1 + 2*dV_2 + dE_3)/6) * t_step
dX = (A) - (B+1)*(X) + (X**2)*(Y)
dY = (B)*(X) - (X**2)*(Y)
t_step = .01 / (1 + dX**2 + dY**2) ** .5
t = t + t_step
array_X.append(X)
array_Y.append(Y)
array_E.append(E)
array_U.append(U)
array_V.append(V)
array_t.append(t)
where previously
display_x = raw_input("Choose catalyst you wish to analyze in the phase/field diagrams (X, Y, E, U, or V) ")
display_y = raw_input("Choose one other catalyst from list you wish to include in phase/field diagrams ")
coupledBrusselator(A, B, t_trial, display_x, display_y)
Thanks!
Once you have calculated the different arrays, you could add them to a dict that maps names to arrays. This can then be used to look up the correct arrays for display_x and display_y:
named_arrays = {
"X": array_X,
"Y": array_Y,
"E": array_E,
...
}
return (named_arrays[display_x], named_arrays[display_y])