How to improve file reading speed?

How to improve file reading speed? - python

I read the data slice from a large file. A 400Mb picture took 4 seconds. It only takes 1 second for the disk to read this file. The program does very little computation. How to improve speed?
from opentile import OpenTile
import time
import traceback
import os
os.environ.setdefault('TURBOJPEG', 'C:/lib/')
try:
tiler = OpenTile.open('svs800.svs')
except:
traceback.print_exc()
s=tiler.get_level(0)
tile_size=str(s.tiled_size).split("x")
time1=time.time()
from multiprocessing.pool import ThreadPool
def get_data(s):
# This function reads a piece of binary data from a certain position of the image
# and then adds the header data
return tiler.get_tile(0,0,0, (s[0], s[1]))
pool = ThreadPool(5)
y = pool.map(get_data, [(i,j) for i in range(int(tile_size[0])) for j in range(int(tile_size[1]))])
print("tiles",len(y))
time2=time.time()
print(time2)

Simple sequential approach:
from opentile import OpenTile
import os
import time
def timer(func):
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
end = time.perf_counter()
print(f'{func.__name__} {end-start:.4f}s')
return result
return wrapper
os.environ.setdefault('TURBOJPEG', '/opt/libjpeg-turbo')
#timer
def open_svs(filename):
return OpenTile.open(filename)
#timer
def get_data(tiler, x, y):
return [tiler.get_tile(0, 0, 0, (x_, y_)) for x_ in range(x) for y_ in range(y)]
tiler = open_svs('18959.svs')
x, y = map(int, str(tiler.get_level(0).tiled_size).split('x'))
data = get_data(tiler, x, y)
assert len(data) == x * y
Output:
open_svs 0.0082s
get_data 0.5843s
Note:
x, y values for this file are 183, 114 respectively. The file size is 563,271,749 bytes

Related

xarray: Larger than memory array using map_blocks dumping results into .zarr store

I am trying to parallelize an operation that generates a very large numpy array and usually blows up the memory of a machine that is running it.
What I came up with is the following workflow:
Use Dask to generate a lazy zero filled array
Use X-Array to generate a DataArray, using the previous lazy zero array with its appropriate coordinates etc...
Using DataArray.map_blocks I call on a function write_values that gets a subset of a Numpy array from a separate file and then insert them into the appropriate location in the xarray.DataArray.
Lazily convert to xarray.Dataset with a name for the DataArray
Then I attempt to store into disk via to_zarr
First: Is this the appropriate to handle an operation that loops through the blocks in a chunked array?
Second: When I run this program, it executes while blowing up my memory, this could be due to the amount of tasks created via Dask? How can I optimize to never hit the memory limit of my machine.
Third: After this code runs, I get a zarr stored into disk, but it seems to not actually do the storing of the values I get from the external function. Is this the right way to change values in the disk stored array.
Problem: My function that writes the .zarr into disk, does not write the values from the numpy_returning_volume. I am thinking that it could be that I need to write the values while in the map_blocks function?
Thank you!
Fully working example:
import dask.array as da
import xarray as xr
import numpy as np
import pathlib
from dask.diagnostics import ProgressBar
class NumpyReturningVolume():
def __init__(self):
# self.data = da.random.random_sample([50000, 50000, 50000])
self.data = np.random.random_sample([500, 1000, 100])
def num_i(self):
return self.data.shape[0]
def num_j(self):
return self.data.shape[1]
def num_k(self):
return self.data.shape[2]
def get_float(self, start_coordinate, end_coordinate):
return self.data[
start_coordinate[0]:end_coordinate[0],
start_coordinate[1]:end_coordinate[1],
start_coordinate[2]:end_coordinate[2]
]
def write_values(chunk, **kwargs):
start_coordinate = (chunk.coords["track"].values[0], chunk.coords["bin"].values[0], chunk.coords["time"].values[0])
end_coordinate = (chunk.coords["track"].values[-1]+1, chunk.coords["bin"].values[-1]+1, chunk.coords["time"].values[-1]+1)
volume_data = kwargs["volume"].get_float(start_coordinate, end_coordinate)
chunk.data = volume_data
return(chunk)
seismic_file_path = pathlib.Path("./")
seismic_file_name = "TEST_FILE.ds"
store_path = seismic_file_path.parent.joinpath(
seismic_file_name + "_test.zarr")
numpy_returning_volume = NumpyReturningVolume()
dimensions = ('track', 'bin', 'time')
track_coords = np.arange(0, numpy_returning_volume.num_i(), 1, dtype=np.uint32)
bin_coords = np.arange(0, numpy_returning_volume.num_j(), 1, dtype=np.uint32)
time_coords = np.arange(0, numpy_returning_volume.num_k(), 1, dtype=np.uint32)
empty_arr = da.empty(shape=(
numpy_returning_volume.num_i(),
numpy_returning_volume.num_j(),
numpy_returning_volume.num_k()),
dtype=np.float32)
xarray_data = xr.DataArray(empty_arr, name="seis", coords={
'track': track_coords,
'bin': bin_coords, 'time': time_coords},
dims=dimensions)
xarray_data.map_blocks(write_values, kwargs={
"volume": numpy_returning_volume}, template=xarray_data).compute()
xarray_data = xarray_data.to_dataset(name="seis")
delayed_results = xarray_data.to_zarr(store_path.__str__(), compute=False)
with ProgressBar():
delayed_results.compute()

OMG! I just realized that my problem was the simplest thing in the world! I just needed to set a variable equal to the result of map blocks and everything works. Here is the complete working script if anyone is interested. It generates a 6GB dataset though
import dask.array as da
import xarray as xr
import numpy as np
import pathlib
from dask.diagnostics import ProgressBar
class NumpyReturningVolume():
def __init__(self):
self.data = da.random.random_sample([1000, 2000, 1000])
# self.data = np.random.random_sample([500, 1000, 100])
def num_i(self):
return self.data.shape[0]
def num_j(self):
return self.data.shape[1]
def num_k(self):
return self.data.shape[2]
def get_float(self, start_coordinate, end_coordinate):
return self.data[
start_coordinate[0]:end_coordinate[0],
start_coordinate[1]:end_coordinate[1],
start_coordinate[2]:end_coordinate[2]
].compute()
def write_values(chunk, **kwargs):
start_coordinate = (chunk.coords["track"].values[0], chunk.coords["bin"].values[0], chunk.coords["time"].values[0])
end_coordinate = (chunk.coords["track"].values[-1]+1, chunk.coords["bin"].values[-1]+1, chunk.coords["time"].values[-1]+1)
volume_data = kwargs["volume"].get_float(start_coordinate, end_coordinate)
chunk.data = volume_data
return(chunk)
seismic_file_path = pathlib.Path("./")
seismic_file_name = "TEST_FILE.ds"
store_path = seismic_file_path.parent.joinpath(
seismic_file_name + "_test.zarr")
numpy_returning_volume = NumpyReturningVolume()
dimensions = ('track', 'bin', 'time')
track_coords = np.arange(0, numpy_returning_volume.num_i(), 1, dtype=np.uint32)
bin_coords = np.arange(0, numpy_returning_volume.num_j(), 1, dtype=np.uint32)
time_coords = np.arange(0, numpy_returning_volume.num_k(), 1, dtype=np.uint32)
empty_arr = da.empty(shape=(
numpy_returning_volume.num_i(),
numpy_returning_volume.num_j(),
numpy_returning_volume.num_k()),
dtype=np.float32)
xarray_data = xr.DataArray(empty_arr, name="seis", coords={
'track': track_coords,
'bin': bin_coords, 'time': time_coords},
dims=dimensions)
# This xarray_data = is what I was missing!!
xarray_data = xarray_data.map_blocks(write_values, kwargs={
"volume": numpy_returning_volume}, template=xarray_data)
xarray_data = xarray_data.to_dataset(name="seis")
delayed_results = xarray_data.to_zarr(store_path.__str__(), compute=False)
with ProgressBar():
delayed_results.compute()

Python multiprocessing with shared RawArray

I want to have multiple processes read from a different row of a numpy array in parallel to speed things up. However, when I run the following code, the first process to reach func throws an error as if var is no longer in scope. Why is this happening?
import numpy as np
import multiprocessing as mp
num_procs = 16
num_points = 2500000
def init_worker(X):
global var
var = X
def func(proc):
X_np = np.frombuffer(var).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4, initializer=init_worker, initargs=(X,))
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
Traceback (most recent call last):
File "parallel_test.py", line 26, in <module>
pool.apply_async(func(proc))
File "parallel_test.py", line 13, in func
X_np = np.frombuffer(var).reshape((num_procs, num_points))
NameError: global name 'var' is not defined
Update:
For some reason, if I use Pool.map instead of the for loop with Pool.apply_async, it seems to work. I don’t understand why though.

Any reason to not declare X as global in the top-level scope? This eliminates the NameError.
import numpy as np
import multiprocessing as mp
num_procs = 16
num_points = 25000000
def func(proc):
X_np = np.frombuffer(X).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
global X
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4 )
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
When I run a reduced instance of this problem, n=20:
import numpy as np
import multiprocessing as mp
num_procs = 4
num_points = 5
def func(proc):
X_np = np.frombuffer(X).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
global X
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4 )
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
print("\n".join(map(str, X)))
I get the following output:
-0.6346037804619162
1.1005724710066107
0.33458763357165255
0.6409345714971889
0.7124888766851982
0.36760459213332963
0.23593304931386933
-0.8668969562941349
-0.8842756219923469
0.005979036105620422
1.386422154089567
-0.8770988782214508
0.25187448339771057
-0.2473967968471952
-0.4909708883978521
0.5423521489750244
0.018749603867333802
0.035304792504378055
1.3263872668956616
1.0199839603892742
You haven't provided a sample of the expected output. Does this look similar to what you expect?

Python multithreaded random generation

I am trying to implement in my simulation this code:
https://numpy.org/doc/stable/reference/random/multithreading.html
but I can't work it out.
If I follow the example in the link, I get
mrng = MultithreadedRNG(10000000, seed=0)
mrng.fill()
print(mrng.values[-1])
> 0.0
and all the other values are 0 too.
If I give a smaller input number, as 40, I get
mrng = MultithreadedRNG(40)
mrng.fill()
print(mrng.values[-1])
> array([1.08305179e-311, 1.08304781e-311, 1.36362118e-321, nan,
6.95195359e-310, ...., 7.27916164e-095, 3.81693953e+180])
What am I doing wrong? I just would like to implement this multiprocessing code to a random bits (0 / 1) generator.

There is a bug in the example, I believe. You have to wrap PCG64 into Generator interface.
Try code below
class MultithreadedRNG(object):
def __init__(self, n, seed=None, threads=None):
rg = PCG64(seed)
if threads is None:
threads = multiprocessing.cpu_count()
self.threads = threads
self._random_generators = [Generator(rg)]
last_rg = rg
for _ in range(0, threads-1):
new_rg = last_rg.jumped()
self._random_generators.append(Generator(new_rg))
last_rg = new_rg
self.n = n
self.executor = concurrent.futures.ThreadPoolExecutor(threads)
self.values = np.empty(n)
self.step = np.ceil(n / threads).astype(np.int_)
def fill(self):
def _fill(gen, out, first, last):
gen.standard_normal(out=out[first:last])
futures = {}
for i in range(self.threads):
args = (_fill,
self._random_generators[i],
self.values,
i * self.step,
(i + 1) * self.step)
futures[self.executor.submit(*args)] = i
concurrent.futures.wait(futures)
def __del__(self):
self.executor.shutdown(False)
Didn't test it much, but values looks ok

Using PyMC3 to compute ODE parameter posterior: Bad initial energy error

I am trying to sample the parameter posterior of an ODE's parameters using a Likelihood that has mean equal to the logarithm of those ODE solutions for a particular choice of parameter and initial value. This is based on the tutorial found here. I can replicate the tutorial, but can't make my model work. My model's ODE is:
dQ(t)/dt = (1/K)*(R(t) - Q(t))
where R(t) is based on rainfall data that I input.
I am assigning priors to the noise standard deviation \sigma, the initial value Q(0) and parameter K.
Any help on how to overcome the error would be much appreciated :)
This is my code:
from scipy.integrate import odeint
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
import theano
from theano import *
import pymc3 as pm
import theano.tensor as tt
THEANO_FLAGS='optimizer=fast_compile'
theano.config.exception_verbosity= 'high'
theano.config.floatX = 'float64'
n_states = 1
n_odeparams = 1
n_ivs = 1
class LinearReservoirModel(object):
def __init__(self, n_states, n_odeparams, n_ivs,net_rainfall_data, y0=None):
self._n_states = n_states
self._n_odeparams = n_odeparams
self._n_ivs = n_ivs
self._y0 = y0
self._nr = net_rainfall_data
def simulate(self, parameters, times):
return self._simulate(parameters, times, self._nr, False)
def simulate_with_sensitivities(self, parameters, times):
return self._simulate(parameters, times, self._nr, True)
def _simulate(self, parameters, times, net_rainfall_data, sensitivities):
k, q0 = [x for x in parameters]
# Interpolate net_rainfall
nr_int = interp1d(times, net_rainfall_data,fill_value="extrapolate",kind='slinear')
def r(q,time,k,nrint):
return (nrint(time) - q) * (1./k)
if sensitivities:
def jac(k):
ret = np.zeros((self._n_states, self._n_states))
ret[0, 0] = (-1./k)
return ret
def dfdp(x,t,k,nrint):
ret = np.zeros((self._n_states,
self._n_odeparams + self._n_ivs))
ret[0, 0] = (-1./(k**2)) * (nrint(t) - x)
return ret
def rhs(q_and_dqdp, t, k, nrint):
q = q_and_dqdp[0:self._n_states]
dqdp = q_and_dqdp[self._n_states:].reshape((self._n_states,
self._n_odeparams + self._n_ivs))
dqdt = r(q, t, k, nrint)
# print('jacobian',jac(q))
# print('dqdp',dqdp)
# print('dfdp',dfdp(q,t,nrint))
d_dqdp_dt = jac(k)*dqdp + dfdp(q,t,k,nrint) # CHANGED CODE HERE np.matmul(jac(q), dqdp) + dfdp(q,t,nrint)
return np.concatenate((dqdt, d_dqdp_dt.reshape(-1)))
y0 = np.zeros( (n_states*(n_odeparams+n_ivs)) + n_states ) # CHANGED CODE HERE 2*
y0[2] = 1. #\frac{\partial [X]}{\partial Xt0} at t==0, and same below for Y
y0[0:n_states] = q0
result = odeint(rhs, y0, times, (k,nr_int),rtol=1e-6,atol=1e-5)
values = result[:, 0:self._n_states]
dvalues_dp = result[:, self._n_states:].reshape((len(times),
self._n_states,
self._n_odeparams + self._n_ivs))
return values, dvalues_dp
else:
q = odeint(r,q0,times,args=(k,nr_int),rtol=1e-6,atol=1e-5)
q_flat = [item for sublist in q for item in sublist]
return q_flat
q = [0.01, 0.084788051,0.289827287,0.487426902,0.623592162,0.855202214,0.901709887,0.87936577,0.857067839,0.775516564,0.701725939,0.675138958,0.68101658,0.64644605,0.701305112,0.747128907,0.676039744,0.668502137,0.731464651,0.766588801]
nr = [1.618666063,0.0001,4.405308823,0.394073731,3.392555321,2.733285785,0.0001,1.31186209,0.0001,0.0001,0.0001,0.83074128,0.646141131,0.0001,2.405660466,0.0001,0.0001,1.174002978,1.481146447,0.73244669]
ode_model = LinearReservoirModel(n_states, n_odeparams, n_ivs, nr)
class ODEGradop(theano.Op):
def __init__(self, numpy_vsp):
self._numpy_vsp = numpy_vsp
def make_node(self, x, g):
x = theano.tensor.as_tensor_variable(x)
g = theano.tensor.as_tensor_variable(g)
node = theano.Apply(self, [x, g], [g.type()])
return node
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
g = inputs_storage[1]
out = output_storage[0]
out[0] = self._numpy_vsp(x, g) # get the numerical VSP
class ODEop(theano.Op):
def __init__(self, state, numpy_vsp):
self._state = state
self._numpy_vsp = numpy_vsp
def make_node(self, x):
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs_storage, output_storage):
x = inputs_storage[0]
out = output_storage[0]
out[0] = self._state(x) # get the numerical solution of ODE states
def grad(self, inputs, output_grads):
x = inputs[0]
g = output_grads[0]
grad_op = ODEGradop(self._numpy_vsp) # pass the VSP when asked for gradient
grad_op_apply = grad_op(x, g)
return [grad_op_apply]
class solveCached(object):
def __init__(self, times, n_params, n_outputs):
self._times = times
self._n_params = n_params
self._n_outputs = n_outputs
self._cachedParam = np.zeros(n_params)
self._cachedSens = np.zeros((len(times), n_outputs, n_params))
self._cachedState = np.zeros((len(times),n_outputs))
def __call__(self, x):
if np.all(x==self._cachedParam):
state, sens = self._cachedState, self._cachedSens
else:
state, sens = ode_model.simulate_with_sensitivities(x, times)
return state, sens
times = np.arange(0, len(q)) # number of measurement points (see below)
cached_solver=solveCached(times, n_odeparams + n_ivs, n_states)
def state(x):
State, Sens = cached_solver(np.array(x,dtype=np.float64))
cached_solver._cachedState, cached_solver._cachedSens, cached_solver._cachedParam = State, Sens, x
return State.reshape((len(State),))
def numpy_vsp(x, g):
numpy_sens = cached_solver(np.array(x,dtype=np.float64))[1].reshape((n_states*len(times),len(x)))
return numpy_sens.T.dot(g)
# Define the data matrix
Q = np.vstack((q))
# Now instantiate the theano custom ODE op
my_ODEop = ODEop(state,numpy_vsp)
# The probabilistic model
with pm.Model() as LR_model:
# Priors for unknown model parameters
k = pm.Uniform('k', lower=0.01, upper=10)
# Priors for initial conditions and noise level
q0 = pm.Lognormal('q0', mu=np.log(1.2), sd=1)
sigma = pm.Lognormal('sigma', mu=-1, sd=1, shape=1)
# Forward model
all_params = pm.math.stack([k,q0],axis=0)
ode_sol = my_ODEop(all_params)
forward = ode_sol.reshape(Q.shape)
# log_forward = pm.math.log(forward)
# log_forward_print = tt.printing.Print('log_forward')(log_forward.shape)
# tt.printing.Print('sigma')(sigma.shape)
# Likelihood
Q_obs = pm.Lognormal('Q_obs', mu=pm.math.log(forward), sd=sigma, observed=Q)
print(LR_model.check_test_point())
# Y_obs_print = tt.printing.Print('Y_obs')(Y_obs)
trace = pm.sample(n_init=1500, tune=1000, chains=1, init='adapt_diag')
trace['diverging'].sum()
If you run the code above you should be able to reproduce the following error:
Traceback (most recent call last):
File "examples/myexample.py", line 195, in <module>
trace = pm.sample(1500, tune=1000, chains=1, init='adapt_diag')
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 457, in sample
trace = _sample_many(**sample_args)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 503, in _sample_many
step=step, random_seed=random_seed[i], **kwargs)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 544, in _sample
for it, strace in enumerate(sampling):
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/tqdm/std.py", line 1091, in __iter__
for obj in iterable:
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/sampling.py", line 633, in _iter_sample
point, states = step.step(point)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
apoint, stats = self.astep(array)
File "/Users/Yannis/.pyenv/versions/mini-project/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 144, in astep
raise SamplingError("Bad initial energy")
pymc3.exceptions.SamplingError: Bad initial energy
PyMC3 Version: 3.7
Theano Version: 1.0.4
Python Version: 3.6.5
Operating system: macOS Catalina (v10.15.1)
How did you install PyMC3: pip (managed in a pyenv virtualenv)

How to accumulate results from pool.apply_async call?

I want to make calls to pool.apply_async(func) and accumulate the results as soon as they are available without waiting for each other.
import multiprocessing
import numpy as np
chrNames=['chr1','chr2','chr3']
sims=[1,2,3]
def accumulate_chrBased_simBased_result(chrBased_simBased_result,accumulatedSignalArray,accumulatedCountArray):
signalArray = chrBased_simBased_result[0]
countArray = chrBased_simBased_result[1]
accumulatedSignalArray += signalArray
accumulatedCountArray += countArray
def func(chrName,simNum):
print('%s %d' %(chrName,simNum))
result=[]
signal_array=np.full((10000,), simNum, dtype=float)
count_array = np.full((10000,), simNum, dtype=int)
result.append(signal_array)
result.append(count_array)
return result
if __name__ == '__main__':
accumulatedSignalArray = np.zeros((10000,), dtype=float)
accumulatedCountArray = np.zeros((10000,), dtype=int)
numofProcesses = multiprocessing.cpu_count()
pool = multiprocessing.Pool(numofProcesses)
for chrName in chrNames:
for simNum in sims:
result= pool.apply_async(func, (chrName,simNum,))
accumulate_chrBased_simBased_result(result.get(),accumulatedSignalArray,accumulatedCountArray)
pool.close()
pool.join()
print(accumulatedSignalArray)
print(accumulatedCountArray)
In this way, each pool.apply_async call waits for other call to end.
Is there a way do get rid of this waiting for each other?

You are using result.get() on each iteration, and making the main process wait for the function to be ready in doing so.
Please find below a working version, with prints showing that accumulation is done when "func" is ready, and adding random sleeps to ensure sizable execution time differences.
import multiprocessing
import numpy as np
from time import time, sleep
from random import random
chrNames=['chr1','chr2','chr3']
sims=[1,2,3]
def accumulate_chrBased_simBased_result(chrBased_simBased_result,accumulatedSignalArray,accumulatedCountArray):
signalArray = chrBased_simBased_result[0]
countArray = chrBased_simBased_result[1]
accumulatedSignalArray += signalArray
accumulatedCountArray += countArray
def func(chrName,simNum):
result=[]
sleep(random()*5)
signal_array=np.full((10000,), simNum, dtype=float)
count_array = np.full((10000,), simNum, dtype=int)
result.append(signal_array)
result.append(count_array)
print('%s %d' %(chrName,simNum))
return result
if __name__ == '__main__':
accumulatedSignalArray = np.zeros((10000,), dtype=float)
accumulatedCountArray = np.zeros((10000,), dtype=int)
numofProcesses = multiprocessing.cpu_count()
pool = multiprocessing.Pool(numofProcesses)
results = []
for chrName in chrNames:
for simNum in sims:
results.append(pool.apply_async(func, (chrName,simNum,)))
for i in results:
print(i)
while results:
for r in results[:]:
if r.ready():
print('{} is ready'.format(r))
accumulate_chrBased_simBased_result(r.get(),accumulatedSignalArray,accumulatedCountArray)
results.remove(r)
pool.close()
pool.join()
print(accumulatedSignalArray)
print(accumulatedCountArray)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to improve file reading speed? - python

Related

xarray: Larger than memory array using map_blocks dumping results into .zarr store

Python multiprocessing with shared RawArray

Python multithreaded random generation

Using PyMC3 to compute ODE parameter posterior: Bad initial energy error

How to accumulate results from pool.apply_async call?

Categories

Resources