I have this code calculating a random walk that I am trying to find the max distance from (0.0) for all walks and add them to a legend. Added an image of the result I want to achieve.
import numpy as np
import matplotlib.pyplot as plt
import math
np.random.seed(12)
repeats = 5
N_steps = 1000000
expected_R = np.sqrt(N_steps)
plt.title(f"{repeats} random walks of {N_steps} steps")
for x in range(repeats):
dirs = np.random.randint(0, 4, N_steps)
steps = np.empty((N_steps, 2))
steps[dirs == 0] = [0, 1] # 0 - right
steps[dirs == 1] = [0, -1] # 1 - left
steps[dirs == 2] = [1, 0] # 2 - up
steps[dirs == 3] = [-1, 0] # 3 - down
steps = steps.cumsum(axis=0)
print("Final position:", steps[-1])
skip = N_steps // 5000 + 1
xs = steps[::skip, 0]
ys = steps[::skip, 1]
x = max(ys)
plt.plot(xs, ys)
circle = plt.Circle((0, 0), radius=expected_R, color="k")
plt.gcf().gca().add_artist(circle)
plt.gcf().gca().set_aspect("equal")
plt.axis([-1500-x,1500+x,-1500-x,1500+x])
plt.show()
You can plot the distance from the coordinates steps to 0,0 by using distance=np.linalg.norm(steps, axis=1). And you can then take the max of this array to find the maximum distance. You can then add a label to your plots and a legend.
See code below:
import numpy as np
import matplotlib.pyplot as plt
import math
np.random.seed(12)
repeats = 5
N_steps = 1000000
expected_R = np.sqrt(N_steps)
plt.title(f"{repeats} random walks of {N_steps} steps")
max_distance=np.zeros(repeats)
for x in range(repeats):
dirs = np.random.randint(0, 4, N_steps)
steps = np.empty((N_steps, 2))
steps[dirs == 0] = [0, 1] # 0 - right
steps[dirs == 1] = [0, -1] # 1 - left
steps[dirs == 2] = [1, 0] # 2 - up
steps[dirs == 3] = [-1, 0] # 3 - down
steps = steps.cumsum(axis=0)
print("Final position:", steps[-1])
skip = N_steps // 5000 + 1
xs = steps[::skip, 0]
ys = steps[::skip, 1]
distance=np.linalg.norm(steps, axis=1)
max_distance[x]=np.amax(distance)
plt.plot(xs, ys,label='Random walk '+str(x)+': max distance: '+str(np.round(max_distance[x],1)))
circle = plt.Circle((0, 0), radius=expected_R, color="k")
plt.gcf().gca().add_artist(circle)
plt.gcf().gca().set_aspect("equal")
plt.axis([-1500-x,1500+x,-1500-x,1500+x])
plt.legend(fontsize=8)
plt.show()
And the output gives:
Related
This is the code I'm trying to run to generate a data set with 3 different sample populations, where one class is weighted by a combined Gaussian distribution with 2 sets of means and covariances -- hence the addition of the two multivariate normal rvs functions to feed into the indices of the 'blank' data set. Not sure what I can do to combine them without making it into a sequence?
N_valid = 10000
def generate_data_from_gmm(N, pdf_params, fig_ax=None):
# Determine dimensionality from mixture PDF parameters
n = pdf_params['mu'].shape[1]
print(n)
# Determine number of classes/mixture components
C = len(pdf_params['priors'])
# Output samples and labels
X = np.zeros([N, n])
labels = np.zeros(N)
# Decide randomly which samples will come from each component u_i ~ Uniform(0, 1) for i = 1, ..., N (or 0, ... , N-1 in code)
u = np.random.rand(N)
# Determine the thresholds based on the mixture weights/priors for the GMM, which need to sum up to 1
thresholds = np.cumsum(pdf_params['priors'])
thresholds = np.insert(thresholds, 0, 0) # For intervals of classes
marker_shapes = 'ox+*.' # Accomodates up to C=5
marker_colors = 'brgmy'
Y = np.array(range(1, C+1))
for y in Y:
# Get randomly sampled indices for this component
indices = np.argwhere((thresholds[y-1] <= u) & (u <= thresholds[y]))[:, 0]
# No. of samples in this component
Ny = len(indices)
labels[indices] = y * np.ones(Ny) - 1
if n == 1:
X[indices, 0] = norm.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny)
else:
X[indices, :] = (multivariate_normal.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny) + multivariate_normal.rvs(pdf_params['mu'][y], pdf_params['Sigma'][y], Ny))
gmm_pdf = {}
# Likelihood of each distribution to be selected AND class priors!!!
gmm_pdf['priors'] = np.array([0.65, 0.35])
gmm_pdf['mu'] = np.array([[3, 0],
[0, 3],
[2, 2]]) # Gaussian distributions means
gmm_pdf['Sigma'] = np.array([[[2, 0],
[0, 1]],
[[1, 0],
[0, 2]],
[1,0],
[0,1]]) # Gaussian distributions covariance matrices
This specifically happens in this line:
X[indices, :] = (multivariate_normal.rvs(pdf_params['mu'][y-1], pdf_params['Sigma'][y-1], Ny)
+ multivariate_normal.rvs(pdf_params['mu'][y], pdf_params['Sigma'][y], Ny))
Any ideas?
I'm trying to solve a minimization problem where an initial solution is already present and the objective function is based on this initial solution.
I have some sort of line y_line which is an initial mapping of resources and stations:
y_line = np.array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
Additionally, I have a savings array for selling from the line S, an array for buying new EC and for processing P
S = np.array([[-260., -260., -260.],
[-30., -30., -30.],
[360., 360., 360.]], dtype=int)
EC = np.array([[1000, 1000, 1000],
[2000, 2000, 2000],
[5000, 5000, 5000]], dtype=int)
P = np.array([[720., 720., 720.],
[1440., 1440., 1440.],
[3600., 3600., 3600.]], dtype=int)
Using just a simplified constraint: every workstation i must have at least one resource j -> sum(y[i, j] for j in j_idx) == 1 for all i in i_idx.
My objective is that every sold resource from the initial y_line brings us savings, every newly bought costs us and the solution (the new line) y has a processing cost for operating. I have defined the objective as follows:
y_delta = y - y_line # delta between new line (y) and old line (y_line)
y_delta_plus = np.zeros(y.shape, dtype=object) # 1
y_delta_minus = np.zeros(y.shape, dtype=object) # 2
# I -> new bought resources
y_delta_plus[y_delta >= 0] = y_delta[y_delta >= 0]
# II -> sold resources
y_delta_minus[y_delta <= 0] = y_delta[y_delta <= 0]
c_i = y_delta_plus * EC # invest
c_s = y_delta_minus * S # savings
c_p = y * P # processing cost
c_y = np.sum(c_s + c_i + c_p)
However, if I solve this model (full code see below), then the objective value (5760) doesn't match my sanity check calculations (12430). Would it be possible to set initial values for y[i, j]? Or is there another function to achieve this?
from ortools.linear_solver import pywraplp
import numpy as np
y_line = np.array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
S = np.array([[-260., -260., -260.],
[-30., -30., -30.],
[360., 360., 360.]], dtype=int)
EC = np.array([[1000, 1000, 1000],
[2000, 2000, 2000],
[5000, 5000, 5000]], dtype=int)
P = np.array([[720., 720., 720.],
[1440., 1440., 1440.],
[3600., 3600., 3600.]], dtype=int)
solver = pywraplp.Solver('stack', pywraplp.Solver.SAT_INTEGER_PROGRAMMING)
y = np.zeros_like(y_line, dtype=object)
i_idx = range(y_line.shape[0])
j_idx = range(y_line.shape[1])
for i in i_idx:
for j in j_idx:
y[i, j] = solver.IntVar(0, 1, 'y[%i_%i]' % (i, j))
for i in i_idx:
solver.Add(
sum(y[i, j] for j in j_idx) == 1
)
def objective(y, y_line):
y_delta = y - y_line # delta between new line (y) and old line (y_line)
y_delta_plus = np.zeros(y.shape, dtype=object) # 1
y_delta_minus = np.zeros(y.shape, dtype=object) # 2
# I -> new bought resources
y_delta_plus[y_delta >= 0] = y_delta[y_delta >= 0]
# II -> sold resources
y_delta_minus[y_delta <= 0] = y_delta[y_delta <= 0]
c_i = y_delta_plus * EC # invest
c_s = y_delta_minus * S # savings
c_p = y * P # processing
return np.sum(c_s + c_i + c_p)
c_y = objective(y=y, y_line=y_line)
solver.Minimize(
c_y
)
# [START solve]
print("Number of constraints:", solver.NumConstraints())
print("Number of variables:", solver.NumVariables())
status = solver.Solve()
# [END solve]
y_new = np.zeros_like(y)
for i in range(y_line.shape[0]):
for j in range(y_line.shape[1]):
if y[i, j].solution_value() > 0:
y_new[i, j] = y[i, j].solution_value()
print(f"Objective sat: {solver.Objective().Value()}")
print(y_new)
# Number of constraints: 3
# Number of variables: 9
# Objective sat: 5760.0
# [[1.0 0 0]
# [1.0 0 0]
# [1.0 0 0]]
# %%
c_y_test = objective(y=y_new, y_line=y_line)
c_y_test # -> 12430.0
The model can be solved. However, not with the approach, I chose in the first place. Using a pywraplp model it didn't work, yet with a cp_model it can be solved using predefined variables (as mentioned by #sascha). The arrays y_line, S, EC and P are the same as above. The solemn constraint is the same as well. Yet, the "filtering" I could solve using:
for i in range(len(y_cp.flatten())):
model.AddElement(i, y_delta.flatten().tolist(), y_cp.flatten().tolist()[i] - y_line.flatten().tolist()[i])
for i in i_idx:
for j in j_idx:
model.AddMaxEquality(y_delta_plus[i, j], [y_delta[i, j], model.NewConstant(0)])
model.AddMinEquality(y_delta_minus[i, j], [y_delta[i, j], model.NewConstant(0)])
model.Minimize(
np.sum(y_delta_plus * EC) + np.sum(y_delta_minus * S) + np.sum(y_cp * P)
)
The solving and sanity check yields:
solver_cp = cp_model.CpSolver()
solver_cp.Solve(model)
y_new_cp = np.zeros_like(y_cp)
for i in i_idx:
for j in j_idx:
if solver_cp.Value(y_cp[i, j]) > 0:
y_new_cp[i, j] = solver_cp.Value(y_cp[i, j])
print(f"Objective cp: {solver_cp.ObjectiveValue()}")
print(y_new_cp)
# Objective cp: 5760.0
# [[1 0 0]
# [0 1 0]
# [1 0 0]]
c_y_test = objective(y=y_new_cp, y_line=y_line)
c_y_test # -> 5760 -> Correct
The cp_model could solve it and match the sanity check.
With the pywraplp model I couldn't figure out how to solve it.
my data is like:
sample1 = [[1, 0, 3, 5, 0, 9], 0, 1.5, 0]
sample2 = [[0, 4, 0, 6, 2, 0], 2, 1.9, 1]
sample3 = [[9, 7, 6, 0, 0, 0], 0, 1.3, 1]
paul = pd.DataFrame(data = [sample1, sample2, sample3], columns=`['list','cat','metr','target'])`
on this data a scikit-learn kNN-Regression with an specific distance function should be done.
The distance function is:
def my_distance(X,Y,**kwargs):
if len(X)>1:
x = X
y = Y
all_minima = []
for k in range(len(x)):
one_minimum = min(x[k],y[k])
all_minima.append(one_minimum)
sum_all_minima=sum(all_minima)
distance = (sum(x)+sum(y)-sum_all_minima) * kwargs["Para_list"]
elif X.dtype=='int64':
x = X
y = Y
if x == y and x != -1:
distance = 0
elif x == -1 or y == -1 or x is None or y is None:
distance = kwargs["Para_minus1"] * 1
else:
distance = kwargs["Para_nominal"] * 1
else:
x = X
y = Y
if x == y:
distance = 0
elif x == -1 or y == -1 or x is None or y is None:
distance = kwargs["Para_minus1"] * 1
else:
distance = abs(x-y) * kwargs["Para_metrisch"]
return distance
And should be implemented as valid distance function by
DistanceMetric.get_metric('pyfunc',func=my_distance)
As I'm right, the scikit code should be like this:
train , test = train_test_split(paul, test_size = 0.3)
#x_train soll nur unabhähgige Variablen enthalten, andere kommen raus:
x_train = train.drop('target', axis=1)
y_train = train['target']
x_test = test.drop('target', axis = 1)
y_test = test['target']
knn = KNeighborsRegressor(n_neighbors=2,
algorithm='ball_tree',
metric=my_distance,
metric_params={"Para_list": 2,
"Para_minus1": 3,
"Para_metrisch": 2,
"Para_nominal": 4}))
knn.fit(x_train,y_train)
y_pred=knn.predict(x_test)
I get
ValueError: setting an array element with a sequence.
I guess scikit can not handle a single feature item as list? Is there a way to make that happen?
I guess scikit can not handle a single feature item as list? Is there a way to make that happen?
No, there is no way I know of to make this happen. You need to convert this feature into 2D matrix, concatenate it with other 1D features, to form data appropriately. This is standard sklearn behavior.
Unless you have some very narrow use-case, making 2D array from list feature is totally fine. I assume, all lists have same length.
I have two matrices in NumPy. One is larger than the other. I want to insert the smaller 2D-array (randomly) into the bigger 2D-array where there are only zeros (so no actual information in the bigger one is lost). Example:
Big array:
[0 0 0 9]
[0 0 0 7]
[0 0 0 2]
[2 3 1 5]
Small array:
[3 3]
[3 3]
(Possible) result:
[3 3 0 9]
[3 3 0 7]
[0 0 0 2]
[2 3 1 5]
I think you can use 2D convolution to find the places where the small array b can go in the large array a. If you use scipy.signal.convolve2d with mode='valid' you only get locations where the small array 'fits'. I think using the abs of the arays gets around positive and negative values (in either array) canceling, but I haven't tested any of this very rigorously.
Here's what I did, using #CypherX's fill_a_with_b function for the fill step:
import numpy as np
import scipy.signal
# Your input data.
a = np.array([[0, 0, 0, 9],
[0, 0, 0, 7],
[0, 0, 0, 2],
[2, 3, 1, 5]])
b = np.ones((2, 2)) * 3
# Find places where b can go.
allowed = scipy.signal.convolve2d(np.abs(a), np.abs(b), mode='valid')
# Get these locations as (row, col) pairs.
coords = np.stack(np.where(allowed==0)).T
# Choose one of the locations at random.
choice = coords[np.random.randint(coords.shape[0])]
# Use #CypherX's 'fill' function.
def fill_a_with_b(a, b, pos=[0, 0]):
aa = a.copy()
aa[slice(pos[0], pos[0] + b.shape[0]),
slice(pos[1], pos[1] + b.shape[1])] = b.copy()
return aa
# Do the fill thing.
fill_a_with_b(a, b, choice)
This results in (for example)...
array([[0, 0, 0, 9],
[0, 3, 3, 7],
[0, 3, 3, 2],
[2, 3, 1, 5]])
I will try giving you an example. But it will be based on some assumptions:
You know that the 0's are spanned across a contiguous rectangular block.
There are no other zeros in a.
If you would like to fill in a non-contiguous block of zeroes or, there are zeros on the columns/rows where you have some other non-zero values, you would have to think of a more sophisticated solution.
Solution: Random Insertion of array b into array a where a==0
Assumption: we know that the places where a is zero are a contiguous set of positions with a rectangular shape.
Imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'svg' # 'svg', 'retina'
plt.style.use('seaborn-white')
Make Data
# Make a
shape = (5,5)
a = np.zeros(shape)
a[:,-1] = np.arange(shape[0]) + 10
a[-1,:] = np.arange(shape[1]) + 10
# Make b
b = np.ones((2,2))*2
Preprocess
Here we determine possible slot-positions of the top-left element of b on a.
# Get range of positions (rows and cols) where we have zeros
target_indices = np.argwhere(a==0)
minmax = np.array([target_indices.min(axis=0), target_indices.max(axis=0)])
# Define max position (index) of topleft element of b on a
maxpos = np.dot(np.array([-1,1]), minmax) + minmax[0,:] - (np.array(b.shape) -1)
# Define min position (index) of topleft element of b on a
minpos = minmax[0,:]
Make a list of Top Left Corner Positions of b on a
Th function get_rand_topleftpos() takes in minpos and maxpos for rows and columns on a that define possible slot-positions, and returns a randomly selected valid slot-position for size=1. I have used a size=20 to create quite a few valid random slot-positions and then select only the unique positions so we could then see them as images. If you need just one slot-position at a time, select size=1.
def get_rand_topleftpos(minpos, maxpos, size=1):
rowpos = np.random.randint(minpos[0], high=maxpos[0] + 1, size=size)
colpos = np.random.randint(minpos[1], high=maxpos[1] + 1, size=size)
pos = np.vstack([rowpos, colpos]).T
return (rowpos, colpos, pos)
# Make a few valid positions where the array b could be placed
rowpos, colpos, pos = get_rand_topleftpos(minpos, maxpos, size=20)
# Select the Unique combinations so we could visualize them only
pos = np.unique(pos, axis=0)
Place b on a and Make Figures
We make a custom defined function fill_a_with_b() to fill a with b at a certain postion on a. This position will accept the top-left cell of b.
def fill_a_with_b(a, b, pos = [0,0]):
aa = a.copy()
aa[slice(pos[0], pos[0] + b.shape[0]),
slice(pos[1], pos[1] + b.shape[1])] = b.copy()
return aa
# Make a few figures with randomly picked position
# for topleft position of b on a
if pos.shape[0]>6:
nrows, ncols = int(np.ceil(pos.shape[0]/6)), 6
else:
nrows, ncols = 1, pos.shape[0]
fig, axs = plt.subplots(nrows = nrows,
ncols = ncols,
figsize=(2.5*ncols,2.5*nrows))
for i, ax in enumerate(axs.flatten()):
if i<pos.shape[0]:
aa = fill_a_with_b(a, b, pos[i,:])
sns.heatmap(aa,
vmin=np.min(aa),
vmax=np.max(aa),
annot=True,
cbar=False,
square=True,
cmap = 'YlGnBu_r',
ax = ax
);
ax.set_title('TopLeftPos: {}'.format(tuple(pos[i,:])),
fontsize=9);
else:
ax.axis('off')
plt.tight_layout()
plt.show()
Results
With array a defined as:
shape = (5,5)
a = np.zeros(shape)
a[:,-1] = np.arange(shape[0]) + 10
a[-1,:] = np.arange(shape[1]) + 10
With array a defined as:
shape = (6,5)
a = np.zeros(shape)
a[:,0] = np.arange(shape[0]) + 10
a[:,-1] = np.arange(shape[0]) + 10
a[-1,:] = np.arange(shape[1]) + 10
I have written a code that plots random walks. There are traj different random walks generated and each consists of n steps. I would like to animate their moves. How can I do that?
My code below:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def random_walk_2D(n, traj = 1):
for i in range(traj):
skoki = np.array([[0, 1], [1, 0], [-1, 0], [0, -1]])
losy = np.random.randint(4, size = n)
temp = skoki[losy, :]
x = np.array([[0, 0]])
temp1 = np.concatenate((x, temp), axis = 0)
traj = np.cumsum(temp1, axis = 0)
plt.plot(traj[:, 0], traj[:, 1])
plt.plot(traj[-1][0], traj[-1][1], 'ro') #the last point
plt.show()
As it stands now, you generate traj in one shot. I mean that traj in traj = np.cumsum(temp1, axis = 0) already contains all the "story" from the beginning to the end. If you want to create an animation that is in "real time", you should not generate traj in one shot, but iteratively, plotting new steps as they come. What about doing:
import numpy as np
import matplotlib.pyplot as plt
def real_time_random_walk_2D_NT(
nb_steps, nb_trajs, with_dots=False, save_trajs=False, tpause=.01
):
"""
Parameters
----------
nb_steps : integer
number of steps
nb_trajs : integer
number of trajectories
save_trajs : boolean (optional)
If True, entire trajectories are saved rather than
saving only the last steps needed for plotting.
False by default.
with_dots : boolean (optional)
If True, dots representative of random-walking entities
are displayed. Has precedence over `save_trajs`.
False by default.
tpause : float (optional)
Pausing time between 2 steps. .01 secondes by default.
"""
skoki = np.array([[0, 1], [1, 0], [-1, 0], [0, -1]])
trajs = np.zeros((nb_trajs, 1, 2))
for i in range(nb_steps):
_steps = []
for j in range(nb_trajs):
traj = trajs[j,:,:]
losy = np.random.randint(4, size = 1)
temp = skoki[losy, :]
traj = np.concatenate((traj, temp), axis = 0)
traj[-1,:] += traj[-2,:]
_steps.append(traj)
if save_trajs or with_dots:
trajs = np.array(_steps)
if with_dots:
plt.cla()
plt.plot(trajs[:,i, 0].T, trajs[:,i, 1].T, 'ro') ## There are leeway in avoiding these costly transpositions
plt.plot(trajs[:,:i+1, 0].T, trajs[:,:i+1, 1].T)
else:
plt.plot(trajs[:,-1+i:i+1, 0].T, trajs[:,-1+i:i+1, 1].T)
else:
trajs = np.array(_steps)[:,-2:,:]
plt.plot(trajs[:,:, 0].T, trajs[:,:, 1].T)
plt.pause(tpause)
real_time_random_walk_2D_NT(50, 6, with_dots=True)
real_time_random_walk_2D_NT(50, 6)