How do i plot the largest eigenvalues for randomly generated matrix? - python

im trying to plot the eigenvalues of randomly generated adjacency matrices to obtain what looks like a gaussian distribution, im trying to change and fix the probability that the graphs are generated and plot the largest eigenvalue by its frequency, however im not sure how to do these two, here is my code:
import numpy as np
import random
import matplotlib.pyplot as plt
import scipy.linalg as la
print("Please input the amount of times you want to repeat this: ")
userInput = int(input())
print("This will repeat {} times".format(userInput))
print("--------------------------------------------")
largestEig = []
for x in range(userInput):
n = 3
print("Random number is: {}".format(n))
adjMatrix = np.random.randn(0,2,(n,n))
np.fill_diagonal(adjMatrix, 0)
i_lower = np.tril_indices(n, -1)
adjMatrix[i_lower] = adjMatrix.T[i_lower]
eigvals, eigvecs = la.eig(adjMatrix)
m = max(eigvals)
largestEig.append(m)
print("For {}, M = {}".format(n, m))
print(adjMatrix)
print("---------------------------------------------")
print("The List:")
print(largestEig)
plt.plot(largestEig)
plt.show()

When trying to run your code, I get an error in the line that generates the matrix.
I assume you want to create a square matrix with random values.
In this case you might want to look at np.random.uniform:
n=3
adjMatrix = np.random.uniform(0,10,(n,n))
For the plotting of the distribution, you might want to look at plt.hist():
plt.hist(largestEig, bins=50)
Adding these changes, and removing the input and print statements gives this:
largestEig = []
userInput=10000
for x in range(userInput):
n = 3
adjMatrix = np.random.uniform(0,10,(n,n))
np.fill_diagonal(adjMatrix, 0)
i_lower = np.tril_indices(n, -1)
adjMatrix[i_lower] = adjMatrix.T[i_lower]
eigvals, eigvecs = la.eig(adjMatrix)
m = max(eigvals)
largestEig.append(m)
plt.hist(largestEig, bins=50)
plt.show()

Related

How to split data into two graphs with mat plot lib

I would be so thankful if someone would be able to help me with this. I am creating a graph in matplotib however I would to love to split up the 14 lines created from the while loop into the x and y values of P, so instead of plt.plot(t,P) it would be plt.plot(t,((P[1])[0]))) and
plt.plot(t,((P[1])[1]))). I would love if someone could help me very quick, it should be easy but i am just getting errors with the arrays
`
#Altering Alpha in Tumor Cells vs PACCs
#What is alpha? α = Rate of conversion of cancer cells to PACCs
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from google.colab import files
value = -6
counter = -1
array = []
pac = []
while value <= 0:
def modelP(x,t):
P, C = x
λc = 0.0601
K = 2000
α = 1 * (10**value)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
#returning odes
dPdt = ((λp))*P*(1-(C+(γ*P))/K)+ (α*C)
dCdt = ((λc)*C)*(1-(C+(γ*P))/K)-(α*C) + (ν***P)
return dPdt, dCdt
#initial
C0= 256
P0 = 0
Pinit = [P0,C0]
#time points
t = np.linspace(0,730)
#solve odes
P = odeint(modelP,Pinit,t)
plt.plot(t,P)
value += 1
#plot results
plt.xlabel('Time [days]')
plt.ylabel('Number of PACCs')
plt.show()
`
You can use subplots() to create two subplots and then plot the individual line into the plot you need. To do this, firstly add the subplots at the start (before the while loop) by adding this line...
fig, ax = plt.subplots(2,1) ## Plot will 2 rows, 1 column... change if required
Then... within the while loop, replace the plotting line...
plt.plot(t,P)
with (do take care of the space so that the lines are within while loop)
if value < -3: ## I am using value = -3 as the point of split, change as needed
ax[0].plot(t,P)#, ax=ax[0]) ## Add to first plot
else:
ax[1].plot(t,P)#,ax=ax[1]) ## Add to second plot
This will give a plot like this.

Is there any Python function/code to plot Binomial distribution (both PDF and CDF)

All the code in the net do not let us decide both the parameters of binomial distribution.
How we can use a user interface (CLI) to ask user to enter any parameter (number of trials, probability of success), then plot the graphs in a suitable way?
Binomial distribution (PDF) is defined as : p(x;n) = nCr(n,x) * (p**x) * ((1-p)**(n-x))
The CDF can be easily generated by sending the PDF through a accumulator system (a summer). For that simply sum the PDF; up to and including the interested point.
How ever plotting the PDF and CDF can be nicely done if you can use stem function for low number of "n" (= number of trials) and plot function for larger values of "n".
Use these links if you do not know matplotlib.pyplot stem, plot functions:
1.
Python matplotlib.pyplot stem function
2.
Python matplotlib.pyplot plot function
import numpy as np
import matplotlib.pyplot as plt
import math
import time
def nCr(n,r):n=int(n);r=int(r);f = math.factorial;return int(f(n)/(f(r)*f(n-r)))
def p(n,p,x):return nCr(n,x) * (p**x) * ((1-p)**(n-x))
def cumilate(n,pS,x_):
o=0
for i in range(x_+1):
o += p(n,pS,i)
return o;
while True:
input_n = input('Enter No. of trials:')
input_pS = input('Enter Probability of success:')
try:
n = int(input_n)
pS = float(input_pS)
except :
print('Please enter valid data...')
continue
if(n<=0):print('n should be larger than 0');continue
if(not(0<=pS<=1)):print('p should be in [0,1]');continue
if(n>11500):print('n is too large.It will stuck your pc.Try less than 500');continue
x = [i for i in range(n+1)]
y = [p(n,pS,x1) for x1 in x]
fig,ax = plt.subplots(2,1,figsize=(20,10))
if(n>=150):
ax[0].plot(x,y)
else:
ax[0].stem(x,y)
ax[0].title.set_text('Binomial PDF n=' + str(n) +' pS=' + str(pS))
cum = [cumilate(n,pS,x1) for x1 in x]
if(n>=150):
ax[1].plot(x,cum)
else:
ax[1].stem(x,cum)
ax[1].title.set_text('Binomial CDF n=' + str(n) +' pS=' + str(pS))
plt.show()
print('-------------------- Plotted -----------------')
Results will look like following:
n = 120 , p = 0.5
n = 10, p =0.8
When the number of trials are so high, stem function is not visually nice to use. So switched to plot function and shown as a continuous function.
n = 200, p = 0.3

Fitting a quadratic function in python without numpy polyfit

I am trying to fit a quadratic function to some data, and I'm trying to do this without using numpy's polyfit function.
Mathematically I tried to follow this website https://neutrium.net/mathematics/least-squares-fitting-of-a-polynomial/ but somehow I don't think that I'm doing it right. If anyone could assist me that would be great, or If you could suggest another way to do it that would also be awesome.
What I've tried so far:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'b--')
plt.show()
As you can see my curve doesn't compare well to nnumpy's polyfit curve.
Update:
I went through my code and removed all the stupid mistakes and now it works, when I try to fit it over 3 points, but I have no idea how to fit over more than three points.
This is the new code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
ones = np.ones(3)
A = np.array( ((0,1),(1,1),(2,1)))
xfeature = A.T[0]
squaredfeature = A.T[0] ** 2
b = np.array( (1,2,0), ndmin=2 ).T
b = b.reshape(3)
features = np.concatenate((np.vstack(ones), np.vstack(xfeature), np.vstack(squaredfeature)), axis = 1)
featuresc = features.copy()
print(features)
m_det = np.linalg.det(features)
print(m_det)
determinants = []
for i in range(3):
featuresc.T[i] = b
print(featuresc)
det = np.linalg.det(featuresc)
determinants.append(det)
print(det)
featuresc = features.copy()
determinants = determinants / m_det
print(determinants)
plt.scatter(A.T[0],b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
p2 = np.polyfit(A.T[0],b,2)
plt.plot(u, np.polyval(p2,u), 'r--')
plt.show()
Instead using Cramer's Rule, actually solve the system using least squares. Remember that Cramer's Rule will only work if the total number of points you have equals the desired order of polynomial plus 1.
If you don't have this, then Cramer's Rule will not work as you're trying to find an exact solution to the problem. If you have more points, the method is unsuitable as we will create an overdetermined system of equations.
To adapt this to more points, numpy.linalg.lstsq would be a better fit as it solves the solution to the Ax = b by computing the vector x that minimizes the Euclidean norm using the matrix A. Therefore, remove the y values from the last column of the features matrix and solve for the coefficients and use numpy.linalg.lstsq to solve for the coefficients:
import numpy as np
import matplotlib.pyplot as plt
ones = np.ones(4)
xfeature = np.asarray([0,1,2,3])
squaredfeature = xfeature ** 2
b = np.asarray([1,2,0,3])
features = np.concatenate((np.vstack(ones),np.vstack(xfeature),np.vstack(squaredfeature)), axis = 1) # Change - remove the y values
determinants = np.linalg.lstsq(features, b)[0] # Change - use least squares
plt.scatter(xfeature,b)
u = np.linspace(0,3,100)
plt.plot(u, u**2*determinants[2] + u*determinants[1] + determinants[0] )
plt.show()
I get this plot now, which matches what the dashed curve is in your graph, also matching what numpy.polyfit gives you:

Filtering 1D numpy arrays in Python

Explanation:
I have two numpy arrays: dataX and dataY, and I am trying to filter each array to reduce the noise. The image shown below shows the actual input data (blue dots) and an example of what I want it to be like(red dots). I do not need the filtered data to be as perfect as in the example but I do want it to be as straight as possible. I have provided sample data in the code.
What I have tried:
Firstly, you can see that the data isn't 'continuous', so I first divided them into individual 'segments' ( 4 of them in this example), and then applied a filter to each 'segment'. Someone suggested that I use a Savitzky-Golay filter. The full, run-able code is below:
import scipy as sc
import scipy.signal
import numpy as np
import matplotlib.pyplot as plt
# Sample Data
ydata = np.array([1,0,1,2,1,2,1,0,1,1,2,2,0,0,1,0,1,0,1,2,7,6,8,6,8,6,6,8,6,6,8,6,6,7,6,5,5,6,6, 10,11,12,13,12,11,10,10,11,10,12,11,10,10,10,10,12,12,10,10,17,16,15,17,16, 17,16,18,19,18,17,16,16,16,16,16,15,16])
xdata = np.array([1,2,3,1,5,4,7,8,6,10,11,12,13,10,12,13,17,16,19,18,21,19,23,21,25,20,26,27,28,26,26,26,29,30,30,29,30,32,33, 1,2,3,1,5,4,7,8,6,10,11,12,13,10,12,13,17,16,19,18,21,19,23,21,25,20,26,27,28,26,26,26,29,30,30,29,30,32])
# Used a diff array to find where there is a big change in Y.
# If there's a big change in Y, then there must be a change of 'segment'.
diffy = np.diff(ydata)
# Create empty numpy arrays to append values into
filteredX = np.array([])
filteredY = np.array([])
# Chose 3 to be the value indicating the change in Y
index = np.where(diffy >3)
# Loop through the array
start = 0
for i in range (0, (index[0].size +1) ):
# Check if last segment is reached
if i == index[0].size:
print xdata[start:]
partSize = xdata[start:].size
# Window length must be an odd integer
if partSize % 2 == 0:
partSize = partSize - 1
filteredDataX = sc.signal.savgol_filter(xdata[start:], partSize, 3)
filteredDataY = sc.signal.savgol_filter(ydata[start:], partSize, 3)
filteredX = np.append(filteredX, filteredDataX)
filteredY = np.append(filteredY, filteredDataY)
else:
print xdata[start:index[0][i]]
partSize = xdata[start:index[0][i]].size
if partSize % 2 == 0:
partSize = partSize - 1
filteredDataX = sc.signal.savgol_filter(xdata[start:index[0][i]], partSize, 3)
filteredDataY = sc.signal.savgol_filter(ydata[start:index[0][i]], partSize, 3)
start = index[0][i]
filteredX = np.append(filteredX, filteredDataX)
filteredY = np.append(filteredY, filteredDataY)
# Plots
plt.plot(xdata,ydata, 'bo', label = 'Input Data')
plt.plot(filteredX, filteredY, 'ro', label = 'Filtered Data')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Result')
plt.legend()
plt.show()
This is my result:
When each point is connected, the result looks as follows.
I have played around with the order, but it seems like a third order gave the best result.
I have also tried these filters, among a few others:
scipy.signal.medfilt
scipy.ndimage.filters.uniform_filter1d
But so far none of the filters I have tried were close to what I really wanted. What is the best way to filter data such as this? Looking forward to your help.
One way to get something looking close to your ideal would be clustering + linear regression.
Note that you have to provide the number of clusters and I also cheated a bit in scaling up y before clustering.
import numpy as np
from scipy import cluster, stats
ydata = np.array([1,0,1,2,1,2,1,0,1,1,2,2,0,0,1,0,1,0,1,2,7,6,8,6,8,6,6,8,6,6,8,6,6,7,6,5,5,6,6, 10,11,12,13,12,11,10,10,11,10,12,11,10,10,10,10,12,12,10,10,17,16,15,17,16, 17,16,18,19,18,17,16,16,16,16,16,15,16])
xdata = np.array([1,2,3,1,5,4,7,8,6,10,11,12,13,10,12,13,17,16,19,18,21,19,23,21,25,20,26,27,28,26,26,26,29,30,30,29,30,32,33, 1,2,3,1,5,4,7,8,6,10,11,12,13,10,12,13,17,16,19,18,21,19,23,21,25,20,26,27,28,26,26,26,29,30,30,29,30,32])
def split_to_lines(x, y, k):
yo = np.empty_like(y, dtype=float)
# get the cluster centers and the labels for each point
centers, map_ = cluster.vq.kmeans2(np.array((x, y * 2)).T.astype(float), k)
# for each cluster, use the labels to select the points belonging to
# the cluster and do a linear regression
for i in range(k):
slope, interc, *_ = stats.linregress(x[map_==i], y[map_==i])
# use the regression parameters to construct y values on the
# best fit line
yo[map_==i] = x[map_==i] * slope + interc
return yo
import pylab
pylab.plot(xdata, ydata, 'or')
pylab.plot(xdata, split_to_lines(xdata, ydata, 4), 'ob')
pylab.show()

2D Numpy Array Beginner troubles

my issues is with trying to work with arrays that for each elements is a tuple of 2 values.
specifically the problem is to generate a random 2-dimensional walk of 200 (but for testing say 2) steps with a max distance, then 100 (try just 2) of those walks per stage, and commencing each stage at the largest distance from the origin of the previous stage.
I can successfully generate the array of random steps and get them to return the final position (x,y) value and as well calculate the distance of them from the origin of each walk:
That's defined in these functions:
#............................................................getPositionInteger
def getPosInt(description) :
"""Asks the user to enter a positive integer"""
askAgain = False
while askAgain == False:
try:
posInt = eval(raw_input("\n %s : " %description))
assert 0 < posInt , "Not a positive integer"
assert type(posInt) == int , "Not a positive integer"
askAgain = True
except :
print "Input failed, not a positive integer"
return posInt
#...............................................................initialPosition
def initialPosition() :
"""Initial position of walker at the start of a random walk"""
return (0.0, 0.0)
#......................................................................distance
def distance(posA, posB) :
"""Distance between two positions"""
xi = posA[0] ; yi = posA[1]
xf = posB[0] ; yf = posB[1]
return np.sqrt((xf-xi)**2+(yf-yi)**2)
#..................................................................getPositions
def getPositions(start, nSteps, maxStep):
xArray = maxStep * np.random.random(nSteps+1)* np.cos(2.0 * np.pi * random.random())
yArray = maxStep * np.random.random(nSteps+1)* np.sin(2.0 * np.pi * random.random())
xArray[0] = start[0]
yArray[0] = start[-1]
xArray = np.cumsum(xArray)
yArray = np.cumsum(yArray)
return (xArray[-1], yArray[-1])
But I can't get the array of the final position of each walk per stage in (x,y) form per stage
Here's the main script and where I'm having trouble:
import numpy as np
import matplotlib.pylab as plt
import random
import time
MAX_STEP_SIZE = 0.90 # maximum size of a single step [m]
random.seed(12345)
#..........................................................................main
def main ():
''''''
print "RANDOM WALK IN STAGES IN TWO DIMENSIONS"
userdata = getDetails()
print "\nPlease wait while random walks are generated and analyzed..."
NUM_STAGES = userdata[0]
NUM_WALKS = userdata[1]
NUM_STEPS = userdata[2]
stageStart = initialPosition()
for stage in np.arange(NUM_STAGES):
walks = np.zeros((NUM_WALKS, NUM_WALKS), dtype=np.ndarray)
for walk in walks:
walk = getPositions(stageStart, NUM_STEPS, MAX_STEP_SIZE)
print walk
print walks
You will see I'm having trouble making a an (x,y) style array, where the [0 0] should be [0.0 , 0.0] and its printed twice and additionally, its not changing to the final position.
I really appreciate and help, advice or references you can provide.
Thanks
-Sid

Categories

Resources