How to make bins and histograms - python

I need to make 200 bins that are evenly spaced and have my data be sorted into them, so that I can make a histogram out of he data. Can someone help me make a script that can make 200 bins and have data be sorted inside of them.
This is my current code:
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[0])
print nat
for line in f:
if line.strip():
if line.strip():
l.append(map(float,line.split()[1:]))
b = 0
a = 1
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
print dp
#data = dp
#num_bins = 200 # <- number of bins for the histogram
#plt.hist(data, num_bins)
#plt.show()
Errors:
/usr/lib64/python2.6/site-packages/matplotlib/backends/backend_gtk.py:621: DeprecationWarning: Use the new widget gtk.Tooltip
self.tooltips = gtk.Tooltips()
Traceback (most recent call last):
File "vector_final", line 42, in <module>
plt.hist(data, num_bins)
File "/usr/lib64/python2.6/site-packages/matplotlib/pyplot.py", line 2008, in hist
ret = ax.hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 7098, in hist
w = [None]*len(x)
TypeError: len() of unsized object

You are pretty close. The only thing you are missing is storing your data and passing it to the histogram function correctly.
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[0])
print nat
for line in f:
# store striped line and only store if there is data on the line.
cleaned = line.strip()
if cleaned:
# convert to float and remove characters in first index
l.append(map(float,cleaned.split()[1:]))
b = 0
a = 1
# create a list to store our calculations in
distances = []
num_vects = len(l)
for b in range(num_vects-1):
for a in range(b+1,num_vects):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
# store individual data point into the list of calculated distances
distances.append(dp)
# plot histogram
num_bins = 200 # <- number of bins for the histogram
# store useful data returned by the histogram function
(n, bins, patches) = plt.hist(distances, num_bins)
plt.show()

Related

How to count a peak that drops to 0? Python Find Peaks

I am using Scipy's find_peaks to count the number of peaks in a time series.
I need to count the number of peaks with the requirement that it starts at 0 and falls to 0. The second peak from the right (indicated by a vertical line) is counted here, but it shouldn't be since it doesn't fall to 0 before the last peak. Is there a way to specify this in find_peaks?
peaks1 = find_peaks(array, height=(1,1.5),prominence=1)
peaks1_5 = find_peaks(array, height=(1.5,2),prominence=1.5)
peaks2 = find_peaks(array, height=2,prominence=2)
fig, ax = plt.subplots(figsize=(30, 10), dpi=80)
plt.plot(spi_neg['date'],spi["SPI-12"])
[plt.axvline(spi_neg.date.iloc[p],c='red',linewidth=0.3) for p in peaks1[0]]
[plt.axvline(spi_neg.date.iloc[p],c='green',linewidth=0.3) for p in peaks1_5[0]]
[plt.axvline(spi_neg.date.iloc[p],c='purple',linewidth=0.3) for p in peaks2[0]]
plt.axhline(2,linestyle='dashed',linewidth=1)
plt.axhline(1.5,linestyle='dashed',linewidth=1)
plt.axhline(1,linestyle='dashed',linewidth=1)
Peaks chart
A running code with the question would be helpful, and a more precise definition of the countable peaks too ;-)
First we generate some data:
import numpy as np
import matplotlib.pyplot as plt
#---- generate data
mp = 200
freq = 20
t = np.linspace(0,freq*np.pi,mp)
signal = np.sin(t)
noise = np.random.rand(mp)
X = 0.5*signal + noise
#---- scale X
def scale01(a):
return (a-a.min())/(a.max()-a.min())
X = scale01(X) - 0.5
X = np.maximum(X,0.0)
#---- grafics
with plt.style.context('ggplot'):
fig = plt.figure(figsize=(15,3))
plt.plot(t, X)
plt.plot(t, X, 'o')
Now we identify the zero lakes and the non-zero islands
a = np.array(np.where(X<=0))[0] # extract the indices with X<=0
b = np.array(np.where(X>0) )[0] # extract the indices with X>0
with plt.style.context('ggplot'):
fig = plt.figure(figsize=(15,3))
plt.plot(t[b], X[b], 'or', label=">0")
plt.vlines(t[b], 0, X[b], colors='k')
plt.plot(t[a], -X[a], 'og', label="<=0")
plt.legend(); plt.show()
Next we fill the non-zero islands in a list with numpy arrays. Each numpy array contains a non-zero island.
X_ = X[b]
m = len(X_)
list_y = list()
list_Y = list()
for j in range(1,m):
if b[j]-b[j-1]>1 :
list_Y.append(list_y)
list_y = list()
#print("------------------------------------------------------ new list")
#print(j, b[j], X_[j])
list_y.append(X_[j])
list_Y.append(list_y)
print("list_Y");
n = len(list_Y)
for j in range(n):
print(list_Y[j])
With each numpy array in the list you can evaluate the peaks according to your definition (which I could not capture fully).
list_Y
[0.22062475371241386, 0.29207471279008657, 0.35072832015294586, 0.1251594602284437, 0.24379282278250836, 0.06896727821692716]
[0.06271739133976328]
[0.2689504650818615, 0.011887999386713255, 0.055442917743508624, 0.2876317343278316, 0.24084993011027578, 0.12097014134978235]
[0.1907699022464584]
[0.08249052680941726]
[0.10205561805376617]
[0.18903867830269638, 0.26990334850384257, 0.5, 0.3288200602696131, 0.05036869827824486, 0.040381419904307436]
[0.08618838642790339]
[0.0053279353208096625, 0.3468189863146819, 0.05644254569326557, 0.3985674171686334, 0.14897985190026097, 0.0025548308606182513, 0.32765453143333545, 0.3328107320769136, 0.1838328219774621, 0.21123652127176762]
[0.18870251894797663]
[0.13453490446867422, 0.25258744200608363, 0.4981866504733391, 0.35180043079867795, 0.08425183513691303, 0.3376976620831299, 0.22348609066402825]
[0.0716155758184146]
[0.052227024152749935, 0.08639499278421903]
[0.1581304564482665, 0.2273016493144655, 0.26721741895716056, 0.33665669827299305, 0.19255497112246478, 0.16227876457894175]
[0.10236622631923908, 0.06039140456773806, 0.053391261130168344]
[0.21170561257978093, 0.11669466945342633, 0.2479665749659119, 0.25792206298341824, 0.19579440295962314, 0.15210847528158666, 0.23531008247873408]
[0.05340116678342899]
[0.2088166123161308, 0.26031072203571415, 0.2786317264092839, 0.289871721166855, 0.25460661866030165, 0.3214937091565473, 0.36293451974436275]
[0.04525610202919361, 0.1740374143631349, 0.17258947174880612]
[0.14217066607610684, 0.03435965315335088, 0.09996473411377804, 0.48290831305140514, 0.09407783896892297]
[0.03224632110920911, 0.08787466747977346, 0.20032938280871493, 0.23646809723694695, 0.13380244841935984, 0.05305696510866664, 0.2657761536751757, 0.34514204517200975]
[0.17123014194168007, 0.2397521290598289]

Implementation of NLMS for a one dimensional array with padasip

I am trying to implement an NLMS filter per the Padasipexample:
https://matousc89.github.io/padasip/sources/filters/nlms.html
I need to filter a one dimensional array. I adjusted the code but I'm getting an error
File "C:\Python310\lib\site-packages\padasip\filters\base_filter.py", line 194, in run
self.n = len(x[0])
TypeError: object of type 'numpy.float64' has no len()
I understand that a int doesn't have a length, however I'm not clear how do implement NLMS with a single array.
Full Code:
import numpy as np
import matplotlib.pylab as plt
import padasip as pa
# creation of data
N = 10000
x = np.random.normal(0, 1, N) # input matrix
v = np.random.normal(0, 0.1, N) # noise
d = x + v
# identification
TAP=5000
f = pa.filters.FilterNLMS(n=TAP, mu=0.1, w="random")
y, e, w = f.run(d, x)
# show results
plt.figure(figsize=(15,9))
plt.subplot(211);plt.title("Adaptation");plt.xlabel("samples - k")
plt.plot(d,"b", label="d - target")
plt.plot(y,"g", label="y - output");plt.legend()
plt.subplot(212);plt.title("Filter error");plt.xlabel("samples - k")
plt.plot(10*np.log10(e**2),"r", label="e - error [dB]");plt.legend()
plt.tight_layout()
plt.show()

Mean values and Gram Matrices

I have spent countless hours looking through posts but can't seem to get my answers right.
-Subtract the mean values (mn) from each of the points (i.e. center the points around the origin and store these in a new matrix called A (This is the first step in PCA).
-Calculate the 3×3 Gram matrix
If anyone can please help me out id greatly appreciate it.
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np
import sympy as sym
sym.init_printing(use_unicode=True)
#get the data file from the internet:
from urllib.request import urlopen, urlretrieve
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt'
file = "Skin_NonSkin.txt"
response = urlopen(url)
data = response.read() # a `bytes` object
text = data.decode('utf-8')
lines = text.split('\r\n')
data = []
#Read in file line by line
for line in lines:
try:
if line:
data.append(list(map(int, line.split('\t'))))
except:
print('invalid line of data:',line)
response.close()
#Convert the file to a list of points
P = np.matrix(data)
P.shape
#Mask out only face values and keep just the RGBs
mask = np.array(P[:,3]==1)
mask = mask.flatten()
points = P[mask,:]
## Change order to Red, Green, Blue
points = points[:,(2,1,0)]
# Plot the points in 3D using their actual color values
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:,0], points[:,1], points[:,2], c=points/255)
ax.set_xlabel('Red');
ax.set_ylabel('Green');
ax.set_zlabel('Blue');
points = points[:,(2,1,0)]
red_mean = np.mean(points[:,0])
green_mean = np.mean(points[:,1])
blue_mean = np.mean(points[:,2])
mn = np.array((red_mean,green_mean,blue_mean), dtype=float)
You can calculate the mean of a matrix in a given axis, and subtract it directly.
mn = np.mean(points, axis=0)
C = points - mn;
The Gramm matrix G defined as the matrix whose entries is the inner product of the columns of C, to make the scale invariant to the size of C we divide the result by its number of rows
G = (C.T # C) / C.shape[0]

How to delete elements from a numpy array using indecies returned by scipy.spatial.KDTree.query_ball_point method

I am trying to use Kdtree data structure to remove closest points from an array preferablly without for loops.
import sys
import time
import scipy.spatial
class KDTree:
"""
Nearest neighbor search class with KDTree
"""
def __init__(self, data):
# store kd-tree
self.tree = scipy.spatial.cKDTree(data)
def search(self, inp, k=1):
"""
Search NN
inp: input data, single frame or multi frame
"""
if len(inp.shape) >= 2: # multi input
index = []
dist = []
for i in inp.T:
idist, iindex = self.tree.query(i, k=k)
index.append(iindex)
dist.append(idist)
return index, dist
dist, index = self.tree.query(inp, k=k)
return index, dist
def search_in_distance(self, inp, r):
"""
find points with in a distance r
"""
index = self.tree.query_ball_point(inp, r)
return np.asarray(index)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
start = time.time()
fig, ar = plt.subplots()
t = 0
R = 50.0
u = R *np.cos(t)
v = R *np.sin(t)
x = np.linspace(-100,100,51)
y = np.linspace(-100,100,51)
xx, yy = np.meshgrid(x,y)
points =np.vstack((xx.ravel(),yy.ravel())).T
Tree = KDTree(points)
ind = Tree.search_in_distance([u, v],10.0)
ar.scatter(points[:,0],points[:,1],c='k',s=1)
infected = points[ind]
ar.scatter(infected[:,0],infected[:,1],c='r',s=5)
def animate(i):
global R,t,start,points
ar.clear()
u = R *np.cos(t)
v = R *np.sin(t)
ind = Tree.search_in_distance([u, v],10.0)
ar.scatter(points[:,0],points[:,1],c='k',s=1)
infected = points[ind]
ar.scatter(infected[:,0],infected[:,1],c='r',s=5)
#points = np.delete(points,ind)
t+=0.01
end = time.time()
if end - start != 0:
print((end - start), end="\r")
start = end
ani = animation.FuncAnimation(fig, animate, interval=20)
plt.show()
but no matter what i do i can't get np.delete to work with the indecies returned by the ball_query method. What am i missing?
I would like to make the red colored points vanish in each iteration from the points array.
Your points array is a Nx2 matrix. Your ind indices are a list of row indices. What you need is to specify the axis along which you need deletion, ultimately this:
points = np.delete(points,ind,axis=0)
Also, once you delete indices, watch out for missing indices in your next iteration/calculations. Maybe you want to have a copy to delete points and plot and another copy for calculations that you do not delete from it.

Graph Customization in python

currently I have a program that takes data and makes a histogram out of it, I know how to change the labels and stuff, but is there a way to make x-axis display the number range more frequently (badly worded I'll just give an example):so right now on the x-axis is shows the number values in increments of 5, but how can I make it show up in like increments of 2 or 1 or 3.
Current code:
#!/usr/bin/python
import operator
import matplotlib.pyplot as plt
import numpy as np
l=[]
with open("testdata") as f:
line = f.next()
f.next()# skip headers
nat = int(line.split()[
print nat
for line in f:
if line.strip():
l.append(map(float,line.split()[1:]))
b = 0
a = 1
distances = []
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
distances.append(dp)
num_bins = 200 # <- number of bins for the histogram
(n, bins, patches) = plt.hist(distances, num_bins)
plt.title('Histogram')
plt.xlabel('Distance')
plt.ylabel('Frequency')
plt.show()
label_positions = np.arange(start, end, step, endpoint=True)
plt.xticks(label_positions)

Categories

Resources