How to make bins and histograms - python

I need to make 200 bins that are evenly spaced and have my data be sorted into them, so that I can make a histogram out of he data. Can someone help me make a script that can make 200 bins and have data be sorted inside of them.
This is my current code:
import operator
import matplotlib.pyplot as plt
import numpy as np
with open("testdata") as f:
line = skip headers
nat = int(line.split()[0])
print nat
for line in f:
if line.strip():
if line.strip():
b = 0
a = 1
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
print dp
#data = dp
#num_bins = 200 # <- number of bins for the histogram
#plt.hist(data, num_bins)
/usr/lib64/python2.6/site-packages/matplotlib/backends/ DeprecationWarning: Use the new widget gtk.Tooltip
self.tooltips = gtk.Tooltips()
Traceback (most recent call last):
File "vector_final", line 42, in <module>
plt.hist(data, num_bins)
File "/usr/lib64/python2.6/site-packages/matplotlib/", line 2008, in hist
ret = ax.hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/", line 7098, in hist
w = [None]*len(x)
TypeError: len() of unsized object

You are pretty close. The only thing you are missing is storing your data and passing it to the histogram function correctly.
import operator
import matplotlib.pyplot as plt
import numpy as np
with open("testdata") as f:
line = skip headers
nat = int(line.split()[0])
print nat
for line in f:
# store striped line and only store if there is data on the line.
cleaned = line.strip()
if cleaned:
# convert to float and remove characters in first index
b = 0
a = 1
# create a list to store our calculations in
distances = []
num_vects = len(l)
for b in range(num_vects-1):
for a in range(b+1,num_vects):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
# store individual data point into the list of calculated distances
# plot histogram
num_bins = 200 # <- number of bins for the histogram
# store useful data returned by the histogram function
(n, bins, patches) = plt.hist(distances, num_bins)


How to count a peak that drops to 0? Python Find Peaks

I am using Scipy's find_peaks to count the number of peaks in a time series.
I need to count the number of peaks with the requirement that it starts at 0 and falls to 0. The second peak from the right (indicated by a vertical line) is counted here, but it shouldn't be since it doesn't fall to 0 before the last peak. Is there a way to specify this in find_peaks?
peaks1 = find_peaks(array, height=(1,1.5),prominence=1)
peaks1_5 = find_peaks(array, height=(1.5,2),prominence=1.5)
peaks2 = find_peaks(array, height=2,prominence=2)
fig, ax = plt.subplots(figsize=(30, 10), dpi=80)
[plt.axvline([p],c='red',linewidth=0.3) for p in peaks1[0]]
[plt.axvline([p],c='green',linewidth=0.3) for p in peaks1_5[0]]
[plt.axvline([p],c='purple',linewidth=0.3) for p in peaks2[0]]
Peaks chart
A running code with the question would be helpful, and a more precise definition of the countable peaks too ;-)
First we generate some data:
import numpy as np
import matplotlib.pyplot as plt
#---- generate data
mp = 200
freq = 20
t = np.linspace(0,freq*np.pi,mp)
signal = np.sin(t)
noise = np.random.rand(mp)
X = 0.5*signal + noise
#---- scale X
def scale01(a):
return (a-a.min())/(a.max()-a.min())
X = scale01(X) - 0.5
X = np.maximum(X,0.0)
#---- grafics
fig = plt.figure(figsize=(15,3))
plt.plot(t, X)
plt.plot(t, X, 'o')
Now we identify the zero lakes and the non-zero islands
a = np.array(np.where(X<=0))[0] # extract the indices with X<=0
b = np.array(np.where(X>0) )[0] # extract the indices with X>0
fig = plt.figure(figsize=(15,3))
plt.plot(t[b], X[b], 'or', label=">0")
plt.vlines(t[b], 0, X[b], colors='k')
plt.plot(t[a], -X[a], 'og', label="<=0")
Next we fill the non-zero islands in a list with numpy arrays. Each numpy array contains a non-zero island.
X_ = X[b]
m = len(X_)
list_y = list()
list_Y = list()
for j in range(1,m):
if b[j]-b[j-1]>1 :
list_y = list()
#print("------------------------------------------------------ new list")
#print(j, b[j], X_[j])
n = len(list_Y)
for j in range(n):
With each numpy array in the list you can evaluate the peaks according to your definition (which I could not capture fully).
[0.22062475371241386, 0.29207471279008657, 0.35072832015294586, 0.1251594602284437, 0.24379282278250836, 0.06896727821692716]
[0.2689504650818615, 0.011887999386713255, 0.055442917743508624, 0.2876317343278316, 0.24084993011027578, 0.12097014134978235]
[0.18903867830269638, 0.26990334850384257, 0.5, 0.3288200602696131, 0.05036869827824486, 0.040381419904307436]
[0.0053279353208096625, 0.3468189863146819, 0.05644254569326557, 0.3985674171686334, 0.14897985190026097, 0.0025548308606182513, 0.32765453143333545, 0.3328107320769136, 0.1838328219774621, 0.21123652127176762]
[0.13453490446867422, 0.25258744200608363, 0.4981866504733391, 0.35180043079867795, 0.08425183513691303, 0.3376976620831299, 0.22348609066402825]
[0.052227024152749935, 0.08639499278421903]
[0.1581304564482665, 0.2273016493144655, 0.26721741895716056, 0.33665669827299305, 0.19255497112246478, 0.16227876457894175]
[0.10236622631923908, 0.06039140456773806, 0.053391261130168344]
[0.21170561257978093, 0.11669466945342633, 0.2479665749659119, 0.25792206298341824, 0.19579440295962314, 0.15210847528158666, 0.23531008247873408]
[0.2088166123161308, 0.26031072203571415, 0.2786317264092839, 0.289871721166855, 0.25460661866030165, 0.3214937091565473, 0.36293451974436275]
[0.04525610202919361, 0.1740374143631349, 0.17258947174880612]
[0.14217066607610684, 0.03435965315335088, 0.09996473411377804, 0.48290831305140514, 0.09407783896892297]
[0.03224632110920911, 0.08787466747977346, 0.20032938280871493, 0.23646809723694695, 0.13380244841935984, 0.05305696510866664, 0.2657761536751757, 0.34514204517200975]
[0.17123014194168007, 0.2397521290598289]

Implementation of NLMS for a one dimensional array with padasip

I am trying to implement an NLMS filter per the Padasipexample:
I need to filter a one dimensional array. I adjusted the code but I'm getting an error
File "C:\Python310\lib\site-packages\padasip\filters\", line 194, in run
self.n = len(x[0])
TypeError: object of type 'numpy.float64' has no len()
I understand that a int doesn't have a length, however I'm not clear how do implement NLMS with a single array.
Full Code:
import numpy as np
import matplotlib.pylab as plt
import padasip as pa
# creation of data
N = 10000
x = np.random.normal(0, 1, N) # input matrix
v = np.random.normal(0, 0.1, N) # noise
d = x + v
# identification
f = pa.filters.FilterNLMS(n=TAP, mu=0.1, w="random")
y, e, w =, x)
# show results
plt.subplot(211);plt.title("Adaptation");plt.xlabel("samples - k")
plt.plot(d,"b", label="d - target")
plt.plot(y,"g", label="y - output");plt.legend()
plt.subplot(212);plt.title("Filter error");plt.xlabel("samples - k")
plt.plot(10*np.log10(e**2),"r", label="e - error [dB]");plt.legend()

Mean values and Gram Matrices

I have spent countless hours looking through posts but can't seem to get my answers right.
-Subtract the mean values (mn) from each of the points (i.e. center the points around the origin and store these in a new matrix called A (This is the first step in PCA).
-Calculate the 3×3 Gram matrix
If anyone can please help me out id greatly appreciate it.
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np
import sympy as sym
#get the data file from the internet:
from urllib.request import urlopen, urlretrieve
url = ''
file = "Skin_NonSkin.txt"
response = urlopen(url)
data = # a `bytes` object
text = data.decode('utf-8')
lines = text.split('\r\n')
data = []
#Read in file line by line
for line in lines:
if line:
data.append(list(map(int, line.split('\t'))))
print('invalid line of data:',line)
#Convert the file to a list of points
P = np.matrix(data)
#Mask out only face values and keep just the RGBs
mask = np.array(P[:,3]==1)
mask = mask.flatten()
points = P[mask,:]
## Change order to Red, Green, Blue
points = points[:,(2,1,0)]
# Plot the points in 3D using their actual color values
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:,0], points[:,1], points[:,2], c=points/255)
points = points[:,(2,1,0)]
red_mean = np.mean(points[:,0])
green_mean = np.mean(points[:,1])
blue_mean = np.mean(points[:,2])
mn = np.array((red_mean,green_mean,blue_mean), dtype=float)
You can calculate the mean of a matrix in a given axis, and subtract it directly.
mn = np.mean(points, axis=0)
C = points - mn;
The Gramm matrix G defined as the matrix whose entries is the inner product of the columns of C, to make the scale invariant to the size of C we divide the result by its number of rows
G = (C.T # C) / C.shape[0]

How to delete elements from a numpy array using indecies returned by scipy.spatial.KDTree.query_ball_point method

I am trying to use Kdtree data structure to remove closest points from an array preferablly without for loops.
import sys
import time
import scipy.spatial
class KDTree:
Nearest neighbor search class with KDTree
def __init__(self, data):
# store kd-tree
self.tree = scipy.spatial.cKDTree(data)
def search(self, inp, k=1):
Search NN
inp: input data, single frame or multi frame
if len(inp.shape) >= 2: # multi input
index = []
dist = []
for i in inp.T:
idist, iindex = self.tree.query(i, k=k)
return index, dist
dist, index = self.tree.query(inp, k=k)
return index, dist
def search_in_distance(self, inp, r):
find points with in a distance r
index = self.tree.query_ball_point(inp, r)
return np.asarray(index)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
start = time.time()
fig, ar = plt.subplots()
t = 0
R = 50.0
u = R *np.cos(t)
v = R *np.sin(t)
x = np.linspace(-100,100,51)
y = np.linspace(-100,100,51)
xx, yy = np.meshgrid(x,y)
points =np.vstack((xx.ravel(),yy.ravel())).T
Tree = KDTree(points)
ind = Tree.search_in_distance([u, v],10.0)
infected = points[ind]
def animate(i):
global R,t,start,points
u = R *np.cos(t)
v = R *np.sin(t)
ind = Tree.search_in_distance([u, v],10.0)
infected = points[ind]
#points = np.delete(points,ind)
end = time.time()
if end - start != 0:
print((end - start), end="\r")
start = end
ani = animation.FuncAnimation(fig, animate, interval=20)
but no matter what i do i can't get np.delete to work with the indecies returned by the ball_query method. What am i missing?
I would like to make the red colored points vanish in each iteration from the points array.
Your points array is a Nx2 matrix. Your ind indices are a list of row indices. What you need is to specify the axis along which you need deletion, ultimately this:
points = np.delete(points,ind,axis=0)
Also, once you delete indices, watch out for missing indices in your next iteration/calculations. Maybe you want to have a copy to delete points and plot and another copy for calculations that you do not delete from it.

Graph Customization in python

currently I have a program that takes data and makes a histogram out of it, I know how to change the labels and stuff, but is there a way to make x-axis display the number range more frequently (badly worded I'll just give an example):so right now on the x-axis is shows the number values in increments of 5, but how can I make it show up in like increments of 2 or 1 or 3.
Current code:
import operator
import matplotlib.pyplot as plt
import numpy as np
with open("testdata") as f:
line = skip headers
nat = int(line.split()[
print nat
for line in f:
if line.strip():
b = 0
a = 1
distances = []
for b in range(53):
for a in range(b+1,54):
vector1 = (l[b][0],l[b][1],l[b][2])
vector2 = (l[a][0],l[a][1],l[a][2])
x = vector1
y = vector2
vector3 = list(np.array(x) - np.array(y))
dotProduct = reduce( operator.add, map( operator.mul, vector3, vector3))
dp = dotProduct**.5
num_bins = 200 # <- number of bins for the histogram
(n, bins, patches) = plt.hist(distances, num_bins)
label_positions = np.arange(start, end, step, endpoint=True)

