I have some problems with a value error in python - python

Here are my codes to plot a stress-strain curve
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.interpolate import interp1d
from matplotlib.offsetbox import AnchoredText
import pandas as pd
#both strain is a column in the given dataframe, and I manually calculated stress
df_1 = pd.read_csv('1045.csv',skiprows=25,header=[0,1])
print(df_1.head())
A1 = 40.602*(10e-6)
stress1 = ((df_1.Load)/A1)
plt.figure(figsize=(12,9))
plt.plot(df_1.Strain1.values,df_1.Load.values,'g')
plt.ylabel('stress(Pa)',fontsize=13)
plt.xlabel('Strain(%)',fontsize=13)
plt.xticks(np.arange(-6e-5,0.15,step=0.005),rotation = 45)
plt.yticks(np.arange(0,42000,step=1000))
strain = df_1.Strain1.values
stress = np.array(((df_1.Load.values)/A1))
strain = np.array((df_1.Strain1.values))
LinearLimit=1
Strain_values_linear = np.linspace(strain[0], strain[LinearLimit], num=50, endpoint=True)
Strain_values_eng = np.linspace(strain[LinearLimit], strain[-1], num=50, endpoint=True)
f1 = interp1d(strain, stress, fill_value='extrapolate')
f2 = interp1d(strain, stress, kind=3, fill_value='extrapolate')
Now I keep getting a value error saying : "x and y arrays must be equal in length along interpolation axis." I don't understand this...i printed the shape of strain and stress and they are the same
Btw here is a screenshot of the csv file:
enter image description here

You probably are passing an array of shape (..., N) as the first argument (meaning strain has shape of the form (..., N)). SciPy doesn't allow that and throws a ValueError. See the documentation for details. You should run a for loop if you have multiple vectors in strain array. The following code should work, considering you want to interpolate 1 function for each row in strain (and that strain is a 2-d array. If it isn't, you can easily convert it using strain.reshape(-1, N)):
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.interpolate import interp1d
from matplotlib.offsetbox import AnchoredText
import pandas as pd
#both strain is a column in the given dataframe, and I manually calculated stress
df_1 = pd.read_csv('1045.csv',skiprows=25,header=[0,1])
print(df_1.head())
A1 = 40.602*(10e-6)
stress1 = ((df_1.Load)/A1)
plt.figure(figsize=(12,9))
plt.plot(df_1.Strain1.values,df_1.Load.values,'g')
plt.ylabel('stress(Pa)',fontsize=13)
plt.xlabel('Strain(%)',fontsize=13)
plt.xticks(np.arange(-6e-5,0.15,step=0.005),rotation = 45)
plt.yticks(np.arange(0,42000,step=1000))
strain = df_1.Strain1.values
stress = np.array(((df_1.Load.values)/A1))
strain = np.array((df_1.Strain1.values))
LinearLimit=1
Strain_values_linear = np.linspace(strain[0], strain[LinearLimit], num=50, endpoint=True)
Strain_values_eng = np.linspace(strain[LinearLimit], strain[-1], num=50, endpoint=True)
f1, f2 = [], []
for row in range(len(strain)):
f1.append(interp1d(strain[row], stress, fill_value='extrapolate'))
f2.append(interp1d(strain[row], stress, kind=3, fill_value='extrapolate'))
Edit: From the comment, you have strain array of shape (222, 1). This means you already have a vector but the shape is not compatible with what SciPy accepts. In this case, you will have to reshape the strain and sress array to have the shape of the form (N,). Following code should work:
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.interpolate import interp1d
from matplotlib.offsetbox import AnchoredText
import pandas as pd
#both strain is a column in the given dataframe, and I manually calculated stress
df_1 = pd.read_csv('1045.csv',skiprows=25,header=[0,1])
print(df_1.head())
A1 = 40.602*(10e-6)
stress1 = ((df_1.Load)/A1)
plt.figure(figsize=(12,9))
plt.plot(df_1.Strain1.values,df_1.Load.values,'g')
plt.ylabel('stress(Pa)',fontsize=13)
plt.xlabel('Strain(%)',fontsize=13)
plt.xticks(np.arange(-6e-5,0.15,step=0.005),rotation = 45)
plt.yticks(np.arange(0,42000,step=1000))
strain = df_1.Strain1.values
stress = np.array(((df_1.Load.values)/A1))
strain = np.array((df_1.Strain1.values))
strain = strain.reshape(-1,)
stress = stress.reshape(-1,)
LinearLimit=1
Strain_values_linear = np.linspace(strain[0], strain[LinearLimit], num=50, endpoint=True)
Strain_values_eng = np.linspace(strain[LinearLimit], strain[-1], num=50, endpoint=True)
f1 = interp1d(strain, stress, fill_value='extrapolate')
f2 = interp1d(strain, stress, kind=3, fill_value='extrapolate')

Related

DBSCAN fit_predict on precomputed metrics outputs strange clusters

I am trying to exercise with ML. Specifically, attempting to apply DBSCAN on precomputed distances matrix (just to check how this work). Yes, I know I could use Euclidean metrics but I wanted to test the precomputed.
I am unsure why the labels are all same value for a data set with random pairs in 3 different regions- expecting DBSCAN to separate those. Note: even if I use non-overlapping ranges for the data1/2/3 I still get a single cluster output.
Here is the code:
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.cluster import DBSCAN
from scipy.spatial.distance import pdist, squareform
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
data1 = np.array ([[random.randint(1,400) for i in range(2)] for j in range (50)], dtype=np.float64)
data2 = np.array ([[random.randint(300,700) for i in range(2)] for j in range (50)], dtype=np.float64)
data3 = np.array ([[random.randint(600,900) for i in range(2)] for j in range (50)], dtype=np.float64)
data= np.append (np.append (data1,data2,axis=0), data3, axis=0)
d = pdist(data, lambda u, v: np.sqrt(((u-v)**2).sum()))
distance_matrix = squareform(d)
cluster = DBSCAN (eps=0.3, min_samples=2,metric='precomputed')
dbscan_model = cluster.fit_predict (distance_matrix)
plt.scatter (data[:,0], data[:,1], s=100, c=dbscan_model)
plt.show ()

MatplotLib.pyplot.scatter not plotting normally when a new list added to the array

I was working with NumPy and Pandas to create some artificial data for testing models.
First, I coded this:
# Constructing some random data for experiments
import math
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
np.random.seed(42)
# Rectangular Data
total_n = 500
x = np.random.rand(total_n)*10
y = np.random.rand(total_n)*10
divider = 260
# Two lambda functions are for shifting the data, the numbers are chosen arbitrarily
f = lambda a: a*2
x[divider:] = f(x[divider:])
y[divider:] = f(y[divider:])
g = lambda a: a*3 + 5
x[:divider] = g(x[:divider])
y[:divider] = g(y[:divider])
# Colours array for separating the data
colors = ['blue']*divider + ['red']*(total_n-divider)
squares = np.array([x,y])
plt.scatter(squares[0],squares[1], c=colors, alpha=0.5)
I got what I wanted:
The Data I wanted
But I wanted to add the colors array to the numpy array, to take it as a Label variable so I added this to the code:
# Constructing some random data for experiments
import math
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
np.random.seed(42)
# Rectangular Data
total_n = 500
x = np.random.rand(total_n)*10
y = np.random.rand(total_n)*10
divider = 260
# Two lambda functions are for shifting the data, the numbers are chosen arbitrarily
f = lambda a: a*2
x[divider:] = f(x[divider:])
y[divider:] = f(y[divider:])
g = lambda a: a*3 + 5
x[:divider] = g(x[:divider])
y[:divider] = g(y[:divider])
# Colours array for separating the data
colors = ['blue']*divider + ['red']*(total_n-divider)
squares = np.array([x,y,colors])
plt.scatter(squares[0],squares[1], c=colors, alpha=0.5)
And everything just blows out:
The Blown out Data
I got my work around this by separating the label from the whole numpy array. But still what's going on here??
Alright so I think I have the answer. A Numpy array can only have one type of data which is infered when creating the array if it is not given. When you create squares with colors in it, then squares.dtype='<U32', which means that all values are converted to a little-endian 32 character string.
To avoid that you can:
use a simple list
use a pandas dataframe, as they accept columns of different types
if you want to use numpy you can use a structured array as follow
zipped = [z for z in zip(x, y, colors)]
#input must be a list of tuples/list representing rows
#the transformation is made with zip
dtype = np.dtype([('x', float), ('y', float), ('colors', 'U10')])
#type of data, 10 characters string is U10
squares = np.array(zipped, dtype=dtype)
#creating the array by precising the type
plt.scatter(squares["x"],squares["y"], c=squares["colors"], alpha=0.5)
#when plotting call the corresponding column, just as in a dataframe

Python: how can I read data from files and assign it to an array? error":could not broadcast input array from shape"

I have the following MATLAB code that works fine using text data files, now I am trying to rewrite it using Python but running into errors. I have results that I am trying to apply some calculations on (perform data analysis). My results are in the format of binary files and I have a specific package I am using to help me import the data. For example, here ne is a 1024x256 array with 159 number of files printed per each iteration. So, in MATLAB I can simply do the following:
% Load data:
frame = 6; % how many number of output files
ne_bg = load([DirPath '/ne_unpert.txt']);
ne_p = load([DirPath '/ne_' num2str(frame) '.txt']);
% perform calculations on data:
ne = ne_bg + ne_p;
dn_over_n = ne_p ./ ne;
Since MATLAB deals easily with multi-dimensional arrays and matrices, I am struggling to interpret that to python.
My Python code:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.gridspec as gridspec
import matplotlib.colors as colors
import matplotlib.patches as patches
import scipy.optimize as opt
from scipy.special import erf, comb, gamma, gammainc
import scipy.constants as const
from scipy.integrate import odeint
import sys
from glob import glob
from mpl_toolkits.axes_grid1 import make_axes_locatable
import Package as pg
# Initialize sizes
ne = np.zeros((1024,256))
ne_p = np.zeros((1024,256))
# Data
data = pg.GData('ne_p.bp')
dg = pg.GInterpModal(data, 2, 'ms')
#dg.interpolate(overwrite=True)
ne_p = data.getValues()
data = pg.GData('ne0.gkyl')
dg = pg.GInterpModal(data, 2, 'ms')
#dg.interpolate(overwrite=True)
ne_bg = data.getValues()
for i in range(1,159): # would like to look at files start from 1 to 159 not 0
data = pg.GData('ne{:d}.gkyl'.format(i))
dg = pg.GInterpModal(data, 2, 'ms')
ne[i,:] = data.getValues() # ERROR HERE
dn_over_n = ne_p/ne # get
....
Error message:
ValueError Traceback (most recent call last)
<ipython-input-35-d6134fb807e8> in <module>
48 dg = pg.GInterpModal(data, 2, 'ms')
49 #dg.interpolate(overwrite=True)
---> 50 ne[i,:] = data.getValues()
ValueError: could not broadcast input array from shape (1024,256,1) into shape (256)
Can someone show me how to fix this and explain what it means?

Apply scipy binned_statistic by dask

Is it possible to add binned_statistic to dask.stats? Or other solutions like embedding it in apply.ufunc()?
Here's an example using scipy.stats:
from scipy.stats import binned_statistic
import numpy as np
x = np.arange(500)
values = x*50
statistics, _, _ = binned_statistic(x, values, statistic='min', bins=500, range=(0, 500))

How to Extract the following Frequency-domain Features in Python?

Please feel free to point out any errors/improvements in the existing code
So this is a very basic question and I only have a beginner level understanding of signal processing. I have a 1.02 second accelerometer data sampled at 32000 Hz. I am looking to extract the following frequency domain features after having performed FFT in python -
Mean Freq, Median Freq, Power Spectrum Deformation, Spectrum energy, Spectral Kurtosis, Spectral Skewness, Spectral Entropy, RMSF (Root Mean Square Freq.), RVF (Root Variance Frequency), Power Cepstrum.
More specifically, I am looking for plots of these features as a final output.
The csv file containing data has four columns: Time, X Axis Value, Y Axis Value, Z Axis Value (The accelerometer is a triaxial one). So far on python, I have been able to visualize the time domain data, apply convolution filter to it, applied FFT and generated a Spectogram that shows an interesting shock
To Visualize Data
#Importing pandas and plotting modules
import numpy as np
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
#Reading Data
data = pd.read_csv('HelicalStage_Aug1.csv', index_col=0)
data = data[['X Value', 'Y Value', 'Z Value']]
date_rng = pd.date_range(start='1/8/2018', end='11/20/2018', freq='s')
#Plot the entire time series data and show gridlines
data.grid=True
data.plot()
enter image description here
Denoising
# Applying Convolution Filter
mylist = [1, 2, 3, 4, 5, 6, 7]
N = 3
cumsum, moving_aves = [0], []
for i, x in enumerate(mylist, 1):
cumsum.append(cumsum[i-1] + x)
if i>=N:
moving_ave = (cumsum[i] - cumsum[i-N])/N
#can do stuff with moving_ave here
moving_aves.append(moving_ave)
np.convolve(x, np.ones((N,))/N, mode='valid')
result_X = np.convolve(data[["X Value"]].values[:,0], np.ones((20001,))/20001, mode='valid')
result_Y = np.convolve(data[["Y Value"]].values[:,0], np.ones((20001,))/20001, mode='valid')
result_Z = np.convolve(data[["Z Value"]].values[:,0],
np.ones((20001,))/20001, mode='valid')
plt.plot(result_X-np.mean(result_X))
plt.plot(result_Y-np.mean(result_Y))
plt.plot(result_Z-np.mean(result_Z))
enter image description here
FFT and Spectogram
import numpy as np
import scipy as sp
import scipy.fftpack
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('HelicalStage_Aug1.csv')
df = df.drop(columns="Time")
df.plot()
plt.title('Sensor Data as Time Series')
signal = df[['Y Value']]
signal = np.squeeze(signal)
Y = np.fft.fftshift(np.abs(np.fft.fft(signal)))
Y = Y[int(len(Y)/2):]
Y = Y[10:]
plt.figure()
plt.plot(Y)
enter image description here
plt.figure()
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(signal, Fs= 32000)
plt.show()
enter image description here
If my code is correct and the generated FFT and spectrogram are good, then how can I graphically compute the previously mentioned frequency domain features?
I have tried doing the following for MFCC -
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy.io import wavfile
from python_speech_features import mfcc
from python_speech_features import logfbank
# Extract MFCC and Filter bank features
mfcc_features = mfcc(signal, Fs)
filterbank_features = logfbank(signal, Fs)
# Printing parameters to see how many windows were generated
print('\nMFCC:\nNumber of windows =', mfcc_features.shape[0])
print('Length of each feature =', mfcc_features.shape[1])
print('\nFilter bank:\nNumber of windows =', filterbank_features.shape[0])
print('Length of each feature =', filterbank_features.shape[1])
Visualizing filter bank features
#Matrix needs to be transformed in order to have horizontal time domain
mfcc_features = mfcc_features.T
plt.matshow(mfcc_features)
plt.title('MFCC')
enter image description here
enter image description here
I think your fft taking procedure is not correct, fft output is usually peak and when you are taking abs it should be one peak, as , probably you should change it to Y = np.fft.fftshift(np.abs(np.fft.fft(signal))) to Y=np.abs(np.fft.fftshift(signal)

Categories

Resources