hist2D masked array - ValueError: cannot convert float NaN to integer - python

Something about the way I have masked zero values in my array is causing an error when doing his2d. I try:
x = ma.masked_array([0.17478785046728973, 0.21854713940370668, 0.7354304690743048,
1.1052309124767226, 0.3450562200956937, 0])
x[5] = ma.masked_all(1)
y = ma.masked_array([0.17478785046728973, 0.21854713940370668, 0.7354304690743048,
1.1052309124767226, 0.3450562200956937, 0])
plt.hist2d(x, y, bins = 10)
plt.show()
I think it doesn't like something to do with the decimals because this is the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-82-38bb844ac43e> in <module>()
9 1.1052309124767226, 0.3450562200956937, 0])
10
---> 11 plt.hist2d(c, d, bins = 10)
12 plt.show()
/Users/eebp/anaconda/envs/Python2/lib/python2.7/site-packages/matplotlib/pyplot.pyc in hist2d(x, y, bins, range, normed, weights, cmin, cmax, hold, data, **kwargs)
2983 ret = ax.hist2d(x, y, bins=bins, range=range, normed=normed,
2984 weights=weights, cmin=cmin, cmax=cmax, data=data,
-> 2985 **kwargs)
2986 finally:
2987 ax.hold(washold)
/Users/eebp/anaconda/envs/Python2/lib/python2.7/site-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs)
1817 warnings.warn(msg % (label_namer, func.__name__),
1818 RuntimeWarning, stacklevel=2)
-> 1819 return func(ax, *args, **kwargs)
1820 pre_doc = inner.__doc__
1821 if pre_doc is None:
/Users/eebp/anaconda/envs/Python2/lib/python2.7/site-packages/matplotlib/axes/_axes.pyc in hist2d(self, x, y, bins, range, normed, weights, cmin, cmax, **kwargs)
6300 range = __builtins__["range"]
6301 h, xedges, yedges = np.histogram2d(x, y, bins=bins, range=bin_range,
-> 6302 normed=normed, weights=weights)
6303
6304 if cmin is not None:
/Users/eebp/anaconda/envs/Python2/lib/python2.7/site-packages/numpy/lib/twodim_base.pyc in histogram2d(x, y, bins, range, normed, weights)
712 xedges = yedges = asarray(bins, float)
713 bins = [xedges, yedges]
--> 714 hist, edges = histogramdd([x, y], bins, range, normed, weights)
715 return hist, edges[0], edges[1]
716
/Users/eebp/anaconda/envs/Python2/lib/python2.7/site-packages/numpy/lib/function_base.pyc in histogramdd(sample, bins, range, normed, weights)
800 mindiff = dedges[i].min()
801 if not np.isinf(mindiff):
--> 802 decimal = int(-log10(mindiff)) + 6
803 # Find which points are on the rightmost edge.
804 not_smaller_than_edge = (sample[:, i] >= edges[i][-1])
ValueError: cannot convert float NaN to integer
if I plot the array without my masked value against itself: plt.hist2d(y, y, bins = 10) then it works. Am I using the wrong method to mask? I was originally using that syntax to mask a 2D slice of a 3D array, so I am wondering if I am creating an array within an array unnecessarily.

Related

ValueError: RGBA values should be within 0-1 range when plotting scatter plot

I am attempting to generate a scatter plot to show data before and after the PCA transform, similar to this tutorial.
To do this, I am running the following code:
fig, axes = plt.subplots(1,2)
axes[0].scatter(X.iloc[:,0], X.iloc[:,1], c=y)
axes[0].set_xlabel('x1')
axes[0].set_ylabel('x2')
axes[0].set_title('Before PCA')
axes[1].scatter(X_new[:,0], X_new[:,1], c=y)
axes[1].set_xlabel('PC1')
axes[1].set_ylabel('PC2')
axes[1].set_title('After PCA')
plt.show()
Which is causing this error to appear:
ValueError: RGBA values should be within 0-1 range
X is the preprocessed matrix of features, which contains 196 samples and 59 features. Whereas y is the dependent variable and contains two classes [0, 1].
Here is the full error message:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-109-2c4f74ddce3f> in <module>
1 fig, axes = plt.subplots(1,2)
----> 2 axes[0].scatter(X.iloc[:,0], X.iloc[:,1], c=y)
3 axes[0].set_xlabel('x1')
4 axes[0].set_ylabel('x2')
5 axes[0].set_title('Before PCA')
~/anaconda3/lib/python3.7/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
1597 def inner(ax, *args, data=None, **kwargs):
1598 if data is None:
-> 1599 return func(ax, *map(sanitize_sequence, args), **kwargs)
1600
1601 bound = new_sig.bind(ax, *args, **kwargs)
~/anaconda3/lib/python3.7/site-packages/matplotlib/axes/_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)
4495 offsets=offsets,
4496 transOffset=kwargs.pop('transform', self.transData),
-> 4497 alpha=alpha
4498 )
4499 collection.set_transform(mtransforms.IdentityTransform())
~/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py in __init__(self, paths, sizes, **kwargs)
881 """
882
--> 883 Collection.__init__(self, **kwargs)
884 self.set_paths(paths)
885 self.set_sizes(sizes)
~/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py in __init__(self, edgecolors, facecolors, linewidths, linestyles, capstyle, joinstyle, antialiaseds, offsets, transOffset, norm, cmap, pickradius, hatch, urls, offset_position, zorder, **kwargs)
125
126 self._hatch_color = mcolors.to_rgba(mpl.rcParams['hatch.color'])
--> 127 self.set_facecolor(facecolors)
128 self.set_edgecolor(edgecolors)
129 self.set_linewidth(linewidths)
~/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py in set_facecolor(self, c)
676 """
677 self._original_facecolor = c
--> 678 self._set_facecolor(c)
679
680 def get_facecolor(self):
~/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py in _set_facecolor(self, c)
659 except AttributeError:
660 pass
--> 661 self._facecolors = mcolors.to_rgba_array(c, self._alpha)
662 self.stale = True
663
~/anaconda3/lib/python3.7/site-packages/matplotlib/colors.py in to_rgba_array(c, alpha)
277 result[mask] = 0
278 if np.any((result < 0) | (result > 1)):
--> 279 raise ValueError("RGBA values should be within 0-1 range")
280 return result
281 # Handle single values.
ValueError: RGBA values should be within 0-1 range
I am unsure what is causing this error and would appreciate help in figuring this out. Thanks!
The c= parameter of ax.scatter can be given in several ways:
A scalar or sequence of n numbers to be mapped to colors using cmap and norm. So a single number, or a list-like 1D sequence of numbers.
A 2D array in which the rows are RGB or RGBA. E.g. something like [[1,0,0], [0,0,1]]. All these values need to be between 0 and 1. Moreover, there should be either 3 (for RGB) or 4 (for RGBA) values per entry.
A sequence of colors of length n. E.g. ["red", "#B789C0", "turquoise"]
A single color format string. E.g. "cornflowerblue".
Now, when an array of numbers is given, to be able to distinguish between the first and the second case, matplotlib just looks at the array dimension. If it is 1D, matplotlib assumes the first case. For 2D, it assumes the second case. Note that also an Nx1 or an 1xN array is considered 2D. You can use np.squeeze() to "squeeze out" the dummy second dimension.

How to plot a windrose when the wind direction is a categorical value

From Dataset Australia Rainfall, I'm trying to predict RainTomorrow. Here is my code given below :
Downloading dataset directly from Kaggle using opendatasets library
import opendatasets as od
dataset_url = 'https://www.kaggle.com/jsphyg/weather-dataset-rattle-package'
od.download(dataset_url)
Importing necessary libraries
import os
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10,6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
Loading Dataset
data_dir = './weather-dataset-rattle-package'
os.listdir(data_dir)
train_csv = data_dir + '/weatherAUS.csv'
raw_df = pd.read_csv(train_csv)
Explore WindGustDir variable
print('WindGustDir contains', len(raw_df['WindGustDir'].unique()), 'labels')
raw_df['WindGustDir'].unique()
raw_df.WindGustDir.value_counts()
pd.get_dummies(raw_df.WindGustDir, drop_first=True, dummy_na=True).head()
pd.get_dummies(raw_df.WindGustDir, drop_first=True, dummy_na=True).sum(axis=0)
Plotting Windrose
from windrose import WindroseAxes
ax = WindroseAxes.from_ax()
ax.bar(raw_df.WindGustDir, raw_df.Rainfall, normed=True, opening=0.8,
edgecolor='white')
ax.set_legend()
I am unable to figure out which columns should use with WindGustDir or if their is any other option of compare RainTomorrow and WindGustDir .
Error Message
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
57 try:
---> 58 return bound(*args, **kwds)
59 except TypeError:
TypeError: '<' not supported between instances of 'float' and 'str'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-253-1a1f0fa6bf7a> in <module>
1 ax = WindroseAxes.from_ax()
----> 2 ax.bar(direction=df.WindGustDir, var=df.Rainfall, normed=True, opening=0.8, edgecolor='white')
3 ax.set_legend()
e:\Anaconda3\lib\site-packages\windrose\windrose.py in bar(self, direction, var, **kwargs)
547 """
548
--> 549 bins, nbins, nsector, colors, angles, kwargs = self._init_plot(
550 direction, var, **kwargs
551 )
e:\Anaconda3\lib\site-packages\windrose\windrose.py in _init_plot(self, direction, var, **kwargs)
359
360 # Set the global information dictionnary
--> 361 self._info["dir"], self._info["bins"], self._info["table"] = histogram(
362 direction, var, bins, nsector, normed, blowto
363 )
e:\Anaconda3\lib\site-packages\windrose\windrose.py in histogram(direction, var, bins, nsector, normed, blowto)
746 direction[direction >= 360.] = direction[direction >= 360.] - 360
747
--> 748 table = histogram2d(x=var, y=direction, bins=[var_bins, dir_bins], normed=False)[0]
749 # add the last value to the first to have the table of North winds
750 table[:, 0] = table[:, 0] + table[:, -1]
<__array_function__ internals> in histogram2d(*args, **kwargs)
e:\Anaconda3\lib\site-packages\numpy\lib\twodim_base.py in histogram2d(x, y, bins, range, normed, weights, density)
742 xedges = yedges = asarray(bins)
743 bins = [xedges, yedges]
--> 744 hist, edges = histogramdd([x, y], bins, range, normed, weights, density)
745 return hist, edges[0], edges[1]
746
<__array_function__ internals> in histogramdd(*args, **kwargs)
e:\Anaconda3\lib\site-packages\numpy\lib\histograms.py in histogramdd(sample, bins, range, normed, weights, density)
1071
1072 # Compute the bin number each sample falls into.
-> 1073 Ncount = tuple(
1074 # avoid np.digitize to work around gh-11022
1075 np.searchsorted(edges[i], sample[:, i], side='right')
e:\Anaconda3\lib\site-packages\numpy\lib\histograms.py in <genexpr>(.0)
1073 Ncount = tuple(
1074 # avoid np.digitize to work around gh-11022
-> 1075 np.searchsorted(edges[i], sample[:, i], side='right')
1076 for i in _range(D)
1077 )
<__array_function__ internals> in searchsorted(*args, **kwargs)
e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in searchsorted(a, v, side, sorter)
1346
1347 """
-> 1348 return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
1349
1350
e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
65 # Call _wrapit from within the except clause to ensure a potential
66 # exception has a traceback chain.
---> 67 return _wrapit(obj, method, *args, **kwds)
68
69
e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapit(obj, method, *args, **kwds)
42 except AttributeError:
43 wrap = None
---> 44 result = getattr(asarray(obj), method)(*args, **kwds)
45 if wrap:
46 if not isinstance(result, mu.ndarray):
TypeError: '<' not supported between instances of 'float' and 'str'
It seems that the direction parameter must be numeric.
Create a dict where each key is a each direction in 'WindGustDir' and the corresponding value is a float in degrees.
.map the dict to df.WindGustDir and plot
Alternatively, create and plot a new column
df.insert(loc=8, column='WindGustDirDeg', value=df.WindGustDir.map(wind_dir_deg))
import pandas as pd
from windrose import WindroseAxes
import numpy as np
# load the downloaded data and dropna
df = pd.read_csv('weatherAUS/weatherAUS.csv').dropna(subset=['WindGustDir'])
# create a dict for WindGustDir to numeric values
wind_dir = ['E', 'ENE', 'NE', 'NNE', 'N', 'NNE', 'NW', 'WNW', 'W', 'WSW', 'SW', 'SSW', 'S', 'SSE', 'SE', 'ESE']
degrees = np.arange(0, 360, 22.5)
wind_dir_deg = dict((zip(wind_dir, degrees)))
# plot and map WindGustDir to the dict
ax = WindroseAxes.from_ax()
ax.bar(direction=df.WindGustDir.map(wind_dir_deg), var=df.Rainfall, normed=True, opening=0.8, edgecolor='white')
ax.set_legend()

How to Add Zeros to Second Dimension of Array in Python?

I have a latitude array with the shape (1111,) and am attempting to use matplotlib pcolormesh, but I'm getting an error since my array is not 2D so I am getting the error not enough values to unpack (expected 2, got 1). Is there a way I can add 1111 zeros to the second dimension of my latitude array? Below is the code I have that is causing the error.
import matplotlib.cm as cm
cmap = cm.get_cmap('BrBG')
cs = plt.pcolormesh(longitude.values, latitude.values, dens, cmap = cmap)
plt.title('Satellite Trajectory')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.savefig('Satellite Trajectory', dpi=200, bbox_inches='tight', pad_inches=0.025)
cb = plt.colorbar(cs, orientation = 'vertical')
cb.set_label(r'Density')
These are the first few lines of my Pandas latitude array:
0 50.224832
1 50.536422
2 50.847827
3 51.159044
4 51.470068
5 51.780895
6 52.091521
7 52.401941
8 52.712151
9 53.022145
10 53.331919
I have the same issue with the longitude array too. Here are some longitude values for reference.
0 108.873007
1 108.989510
2 109.107829
3 109.228010
4 109.350097
5 109.474136
6 109.600176
7 109.728265
8 109.858455
9 109.990798
10 110.125348
The other array shown is dens which is a density array and has the shape (5, 91, 181). Here are a few values for reference:
[6.042968853864891e-12, 6.042894605467602e-12, 6.042777396826408e-12, 6.042616263531836e-12, 6.042410211830538e-12, 6.042158216350682e-12, 6.0361190688090634e-12, 6.038107492458882e-12, 6.039984972063208e-12, 6.041748879958635e-12, 6.030375732644546e-12, 6.027898597657696e-12, 6.0251851962303345e-12, 6.0390021800772395e-12, 6.035096323493865e-12, 6.030879347062723e-12, 6.026343416350273e-12, 6.021480432118012e-12, 6.01628202402901e-12, 6.042274874237314e-12, 6.040409269411221e-1
I'm just stuck how to execute the pcolormesh without getting the following error:
ValueError Traceback (most recent call last)
<ipython-input-54-685815191229> in <module>
7
8
----> 9 cs = plt.pcolormesh(longitude.values, latitude.values, dens, cmap = cmap)
10
11 plt.title('Satellite Trajectory')
~\Anaconda3\lib\site-packages\matplotlib\pyplot.py in pcolormesh(alpha, norm, cmap, vmin, vmax, shading, antialiased, data, *args, **kwargs)
2771 *args, alpha=alpha, norm=norm, cmap=cmap, vmin=vmin,
2772 vmax=vmax, shading=shading, antialiased=antialiased,
-> 2773 **({"data": data} if data is not None else {}), **kwargs)
2774 sci(__ret)
2775 return __ret
~\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1808 "the Matplotlib list!)" % (label_namer, func.__name__),
1809 RuntimeWarning, stacklevel=2)
-> 1810 return func(ax, *args, **kwargs)
1811
1812 inner.__doc__ = _add_data_doc(inner.__doc__,
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in pcolormesh(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)
5980 allmatch = (shading == 'gouraud')
5981
-> 5982 X, Y, C = self._pcolorargs('pcolormesh', *args, allmatch=allmatch)
5983 Ny, Nx = X.shape
5984 X = X.ravel()
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in _pcolorargs(funcname, allmatch, *args)
5547 if isinstance(Y, np.ma.core.MaskedArray):
5548 Y = Y.data
-> 5549 numRows, numCols = C.shape
5550 else:
5551 raise TypeError(
ValueError: not enough values to unpack (expected 2, got 1)
I'm assuming it is because of the longitde and latitude array shape, so I'm asking for help filling the second dimension so I have an array (1111,1111) rather than (1111,).
If you have another recommendation I would love help. I am new to Python.
Use a for loop or a list comprehension in that case.
latitude = [50.224832, 50.536422, 50.847827, 51.159044, 51.470068]
longitude = [108.873007, 108.989510, 109.107829, 109.228010, 109.350097]
density = [.15,.25,.35,.45,.55]
output = [(latitude[i], longitude[i], density[i]) for i in range(len(latitude))]
print(output)
[(50.224832, 108.873007, 0.15), (50.536422, 108.98951, 0.25), (50.847827, 109.107829, 0.35), (51.159044, 109.22801, 0.45), (51.470068, 109.350097, 0.55)]

How to use LineCollection with set of strings in segments array

I am trying to plot a line with three different colors based on other conditions:
I have a dataframe x_week where the column ['Year-Week'] contains a string of year and week in the form '%Y-w%U'
The column x_week['#ops'] are float numbers
The limits where I want to change color of the line are stored in a dictionary named week that also contains strings in the format '%Y-w%U'
I am using LineCollection, the problem is that it requieres that the string element of the array segment being float, I have already tried date2num but I want to mantain the format '%Y-w%U' for the x-axis
(I already look into here multicolored line with strings linecolllection )
x = x_week['Year-Week']
y = x_week['ops']
# select how to color
color = []
for i in range(3):
color.append('#%06X' % randint(0, 0xFFFFFF))
cmap = ListedColormap(color)
norm = BoundaryNorm([min(x_week['Year-Week']),week[1],week[2],week[3],max(x_week['Year-Week'])], cmap.N)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# make line collection
lc = LineCollection(segments, cmap = cmap, norm = norm)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-279-6f4c3a13d43e> in <module>
18
19 # make line collection
---> 20 lc = LineCollection(segments, cmap = cmap, norm = norm)
~\Anaconda3\lib\site-packages\matplotlib\collections.py in __init__(self, segments, linewidths, colors, antialiaseds, linestyles, offsets, transOffset, norm, cmap, pickradius, zorder, facecolors, **kwargs)
1331 **kwargs)
1332
-> 1333 self.set_segments(segments)
1334
1335 def set_segments(self, segments):
~\Anaconda3\lib\site-packages\matplotlib\collections.py in set_segments(self, segments)
1340 for seg in segments:
1341 if not isinstance(seg, np.ma.MaskedArray):
-> 1342 seg = np.asarray(seg, float)
1343 _segments.append(seg)
1344
~\Anaconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
ValueError: could not convert string to float: '2019-w27'

ValueError: x and y must have same first dimension, but have shapes (4200,) and (16800, 1)

Ihave created an SVR model using SCIKIT-LEARN, I am trying to plot my data but for some reason I am receiving the error:
ValueError: x and y must have same first dimension, but have shapes
(4200,) and (16800, 1)
I have split my data into training and testing data, training the model and making a prediction. My code is:
X_feature = wind_speed
X_feature = X_feature.reshape(-1, 1)## Reshaping array to be 1D from 2D
y_label = Power
y_label = y_label.reshape(-1,1)
timeseries_split = TimeSeriesSplit(n_splits=3) ## Splitting training testing data into 3 splits
for train_index, test_index in timeseries_split.split(X_feature):## for loop to obtain print the training and splitting of the data
print("Training data:",train_index, "Testing data test:", test_index)#
X_train, X_test = X_feature[train_index], X_feature[test_index]
y_train, y_test = y_label[train_index], y_label [test_index]
timeseries_split = TimeSeriesSplit(n_splits=3) ## Splitting training testing data into 3 splits
scaler =pre.MinMaxScaler(feature_range=(0,1)).fit(X_train)## Data is being preprocessed then standard deviation
scaled_wind_speed_train = scaler.transform(X_train)## Wind speed training data is being scaled and then transformed
scaled_wind_speed_test = scaler.transform(X_test)## Wind speed test data is being scaled and then transformed
SVR_model = svm.SVR(kernel='rbf',C=100,gamma=.001).fit(scaled_wind_speed_train,y_train)
y_prediction = SVR_model.predict(scaled_wind_speed_test)
SVR_model.score(scaled_wind_speed_test,y_test)
rmse=numpy.sqrt(mean_squared_error(y_label,y_prediction))
print("RMSE:",rmse)
fig, bx = plt.subplots(figsize=(19,8))
bx.plot(y_prediction, X_feature,'bs')
fig.suptitle('Wind Power Prediction v Wind Speed', fontsize=20)
plt.xlabel('Wind Power Data')
plt.ylabel('Predicted Power')
plt.xticks(rotation=30)
plt.show()
fig, bx = plt.subplots(figsize=(19,8))
bx.plot( y_prediction, y_label)
fig.suptitle('Wind Power Prediction v Measured Wind Power ', fontsize=20)
plt.xlabel('Wind Power Data')
plt.ylabel('Predicted Power')
fig, bx = plt.subplots(figsize=(19,8))
bx.plot(y_prediction)
fig.suptitle('Wind Power Prediction v Measured Wind Power ', fontsize=20)
plt.xlabel('Wind Power Data')
plt.ylabel('Predicted Power')
I believe this code is being genrated when I am trying to obtain the rmse in the line:
rmse=numpy.sqrt(mean_squared_error(y_label,y_prediction))
This error also occurs when I comment this line out and try to plot my data..
My traceback error message is:
ValueError Traceback (most recent call last)
<ipython-input-57-ed11a9ca7fd8> in <module>()
79
80 fig, bx = plt.subplots(figsize=(19,8))
---> 81 bx.plot( y_prediction, y_label)
82 fig.suptitle('Wind Power Prediction v Measured Wind Power ', fontsize=20)
83 plt.xlabel('Wind Power Data')
~/anaconda3_501/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1715 warnings.warn(msg % (label_namer, func.__name__),
1716 RuntimeWarning, stacklevel=2)
-> 1717 return func(ax, *args, **kwargs)
1718 pre_doc = inner.__doc__
1719 if pre_doc is None:
~/anaconda3_501/lib/python3.6/site-packages/matplotlib/axes/_axes.py in plot(self, *args, **kwargs)
1370 kwargs = cbook.normalize_kwargs(kwargs, _alias_map)
1371
-> 1372 for line in self._get_lines(*args, **kwargs):
1373 self.add_line(line)
1374 lines.append(line)
~/anaconda3_501/lib/python3.6/site-packages/matplotlib/axes/_base.py in _grab_next_args(self, *args, **kwargs)
402 this += args[0],
403 args = args[1:]
--> 404 for seg in self._plot_args(this, kwargs):
405 yield seg
406
~/anaconda3_501/lib/python3.6/site-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
382 x, y = index_of(tup[-1])
383
--> 384 x, y = self._xy_from_xy(x, y)
385
386 if self.command == 'plot':
~/anaconda3_501/lib/python3.6/site-packages/matplotlib/axes/_base.py in _xy_from_xy(self, x, y)
241 if x.shape[0] != y.shape[0]:
242 raise ValueError("x and y must have same first dimension, but "
--> 243 "have shapes {} and {}".format(x.shape, y.shape))
244 if x.ndim > 2 or y.ndim > 2:
245 raise ValueError("x and y can be no greater than 2-D, but have "
ValueError: x and y must have same first dimension, but have shapes (4200,) and (16800, 1)
I think you have mixed the arguements for mean_squared_error, it should be
rmse=numpy.sqrt(mean_squared_error(y_test,y_prediction))
Update : as per the latest error, try this
fig, bx = plt.subplots(figsize=(19,8))
bx.plot(y_prediction, scaled_wind_speed_test,'bs')
fig.suptitle('Wind Power Prediction v Wind Speed', fontsize=20)
plt.xlabel('Wind Power Data')
plt.ylabel('Predicted Power')
plt.xticks(rotation=30)
plt.show()
Update 2
In case you get error on the other plot try this
fig, bx = plt.subplots(figsize=(19,8))
bx.plot( y_prediction, y_test)
fig.suptitle('Wind Power Prediction v Measured Wind Power ', fontsize=20)
plt.xlabel('Wind Power Data')
plt.ylabel('Predicted Power')
Numpy's function mean_squared_error expects two arrays of the same size.
The error you are getting implies that these two do not have the same size.
You can check your array sizes by
print(array_1.shape)
print(array_2.shape)
if the output you get is
output:
> (4200,)
> (4200, 1)
you can fix by doing
new_array_2 = array_2.transpose()[0]
and then
mean_squared_error(array_1, new_array_2)
if the two input arguments, whatever they are give you the following shapes
print(array_1.shape)
print(array_2.shape)
output:
> (4200,)
> (16800, 1)
try
new_array_1 = scalar.transform(array_1)
or
new_array_2 = scalar.transform(array_2)
until you get arrays with the same number whether it's 16800 or 4200.
Once you have two of the same size, but the one or both still comes with an extra dimension,
then again do
new_new_array_1 = scalar.transform(new_array_1)[0]
and feed these to mean_squared_error, e.g.
mean_squared_error(new_new_array_1, new_array_2)

Categories

Resources