Cannot use print function and mean function from numpy - python

I am taking a course on Udacity called Intro into Data Analysis and I am trying to run this code but I keep getting an error. I am using Python3. Thanks in advance. In the tutorial videos that were explaing the code and the course everything was working fine ( I assumed because it is a different version of Python). I tried many things but I still don't seem to be able to make it work.
%pylab inline
import matplotlib.pyplot as plt
import numpy as np
def describe_data(data):
print ('Mean:', np.mean(data))
print ('Standard deviation:', np.std(data))
print ('Minimum:', np.min(data))
print ('Maximum:', np.max(data))
plt.hist(data)
describe_data(total_minutes_by_account.values())
This is the error:
Populating the interactive namespace from numpy and matplotlib
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-34-669ffc75246c> in <module>
14 plt.hist(data)
15
---> 16 describe_data(total_minutes_by_account.values())
<ipython-input-34-669ffc75246c> in describe_data(data)
8 # Summarize the given data
9 def describe_data(data):
---> 10 print ('Mean:', np.mean(data))
11 print ('Standard deviation:', np.std(data))
12 print ('Minimum:', np.min(data))
<__array_function__ internals> in mean(*args, **kwargs)
~\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims, where)
3417 return mean(axis=axis, dtype=dtype, out=out, **kwargs)
3418
-> 3419 return _methods._mean(a, axis=axis, dtype=dtype,
3420 out=out, **kwargs)
3421
~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims, where)
188 ret = ret.dtype.type(ret / rcount)
189 else:
--> 190 ret = ret / rcount
191
192 return ret
TypeError: unsupported operand type(s) for /: 'dict_values' and 'int'

I supposed "total_minutes_by_account" is a dataframe. So you can do it in the following way.
import matplotlib.pyplot as plt
import numpy as np
def describe_data(data):
print ('Mean:', np.mean(data))
print ('Standard deviation:', np.std(data))
print ('Minimum:', np.min(data))
print ('Maximum:', np.max(data))
plt.hist(data)
describe_data(total_minutes_by_account.values.tolist())
You need to convert your dataframe values to list before performning any numpy operations.

Related

Python: why do I get an error when I try to interpolate an xarray between dates?

I am trying to interpolate the values of an xarray called pop
pop
I am using the function xarray.interp
dates = pd.date_range('1990-01-01', '2020-01-01', freq='1Y')
popI = pop.interp(time=dates, kwargs={"fill_value": "extrapolate"})
but I get the following error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-75-1393bc257da7> in <module>
----> 1 popI = pop.interp(time=dates, kwargs={"fill_value": "extrapolate"})
/usr/lib/python3/dist-packages/xarray/core/dataset.py in interp(self, coords, method, assume_sorted, kwargs, method_non_numeric, **coords_kwargs)
3163 if method in ["linear", "nearest"]:
3164 for k, v in validated_indexers.items():
-> 3165 obj, newidx = missing._localize(obj, {k: v})
3166 validated_indexers[k] = newidx[k]
3167
/usr/lib/python3/dist-packages/xarray/core/missing.py in _localize(var, indexes_coords)
561 indexes = {}
562 for dim, [x, new_x] in indexes_coords.items():
--> 563 minval = np.nanmin(new_x.values)
564 maxval = np.nanmax(new_x.values)
565 index = x.to_index()
<__array_function__ internals> in nanmin(*args, **kwargs)
/usr/lib/python3/dist-packages/numpy/lib/nanfunctions.py in nanmin(a, axis, out, keepdims)
319 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
320 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
--> 321 if np.isnan(res).any():
322 warnings.warn("All-NaN slice encountered", RuntimeWarning,
323 stacklevel=3)
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
You're calling interp on a Dataset, which will always apply functions to all data variables. One of your data variable is a string array mollewide. This can't be interpolated. So you can either set this as a coordinate:
popI = pop.set_coords('mollewide').interp(time=dates, kwargs={"fill_value": "extrapolate"})
or you can only operate on the popDensity data variable:
popI = pop["popDensity"].interp(time=dates, kwargs={"fill_value": "extrapolate"})

Reduce xarray.Dataset by custom function

I want to use xarray functionality to reduce a dataset by a custom/external function across a named dimension.
Create dataset to demonstrate the problem
import xarray as xr
import numpy as np
import pandas as pd
time = pd.date_range("2000-01-01", "2001-01-01", freq="D")
sids = np.arange(4)
obs = np.random.random(size=(len(time), len(sids)))
sim = np.random.random(size=(len(time), len(sids)))
original = xr.Dataset({"obs": (("time", "station_id"), obs), "sim": (("time", "station_id"), sim)}, coords={"time": time, "station_id": sids})
I want to calculate the mean_squared_error using the two variables in original, calculating the metric by collapsing the "time" dimension. This should return an xr.Dataset like the following:
<xarray.Dataset>
Dimensions: (station_id: 4)
Coordinates:
* station_id (station_id) int64 0 1 2 3
Data variables:
mean_squared_error (station_id) float64 0.4411 0.183 0.06754 0.9662
I have tried using the reduce function
from sklearn.metrics import mean_squared_error
original.reduce(mean_squared_error, dim="time")
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-243-51111f05437b> in <module>
----> 1 original.reduce(mean_squared_error, dim="time")
~/miniconda3/envs/ml/lib/python3.8/site-packages/xarray/core/dataset.py in reduce(self, func, dim, keep_attrs, keepdims, numeric_only, **kwargs)
4915 # the former is often more efficient
4916 reduce_dims = None # type: ignore[assignment]
-> 4917 variables[name] = var.reduce(
4918 func,
4919 dim=reduce_dims,
~/miniconda3/envs/ml/lib/python3.8/site-packages/xarray/core/variable.py in reduce(self, func, dim, axis, keep_attrs, keepdims, **kwargs)
1721 )
1722 if axis is not None:
-> 1723 data = func(self.data, axis=axis, **kwargs)
1724 else:
1725 data = func(self.data, **kwargs)
~/miniconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
TypeError: mean_squared_error() got an unexpected keyword argument 'axis'
There is a package called xskillscore, which has a method to calculate the MSE.
pip install xskillscore
xskillscore.mse(original.obs, original.sim, 'time')
I believe this would work :
np.sqrt(np.square(original["sim"] - original["obs"]).mean(dim="time"))
One solution does not use the internal functions of xarray, but instead requires you to loop over all of your dimension station_id.
from collections import defaultdict
# calculate error metric
out = defaultdict(list)
for sid in original.station_id.values:
data = original.sel(station_id=sid)
orig_err = np.sqrt(mean_squared_error(data["obs"], data["sim"]))
out["original"].append(orig_err)
out["station_id"].append(sid)
rmse = pd.DataFrame(out).set_index("station_id").to_xarray()
This gives you the solution but does not use the internal broadcasting features of xarray and so would struggle with larger datasets.

Pandas df.mean() throws TypeError: 'NoneType' object is not callable or returns empty Series

The title says it all.
df = pd.DataFrame({"A":np.array([1,2,3,4]),"B":np.array([1,2,3,4])})
df_mean = df.mean(axis=0)
print(df_mean)
The code above outputs an empty series object:
Series([], dtype: float64)
Using df.mean() on a dataframe filled with MNIST data throws the following stacktrace:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-42-bab38039484e> in <module>
2
3 X_train_class, y_train_class, X_valid_class, \
----> 4 y_valid_class, X_test_class, y_test_class = prepare_load_classification_data()
5 X_train_class.mean()
6 # ebm = ExplainableBoostingClassifier()
<ipython-input-37-b1dcfdd01adc> in prepare_load_classification_data()
45 train_features, train_labels, dev_features, \
46 dev_labels, test_features, test_labels = load_data()
---> 47 feature_mean, label_mean = train_features.mean(axis=0), train_labels.mean(axis=0)
48
49 train_features = pd.DataFrame(data=np.where(train_features > feature_mean, 1, 0), columns=FEATURE_NAMES)
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\generic.py in mean(self, axis, skipna, level, numeric_only, **kwargs)
11107 )
11108 def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs):
> 11109 return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs)
11110
11111 # pandas\core\generic.py:10924: error: Cannot assign to a method
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\generic.py in mean(self, axis, skipna, level, numeric_only, **kwargs)
10718 def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs):
10719 return self._stat_function(
> 10720 "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs
10721 )
10722
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\generic.py in _stat_function(self, name, func, axis, skipna, level, numeric_only, **kwargs)
10703 return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
10704 return self._reduce(
> 10705 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
10706 )
10707
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\series.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
4150 )
4151 with np.errstate(all="ignore"):
-> 4152 return op(delegate, skipna=skipna, **kwds)
4153
4154 def _reindex_indexer(self, new_index, indexer, copy):
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\nanops.py in _f(*args, **kwargs)
69 try:
70 with np.errstate(invalid="ignore"):
---> 71 return f(*args, **kwargs)
72 except ValueError as e:
73 # we want to transform an object array
c:\users\fmijs\anaconda3\lib\site-packages\pandas\core\nanops.py in f(values, axis, skipna, **kwds)
122 # TypeError if called
123 kwds.pop("mask", None)
--> 124 result = bn_func(values, axis=axis, **kwds)
125
126 # prefer to treat inf/-inf as NA, but must compute the func
TypeError: 'NoneType' object is not callable
It appears to be related to a somehow corrupted instalation of pandas or numpy but after reinstalling both downgrading or starting a new Conda environment the issues still remain. Any help would be greatly apreciated!
I ran it with pandas 1.1.3 and numpy 1.19.2 and worked.
I ran it also with pandas 1.2.3 and numpy 1.19.5 in a Jupyter and worked.
I updated all and ran it with pandas 1.2.4 and numpy 1.20.2 and it worked.
So either it is because of numpy or the reason is something different.
Do you have really just this code? Or is there other code that might interfere with your snippet?
for me, the issue was caused by importing pandas before importing numpy
so instead of:
import pandas as pd
import numpy as np
i changed it to
import numpy as np
import pandas as pd
and it fixed the issue

pandas standard deviation with bell curve graph using stats norm

My data frame is
Here i want standard devistion for the above dataframe and need a standard deviation graph.
I used below code
import numpy as np
import scipy.stats as stats
import pylab as pl
import pandas as pd
h=pd.read_excel(r"C:\Users\monthlyReports\standard_deviation\stan_rawdata.xlsx")
fit = stats.norm.pdf(h, np.mean(h), np.std(h))
pl.plot(h,fit,'-o')
pl.hist(h,normed=True)
pl.show()
but I am getting type error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-830c3a5f6c7c> in <module>()
7
8
----> 9 fit = stats.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed
10
11 pl.plot(h,fit,'-o')
~\AppData\Local\Continuum\anaconda3\lib\sitepackages\scipy\stats\_distn_infrastructure.py in pdf(self, x, *args, **kwds)
1650 args = tuple(map(asarray, args))
1651 dtyp = np.find_common_type([x.dtype, np.float64], [])
-> 1652 x = np.asarray((x - loc)/scale, dtype=dtyp)
1653 cond0 = self._argcheck(*args) & (scale > 0)
1654 cond1 = self._support_mask(x) & (scale > 0)
TypeError: unsupported operand type(s) for -: 'str' and 'float'

Python histogram of split() data

I am trying to make a histgramm over a text file containing floats:
import matplotlib.pyplot as plt
c1_file = open('densEst1.txt','r')
c1_data = c1_file.read().split()
c1_sum = float(c1_data.__len__())
plt.hist(c1_data)
plt.show()
The output of c1_data.__len__() works fine, but hist() throws:
C:\Python27\python.exe "C:/x.py"
Traceback (most recent call last):
File "C:/x.py", line 7, in <module>
plt.hist(c1_data)
File "C:\Python27\lib\site-packages\matplotlib\pyplot.py", line 2958, in hist
stacked=stacked, data=data, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\__init__.py", line 1812, in inner
return func(ax, *args, **kwargs)
File "C:\Python27\lib\site-packages\matplotlib\axes\_axes.py", line 5995, in hist
if len(xi) > 0:
TypeError: len() of unsized object
The main reason the plt.hist call fails is because the argument c1_data is a list containing strings. When you open a file and read it the result will be a string containing the files contents:
To read a file’s contents, call f.read(size), which reads some quantity of data and returns it as a string (in text mode) or bytes object (in binary mode).
Emphasis mine.
When you now split this long string you'll get a list containing strings:
Return a list of the words in the string, using sep as the delimiter string.
However a list of strings is not a valid input for plt.hist:
>>> import matplotlib.pyplot as plt
>>> plt.hist(['1', '2'])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
1 import matplotlib.pyplot as plt
----> 2 plt.hist(['1', '2'])
C:\...\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, data, **kwargs)
3079 histtype=histtype, align=align, orientation=orientation,
3080 rwidth=rwidth, log=log, color=color, label=label,
-> 3081 stacked=stacked, data=data, **kwargs)
3082 finally:
3083 ax._hold = washold
C:\...\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1895 warnings.warn(msg % (label_namer, func.__name__),
1896 RuntimeWarning, stacklevel=2)
-> 1897 return func(ax, *args, **kwargs)
1898 pre_doc = inner.__doc__
1899 if pre_doc is None:
C:\...\lib\site-packages\matplotlib\axes\_axes.py in hist(***failed resolving arguments***)
6178 xmax = -np.inf
6179 for xi in x:
-> 6180 if len(xi) > 0:
6181 xmin = min(xmin, xi.min())
6182 xmax = max(xmax, xi.max())
TypeError: len() of unsized object
The solution:
You can simply convert it to a float-array:
>>> import numpy as np
>>> plt.hist(np.array(c1_data, dtype=float))
Pointing to an example using numpy ... easy and results are below with the code.
pandas will work too, split and data type are available on reading (even if is a column data), also you can read as a vector (depends of the size of data)/
# !/usr/bin/env python
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
# will be better to read with numpy because you use float ...
#a = np.fromfile(open('from_file', 'r'), sep='\n')
from_file = np.array([1, 2, 2.5]) #sample data a
c1_data = from_file.astype(float) # convert the data in float
plt.hist(c1_data) # plt.hist passes it's arguments to np.histogram
plt.title("Histogram without 'auto' bins")
plt.show()
plt.hist(c1_data, bins='auto') # plt.hist passes it's arguments to np.histogram
plt.title("Histogram with 'auto' bins")
plt.show()

Categories

Resources