Pandas: a must be greater than 0 unless no samples are taken

Pandas: a must be greater than 0 unless no samples are taken - python

I am trying to resample the rebalanced data set 'churn_train' by 20%, or n = 158 records, to have 'True' 'Churn' column values. I am receiving an error message. The data set is not empty as I determined the shape and value counts of it. How do I resolve this error message? Any help would be appreciated. Thanks.
Data frame 'churn': Below is some rows of the data frame.
State,Account Length,Area Code,Phone,Intl Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,Eve Mins,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Old Churn,Churn
"KS",128,415,"382-4657","no","yes",25,265.100000,110,45.070000,197.400000,99,16.780000,244.700000,91,11.010000,10.000000,3,2.700000,1,"False.","False"
"OH",107,415,"371-7191","no","yes",26,161.600000,123,27.470000,195.500000,103,16.620000,254.400000,103,11.450000,13.700000,3,3.700000,1,"False.","False"
"NJ",137,415,"358-1921","no","no",0,243.400000,114,41.380000,121.200000,110,10.300000,162.600000,104,7.320000,12.200000,5,3.290000,0,"False.","False"
"OH",84,408,"375-9999","yes","no",0,299.400000,71,50.900000,61.900000,88,5.260000,196.900000,89,8.860000,6.600000,7,1.780000,2,"False.","False"
"OK",75,415,"330-6626","yes","no",0,166.700000,113,28.340000,148.300000,122,12.610000,186.900000,121,8.410000,10.100000,3,2.730000,3,"False.","False"
"AL",118,510,"391-8027","yes","no",0,223.400000,98,37.980000,220.600000,101,18.750000,203.900000,118,9.180000,6.300000,6,1.700000,0,"False.","False"
"MA",121,510,"355-9993","no","yes",24,218.200000,88,37.090000,348.500000,108,29.620000,212.600000,118,9.570000,7.500000,7,2.030000,3,"False.","False"
"MO",147,415,"329-9001","yes","no",0,157.000000,79,26.690000,103.100000,94,8.760000,211.800000,96,9.530000,7.100000,6,1.920000,0,"False.","False"
"WV",141,415,"330-8173","yes","yes",37,258.600000,84,43.960000,222.000000,111,18.870000,326.400000,97,14.690000,11.200000,5,3.020000,0,"False.","False"
"IN",65,415,"329-6603","no","no",0,129.100000,137,21.950000,228.500000,83,19.420000,208.800000,111,9.400000,12.700000,6,3.430000,4,"True.","True"
My code:
churn_train['Churn'].value_counts()
False 1913
True 320
Name: Churn, dtype: int64
to_resample = churn_train.loc[churn_train['Churn'] == "True"]
our_resample = to_resample.sample(n = 158, replace = True)
churn_train_rebal = pd.concat([churn_train, our_resample])
Error Message:
ValueError Traceback (most recent call last)
/var/folders/wv/42dn23fd1cb0czpvqdnb6zw00000gn/T/ipykernel_7751/2929105044.py in <module>
1 to_resample = churn_train.loc[churn_train['Churn'] == "True"]
----> 2 our_resample = to_resample.sample(n = 158, replace = True)
3 churn_train_rebal = pd.concat([churn_train, our_resample])
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in sample(self, n, frac, replace, weights, random_state, axis, ignore_index)
5452 weights = sample.preprocess_weights(self, weights, axis)
5453
-> 5454 sampled_indices = sample.sample(obj_len, size, replace, weights, rs)
5455 result = self.take(sampled_indices, axis=axis)
5456
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/sample.py in sample(obj_len, size, replace, weights, random_state)
148 raise ValueError("Invalid weights: weights sum to zero")
149
--> 150 return random_state.choice(obj_len, size=size, replace=replace, p=weights).astype(
151 np.intp, copy=False
152 )
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
ValueError: a must be greater than 0 unless no samples are taken

Related

how can get rid of "ValueError: all the input array dimensions except for the concatenation axis must match exactly" error when use cvxopt function?

first i define some matrix and vector in proper shape .
initialization
I=np.eye(24)
Z=np.zeros((24,24))
a=0.012
b=1.1
gamma1=0.9/80
gamma2=1.1/80
MM=np.eye(24)
for i in range (22):
MM[i+1,i]=-1
MM[0,23]=-1
M=random.randint(200,300, size=(24,1))
max_pch=5
max_pdch=5
ppp=random.randint(150,200, size=(24,))
define matrix of objective function
Q and C is matrix and vector of objective function 1/2 x^T Q x +C^T x , respectively.
Q=np.asarray(np.bmat([[a*I,Z,Z,Z],[Z,a*I,Z,Z],[Z,Z,Z,Z],[Z,Z,Z,Z] ]))
C=np.asarray(np.bmat([[b*np.ones(24),b*np.ones(24),0*np.ones(24),ppp]]))
##create equal subject
In my problem, I have just equal constraint and upper bound and lower bound that define blew.
Aeq=np.asarray(np.bmat([[-I,I,Z,I], [-gamma1*I, gamma2*I,MM,Z],[np.zeros((48,96))]]))
beq=np.asarray(np.bmat([[M],[np.zeros((72,1))]]))
##create upper and lower bound in shape (1,96)
lb=np.asarray(np.bmat([[0*np.ones(24),0*np.ones(24),[0.1],0.1*np.ones(22),
[0.1],100*np.ones(24)]]))
ub=np.asarray(np.bmat([[max_pch*np.ones(24),max_pdch*np.ones(24),[0.1],0.9*np.ones(22),
[0.9],500*np.ones(24)]]))
x = solve_qp(P=matrix(Q), q=C.T,
G=None,h=None, A=matrix(Aeq), b=beq, lb=lb.T, ub=ub.T,solver='quadprog')
##error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-111d9695d5a8> in <module>
25
26 x = solve_qp(P=matrix(Q), q=C.T,
---> 27 G=None,h=None, A=matrix(Aeq), b=beq, lb=lb.T, ub=ub.T,solver='quadprog')
28
29
~\Anaconda3\lib\site-packages\qpsolvers\__init__.py in solve_qp(P, q, G, h, A, b, lb, ub,
solver, initvals, sym_proj, verbose, **kwargs)
271 kwargs["verbose"] = verbose
272 try:
--> 273 return __solve_function__[solver](*args, **kwargs)
274 except KeyError:
275 raise SolverNotFound(f"solver '{solver}' is not available")
~\Anaconda3\lib\site-packages\qpsolvers\quadprog_.py in quadprog_solve_qp(P, q, G, h, A, b,
initvals, verbose)
85 else:
86 qp_C = -vstack([A, G]).T
---> 87 qp_b = -hstack([b, h])
88 meq = A.shape[0]
89 else: # no equality constraint
~\Anaconda3\lib\site-packages\numpy\core\shape_base.py in hstack(tup)
338 return _nx.concatenate(arrs, 0)
339 else:
--> 340 return _nx.concatenate(arrs, 1)
341
342
ValueError: all the input array dimensions except for the concatenation axis must match
exactly
If anybody can help me, I am glad.

How do I apply SMOTENC to my data frame that has columns with objects and numerics?

> In: data.dtypes
Out: Organization Name object
Money Raised Currency (in USD) float64
Announced Date datetime64[ns]
Total Funding Amount Currency (in USD) float64
Organization Description object
Organization Location object
Raised Series A int64
Primary Industry object
Sub_Ind object
Sub_Ind2 object
Sub_Ind3 object
Sub_Ind4 object
Sub_Ind5 object
Sub_Ind6 object
Sub_Ind7 object
Investor1 object
Investor2 object
Investor3 object
Investor4 object
Investor5 object
Investor6 object
Investor7 object
Investor8 object
Investor9 object
Investor10 object
Investor11 object
> In: x = data.drop(columns=['Raised Series A', 'Announced Date'])
> In: y = data['Raised Series A']
> In: from imblearn.over_sampling import SMOTENC
> In: smote_nc = SMOTENC(categorical_features=[0,1,3,4,5,7,8,9,10,11,12,13,14,15,16,17,
18,19,20,21,22,23,24], random_state=0)
> In: x_resampled, y_resampled = smote_nc.fit_resample(x, y)
---------------------------------------------------------------------------
Out: ValueError Traceback (most recent call last)
in
----> 1 x_resampled, y_resampled = smote_nc.fit_resample(x, y)
~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/base.py in fit_resample(self, X, y)
81 )
82
---> 83 output = self._fit_resample(X, y)
84
85 y_ = (label_binarize(output[1], np.unique(y))
~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/over_sampling/_smote.py in _fit_resample(self, X, y)
936 def _fit_resample(self, X, y):
937 self.n_features_ = X.shape[1]
--> 938 self._validate_estimator()
939
940 # compute the median of the standard deviation of the minority class
~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/over_sampling/_smote.py in _validate_estimator(self)
921 raise ValueError(
922 "Some of the categorical indices are out of range. Indices"
--> 923 " should be between 0 and {}".format(self.n_features_)
924 )
925 self.categorical_features_ = categorical_features
ValueError: Some of the categorical indices are out of range. Indices should be between 0 and 24
I have been trying combinations of columns to include in the categorical_features parameter but none of them are working. There are no null values in my data fame either. The reason I am using Smotenc is because my target vector is extremely skewed: 99.7% Yes and .3% no. Please help.

I had the same problem. Change the way you do your categorical_features, and put a list of booleans for if it's categorical or not.
Try this:
cat_cols = []
for col in x.columns:
if x[col].dtype == 'object':
cat_cols.append(True)
else:
cat_cols.append(False)
then:
smote_nc = SMOTENC(categorical_features=cat_cols, random_state=0)

Keep getting ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)

First off, thanks in advance if you can help puzzle this out! I'm trying to balance some customer data for my model. My targets are all 1s and 0s, and the 0s are overwhelmingly abundant. So I created a counter that will start to delete the 0 rows once they surpass the number of 1 rows. But at the very end of my code, when I create the np.delete to get those extra rows off my dataset I keep getting this error
I don't really know what to try, because I don't even understand what the error is telling me
import pandas as pd
import numpy as np
from sklearn import preprocessing
#%%
#Loading the Raw Data
raw_csv_data= pd.read_csv('Audiobooks-data_raw.csv')
print(display(raw_csv_data.head(20)))
#%%
df=raw_csv_data.copy()
print(display(df.head(20)))
#%%
print(df.info())
#%%
#Separate the Targets from the dataset
inputs_all= df.loc[:,'Book length (mins)_overall':'Last visited minus Purchase date']
targets_all= df['Targets']
print(display(inputs_all.head()))
print(display(targets_all.head()))
#%%
#Shuffling the Data to prep for balancing
shuffled_indices= np.arange(inputs_all.shape[0])
np.random.shuffle(shuffled_indices)
shuffled_inputs= inputs_all.iloc[shuffled_indices]
shuffled_targets= targets_all[shuffled_indices]
#%%
#Balance the Dataset
#There are significantly more 0's than 1's in our target.
#We want a good accurate model
print(inputs_all.shape)
print(targets_all.shape)
#%%
num_one_targets= int(np.sum(targets_all))
zero_targets_counter= 0
indices_to_remove= []
print(num_one_targets)
#%%
for i in range(targets_all.shape[0]):
if targets_all[i]==0:
zero_targets_counter +=1
if zero_targets_counter> num_one_targets:
indices_to_remove.append(i)
#%%
inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0)
targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0)
Everything works except when I try to group my balanced datasets and delete the excess 0 rows. Here is the error:
ValueError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1652
-> 1653 mgr = BlockManager(blocks, axes)
1654 mgr._consolidate_inplace()
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check)
113 if do_integrity_check:
--> 114 self._verify_integrity()
115
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self)
310 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
--> 311 construction_error(tot_items, block.shape[1:], self.axes)
312 if len(self.items) != tot_items:
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 1691 passed, implied))
1692
ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
----> 1 inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0)
2 targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0)
~\Anaconda3\lib\site-packages\numpy\lib\function_base.py in delete(arr, obj, axis)
4419
4420 if wrap:
-> 4421 return wrap(new)
4422 else:
4423 return new
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __array_wrap__(self, result, context)
1907 def __array_wrap__(self, result, context=None):
1908 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
-> 1909 return self._constructor(result, **d).__finalize__(self)
1910
1911 # ideally we would define this to avoid the getattr checks, but
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
422 else:
423 mgr = init_ndarray(data, index, columns, dtype=dtype,
--> 424 copy=copy)
425
426 # For data is list-like, or Iterable (will consume into list)
~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
165 values = maybe_infer_to_datetimelike(values)
166
--> 167 return create_block_manager_from_blocks([values], [columns, index])
168
169
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1658 blocks = [getattr(b, 'values', b) for b in blocks]
1659 tot_items = sum(b.shape[0] for b in blocks)
-> 1660 construction_error(tot_items, blocks[0].shape[1:], axes, e)
1661
1662
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
1689 raise ValueError("Empty data passed with indices specified.")
1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 1691 passed, implied))
1692
1693
ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)

Try removing rows with pandas drop instead:
inputs_all_balanced = inputs_all.drop(indices_to_remove,axis=0)
targets_all_balanced = targets_all.drop(indices_to_remove,axis=0)

Binning a series returns a seemingly unrelated TypeError

I am trying to slice a dataframe I created into bins:
picture of dataframe in case it's relevant
# create bins and labels
bins = [575, 600, 625, 650]
labels = [
"$575-$599",
"$600-$624",
"$625-$649",
"$650-$675"
]
schoolSummary["Spending Range"] = pd.cut(schoolSummary["Per Student Budget"], bins, labels = labels)
For some reason, I receive this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-73-b938397739fa> in <module>()
9
10 #schoolSummary["Spending Range"] =
---> 11 pd.cut(schoolSummary["Per Student Budget"], bins, labels = labels)
~\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\tile.py in cut(x, bins, right, labels, retbins, precision, include_lowest, duplicates)
232 include_lowest=include_lowest,
233 dtype=dtype,
--> 234 duplicates=duplicates)
235
236 return _postprocess_for_cut(fac, bins, retbins, x_is_series,
~\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\tile.py in _bins_to_cuts(x, bins, right, labels, precision, include_lowest, dtype, duplicates)
335
336 side = 'left' if right else 'right'
--> 337 ids = _ensure_int64(bins.searchsorted(x, side=side))
338
339 if include_lowest:
TypeError: '<' not supported between instances of 'int' and 'str'
I'm confused, because I did not use '<' in the code at all. I also used
print(type(schoolSummary["Per Student Budget"]))
and it is a series object, so I don't know what 'int' and 'str' it's referring to. Is it a problem with my bins or labels?

Due to low rep, I can't comment to your question,
You must try the following
bins = [575, 600, 625, 650]
labels = [
"$575-$599",
"$600-$624",
"$625-$649",
"$650-$675"
]
for bin_ in bins:
schoolSummary["Spending Range"] = pd.cut(schoolSummary["Per Student Budget"], bin_, labels = labels)
Because bin takes int type, instead of a list.

Python, Key Error: 1

The goal of my code is to sort through the data and select only the Visual band or "Vis." band data. From that I eliminated all values that were upper and lower limits to clean up the graph. Finally I wanted to remove all the data that was not a part of the outbursts or decays. My filtering of Vis. band and the upper/lower limit data seems to work fine, but when I try to remove data that had a small slope it shows key error:1, I don't have enough reputation to post an image so I included a link to the plot. The plot shows data after filtering the vis band and upper/lower limits.
def timeplot():
import pandas as pd
import matplotlib.pyplot as plt
import jdcal as jd
import math
#Getting input from user as to start and end dates for the data
(miny,minm,mind) = input("Enter the start date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
(maxy,maxm,maxd) = input("Enter the end date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
#Calculating modified julian dates from the gregorian date input
(x,Amin)=jd.gcal2jd(miny,minm,mind)
(y,Amax)=jd.gcal2jd(maxy,maxm,maxd)
#Creating a table with the numbers corresponding to their month
Month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
#Read in data file
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 2000)
data1 = pd.read_csv("50yrdata.csv")
data1['ulflag']=1
#Deal with any bad columns
data1_limit = data1.JD * 0
ii=0
for mag in data1.Magnitude:
if mag[0] == '<':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = 1
if mag[0] == '>':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = -1
ii +=1
#The data set has Vis, V, I, R, B, TG, TB, TR, CV bands
#Selecting data only in the visual band with no upper or lower limits in
#magnitude
#Converting Julian Date to Modified Julian Date
data1.JD=data1.JD-2400000.5
data1.index=data1.ulflag
data1=data1.ix[1,['JD','Magnitude','Band']]
data1.index=data1.Band
tdata=data1.ix['Vis.',['JD','Magnitude']]
#Changing all of the values from Magnitude from string to float
tdata=tdata.astype(float)
#Adding on columns to make computations easier
tdata['sflag']=0
tdata['slope']=0.000
tdata['aslope']=0.000
tdata['A']=0.000
tdata['B']=0.000
#Finding max and min values of our MJD,
Max=Amax
Min=Amin
#We split the data into N graphs where N is the number of years the data spans
N=(int((Max-Min)/365))
#Finding slope of the curve
#Attempt to filter the data using
#1. A positive slope greater than a certain threshold = outburst
#2. A negtaive slope smaller than a certain threshold = decay
#3. The absolute value of the slope is smaller than a certain threshold = quiescence
length=len(tdata.JD)-1
tdata.A[length]=0
tdata.B[length]=1
for i in range(length):
tdata.A[i] = tdata.Magnitude[i+1]-tdata.Magnitude[i]
for i in range(length):
tdata.B[i] = tdata.JD[i+1]-tdata.JD[i]
for i in range(length+1):
tdata.slope[i] = tdata.A[i]/tdata.B[i]
tdata.aslope=abs(tdata.slope)
for i in range(length):
if tdata.aslope[i] > 1:
tdata.sflag = 1
if tdata.aslope[i] < 1:
tdata.sflag = 0
i += 1
#filtering out all the data that has a slope less than a certain threshold
tdata.index = tdata.sflag
tdata=tdata.astype(float)
tdata=tdata.ix[1,['JD','Magnitude']]
#Plot selected data
fig ,axs = plt.subplots(N,1)
fig.subplots_adjust(hspace = .5)
#Due to data set being so large, make multiple sub plots instead of one large plot
#Magnitude axis needs to be flipped to see when the star has outbursts
#When setting the limits of our subplots, we extend them by a small value in
#order to make the data easier to read. The large value being added and subtracted
#of 365 causes the graph to cover approximately one year in data.
axs = axs.ravel()
for i in range(N):
axs[i].scatter(tdata.JD, tdata.Magnitude)
axs[i].invert_yaxis()
axs[i].set_xlim([Min+(365*(i-1))-5, Max+5-(365*(N-i))])
A=str(miny+i)
B=Month[minm]
C=str(mind)
axs[i].set_title('A Year of data starting from ' + A + ',' + B + ',' +C)
#Setting title and axis, I was unable to set a shared x and y axis title
#between the subplots, when I attempted to do this it would create another
#plot overlapping the 4 subplots making it difficult to see the values
fig.suptitle('SS Cyg Data', fontsize = 20)
fig.text(0.5, 0.04, 'Modified Julian Date', ha='center', va='center')
fig.text(0.04, 0.5, 'Magnitude', ha='center', va='center', rotation='vertical')
plt.show()
timeplot()
The full Traceback to the error is
KeyError Traceback (most recent call last)
C:\Users\Kenny\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.2.0.1610.win-x86_64\lib\site-packages\IPython\utils\py3compat.pyc in execfile(fname, glob, loc)
195 else:
196 filename = fname
--> 197 exec compile(scripttext, filename, 'exec') in glob, loc
198 else:
199 def execfile(fname, *where):
C:\Users\Kenny\Dropbox\499\timeplot.py in <module>()
136 plt.show()
137
--> 138 timeplot()
C:\Users\Kenny\Dropbox\499\timeplot.py in timeplot()
102 tdata.index = tdata.sflag
103 tdata=tdata.astype(float)
--> 104 tdata=tdata.ix[1,['JD','Magnitude']]
105
106 #Plot selected data
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
45 pass
46
---> 47 return self._getitem_tuple(key)
48 else:
49 return self._getitem_axis(key, axis=0)
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_tuple(self, tup)
251 def _getitem_tuple(self, tup):
252 try:
--> 253 return self._getitem_lowerdim(tup)
254 except IndexingError:
255 pass
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_lowerdim(self, tup)
361 for i, key in enumerate(tup):
362 if _is_label_like(key) or isinstance(key, tuple):
--> 363 section = self._getitem_axis(key, axis=i)
364
365 # we have yielded a scalar ?
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
411 return self._get_loc(key, axis=axis)
412
--> 413 return self._get_label(key, axis=axis)
414
415 def _getitem_iterable(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _get_label(self, label, axis)
59 return self.obj._xs(label, axis=axis, copy=False)
60 except Exception:
---> 61 return self.obj._xs(label, axis=axis, copy=True)
62
63 def _get_loc(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in xs(self, key, axis, level, copy)
2369 loc, new_index = self.index.get_loc_level(key)
2370 else:
-> 2371 loc = self.index.get_loc(key)
2372
2373 if isinstance(loc, np.ndarray):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_loc(self, key)
714 loc : int if unique index, possibly slice or mask if not
715 """
--> 716 return self._engine.get_loc(key)
717
718 def get_value(self, series, key):
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3542)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3373)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine._get_loc_duplicates (pandas\index.c:3709)()
KeyError: 1

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pandas: a must be greater than 0 unless no samples are taken - python

Related

how can get rid of "ValueError: all the input array dimensions except for the concatenation axis must match exactly" error when use cvxopt function?

How do I apply SMOTENC to my data frame that has columns with objects and numerics?

Keep getting ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)

Binning a series returns a seemingly unrelated TypeError

Python, Key Error: 1

Categories

Resources