Pandas: a must be greater than 0 unless no samples are taken - python
I am trying to resample the rebalanced data set 'churn_train' by 20%, or n = 158 records, to have 'True' 'Churn' column values. I am receiving an error message. The data set is not empty as I determined the shape and value counts of it. How do I resolve this error message? Any help would be appreciated. Thanks.
Data frame 'churn': Below is some rows of the data frame.
State,Account Length,Area Code,Phone,Intl Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,Eve Mins,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Old Churn,Churn
"KS",128,415,"382-4657","no","yes",25,265.100000,110,45.070000,197.400000,99,16.780000,244.700000,91,11.010000,10.000000,3,2.700000,1,"False.","False"
"OH",107,415,"371-7191","no","yes",26,161.600000,123,27.470000,195.500000,103,16.620000,254.400000,103,11.450000,13.700000,3,3.700000,1,"False.","False"
"NJ",137,415,"358-1921","no","no",0,243.400000,114,41.380000,121.200000,110,10.300000,162.600000,104,7.320000,12.200000,5,3.290000,0,"False.","False"
"OH",84,408,"375-9999","yes","no",0,299.400000,71,50.900000,61.900000,88,5.260000,196.900000,89,8.860000,6.600000,7,1.780000,2,"False.","False"
"OK",75,415,"330-6626","yes","no",0,166.700000,113,28.340000,148.300000,122,12.610000,186.900000,121,8.410000,10.100000,3,2.730000,3,"False.","False"
"AL",118,510,"391-8027","yes","no",0,223.400000,98,37.980000,220.600000,101,18.750000,203.900000,118,9.180000,6.300000,6,1.700000,0,"False.","False"
"MA",121,510,"355-9993","no","yes",24,218.200000,88,37.090000,348.500000,108,29.620000,212.600000,118,9.570000,7.500000,7,2.030000,3,"False.","False"
"MO",147,415,"329-9001","yes","no",0,157.000000,79,26.690000,103.100000,94,8.760000,211.800000,96,9.530000,7.100000,6,1.920000,0,"False.","False"
"WV",141,415,"330-8173","yes","yes",37,258.600000,84,43.960000,222.000000,111,18.870000,326.400000,97,14.690000,11.200000,5,3.020000,0,"False.","False"
"IN",65,415,"329-6603","no","no",0,129.100000,137,21.950000,228.500000,83,19.420000,208.800000,111,9.400000,12.700000,6,3.430000,4,"True.","True"
My code:
churn_train['Churn'].value_counts()
False 1913
True 320
Name: Churn, dtype: int64
to_resample = churn_train.loc[churn_train['Churn'] == "True"]
our_resample = to_resample.sample(n = 158, replace = True)
churn_train_rebal = pd.concat([churn_train, our_resample])
Error Message:
ValueError Traceback (most recent call last)
/var/folders/wv/42dn23fd1cb0czpvqdnb6zw00000gn/T/ipykernel_7751/2929105044.py in <module>
1 to_resample = churn_train.loc[churn_train['Churn'] == "True"]
----> 2 our_resample = to_resample.sample(n = 158, replace = True)
3 churn_train_rebal = pd.concat([churn_train, our_resample])
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in sample(self, n, frac, replace, weights, random_state, axis, ignore_index)
5452 weights = sample.preprocess_weights(self, weights, axis)
5453
-> 5454 sampled_indices = sample.sample(obj_len, size, replace, weights, rs)
5455 result = self.take(sampled_indices, axis=axis)
5456
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/sample.py in sample(obj_len, size, replace, weights, random_state)
148 raise ValueError("Invalid weights: weights sum to zero")
149
--> 150 return random_state.choice(obj_len, size=size, replace=replace, p=weights).astype(
151 np.intp, copy=False
152 )
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
ValueError: a must be greater than 0 unless no samples are taken
Related
how can get rid of "ValueError: all the input array dimensions except for the concatenation axis must match exactly" error when use cvxopt function?
first i define some matrix and vector in proper shape . initialization I=np.eye(24) Z=np.zeros((24,24)) a=0.012 b=1.1 gamma1=0.9/80 gamma2=1.1/80 MM=np.eye(24) for i in range (22): MM[i+1,i]=-1 MM[0,23]=-1 M=random.randint(200,300, size=(24,1)) max_pch=5 max_pdch=5 ppp=random.randint(150,200, size=(24,)) define matrix of objective function Q and C is matrix and vector of objective function 1/2 x^T Q x +C^T x , respectively. Q=np.asarray(np.bmat([[a*I,Z,Z,Z],[Z,a*I,Z,Z],[Z,Z,Z,Z],[Z,Z,Z,Z] ])) C=np.asarray(np.bmat([[b*np.ones(24),b*np.ones(24),0*np.ones(24),ppp]])) ##create equal subject In my problem, I have just equal constraint and upper bound and lower bound that define blew. Aeq=np.asarray(np.bmat([[-I,I,Z,I], [-gamma1*I, gamma2*I,MM,Z],[np.zeros((48,96))]])) beq=np.asarray(np.bmat([[M],[np.zeros((72,1))]])) ##create upper and lower bound in shape (1,96) lb=np.asarray(np.bmat([[0*np.ones(24),0*np.ones(24),[0.1],0.1*np.ones(22), [0.1],100*np.ones(24)]])) ub=np.asarray(np.bmat([[max_pch*np.ones(24),max_pdch*np.ones(24),[0.1],0.9*np.ones(22), [0.9],500*np.ones(24)]])) x = solve_qp(P=matrix(Q), q=C.T, G=None,h=None, A=matrix(Aeq), b=beq, lb=lb.T, ub=ub.T,solver='quadprog') ##error --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-48-111d9695d5a8> in <module> 25 26 x = solve_qp(P=matrix(Q), q=C.T, ---> 27 G=None,h=None, A=matrix(Aeq), b=beq, lb=lb.T, ub=ub.T,solver='quadprog') 28 29 ~\Anaconda3\lib\site-packages\qpsolvers\__init__.py in solve_qp(P, q, G, h, A, b, lb, ub, solver, initvals, sym_proj, verbose, **kwargs) 271 kwargs["verbose"] = verbose 272 try: --> 273 return __solve_function__[solver](*args, **kwargs) 274 except KeyError: 275 raise SolverNotFound(f"solver '{solver}' is not available") ~\Anaconda3\lib\site-packages\qpsolvers\quadprog_.py in quadprog_solve_qp(P, q, G, h, A, b, initvals, verbose) 85 else: 86 qp_C = -vstack([A, G]).T ---> 87 qp_b = -hstack([b, h]) 88 meq = A.shape[0] 89 else: # no equality constraint ~\Anaconda3\lib\site-packages\numpy\core\shape_base.py in hstack(tup) 338 return _nx.concatenate(arrs, 0) 339 else: --> 340 return _nx.concatenate(arrs, 1) 341 342 ValueError: all the input array dimensions except for the concatenation axis must match exactly If anybody can help me, I am glad.
How do I apply SMOTENC to my data frame that has columns with objects and numerics?
> In: data.dtypes Out: Organization Name object Money Raised Currency (in USD) float64 Announced Date datetime64[ns] Total Funding Amount Currency (in USD) float64 Organization Description object Organization Location object Raised Series A int64 Primary Industry object Sub_Ind object Sub_Ind2 object Sub_Ind3 object Sub_Ind4 object Sub_Ind5 object Sub_Ind6 object Sub_Ind7 object Investor1 object Investor2 object Investor3 object Investor4 object Investor5 object Investor6 object Investor7 object Investor8 object Investor9 object Investor10 object Investor11 object > In: x = data.drop(columns=['Raised Series A', 'Announced Date']) > In: y = data['Raised Series A'] > In: from imblearn.over_sampling import SMOTENC > In: smote_nc = SMOTENC(categorical_features=[0,1,3,4,5,7,8,9,10,11,12,13,14,15,16,17, 18,19,20,21,22,23,24], random_state=0) > In: x_resampled, y_resampled = smote_nc.fit_resample(x, y) --------------------------------------------------------------------------- Out: ValueError Traceback (most recent call last) in ----> 1 x_resampled, y_resampled = smote_nc.fit_resample(x, y) ~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/base.py in fit_resample(self, X, y) 81 ) 82 ---> 83 output = self._fit_resample(X, y) 84 85 y_ = (label_binarize(output[1], np.unique(y)) ~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/over_sampling/_smote.py in _fit_resample(self, X, y) 936 def _fit_resample(self, X, y): 937 self.n_features_ = X.shape[1] --> 938 self._validate_estimator() 939 940 # compute the median of the standard deviation of the minority class ~/opt/anaconda3/envs/unit2/lib/python3.7/site-packages/imblearn/over_sampling/_smote.py in _validate_estimator(self) 921 raise ValueError( 922 "Some of the categorical indices are out of range. Indices" --> 923 " should be between 0 and {}".format(self.n_features_) 924 ) 925 self.categorical_features_ = categorical_features ValueError: Some of the categorical indices are out of range. Indices should be between 0 and 24 I have been trying combinations of columns to include in the categorical_features parameter but none of them are working. There are no null values in my data fame either. The reason I am using Smotenc is because my target vector is extremely skewed: 99.7% Yes and .3% no. Please help.
I had the same problem. Change the way you do your categorical_features, and put a list of booleans for if it's categorical or not. Try this: cat_cols = [] for col in x.columns: if x[col].dtype == 'object': cat_cols.append(True) else: cat_cols.append(False) then: smote_nc = SMOTENC(categorical_features=cat_cols, random_state=0)
Keep getting ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)
First off, thanks in advance if you can help puzzle this out! I'm trying to balance some customer data for my model. My targets are all 1s and 0s, and the 0s are overwhelmingly abundant. So I created a counter that will start to delete the 0 rows once they surpass the number of 1 rows. But at the very end of my code, when I create the np.delete to get those extra rows off my dataset I keep getting this error I don't really know what to try, because I don't even understand what the error is telling me import pandas as pd import numpy as np from sklearn import preprocessing #%% #Loading the Raw Data raw_csv_data= pd.read_csv('Audiobooks-data_raw.csv') print(display(raw_csv_data.head(20))) #%% df=raw_csv_data.copy() print(display(df.head(20))) #%% print(df.info()) #%% #Separate the Targets from the dataset inputs_all= df.loc[:,'Book length (mins)_overall':'Last visited minus Purchase date'] targets_all= df['Targets'] print(display(inputs_all.head())) print(display(targets_all.head())) #%% #Shuffling the Data to prep for balancing shuffled_indices= np.arange(inputs_all.shape[0]) np.random.shuffle(shuffled_indices) shuffled_inputs= inputs_all.iloc[shuffled_indices] shuffled_targets= targets_all[shuffled_indices] #%% #Balance the Dataset #There are significantly more 0's than 1's in our target. #We want a good accurate model print(inputs_all.shape) print(targets_all.shape) #%% num_one_targets= int(np.sum(targets_all)) zero_targets_counter= 0 indices_to_remove= [] print(num_one_targets) #%% for i in range(targets_all.shape[0]): if targets_all[i]==0: zero_targets_counter +=1 if zero_targets_counter> num_one_targets: indices_to_remove.append(i) #%% inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0) targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0) Everything works except when I try to group my balanced datasets and delete the excess 0 rows. Here is the error: ValueError Traceback (most recent call last) ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes) 1652 -> 1653 mgr = BlockManager(blocks, axes) 1654 mgr._consolidate_inplace() ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check) 113 if do_integrity_check: --> 114 self._verify_integrity() 115 ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self) 310 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: --> 311 construction_error(tot_items, block.shape[1:], self.axes) 312 if len(self.items) != tot_items: ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e) 1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format( -> 1691 passed, implied)) 1692 ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10) During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) in ----> 1 inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0) 2 targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0) ~\Anaconda3\lib\site-packages\numpy\lib\function_base.py in delete(arr, obj, axis) 4419 4420 if wrap: -> 4421 return wrap(new) 4422 else: 4423 return new ~\Anaconda3\lib\site-packages\pandas\core\generic.py in __array_wrap__(self, result, context) 1907 def __array_wrap__(self, result, context=None): 1908 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) -> 1909 return self._constructor(result, **d).__finalize__(self) 1910 1911 # ideally we would define this to avoid the getattr checks, but ~\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy) 422 else: 423 mgr = init_ndarray(data, index, columns, dtype=dtype, --> 424 copy=copy) 425 426 # For data is list-like, or Iterable (will consume into list) ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy) 165 values = maybe_infer_to_datetimelike(values) 166 --> 167 return create_block_manager_from_blocks([values], [columns, index]) 168 169 ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes) 1658 blocks = [getattr(b, 'values', b) for b in blocks] 1659 tot_items = sum(b.shape[0] for b in blocks) -> 1660 construction_error(tot_items, blocks[0].shape[1:], axes, e) 1661 1662 ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e) 1689 raise ValueError("Empty data passed with indices specified.") 1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format( -> 1691 passed, implied)) 1692 1693 ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)
Try removing rows with pandas drop instead: inputs_all_balanced = inputs_all.drop(indices_to_remove,axis=0) targets_all_balanced = targets_all.drop(indices_to_remove,axis=0)
Binning a series returns a seemingly unrelated TypeError
I am trying to slice a dataframe I created into bins: picture of dataframe in case it's relevant # create bins and labels bins = [575, 600, 625, 650] labels = [ "$575-$599", "$600-$624", "$625-$649", "$650-$675" ] schoolSummary["Spending Range"] = pd.cut(schoolSummary["Per Student Budget"], bins, labels = labels) For some reason, I receive this error: --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-73-b938397739fa> in <module>() 9 10 #schoolSummary["Spending Range"] = ---> 11 pd.cut(schoolSummary["Per Student Budget"], bins, labels = labels) ~\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\tile.py in cut(x, bins, right, labels, retbins, precision, include_lowest, duplicates) 232 include_lowest=include_lowest, 233 dtype=dtype, --> 234 duplicates=duplicates) 235 236 return _postprocess_for_cut(fac, bins, retbins, x_is_series, ~\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\tile.py in _bins_to_cuts(x, bins, right, labels, precision, include_lowest, dtype, duplicates) 335 336 side = 'left' if right else 'right' --> 337 ids = _ensure_int64(bins.searchsorted(x, side=side)) 338 339 if include_lowest: TypeError: '<' not supported between instances of 'int' and 'str' I'm confused, because I did not use '<' in the code at all. I also used print(type(schoolSummary["Per Student Budget"])) and it is a series object, so I don't know what 'int' and 'str' it's referring to. Is it a problem with my bins or labels?
Due to low rep, I can't comment to your question, You must try the following bins = [575, 600, 625, 650] labels = [ "$575-$599", "$600-$624", "$625-$649", "$650-$675" ] for bin_ in bins: schoolSummary["Spending Range"] = pd.cut(schoolSummary["Per Student Budget"], bin_, labels = labels) Because bin takes int type, instead of a list.
Python, Key Error: 1
The goal of my code is to sort through the data and select only the Visual band or "Vis." band data. From that I eliminated all values that were upper and lower limits to clean up the graph. Finally I wanted to remove all the data that was not a part of the outbursts or decays. My filtering of Vis. band and the upper/lower limit data seems to work fine, but when I try to remove data that had a small slope it shows key error:1, I don't have enough reputation to post an image so I included a link to the plot. The plot shows data after filtering the vis band and upper/lower limits. def timeplot(): import pandas as pd import matplotlib.pyplot as plt import jdcal as jd import math #Getting input from user as to start and end dates for the data (miny,minm,mind) = input("Enter the start date for data in the format (yyyy,mm,dd) ex. (2000,01,01):") (maxy,maxm,maxd) = input("Enter the end date for data in the format (yyyy,mm,dd) ex. (2000,01,01):") #Calculating modified julian dates from the gregorian date input (x,Amin)=jd.gcal2jd(miny,minm,mind) (y,Amax)=jd.gcal2jd(maxy,maxm,maxd) #Creating a table with the numbers corresponding to their month Month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} #Read in data file pd.set_option('html', False) pd.set_option('max_columns', 30) pd.set_option('max_rows', 2000) data1 = pd.read_csv("50yrdata.csv") data1['ulflag']=1 #Deal with any bad columns data1_limit = data1.JD * 0 ii=0 for mag in data1.Magnitude: if mag[0] == '<': data1.ulflag[ii]=0 data1.Magnitude[ii] = mag[1:] data1_limit[ii] = 1 if mag[0] == '>': data1.ulflag[ii]=0 data1.Magnitude[ii] = mag[1:] data1_limit[ii] = -1 ii +=1 #The data set has Vis, V, I, R, B, TG, TB, TR, CV bands #Selecting data only in the visual band with no upper or lower limits in #magnitude #Converting Julian Date to Modified Julian Date data1.JD=data1.JD-2400000.5 data1.index=data1.ulflag data1=data1.ix[1,['JD','Magnitude','Band']] data1.index=data1.Band tdata=data1.ix['Vis.',['JD','Magnitude']] #Changing all of the values from Magnitude from string to float tdata=tdata.astype(float) #Adding on columns to make computations easier tdata['sflag']=0 tdata['slope']=0.000 tdata['aslope']=0.000 tdata['A']=0.000 tdata['B']=0.000 #Finding max and min values of our MJD, Max=Amax Min=Amin #We split the data into N graphs where N is the number of years the data spans N=(int((Max-Min)/365)) #Finding slope of the curve #Attempt to filter the data using #1. A positive slope greater than a certain threshold = outburst #2. A negtaive slope smaller than a certain threshold = decay #3. The absolute value of the slope is smaller than a certain threshold = quiescence length=len(tdata.JD)-1 tdata.A[length]=0 tdata.B[length]=1 for i in range(length): tdata.A[i] = tdata.Magnitude[i+1]-tdata.Magnitude[i] for i in range(length): tdata.B[i] = tdata.JD[i+1]-tdata.JD[i] for i in range(length+1): tdata.slope[i] = tdata.A[i]/tdata.B[i] tdata.aslope=abs(tdata.slope) for i in range(length): if tdata.aslope[i] > 1: tdata.sflag = 1 if tdata.aslope[i] < 1: tdata.sflag = 0 i += 1 #filtering out all the data that has a slope less than a certain threshold tdata.index = tdata.sflag tdata=tdata.astype(float) tdata=tdata.ix[1,['JD','Magnitude']] #Plot selected data fig ,axs = plt.subplots(N,1) fig.subplots_adjust(hspace = .5) #Due to data set being so large, make multiple sub plots instead of one large plot #Magnitude axis needs to be flipped to see when the star has outbursts #When setting the limits of our subplots, we extend them by a small value in #order to make the data easier to read. The large value being added and subtracted #of 365 causes the graph to cover approximately one year in data. axs = axs.ravel() for i in range(N): axs[i].scatter(tdata.JD, tdata.Magnitude) axs[i].invert_yaxis() axs[i].set_xlim([Min+(365*(i-1))-5, Max+5-(365*(N-i))]) A=str(miny+i) B=Month[minm] C=str(mind) axs[i].set_title('A Year of data starting from ' + A + ',' + B + ',' +C) #Setting title and axis, I was unable to set a shared x and y axis title #between the subplots, when I attempted to do this it would create another #plot overlapping the 4 subplots making it difficult to see the values fig.suptitle('SS Cyg Data', fontsize = 20) fig.text(0.5, 0.04, 'Modified Julian Date', ha='center', va='center') fig.text(0.04, 0.5, 'Magnitude', ha='center', va='center', rotation='vertical') plt.show() timeplot() The full Traceback to the error is KeyError Traceback (most recent call last) C:\Users\Kenny\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.2.0.1610.win-x86_64\lib\site-packages\IPython\utils\py3compat.pyc in execfile(fname, glob, loc) 195 else: 196 filename = fname --> 197 exec compile(scripttext, filename, 'exec') in glob, loc 198 else: 199 def execfile(fname, *where): C:\Users\Kenny\Dropbox\499\timeplot.py in <module>() 136 plt.show() 137 --> 138 timeplot() C:\Users\Kenny\Dropbox\499\timeplot.py in timeplot() 102 tdata.index = tdata.sflag 103 tdata=tdata.astype(float) --> 104 tdata=tdata.ix[1,['JD','Magnitude']] 105 106 #Plot selected data E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key) 45 pass 46 ---> 47 return self._getitem_tuple(key) 48 else: 49 return self._getitem_axis(key, axis=0) E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_tuple(self, tup) 251 def _getitem_tuple(self, tup): 252 try: --> 253 return self._getitem_lowerdim(tup) 254 except IndexingError: 255 pass E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_lowerdim(self, tup) 361 for i, key in enumerate(tup): 362 if _is_label_like(key) or isinstance(key, tuple): --> 363 section = self._getitem_axis(key, axis=i) 364 365 # we have yielded a scalar ? E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis) 411 return self._get_loc(key, axis=axis) 412 --> 413 return self._get_label(key, axis=axis) 414 415 def _getitem_iterable(self, key, axis=0): E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _get_label(self, label, axis) 59 return self.obj._xs(label, axis=axis, copy=False) 60 except Exception: ---> 61 return self.obj._xs(label, axis=axis, copy=True) 62 63 def _get_loc(self, key, axis=0): E:\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in xs(self, key, axis, level, copy) 2369 loc, new_index = self.index.get_loc_level(key) 2370 else: -> 2371 loc = self.index.get_loc(key) 2372 2373 if isinstance(loc, np.ndarray): E:\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_loc(self, key) 714 loc : int if unique index, possibly slice or mask if not 715 """ --> 716 return self._engine.get_loc(key) 717 718 def get_value(self, series, key): E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3542)() E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3373)() E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine._get_loc_duplicates (pandas\index.c:3709)() KeyError: 1