The goal of my code is to sort through the data and select only the Visual band or "Vis." band data. From that I eliminated all values that were upper and lower limits to clean up the graph. Finally I wanted to remove all the data that was not a part of the outbursts or decays. My filtering of Vis. band and the upper/lower limit data seems to work fine, but when I try to remove data that had a small slope it shows key error:1, I don't have enough reputation to post an image so I included a link to the plot. The plot shows data after filtering the vis band and upper/lower limits.
def timeplot():
import pandas as pd
import matplotlib.pyplot as plt
import jdcal as jd
import math
#Getting input from user as to start and end dates for the data
(miny,minm,mind) = input("Enter the start date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
(maxy,maxm,maxd) = input("Enter the end date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
#Calculating modified julian dates from the gregorian date input
(x,Amin)=jd.gcal2jd(miny,minm,mind)
(y,Amax)=jd.gcal2jd(maxy,maxm,maxd)
#Creating a table with the numbers corresponding to their month
Month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
#Read in data file
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 2000)
data1 = pd.read_csv("50yrdata.csv")
data1['ulflag']=1
#Deal with any bad columns
data1_limit = data1.JD * 0
ii=0
for mag in data1.Magnitude:
if mag[0] == '<':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = 1
if mag[0] == '>':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = -1
ii +=1
#The data set has Vis, V, I, R, B, TG, TB, TR, CV bands
#Selecting data only in the visual band with no upper or lower limits in
#magnitude
#Converting Julian Date to Modified Julian Date
data1.JD=data1.JD-2400000.5
data1.index=data1.ulflag
data1=data1.ix[1,['JD','Magnitude','Band']]
data1.index=data1.Band
tdata=data1.ix['Vis.',['JD','Magnitude']]
#Changing all of the values from Magnitude from string to float
tdata=tdata.astype(float)
#Adding on columns to make computations easier
tdata['sflag']=0
tdata['slope']=0.000
tdata['aslope']=0.000
tdata['A']=0.000
tdata['B']=0.000
#Finding max and min values of our MJD,
Max=Amax
Min=Amin
#We split the data into N graphs where N is the number of years the data spans
N=(int((Max-Min)/365))
#Finding slope of the curve
#Attempt to filter the data using
#1. A positive slope greater than a certain threshold = outburst
#2. A negtaive slope smaller than a certain threshold = decay
#3. The absolute value of the slope is smaller than a certain threshold = quiescence
length=len(tdata.JD)-1
tdata.A[length]=0
tdata.B[length]=1
for i in range(length):
tdata.A[i] = tdata.Magnitude[i+1]-tdata.Magnitude[i]
for i in range(length):
tdata.B[i] = tdata.JD[i+1]-tdata.JD[i]
for i in range(length+1):
tdata.slope[i] = tdata.A[i]/tdata.B[i]
tdata.aslope=abs(tdata.slope)
for i in range(length):
if tdata.aslope[i] > 1:
tdata.sflag = 1
if tdata.aslope[i] < 1:
tdata.sflag = 0
i += 1
#filtering out all the data that has a slope less than a certain threshold
tdata.index = tdata.sflag
tdata=tdata.astype(float)
tdata=tdata.ix[1,['JD','Magnitude']]
#Plot selected data
fig ,axs = plt.subplots(N,1)
fig.subplots_adjust(hspace = .5)
#Due to data set being so large, make multiple sub plots instead of one large plot
#Magnitude axis needs to be flipped to see when the star has outbursts
#When setting the limits of our subplots, we extend them by a small value in
#order to make the data easier to read. The large value being added and subtracted
#of 365 causes the graph to cover approximately one year in data.
axs = axs.ravel()
for i in range(N):
axs[i].scatter(tdata.JD, tdata.Magnitude)
axs[i].invert_yaxis()
axs[i].set_xlim([Min+(365*(i-1))-5, Max+5-(365*(N-i))])
A=str(miny+i)
B=Month[minm]
C=str(mind)
axs[i].set_title('A Year of data starting from ' + A + ',' + B + ',' +C)
#Setting title and axis, I was unable to set a shared x and y axis title
#between the subplots, when I attempted to do this it would create another
#plot overlapping the 4 subplots making it difficult to see the values
fig.suptitle('SS Cyg Data', fontsize = 20)
fig.text(0.5, 0.04, 'Modified Julian Date', ha='center', va='center')
fig.text(0.04, 0.5, 'Magnitude', ha='center', va='center', rotation='vertical')
plt.show()
timeplot()
The full Traceback to the error is
KeyError Traceback (most recent call last)
C:\Users\Kenny\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.2.0.1610.win-x86_64\lib\site-packages\IPython\utils\py3compat.pyc in execfile(fname, glob, loc)
195 else:
196 filename = fname
--> 197 exec compile(scripttext, filename, 'exec') in glob, loc
198 else:
199 def execfile(fname, *where):
C:\Users\Kenny\Dropbox\499\timeplot.py in <module>()
136 plt.show()
137
--> 138 timeplot()
C:\Users\Kenny\Dropbox\499\timeplot.py in timeplot()
102 tdata.index = tdata.sflag
103 tdata=tdata.astype(float)
--> 104 tdata=tdata.ix[1,['JD','Magnitude']]
105
106 #Plot selected data
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
45 pass
46
---> 47 return self._getitem_tuple(key)
48 else:
49 return self._getitem_axis(key, axis=0)
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_tuple(self, tup)
251 def _getitem_tuple(self, tup):
252 try:
--> 253 return self._getitem_lowerdim(tup)
254 except IndexingError:
255 pass
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_lowerdim(self, tup)
361 for i, key in enumerate(tup):
362 if _is_label_like(key) or isinstance(key, tuple):
--> 363 section = self._getitem_axis(key, axis=i)
364
365 # we have yielded a scalar ?
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
411 return self._get_loc(key, axis=axis)
412
--> 413 return self._get_label(key, axis=axis)
414
415 def _getitem_iterable(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _get_label(self, label, axis)
59 return self.obj._xs(label, axis=axis, copy=False)
60 except Exception:
---> 61 return self.obj._xs(label, axis=axis, copy=True)
62
63 def _get_loc(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in xs(self, key, axis, level, copy)
2369 loc, new_index = self.index.get_loc_level(key)
2370 else:
-> 2371 loc = self.index.get_loc(key)
2372
2373 if isinstance(loc, np.ndarray):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_loc(self, key)
714 loc : int if unique index, possibly slice or mask if not
715 """
--> 716 return self._engine.get_loc(key)
717
718 def get_value(self, series, key):
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3542)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3373)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine._get_loc_duplicates (pandas\index.c:3709)()
KeyError: 1
Related
I am trying to find the gap statistics of my clusters like this:
from gap_statistic import OptimalK
# creat function
def KMeans_clustering_func(X, k):
# Include any clustering Algorithm that can return cluster centers
m = KMeans(random_state=11, n_clusters=k)
m.fit(X)
# Return the location of each cluster center and the labels for each point.
return m.cluster_centers_, m.predict(X)
# create a wrapper around OptimalK to extract cluster centers and cluster labels
optimalK = OptimalK(clusterer=KMeans_clustering_func)
# run optimal K on the input data (subset_scaled_interim) and number of clusters
n_clusters = optimalK(X, cluster_array=np.arange(2, 21))
I encounter this error:
inAlgError Traceback (most recent call last)
Input In [211], in <cell line: 19>()
15 optimalK = OptimalK(clusterer=KMeans_clustering_func)
17 # run optimal K on the input data (subset_scaled_interim) and number of clusters
---> 19 n_clusters = optimalK(X, cluster_array=np.arange(2,25))
20 print('Optimal clusters: ', n_clusters)
22 # Gap Statistics data frame
File ~/.local/lib/python3.8/site-packages/gap_statistic/optimalK.py:134, in OptimalK.__call__(self, X, n_refs, cluster_array)
131 engine = self._process_non_parallel
133 # Calculate the gaps for each cluster count.
--> 134 for gap_calc_result in engine(X, n_refs, cluster_array):
135
136 # Assign this loop's gap statistic to gaps
137 gap_df = gap_df.append(
138 {
139 "n_clusters": gap_calc_result.n_clusters,
(...)
147 ignore_index=True,
148 )
149 gap_df["gap_k+1"] = gap_df["gap_value"].shift(-1)
File ~/.local/lib/python3.8/site-packages/gap_statistic/optimalK.py:361, in OptimalK._process_with_joblib(self, X, n_refs, cluster_array)
356 raise EnvironmentError(
357 "joblib is not installed; cannot use joblib as the parallel backend!"
358 )
360 with Parallel(n_jobs=self.n_jobs) as parallel:
--> 361 for gap_calc_result in parallel(
362 delayed(self._calculate_gap)(X, n_refs, n_clusters)
363 for n_clusters in cluster_array
364 ):
365 yield gap_calc_result
LinAlgError: Last 2 dimensions of the array must be square
How to fix it?
The neg_ctl_df dataframe contains negative control and the coding_gene_df contains my gene-of-interest.
I want to perform normalization for each sample by subtracting the median of the negative controls within the sample.
import pandas as pd
# Median of the NEGATIVE controls
neg_ctl_median = neg_ctl_df.iloc[:,-29:].median()
# Normalize the samples
i = []
for sample in coding_gene_df.iloc[:,-29:].astype(float).itertuples():
for s in sample:
norm_val = s - neg_ctl_median # Subtract the median of the NEGATIVE controls within the patient sample
norm_val = norm_val / pos_ctl_median # Divide the median of the POSITIVE controls within the patient sample (replace sample value with the value that has already been normalized against negative control)
norm_val = norm_val / probeset_norm # Probeset normalization (quantile normalization)
i.append(norm_val)
pd.DataFrame(i)
Traceback:
---------------------------------------------------------------------------
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-30-5f90e90de22c> in <module>()
12 for sample in coding_gene_df.iloc[:,-29:].astype(float).itertuples():
13 for s in sample:
---> 14 norm_val = s - neg_ctl_median # Subtract the median of the NEGATIVE controls within the patient sample
15 norm_val = norm_val / pos_ctl_median # Divide the median of the POSITIVE controls within the patient sample (replace sample value with the value that has already been normalized against negative control)
16 norm_val = norm_val / probeset_norm # Probeset normalization (quantile normalization)
5 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/ops/common.py in new_method(self, other)
67 other = item_from_zerodim(other)
68
---> 69 return method(self, other)
70
71 return new_method
/usr/local/lib/python3.7/dist-packages/pandas/core/arraylike.py in __rsub__(self, other)
102 #unpack_zerodim_and_defer("__rsub__")
103 def __rsub__(self, other):
--> 104 return self._arith_method(other, roperator.rsub)
105
106 #unpack_zerodim_and_defer("__mul__")
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _arith_method(self, other, op)
5524
5525 with np.errstate(all="ignore"):
-> 5526 result = ops.arithmetic_op(lvalues, rvalues, op)
5527
5528 return self._construct_result(result, name=res_name)
/usr/local/lib/python3.7/dist-packages/pandas/core/ops/array_ops.py in arithmetic_op(left, right, op)
222 _bool_arith_check(op, left, right)
223
--> 224 res_values = _na_arithmetic_op(left, right, op)
225
226 return res_values
/usr/local/lib/python3.7/dist-packages/pandas/core/ops/array_ops.py in _na_arithmetic_op(left, right, op, is_cmp)
164
165 try:
--> 166 result = func(left, right)
167 except TypeError:
168 if is_object_dtype(left) or is_object_dtype(right) and not is_cmp:
/usr/local/lib/python3.7/dist-packages/pandas/core/roperator.py in rsub(left, right)
11
12 def rsub(left, right):
---> 13 return right - left
14
15
UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U6'), dtype('float64')) -> None
Samples:
coding_gene_df.iloc[1:10,-29:-27].to_dict()
{'12h_P1_T4_TimeC2_PIDC4_Non-Survivor': {'CNTN2': '6.35',
'KCNA2': '5.29',
'LOC79160': '5.99',
'PTGIS': '5.66',
'TTTY11': '3.91',
'VPS4B': '9.68',
'XRCC1': '9.09',
'ZC3HC1': '7.19',
'ZFAS1': '8.68'},
'48h_P1_T6_TimeC3_PIDC1_Non-Survivor': {'CNTN2': '6.6',
'KCNA2': '5.36',
'LOC79160': '6.18',
'PTGIS': '5.54',
'TTTY11': '3.92',
'VPS4B': '9.51',
'XRCC1': '9.15',
'ZC3HC1': '7.05',
'ZFAS1': '8.46'}}
Negative controls:
neg_ctl_df.iloc[1:10,-29:-27].to_dict()
{'12h_P1_T4_TimeC2_PIDC4_Non-Survivor': {'---': '8.45'},
'48h_P1_T6_TimeC3_PIDC1_Non-Survivor': {'---': '8.16'}}
I have two dataset in csv format:
df2
type prediction 100000 155000
0 0 2.60994 3.40305
1 1 10.82100 34.68900
0 0 4.29470 3.74023
0 0 7.81339 9.92839
0 0 28.37480 33.58000
df
TIMESTEP id type y z v_acc
100000 8054 1 -0.317192 -0.315662 15.54430
100000 669 0 0.352031 -0.008087 2.60994
100000 520 0 0.437786 0.000325 5.28670
100000 2303 1 0.263105 0.132615 7.81339
105000 8055 1 0.113863 0.036407 5.94311
I am trying to match value of df2[100000] to df1[v_acc]. If value matched, I am making scatter plot from df with columns y and z. After that I want to to annoted scatter point with matched value.
What I want is:
(I want all annotaions in a same plot).
I tried to code in python for such condition but I am not getting all annotation points in a single plot instead I am getting multi plots with a single annotation.
I am also getting this error:
TypeError Traceback (most recent call last)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/IPython/core/formatters.py:339, in BaseFormatter.__call__(self, obj)
337 pass
338 else:
--> 339 return printer(obj)
340 # Finally look for special method names
341 method = get_real_method(obj, self.print_method)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/IPython/core/pylabtools.py:151, in print_figure(fig, fmt, bbox_inches, base64, **kwargs)
148 from matplotlib.backend_bases import FigureCanvasBase
149 FigureCanvasBase(fig)
--> 151 fig.canvas.print_figure(bytes_io, **kw)
152 data = bytes_io.getvalue()
153 if fmt == 'svg':
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/matplotlib/backend_bases.py:2295, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2289 renderer = _get_renderer(
2290 self.figure,
2291 functools.partial(
2292 print_method, orientation=orientation)
2293 )
2294 with getattr(renderer, "_draw_disabled", nullcontext)():
-> 2295 self.figure.draw(renderer)
2297 if bbox_inches:
...
189 if len(self) == 1:
190 return converter(self.iloc[0])
--> 191 raise TypeError(f"cannot convert the series to {converter}")
TypeError: cannot convert the series to <class 'float'>
Can I get some help to make a plot as I want?
Thank you.
My code is here:
df2 = pd.read_csv('./result.csv')
print(df2.columns)
#print(df2.head(10))
df = pd.read_csv('./main.csv')
df = df[df['TIMESTEP'] == 100000]
for i in df['v_acc']:
for j in df2['100000']:
# sometimes numbers are long and different after decimals.So mathing 0.2f only
if "{0:0.2f}".format(i) == "{0:0.2f}".format(j):
plt.figure(figsize = (10,8))
sns.scatterplot(data = df, x = "y", y = "z", hue = "type", palette=['red','dodgerblue'], legend='full')
plt.annotate(i, (df['y'][df['v_acc'] == i], df['z'][df['v_acc'] == i]))
plt.grid(False)
plt.show()
break
the reason for the multiple plots is because are you using plt.figure() inside the loop. This will create a single figure for each loop. You need to create that outside and only the individual scatter and annotate within the loop. Here is the updated code that ran for the data you provided. Other than that, think your code is fine...
fig, ax=plt.subplots(figsize = (7,7)) ### Keep this before the loop and call it as subplot
for i in df['v_acc']:
for j in df2[100000]:
# sometimes numbers are long and different after decimals.So mathing 0.2f only
if "{0:0.2f}".format(i) == "{0:0.2f}".format(j):
#plt.figure(figsize = (10,8))
ax=sns.scatterplot(data = df, x = "y", y = "z", hue = "type", palette=['red','dodgerblue'], legend='full')
ax.annotate(i, (df['y'][df['v_acc'] == i], df['z'][df['v_acc'] == i]))
break
plt.grid(False) ### Keep these two after the loop, just one show for one plot
plt.show()
Output plot
I am trying to plot a line with three different colors based on other conditions:
I have a dataframe x_week where the column ['Year-Week'] contains a string of year and week in the form '%Y-w%U'
The column x_week['#ops'] are float numbers
The limits where I want to change color of the line are stored in a dictionary named week that also contains strings in the format '%Y-w%U'
I am using LineCollection, the problem is that it requieres that the string element of the array segment being float, I have already tried date2num but I want to mantain the format '%Y-w%U' for the x-axis
(I already look into here multicolored line with strings linecolllection )
x = x_week['Year-Week']
y = x_week['ops']
# select how to color
color = []
for i in range(3):
color.append('#%06X' % randint(0, 0xFFFFFF))
cmap = ListedColormap(color)
norm = BoundaryNorm([min(x_week['Year-Week']),week[1],week[2],week[3],max(x_week['Year-Week'])], cmap.N)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# make line collection
lc = LineCollection(segments, cmap = cmap, norm = norm)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-279-6f4c3a13d43e> in <module>
18
19 # make line collection
---> 20 lc = LineCollection(segments, cmap = cmap, norm = norm)
~\Anaconda3\lib\site-packages\matplotlib\collections.py in __init__(self, segments, linewidths, colors, antialiaseds, linestyles, offsets, transOffset, norm, cmap, pickradius, zorder, facecolors, **kwargs)
1331 **kwargs)
1332
-> 1333 self.set_segments(segments)
1334
1335 def set_segments(self, segments):
~\Anaconda3\lib\site-packages\matplotlib\collections.py in set_segments(self, segments)
1340 for seg in segments:
1341 if not isinstance(seg, np.ma.MaskedArray):
-> 1342 seg = np.asarray(seg, float)
1343 _segments.append(seg)
1344
~\Anaconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
ValueError: could not convert string to float: '2019-w27'
I'm totally new to all of this and can't figure out why I have this KeyError. Any pointers appreciated!
Trying to use some sample medical claim data from CMS with a sample program gleaned from a YouTube tutorial that I viewed... Wondering if the error is because some of the values for 'HCPCS_CD1' are blank maybe?
# packages for data and visual analysis
import numpy as np
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
%matplotlib inline
# begin
claims = pd.read_csv('DE1_0_2008_to_2010_Outpatient_Claims_Sample_1_CCONLY.csv')
print(claims.head())
# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
# format and preprocess training data - either it's a ECC or not
type_label = np.where(claims['ICD9_DGNS_CD_1']=='1561',0,1)
claim_features = claims.columns.values[1:].tolist()
#claim_features - limit which columns to consider
domain = claims[['ICD9_DGNS_CD_1','HCPCS_CD1','HCPCS_CD2']].values
print(domain)
# fit model
model = svm.SVC(kernel='linear')
model.fit(domain,type_label)
# get separating hyperplane
w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(30,60)
yy = a * xx - (model.intercept_[0]) / w[1]
# plot the parallels to the separating hyperplane that pass through the support vectors
b = model.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = model.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
plt.plot(xx,yy,linewidth=2,color='black')
plt.plot(xx,yy_down,'k--')
plt.plot(xx,yy_up,'k--')
KeyError Traceback (most recent call last)
<ipython-input-7-ab7422e52d5c> in <module>
12
13 # plot data
---> 14 sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
15
16 # format and preprocess training data - either it's a ECC or not
F:\Users\matt\Anaconda3\lib\site-packages\seaborn\regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, height, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws, size)
549 need_cols = [x, y, hue, col, row, units, x_partial, y_partial]
550 cols = np.unique([a for a in need_cols if a is not None]).tolist()
--> 551 data = data[cols]
552
553 # Initialize the grid
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2680 if isinstance(key, (Series, np.ndarray, Index, list)):
2681 # either boolean or fancy integer index
-> 2682 return self._getitem_array(key)
2683 elif isinstance(key, DataFrame):
2684 return self._getitem_frame(key)
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
2724 return self._take(indexer, axis=0)
2725 else:
-> 2726 indexer = self.loc._convert_to_indexer(key, axis=1)
2727 return self._take(indexer, axis=1)
2728
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1325 if mask.any():
1326 raise KeyError('{mask} not in index'
-> 1327 .format(mask=objarr[mask]))
1328
1329 return com._values_from_object(indexer)
KeyError: "['HCPCS_CD1' 'HCPCS_CD2'] not in index"