value not in range in for loop for python - python

I am trying to obtain the MACD, MACD signal and MACD difference lines for stock prices given certain input. below is the custom code that I am using.
def create_MACD(long_term,short_term,dataframe,signal_ema_length):
#obtain the SMA data that we need to obtain the MACD ema values
short_sma = create_sma(short_term,dataframe)
long_sma = create_sma(long_term,dataframe)
#create the EMAs that will be subtracted to obtain the MACD line
short_ema = create_ema(short_term,2,dataframe)
long_ema = create_ema(long_term,2,dataframe)
#calculate length of MACD array and starting indicies for line and signal
length = len(dataframe)
#calculate the starting index of the line
start_line = long_term
#calculate the starting index of the signal line
start_signal = long_term+signal_ema_length
#create the smoothing variables for the signal line
smoothing = 2/(signal_ema_length+1)
smoothing_minus = 1-smoothing
#calculate number of iterations for macd and macd signal
num_iters_macd = len(dataframe)-long_term
num_iters_signal = num_iters_macd - signal_ema_length
#create the MACD dataframe change dataframe to array for iterations
macd = np.zeros(length)
macd_signal = np.zeros(length)
array = dataframe.to_numpy()
#for loop for MACD data
for i in range(num_iters_macd):
index = start_line+i
macd[index] = short_ema[index]-long_ema[index]
#for loop for MACD signal
for i in range(num_iters_signal):
index = start_signal+i
macd_signal[index] = macd[index]*smoothing + macd_signal[index-1]*smoothing_minus
#create sma of first X days of MACD
sma_MACD = sum(macd[:signal_ema_length])/signal_ema_length
#insert the first value into the MACD signal array
macd_signal[start_signal-1] = macd[start_signal-1]*smoothing +sma_MACD*smoothing_minus
#create array for MACD difference
macd_diff = np.zeros(length)
#create starting index for MACD difference
start_diff = start_signal
num_iters_diff = num_iters_signal
for i in range(num_iters_diff):
index = i+start_diff
macd_diff[index] = macd[index]-macd_signal[index]
#send all array's to pandas dataframe
MACD_line = pd.DataFrame(data=macd)
MACD_signal = pd.DataFrame(data=macd_signal)
MACD_difference = pd.DataFrame(data=macd_diff)
return MACD_line, MACD_signal, MACD_difference
macd_av,signal_av,diff_av = create_MACD(26,12,price,9)
The error that I get is
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/opt/anaconda3/envs/tensorflow/lib/python3.7/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
354 try:
--> 355 return self._range.index(new_key)
356 except ValueError as err:
ValueError: 26 is not in range
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-20-a1e7f9a89bbb> in <module>
----> 1 macd_av,signal_av,diff_av = create_MACD(26,12,price,9)
<ipython-input-19-78834be35c60> in create_MACD(long_term, short_term, dataframe, signal_ema_length)
35 for i in range(num_iters_macd):
36 index = start_line+i
---> 37 macd[index] = short_ema[index]-long_ema[index]
38
39 #for loop for MACD signal
~/opt/anaconda3/envs/tensorflow/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
2900 if self.columns.nlevels > 1:
2901 return self._getitem_multilevel(key)
-> 2902 indexer = self.columns.get_loc(key)
2903 if is_integer(indexer):
2904 indexer = [indexer]
~/opt/anaconda3/envs/tensorflow/lib/python3.7/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
355 return self._range.index(new_key)
356 except ValueError as err:
--> 357 raise KeyError(key) from err
358 raise KeyError(key)
359 return super().get_loc(key, method=method, tolerance=tolerance)
KeyError: 26
I have tested the custom SMA and EMA functions so those are outputting the correct array's. I know that this error means that my for loop range is not correct but I am unsure of why this is wrong.

The problem seems to be that the short and long ema/sma array's at the beginning are in pandas dataframes. To index those correctly you need to use .iloc function. However, this doesn't work well when you use loops as you need to convert to numpy arrays and then the loop should work as intended.

Related

Python: why do I get an error when I try to interpolate an xarray between dates?

I am trying to interpolate the values of an xarray called pop
pop
I am using the function xarray.interp
dates = pd.date_range('1990-01-01', '2020-01-01', freq='1Y')
popI = pop.interp(time=dates, kwargs={"fill_value": "extrapolate"})
but I get the following error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-75-1393bc257da7> in <module>
----> 1 popI = pop.interp(time=dates, kwargs={"fill_value": "extrapolate"})
/usr/lib/python3/dist-packages/xarray/core/dataset.py in interp(self, coords, method, assume_sorted, kwargs, method_non_numeric, **coords_kwargs)
3163 if method in ["linear", "nearest"]:
3164 for k, v in validated_indexers.items():
-> 3165 obj, newidx = missing._localize(obj, {k: v})
3166 validated_indexers[k] = newidx[k]
3167
/usr/lib/python3/dist-packages/xarray/core/missing.py in _localize(var, indexes_coords)
561 indexes = {}
562 for dim, [x, new_x] in indexes_coords.items():
--> 563 minval = np.nanmin(new_x.values)
564 maxval = np.nanmax(new_x.values)
565 index = x.to_index()
<__array_function__ internals> in nanmin(*args, **kwargs)
/usr/lib/python3/dist-packages/numpy/lib/nanfunctions.py in nanmin(a, axis, out, keepdims)
319 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
320 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
--> 321 if np.isnan(res).any():
322 warnings.warn("All-NaN slice encountered", RuntimeWarning,
323 stacklevel=3)
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
You're calling interp on a Dataset, which will always apply functions to all data variables. One of your data variable is a string array mollewide. This can't be interpolated. So you can either set this as a coordinate:
popI = pop.set_coords('mollewide').interp(time=dates, kwargs={"fill_value": "extrapolate"})
or you can only operate on the popDensity data variable:
popI = pop["popDensity"].interp(time=dates, kwargs={"fill_value": "extrapolate"})

Pandas: a must be greater than 0 unless no samples are taken

I am trying to resample the rebalanced data set 'churn_train' by 20%, or n = 158 records, to have 'True' 'Churn' column values. I am receiving an error message. The data set is not empty as I determined the shape and value counts of it. How do I resolve this error message? Any help would be appreciated. Thanks.
Data frame 'churn': Below is some rows of the data frame.
State,Account Length,Area Code,Phone,Intl Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,Eve Mins,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Old Churn,Churn
"KS",128,415,"382-4657","no","yes",25,265.100000,110,45.070000,197.400000,99,16.780000,244.700000,91,11.010000,10.000000,3,2.700000,1,"False.","False"
"OH",107,415,"371-7191","no","yes",26,161.600000,123,27.470000,195.500000,103,16.620000,254.400000,103,11.450000,13.700000,3,3.700000,1,"False.","False"
"NJ",137,415,"358-1921","no","no",0,243.400000,114,41.380000,121.200000,110,10.300000,162.600000,104,7.320000,12.200000,5,3.290000,0,"False.","False"
"OH",84,408,"375-9999","yes","no",0,299.400000,71,50.900000,61.900000,88,5.260000,196.900000,89,8.860000,6.600000,7,1.780000,2,"False.","False"
"OK",75,415,"330-6626","yes","no",0,166.700000,113,28.340000,148.300000,122,12.610000,186.900000,121,8.410000,10.100000,3,2.730000,3,"False.","False"
"AL",118,510,"391-8027","yes","no",0,223.400000,98,37.980000,220.600000,101,18.750000,203.900000,118,9.180000,6.300000,6,1.700000,0,"False.","False"
"MA",121,510,"355-9993","no","yes",24,218.200000,88,37.090000,348.500000,108,29.620000,212.600000,118,9.570000,7.500000,7,2.030000,3,"False.","False"
"MO",147,415,"329-9001","yes","no",0,157.000000,79,26.690000,103.100000,94,8.760000,211.800000,96,9.530000,7.100000,6,1.920000,0,"False.","False"
"WV",141,415,"330-8173","yes","yes",37,258.600000,84,43.960000,222.000000,111,18.870000,326.400000,97,14.690000,11.200000,5,3.020000,0,"False.","False"
"IN",65,415,"329-6603","no","no",0,129.100000,137,21.950000,228.500000,83,19.420000,208.800000,111,9.400000,12.700000,6,3.430000,4,"True.","True"
My code:
churn_train['Churn'].value_counts()
False 1913
True 320
Name: Churn, dtype: int64
to_resample = churn_train.loc[churn_train['Churn'] == "True"]
our_resample = to_resample.sample(n = 158, replace = True)
churn_train_rebal = pd.concat([churn_train, our_resample])
Error Message:
ValueError Traceback (most recent call last)
/var/folders/wv/42dn23fd1cb0czpvqdnb6zw00000gn/T/ipykernel_7751/2929105044.py in <module>
1 to_resample = churn_train.loc[churn_train['Churn'] == "True"]
----> 2 our_resample = to_resample.sample(n = 158, replace = True)
3 churn_train_rebal = pd.concat([churn_train, our_resample])
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in sample(self, n, frac, replace, weights, random_state, axis, ignore_index)
5452 weights = sample.preprocess_weights(self, weights, axis)
5453
-> 5454 sampled_indices = sample.sample(obj_len, size, replace, weights, rs)
5455 result = self.take(sampled_indices, axis=axis)
5456
~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/sample.py in sample(obj_len, size, replace, weights, random_state)
148 raise ValueError("Invalid weights: weights sum to zero")
149
--> 150 return random_state.choice(obj_len, size=size, replace=replace, p=weights).astype(
151 np.intp, copy=False
152 )
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
ValueError: a must be greater than 0 unless no samples are taken

Dask looping over library function call

Goal
I would like to parallelize a loop with dask that uses a library function inside the loop. This function, mhw.detect(), calculates some statistics on a slice of a numpy array. None of the slices of the array depend on the other slices, so I was hoping that dask could be used to compute them in parallel and store them all in the same output array.
Code
The flow of the code I am working on is:
import numpy as np
import marineHeatWaves as mhw
from dask import delayed
# Create fake input data
lat_size, long_size = 100, 100
data = np.random.random_integers(0, 30, size=(10_000, long_size, lat_size)) # size = (time, longitude, latitude)
time = np.arange(730_000, 740_000) # time in ordinal days
# Initialize an empty array to hold the output
output_array = np.empty(data.shape)
# loop through each pixel in the data array
for idx_lat in range(lat_size):
for idx_long in range(long_size):
# Extract a slice of data
data_slice = data[:, idx_lat, idx_long]
# Use the library function to calculate the stats for the pixel
# `library_output` is a dictionary that has a numpy array inside it
_, library_output = delayed(mhw.detect)(time, data_slice)
# Update the output array with the calculated values from the library
output_array[:, idx_lat, idx_long] = library_output['seas']
Previous efforts
When I run this code I get the error TypeError: Delayed objects of unspecified length are not iterable. Another stack overflow post discusses this issue and resolves the issue by converting the output of the delayed function to a delayed object. However, because I didn't create the output object myself I am not sure if I can convert it to a delayed object.
I've also tried wrapping the last line in da.from_delayed(), as in output_array[:, idx_lat, idx_long] = da.from_delayed(library_output['seas']) and initalizing the output_array with da.empty(data.shape). I get the same error, though, since I think the code doesn't make it past the line with the library function delayed(mhw.detect)(time, data_slice).
Is it possible to parallelize this? Is this approach of asking dask to compute all the slices in parallel and put them together in an output array even a reasonable approach?
Full Traceback
TypeError Traceback (most recent call last)
/home/rwegener/mhw-ocetrac-census/notebooks/ejoliver_subset_MUR.ipynb Cell 44' in <cell line: 10>()
13 data_slice = data[:, idx_lat, idx_long]
14 # Use the library function to calculate the stats for the pixel
---> 15 _, point_clim = delayed(mhw.detect)(time_ordinal, data_slice)
16 # Update the output array with the calculated values from the library
17 output_array[:, idx_lat, idx_long] = point_clim['seas']
File ~/.conda/envs/dask/lib/python3.10/site-packages/dask/delayed.py:581, in Delayed.__iter__(self)
579 def __iter__(self):
580 if self._length is None:
--> 581 raise TypeError("Delayed objects of unspecified length are not iterable")
582 for i in range(self._length):
583 yield self[i]
TypeError: Delayed objects of unspecified length are not iterable
Update
Using .apply_along_axis() as suggested:
# Create fake input data
lat_size, long_size = 100, 100
data = np.random.randint(0, 30, size=(10_000, long_size, lat_size)) # size = (time, longitude, latitude)
data = dask.array.from_array(data, chunks=(-1, 100, 100))
time = np.arange(730_000, 740_000) # time in ordinal days
# Initialize an empty array to hold the output
output_array = np.empty(data.shape)
# define a wrapper to rearrange arguments
def func1d(arr, time, shape=(10000,)):
print(arr.shape)
return mhw.detect(time, arr)
res = dask.array.apply_along_axis(func1d, 0, data, time=time)
With the output:
(1,)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/homes/metogra/rwegener/mhw-ocetrac-census/notebooks/ejoliver_subset_MUR.ipynb Cell 48' in <cell line: 15>()
12 print(arr.shape)
13 return mhw.detect(time, arr)
---> 15 res = dask.array.apply_along_axis(func1d, 0, data, time=time)
File ~/.conda/envs/dask/lib/python3.10/site-packages/dask/array/routines.py:508, in apply_along_axis(func1d, axis, arr, dtype, shape, *args, **kwargs)
506 if shape is None or dtype is None:
507 test_data = np.ones((1,), dtype=arr.dtype)
--> 508 test_result = np.array(func1d(test_data, *args, **kwargs))
509 if shape is None:
510 shape = test_result.shape
/homes/metogra/rwegener/mhw-ocetrac-census/notebooks/ejoliver_subset_MUR.ipynb Cell 48' in func1d(arr, time, shape)
11 def func1d(arr, time, shape=(10000,)):
12 print(arr.shape)
---> 13 return mhw.detect(time, arr)
File ~/.conda/envs/dask/lib/python3.10/site-packages/marineHeatWaves-0.28-py3.10.egg/marineHeatWaves.py:280, in detect(t, temp, climatologyPeriod, pctile, windowHalfWidth, smoothPercentile, smoothPercentileWidth, minDuration, joinAcrossGaps, maxGap, maxPadLength, coldSpells, alternateClimatology, Ly)
278 tt = tt[tt>=0] # Reject indices "before" the first element
279 tt = tt[tt<TClim] # Reject indices "after" the last element
--> 280 thresh_climYear[d-1] = np.nanpercentile(tempClim[tt.astype(int)], pctile)
281 seas_climYear[d-1] = np.nanmean(tempClim[tt.astype(int)])
282 # Special case for Feb 29
IndexError: index 115 is out of bounds for axis 0 with size 1
Rather than using delayed, this seems like a good case for dask.array.
You can create the dask array by partitioning the numpy array:
da = dask.array.from_array(output_array, chunks=(-1, 10, 10))
Now you can call mhw.detect using dask.array.map_blocks alongside np.apply_along_axis within each block:
# define a wrapper to rearrange arguments
def func1d(arr, time):
return mhw.detect(time, arr)
def block_func(block, **kwargs):
return np.apply_along_axis(func1d, 0, block, **kwargs)
res = data.map_blocks(block_func, meta=data, time=time)
res = res.compute()
The map_blocks answer above works great! Additionally, apply_along_axis() was suggested and discussed in comments. I was able to get that method to work, but in order for it to function properly you need to use both the dtype and shape inputs to da.apply_along_axis(). If these aren't supplied the function can't figure out the shape of the data it should pass as an argument.
So, another solution:
import dask.array as da
# Create fake input data
lat_size, long_size = 100, 100
data = da.random.random_integers(0, 30, size=(1_000, long_size, lat_size), chunks=(-1, 10, 10)) # size = (time, longitude, latitude)
time = np.arange(730_000, 731_000) # time in ordinal days
# define a wrapper to rearrange arguments
def func1d(arr, time):
return mhw.detect(time, arr)
result = da.apply_along_axis(func1d, 0, data, time=time, dtype=data.dtype, shape=(1000,))
result.compute()

Keep getting ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)

First off, thanks in advance if you can help puzzle this out! I'm trying to balance some customer data for my model. My targets are all 1s and 0s, and the 0s are overwhelmingly abundant. So I created a counter that will start to delete the 0 rows once they surpass the number of 1 rows. But at the very end of my code, when I create the np.delete to get those extra rows off my dataset I keep getting this error
I don't really know what to try, because I don't even understand what the error is telling me
import pandas as pd
import numpy as np
from sklearn import preprocessing
#%%
#Loading the Raw Data
raw_csv_data= pd.read_csv('Audiobooks-data_raw.csv')
print(display(raw_csv_data.head(20)))
#%%
df=raw_csv_data.copy()
print(display(df.head(20)))
#%%
print(df.info())
#%%
#Separate the Targets from the dataset
inputs_all= df.loc[:,'Book length (mins)_overall':'Last visited minus Purchase date']
targets_all= df['Targets']
print(display(inputs_all.head()))
print(display(targets_all.head()))
#%%
#Shuffling the Data to prep for balancing
shuffled_indices= np.arange(inputs_all.shape[0])
np.random.shuffle(shuffled_indices)
shuffled_inputs= inputs_all.iloc[shuffled_indices]
shuffled_targets= targets_all[shuffled_indices]
#%%
#Balance the Dataset
#There are significantly more 0's than 1's in our target.
#We want a good accurate model
print(inputs_all.shape)
print(targets_all.shape)
#%%
num_one_targets= int(np.sum(targets_all))
zero_targets_counter= 0
indices_to_remove= []
print(num_one_targets)
#%%
for i in range(targets_all.shape[0]):
if targets_all[i]==0:
zero_targets_counter +=1
if zero_targets_counter> num_one_targets:
indices_to_remove.append(i)
#%%
inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0)
targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0)
Everything works except when I try to group my balanced datasets and delete the excess 0 rows. Here is the error:
ValueError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1652
-> 1653 mgr = BlockManager(blocks, axes)
1654 mgr._consolidate_inplace()
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check)
113 if do_integrity_check:
--> 114 self._verify_integrity()
115
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self)
310 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
--> 311 construction_error(tot_items, block.shape[1:], self.axes)
312 if len(self.items) != tot_items:
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 1691 passed, implied))
1692
ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
----> 1 inputs_all_balanced= np.delete(inputs_all, indices_to_remove, axis=0)
2 targets_all_balanced= np.delete(targets_all, indices_to_remove, axis=0)
~\Anaconda3\lib\site-packages\numpy\lib\function_base.py in delete(arr, obj, axis)
4419
4420 if wrap:
-> 4421 return wrap(new)
4422 else:
4423 return new
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __array_wrap__(self, result, context)
1907 def __array_wrap__(self, result, context=None):
1908 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
-> 1909 return self._constructor(result, **d).__finalize__(self)
1910
1911 # ideally we would define this to avoid the getattr checks, but
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
422 else:
423 mgr = init_ndarray(data, index, columns, dtype=dtype,
--> 424 copy=copy)
425
426 # For data is list-like, or Iterable (will consume into list)
~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
165 values = maybe_infer_to_datetimelike(values)
166
--> 167 return create_block_manager_from_blocks([values], [columns, index])
168
169
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1658 blocks = [getattr(b, 'values', b) for b in blocks]
1659 tot_items = sum(b.shape[0] for b in blocks)
-> 1660 construction_error(tot_items, blocks[0].shape[1:], axes, e)
1661
1662
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
1689 raise ValueError("Empty data passed with indices specified.")
1690 raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 1691 passed, implied))
1692
1693
ValueError: Shape of passed values is (4474, 10), indices imply (14084, 10)
Try removing rows with pandas drop instead:
inputs_all_balanced = inputs_all.drop(indices_to_remove,axis=0)
targets_all_balanced = targets_all.drop(indices_to_remove,axis=0)

Python, Key Error: 1

The goal of my code is to sort through the data and select only the Visual band or "Vis." band data. From that I eliminated all values that were upper and lower limits to clean up the graph. Finally I wanted to remove all the data that was not a part of the outbursts or decays. My filtering of Vis. band and the upper/lower limit data seems to work fine, but when I try to remove data that had a small slope it shows key error:1, I don't have enough reputation to post an image so I included a link to the plot. The plot shows data after filtering the vis band and upper/lower limits.
def timeplot():
import pandas as pd
import matplotlib.pyplot as plt
import jdcal as jd
import math
#Getting input from user as to start and end dates for the data
(miny,minm,mind) = input("Enter the start date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
(maxy,maxm,maxd) = input("Enter the end date for data in the format (yyyy,mm,dd) ex. (2000,01,01):")
#Calculating modified julian dates from the gregorian date input
(x,Amin)=jd.gcal2jd(miny,minm,mind)
(y,Amax)=jd.gcal2jd(maxy,maxm,maxd)
#Creating a table with the numbers corresponding to their month
Month = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
#Read in data file
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 2000)
data1 = pd.read_csv("50yrdata.csv")
data1['ulflag']=1
#Deal with any bad columns
data1_limit = data1.JD * 0
ii=0
for mag in data1.Magnitude:
if mag[0] == '<':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = 1
if mag[0] == '>':
data1.ulflag[ii]=0
data1.Magnitude[ii] = mag[1:]
data1_limit[ii] = -1
ii +=1
#The data set has Vis, V, I, R, B, TG, TB, TR, CV bands
#Selecting data only in the visual band with no upper or lower limits in
#magnitude
#Converting Julian Date to Modified Julian Date
data1.JD=data1.JD-2400000.5
data1.index=data1.ulflag
data1=data1.ix[1,['JD','Magnitude','Band']]
data1.index=data1.Band
tdata=data1.ix['Vis.',['JD','Magnitude']]
#Changing all of the values from Magnitude from string to float
tdata=tdata.astype(float)
#Adding on columns to make computations easier
tdata['sflag']=0
tdata['slope']=0.000
tdata['aslope']=0.000
tdata['A']=0.000
tdata['B']=0.000
#Finding max and min values of our MJD,
Max=Amax
Min=Amin
#We split the data into N graphs where N is the number of years the data spans
N=(int((Max-Min)/365))
#Finding slope of the curve
#Attempt to filter the data using
#1. A positive slope greater than a certain threshold = outburst
#2. A negtaive slope smaller than a certain threshold = decay
#3. The absolute value of the slope is smaller than a certain threshold = quiescence
length=len(tdata.JD)-1
tdata.A[length]=0
tdata.B[length]=1
for i in range(length):
tdata.A[i] = tdata.Magnitude[i+1]-tdata.Magnitude[i]
for i in range(length):
tdata.B[i] = tdata.JD[i+1]-tdata.JD[i]
for i in range(length+1):
tdata.slope[i] = tdata.A[i]/tdata.B[i]
tdata.aslope=abs(tdata.slope)
for i in range(length):
if tdata.aslope[i] > 1:
tdata.sflag = 1
if tdata.aslope[i] < 1:
tdata.sflag = 0
i += 1
#filtering out all the data that has a slope less than a certain threshold
tdata.index = tdata.sflag
tdata=tdata.astype(float)
tdata=tdata.ix[1,['JD','Magnitude']]
#Plot selected data
fig ,axs = plt.subplots(N,1)
fig.subplots_adjust(hspace = .5)
#Due to data set being so large, make multiple sub plots instead of one large plot
#Magnitude axis needs to be flipped to see when the star has outbursts
#When setting the limits of our subplots, we extend them by a small value in
#order to make the data easier to read. The large value being added and subtracted
#of 365 causes the graph to cover approximately one year in data.
axs = axs.ravel()
for i in range(N):
axs[i].scatter(tdata.JD, tdata.Magnitude)
axs[i].invert_yaxis()
axs[i].set_xlim([Min+(365*(i-1))-5, Max+5-(365*(N-i))])
A=str(miny+i)
B=Month[minm]
C=str(mind)
axs[i].set_title('A Year of data starting from ' + A + ',' + B + ',' +C)
#Setting title and axis, I was unable to set a shared x and y axis title
#between the subplots, when I attempted to do this it would create another
#plot overlapping the 4 subplots making it difficult to see the values
fig.suptitle('SS Cyg Data', fontsize = 20)
fig.text(0.5, 0.04, 'Modified Julian Date', ha='center', va='center')
fig.text(0.04, 0.5, 'Magnitude', ha='center', va='center', rotation='vertical')
plt.show()
timeplot()
The full Traceback to the error is
KeyError Traceback (most recent call last)
C:\Users\Kenny\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.2.0.1610.win-x86_64\lib\site-packages\IPython\utils\py3compat.pyc in execfile(fname, glob, loc)
195 else:
196 filename = fname
--> 197 exec compile(scripttext, filename, 'exec') in glob, loc
198 else:
199 def execfile(fname, *where):
C:\Users\Kenny\Dropbox\499\timeplot.py in <module>()
136 plt.show()
137
--> 138 timeplot()
C:\Users\Kenny\Dropbox\499\timeplot.py in timeplot()
102 tdata.index = tdata.sflag
103 tdata=tdata.astype(float)
--> 104 tdata=tdata.ix[1,['JD','Magnitude']]
105
106 #Plot selected data
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
45 pass
46
---> 47 return self._getitem_tuple(key)
48 else:
49 return self._getitem_axis(key, axis=0)
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_tuple(self, tup)
251 def _getitem_tuple(self, tup):
252 try:
--> 253 return self._getitem_lowerdim(tup)
254 except IndexingError:
255 pass
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_lowerdim(self, tup)
361 for i, key in enumerate(tup):
362 if _is_label_like(key) or isinstance(key, tuple):
--> 363 section = self._getitem_axis(key, axis=i)
364
365 # we have yielded a scalar ?
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
411 return self._get_loc(key, axis=axis)
412
--> 413 return self._get_label(key, axis=axis)
414
415 def _getitem_iterable(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\indexing.pyc in _get_label(self, label, axis)
59 return self.obj._xs(label, axis=axis, copy=False)
60 except Exception:
---> 61 return self.obj._xs(label, axis=axis, copy=True)
62
63 def _get_loc(self, key, axis=0):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\frame.pyc in xs(self, key, axis, level, copy)
2369 loc, new_index = self.index.get_loc_level(key)
2370 else:
-> 2371 loc = self.index.get_loc(key)
2372
2373 if isinstance(loc, np.ndarray):
E:\Enthought\Canopy\User\lib\site-packages\pandas\core\index.pyc in get_loc(self, key)
714 loc : int if unique index, possibly slice or mask if not
715 """
--> 716 return self._engine.get_loc(key)
717
718 def get_value(self, series, key):
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3542)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine.get_loc (pandas\index.c:3373)()
E:\Enthought\Canopy\User\lib\site-packages\pandas\index.pyd in pandas.index.IndexEngine._get_loc_duplicates (pandas\index.c:3709)()
KeyError: 1

Categories

Resources