Key error & Pandas - python

I wrote a Python script (below) which load data from a text file (using pandas) and checks the values in the columns.
import sys
import pandas as pd
import numpy as np
from numpy import ndarray
import math
import matplotlib.pyplot as plt
from matplotlib.pyplot import *
from skimage import data
from skimage.feature import match_template
if __name__ == '__main__':
data = pd.read_csv('Fe_PSI_spt_refined.txt', sep=" ", header = None)
data.columns = ["Angle_number", "Omega", "Intensity", "X", "Y", "Address", "ID"]#, "flag"]
Number_of_projections = 181
Number_of_lines_in_txt = 3493
numrows = len(data)
counter_array = []
correlation_threshold_value = 0.7
a = np.zeros(Number_of_lines_in_txt)
output_file = ("output.txt")
for i in range(2, (Number_of_projections + 1)):
filename_cutouts_combined = ("cutouts_combined_%03i.txt" % (i))
filename_cutouts_combined_tag = ("cutouts_combined_tag_%03i.txt" % (i))
image = np.loadtxt(filename_cutouts_combined)
image_tagged = np.loadtxt(filename_cutouts_combined_tag)
for j in range(0, Number_of_lines_in_txt - 1):
print data.Angle_number[j], i
After one iteration of j I get the error below. Do you spot any error I should fix? Thanks
`Traceback (most recent call last):
File "Hyperbola_search.py", line 46, in <module>
print data.Angle_number[j], i
File "/Users/Alberto/anaconda/lib/python2.7/site-packages/pandas/core/series.py", line 491, in __getitem__
result = self.index.get_value(self, key)
File "/Users/Alberto/anaconda/lib/python2.7/site-packages/pandas/core/index.py", line 1032, in get_value
return self._engine.get_value(s, k)
File "index.pyx", line 97, in pandas.index.IndexEngine.get_value (pandas/index.c:2661)
File "index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas/index.c:2476)
File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3215)
File "hashtable.pyx", line 382, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6450)
File "hashtable.pyx", line 388, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6394)
KeyError: 3491`

You load files into image and image_tagged, while a remains unused.
I don't know what data.Angle_number and numrows are, but they appear to be from libraries, not related to your files.

Related

How to name a figure following its histogram name in matplotlib pyplot using savefig

I use the script
import matplotlib.pyplot as plt
import numpy as np
import yoda
def readScatter2D(histname, filename):
histos = yoda.core.read(filename+".yoda")
x = []
y = []
h = histos[histname]
for b in h:
x.append(b.x())
y.append(b.y())
plt.plot([x],[y], 'bo')
plt.savefig(str(histname)+".pdf")
readScatter2D("totalCh", "properties_file")
Compiling this with python3 gives me the error:
Traceback (most recent call last):
File "enhancement.py", line 18, in <module>
readScatter2D("totalCh", "properties_file")
File "enhancement.py", line 16, in readScatter2D
plt.savefig(str(histname)+".pdf")
File "/usr/lib64/python2.7/site-packages/matplotlib/pyplot.py", line 697, in savefig
res = fig.savefig(*args, **kwargs)
File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 1573, in savefig
self.canvas.print_figure(*args, **kwargs)
File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", line 2252, in print_figure
**kwargs)
File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_pdf.py", line 2519, in print_pdf
file = PdfFile(filename)
File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_pdf.py", line 422, in __init__
fh = open(filename, 'wb')
IOError: [Errno 2] No such file or directory: 'totalCh.pdf'
I do not understand why matplotlib cannot name the file while saving, by following the histogram names. What is the solution to save the figure following the original histogram? I am using matplotlib version 2.0.2

Buffer is too small for requested array-Astropy

I'm reading TESS data and as you might expect it can be really large, My Buffer is too small. Is there a way I can avoid this error? Like maybe skip the file that has a large file? Or is there a more permanent solution(not involving more memory)? My Code is below, along with the Full Error
from lightkurve import TessTargetPixelFile
import lightkurve as lk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def func(tpf):
tpf.plot(aperture_mask=tpf.pipeline_mask);
lc = tpf.to_lightcurve()
mask = (lc.time.value < 1464)
masked_lc = lc[mask]
clipped_lc = masked_lc.remove_outliers(sigma=5);
flat_lc = clipped_lc.flatten()
binned_lc = flat_lc.bin(binsize=5)
periodogram = binned_lc.to_periodogram(method="bls", period=np.arange(1, 20, 0.001))
print(periodogram.plot())
planet_b_period = periodogram.period_at_max_power
planet_b_t0 = periodogram.transit_time_at_max_power
planet_b_dur = periodogram.duration_at_max_power
ax = binned_lc.fold(period=planet_b_period, epoch_time=planet_b_t0).scatter()
ax.set_xlim(-5, 5);
best_fit_period = periodogram.period_at_max_power
print('Best fit period: {:.3f}'.format(best_fit_period))
bfc=best_fit_period
print(bfc)
folded_lc = binned_lc.fold(period=bfc)
folded_lc.scatter(s=7);
plt.show()
planet_b_period
planet_b_model = periodogram.get_transit_model(period=planet_b_period,
transit_time=planet_b_t0,
duration=planet_b_dur)
ax = binned_lc.fold(planet_b_period, planet_b_t0).scatter(s=7)
planet_b_model.fold(planet_b_period, planet_b_t0).plot(ax=ax, c='r', lw=2)
ax.set_xlim(-5, 5);
dataset=pd.read_csv("all_targets_S001_v1.csv")
d=dataset["TICID"]
print(d)
IDs=[]
for i in range(len(d)):
IDs.append("TIC"+str(d[i]))
for i in range(len(IDs)):
tpf_file = lk.search_targetpixelfile(IDs[i], mission="TESS", sector=5).download(quality_bitmask='default')
try:
func(tpf_file)
continue
except:
continue
Thanks
The Full Error
WARNING: File may have been truncated: actual file length (262144) is smaller than the expected size (46402560) [astropy.io.fits.file]
Traceback (most recent call last):
File "lc.py", line 42, in <module>
tpf_file = lk.search_targetpixelfile(IDs[i], mission="TESS", sector=5).download(quality_bitmask='default')
File "C:\ProgramData\Anaconda3\lib\site-packages\lightkurve\utils.py", line 555, in wrapper
return f(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\lightkurve\search.py", line 355, in download
return self._download_one(
File "C:\ProgramData\Anaconda3\lib\site-packages\lightkurve\search.py", line 290, in _download_one
return read(path, quality_bitmask=quality_bitmask, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\lightkurve\io\read.py", line 112, in read
return getattr(__import__("lightkurve"), filetype)(path_or_url, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\lightkurve\targetpixelfile.py", line 2727, in __init__
quality_array=self.hdu[1].data["QUALITY"], bitmask=quality_bitmask
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\utils\decorators.py", line 758, in __get__
val = self.fget(obj)
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\fits\hdu\table.py", line 399, in data
data = self._get_tbdata()
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\fits\hdu\table.py", line 171, in _get_tbdata
raw_data = self._get_raw_data(self._nrows, columns.dtype,
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\fits\hdu\base.py", line 520, in _get_raw_data
return self._file.readarray(offset=offset, dtype=code, shape=shape)
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\fits\file.py", line 330, in readarray
return np.ndarray(shape=shape, dtype=dtype, offset=offset,
TypeError: buffer is too small for requested array

Creating a deltatime array in Python

I am new to python, so I decided to start a project to improve my skills. Therefore, I started trying this one on GeeksForGeeks. Now, I am having difficulty to append a deltaTime variable into an array. I tried a numpy array as well, but it did not worked out.
My code:
from matplotlib.ticker import Formatter
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
from pandas._libs.tslibs import timestamps
birdData = pd.read_csv("bird_tracking.csv")
birdNames = pd.unique(birdData.bird_name)
#Pegando intervalo do tempo
timestamps = []
for i in range(len(birdData)):
timestamps.append(datetime.datetime.strptime(birdData.date_time.iloc[i][:-3], "%Y-%m-%d %H:%M:%S"))
birdData["timestamps"] = pd.Series(timestamps, index = birdData.index)
plt.figure(figsize=(7, 7))
for name in birdNames:
times = birdData.timestamps[birdData.bird_name == name]
elapsedTime = []
for time in times:
x = time-times[0]
#print(x)
elapsedTime.append(x)
plt.plot(np.array(elapsedTime)/datetime.timedelta(days=1), label = name)
plt.xlabel(" Observation ")
plt.ylabel(" Elapsed time (days) ")
plt.show()
The error that I am finding:
Traceback (most recent call last):
File "C:\Users\User\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 3080, in get_loc
return self._engine.get_loc(casted_key)
File "pandas\_libs\index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1625, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1632, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "c:\Users\User\Documents\GitHub\TrackingBirdMigration\dataTime.py", line 24, in <module>
x = time-times[0]
File "C:\Users\User\anaconda3\lib\site-packages\pandas\core\series.py", line 853, in __getitem__
return self._get_value(key)
File "C:\Users\User\anaconda3\lib\site-packages\pandas\core\series.py", line 961, in _get_value
loc = self.index.get_loc(label)
File "C:\Users\User\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 3082, in get_loc
raise KeyError(key) from err
KeyError: 0
[Done] exited with code=1 in 8.313 seconds

ValueError using np.loadtxt

I am trying to extract data from a txt file with numpy and I get this big error:
Traceback (most recent call last):
File "C:\Python36\machine learning\bayes_classifier.py", line 14, in
<module>
data = np.loadtxt(input_file, delimiter=",")
File "C:\Python36\lib\site-packages\numpy\lib\npyio.py", line 1092, in loadtxt
for x in read_data(_loadtxt_chunksize):
File "C:\Python36\lib\site-packages\numpy\lib\npyio.py", line 1019, in read_data
items = [conv(val) for (conv, val) in zip(converters, vals)]
File "C:\Python36\lib\site-packages\numpy\lib\npyio.py", line 1019, in <listcomp>
items = [conv(val) for (conv, val) in zip(converters, vals)]
File "C:\Python36\lib\site-packages\numpy\lib\npyio.py", line 738, in floatconv
return float(x)
ValueError: could not convert string to float: '2.18'
notice the ValueError, what is the '2.18'?
Here is the full code:
### Naïve bayes classifier ###
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn import cross_validation
from utilities import visualize_classifier
# Input file containing data
input_file = "data_multivar_nb.txt"
# Load data from input file
data = np.loadtxt(input_file, delimiter=",")
x, y = data[:, :-1], data[:, -1]
and here is the file:
file link here

Getting error to extract data from coinmarketcap

import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np
df=pd.read_excel('currencynames.xlsx', sheet_name='Sheet1')
writer = pd.ExcelWriter('cryptodata.xlsx', engine='xlsxwriter')
currencies = df['Currency Name']
print(currencies[0])
for i in range(0,1465,1):
print("https://coinmarketcap.com/currencies/"+currencies[i]+"/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))
currency_market_info = pd.read_html("https://coinmarketcap.com/currencies/"+currencies[i].lower()+"/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))[0]
currency_market_info = currency_market_info.assign(Date=pd.to_datetime(currency_market_info['Date']))
currency_market_info.loc[currency_market_info['Volume']=="-",'Volume']=0
currency_market_info['Volume'] = currency_market_info['Volume'].astype('int64')
currency_market_info.head()
currency_market_info.to_excel(writer, sheet_name=currencies[i])
currency_market_info = 0
print("Done with "+currencies[i])
C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py:816:
FutureWarning: elementwise comparison failed; returning scalar
instead, but in the future will perform elementwise comparison
result = getattr(x, name)(y) Traceback (most recent call last):
File "", line 1, in
runfile('C:/Users/SAU/Desktop/deep learning/cryptodata/data.py', wdir='C:/Users/SAU/Desktop/deep learning/cryptodata')
File
"C:\Users\SAU\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 880, in runfile
execfile(filename, namespace)
File
"C:\Users\SAU\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/SAU/Desktop/deep learning/cryptodata/data.py", line
19, in
currency_market_info.loc[currency_market_info['Volume']=="-",'Volume']=0
File "C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py",
line 879, in wrapper
res = na_op(values, other)
File "C:\Users\SAU\Anaconda3\lib\site-packages\pandas\core\ops.py",
line 818, in na_op
raise TypeError("invalid type comparison")
TypeError: invalid type comparison

Categories

Resources