pandas numpy matplotlib OverflowError: date value out of range - python

I have code to crawl through every file in a directory and plot every csv file it comes across. Each CSV has a header of the form
`Timestamp, P1rms (A), P2rms (A), P3rms (A), P4rms (A), P5rms (A), Vrms (V), P1 theta, P2 theta, P3 theta, P4 theta, P5 theta`.
Mon Sep 30 00:00:00 2013, 128, 128, 180, 177, 192, 43, 7, 7, 8, 8, 48
Mon Sep 30 00:00:01 2013, 127, 127, 182, 178, 193, 43, 8, 8, 8, 8, 49
# ect....
I am developing a fft visualization option, and I am running into an overflow error when I fft my data sets. here is my exact problem:
When I run my code:
#!/usr/bin/env python
from pandas import *
import matplotlib.pyplot as plt
import os
import sys
import platform
import numpy.fft as np
# name of plots folder
plotfold='plots'
# System specific info
if platform.system()=='Darwin':comsep="/"
else: comsep="\\"
# How many columns should I plot?
numcol=6
if len(sys.argv)<2:
print 'usage: ./canaryCrawler.py [-c] or [-f] rootdir'
quit()
if len(sys.argv)>2:
ylim=1500
root = sys.argv[2]
else:
ylim=1200
root = sys.argv[1]
for subdir, dirs, files in os.walk(root):
# plot each file
for file in files:
if str(file)[-4:]=='.csv':
print 'plotting '+str(file)+'...'
# load csv as data frame
df=pandas.io.parsers.read_csv(subdir+comsep+file)
for i in range(0,len(df.Timestamp)):
df.Timestamp[i] = datetime.strptime(df.Timestamp[i], '%a %b %d %H:%M:%S %Y')
# We only want the first 6 collumns
df = df.ix[:,0:numcol]
if len(sys.argv)>=2:
if sys.argv[1]=='-c' or sys.argv[1]=='-f':
plotfold='plots_Specialty'
df2 = df
df=pandas.DataFrame(df2.Timestamp)
df['Residence']=df2['P1rms (A)']+df2['P2rms (A)']
df['Specialty']=df2['P3rms (A)']+df2['P4rms (A)']
if sys.argv[1]=='-f':
df2['Residence']=np.fft(df['Residence'])
df2['Specialty']=np.fft(df['Specialty'])
df=df2
print 'Fourier Transformation Complete'
plotfold='plots_Specialty_fft'
# set up plot
plt.figure()
df.plot(df.Timestamp,alpha=0.6,linewidth=2.3) # add transparency to see overlapping colors
plt.tight_layout(pad=1.08)
plt.legend(loc='best') # add legend in non-intrusive location
plt.legend(loc=5,prop={'size':14}) #
plt.ylabel('Current')
plt.xlabel('Time')
plt.gcf().autofmt_xdate()
plt.gcf().set_size_inches(12.7,9.2)
plt.gca().set_ylim([0,ylim])
stamp = df.Timestamp[0]
day = datetime.strftime(stamp,'%a')
DOM=datetime.strftime(stamp,'%d')
month = datetime.strftime(stamp,'%b')
year = datetime.strftime(stamp,'%Y')
plt.title(subdir+' '+day+' '+month+' '+DOM+' '+year)
# keep plot
# check for existing plots folder,
# create one if it doesn't exist
if plotfold not in os.listdir(subdir):
print '** adding plots directory to ',subdir
os.mkdir(subdir+comsep+plotfold)
# save in plots directory
spsubs = str(subdir).split(comsep)
filnam=''
for piece in range(len(spsubs)-4,len(spsubs)-1):
filnam+='_'+spsubs[piece]
filnam+='_'+str(file)[:-4]
saveto=subdir+comsep+plotfold+comsep+filnam
print '**** saving plot to ',saveto
plt.savefig(saveto)
plt.close()
I get this error:
kilojoules$ ./canaryCrawler.py -f 35ca7/
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas-0.14.0-py2.7-macosx-10.9-x86_64.egg/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
.format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))
plotting 2014Aug04.csv...
Fourier Transformation Complete
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.8.1-py2.7-macosx-10.9-x86_64.egg/numpy/core/numeric.py:460: ComplexWarning: Casting complex values to real discards the imaginary part
return array(a, dtype, copy=False, order=order)
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer
warnings.warn("tight_layout : falling back to Agg renderer")
**** saving plot to 35ca7/2014/Aug/plots_Specialty_fft/_Aug_35ca7_2014_2014Aug04
plotting 2014Aug05.csv...
Fourier Transformation Complete
**** saving plot to 35ca7/2014/Aug/plots_Specialty_fft/_Aug_35ca7_2014_2014Aug05
plotting 2014Aug07.csv...
Fourier Transformation Complete
**** saving plot to 35ca7/2014/Aug/plots_Specialty_fft/_Aug_35ca7_2014_2014Aug07
Traceback (most recent call last):
File "./canaryCrawler.py", line 97, in <module>
plt.savefig(saveto)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/pyplot.py", line 561, in savefig
return fig.savefig(*args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/figure.py", line 1421, in savefig
self.canvas.print_figure(*args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/backend_bases.py", line 2220, in print_figure
**kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 505, in print_png
FigureCanvasAgg.draw(self)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 451, in draw
self.figure.draw(self.renderer)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/figure.py", line 1034, in draw
func(*args)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/axes.py", line 2086, in draw
a.draw(renderer)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/lines.py", line 562, in draw
drawFunc(renderer, gc, tpath, affine.frozen())
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/lines.py", line 938, in _draw_lines
self._lineFunc(renderer, gc, path, trans)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/lines.py", line 978, in _draw_solid
renderer.draw_path(gc, path, trans)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 145, in draw_path
self._renderer.draw_path(gc, path, transform, rgbFace)
OverflowError: Allocated too many blocks
I specified the agg.path.chunksize parameter in matplotlibrc to o agg.path.chunksize : 10000000. I only get this error when I run the -f fft option. How can I prevent this error?

Not sure but try pdf, svg backends
#!/usr/bin/env python
from pandas import *
import matplotlib.pyplot as plt # Insert just before import matplotlib as mpl
mpl.use('pdf') # Insert just before import matplotlib as mpl
import matplotlib as mpl
# ['pdf', 'pgf', 'Qt4Agg', 'GTK', 'GTKAgg', 'ps', 'agg',
# 'cairo', 'MacOSX', 'GTKCairo', 'WXAgg', 'template', 'TkAgg',
# 'GTK3Cairo', 'GTK3Agg', 'svg', 'WebAgg', 'CocoaAgg', 'emf', 'gdk', 'WX']
# (...)
plt.savefig('svg.pdf') # Consider file extension (!)

Related

OverflowError: int too big to convert when formatting date on pandas series plot

I'm trying to plot a pandas series, but I'm encountering an error when I attempt to format the x-axis date.
(A related issue was identified in the comments, but it appears that it was resolved in a much older version of pandas than what I'm using. So, it seems like this is a new problem.)
Consider a plot of the following pandas series:
import pandas as pd
d = {pd.Timestamp('2021-03-15 08:30:00'): -65.926651,
pd.Timestamp('2021-03-15 08:30:05'): -42.115551,
pd.Timestamp('2021-03-15 08:30:10'): -24.699627,
pd.Timestamp('2021-03-15 08:30:15'): -12.010081,
pd.Timestamp('2021-03-15 08:30:20'): -2.781321}
s = pd.Series(d)
ax = s.plot()
I seek to format the x-axis date on the plot using:
from matplotlib.dates import DateFormatter
format_str: str = '%H:%M:%S'
format_: DateFormatter = DateFormatter(format_str)
ax.xaxis.set_major_formatter(format_)
This results in the following error:
Traceback (most recent call last):
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/backends/backend_macosx.py", line 61, in _draw
self.figure.draw(renderer)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/figure.py", line 1863, in draw
mimage._draw_list_compositing_images(
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/cbook/deprecation.py", line 411, in wrapper
return func(*inner_args, **inner_kwargs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/axes/_base.py", line 2747, in draw
mimage._draw_list_compositing_images(renderer, self, artists)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/axis.py", line 1164, in draw
ticks_to_draw = self._update_ticks()
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/axis.py", line 1022, in _update_ticks
major_labels = self.major.formatter.format_ticks(major_locs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/ticker.py", line 250, in format_ticks
return [self(value, i) for i, value in enumerate(values)]
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/ticker.py", line 250, in <listcomp>
return [self(value, i) for i, value in enumerate(values)]
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/dates.py", line 605, in __call__
return num2date(x, self.tz).strftime(self.fmt)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/dates.py", line 511, in num2date
return _from_ordinalf_np_vectorized(x, tz).tolist()
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/numpy/lib/function_base.py", line 2108, in __call__
return self._vectorize_call(func=func, args=vargs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/numpy/lib/function_base.py", line 2192, in _vectorize_call
outputs = ufunc(*inputs)
File "/Users/me/VirtualEnvironments/my_venv/lib/python3.9/site-packages/matplotlib/dates.py", line 331, in _from_ordinalf
np.timedelta64(int(np.round(x * MUSECONDS_PER_DAY)), 'us'))
OverflowError: int too big to convert
Interestingly, if I add a fractional offset to the timestamps, everything works:
s.index += pd.DateOffset(seconds=0.5)
When I examine x in the np.timedelta64 call, it corresponds to the number of days since the start of the unix epoch (1 Jan 1970) only if I add a fractional part to the timestamp. If there's no fractional part, the resulting integer is huge and seems to have no obvious relationship to the number of days since 1 Jan 1970.
What's wrong here?
Error occurred because data was given that exceeded the number range handled by DateFormatter.
Please refer to the official reference.
For example, the actual data for the first time series looks like this
s.index[0].value
1615797000000000000
This needs to be converted to numbers that can be handled by matplotlib.
s.index = mdates.date2num(s.index)
s
18701.354167 -65.926651
18701.354225 -42.115551
18701.354282 -24.699627
18701.354340 -12.010081
18701.354398 -2.781321
dtype: float64
update(I am on 3.6.3, so I am fixing it.)
ax = s.plot(style='o-')
import matplotlib.dates as mdates
format_str = '%H:%M:%S'
format_ = mdates.DateFormatter(format_str)
ax.xaxis.set_major_formatter(format_)
I had the same error message and I used this to resolve it.
import matplotlib.dates as mdates
ts = mdates.epoch2num(ts)
After that, I didn't have any error. I hope it will be help you.

Setting the linestyle for the longitude and latitude lines in matplotlib's Basemap?

I am using maplotlib's Basemap to draw maps of the world and want to include longitude and latitude lines. This can be done using drawmeridians() and drawparallels(), but the linestyle of the corresponding lines can only be set via the keyword dashes. According to the documentation, see see here, is should work as follows:
dash pattern for meridians (default [1,1], i.e. 1 pixel on, 1 pixel off)
I tried dashes=[1,0] but that did not worked. Is there any simple way to have solid linestyle?
Here is my code:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.basemap import Basemap
fig1, ax1 = plt.subplots(1,1)
map1 = Basemap( resolution='l', projection='mill',
llcrnrlat=-60., llcrnrlon=-180.,
urcrnrlat=90., urcrnrlon=180. )
map1.drawcoastlines()
map1.drawmapboundary( fill_color='aqua' )
map1.fillcontinents( color='coral', lake_color='aqua' )
# labels=[left,right,top,bottom]
map1.drawparallels( np.arange(-80.,81.,20.), labels=[True,True,False,False] )
map1.drawmeridians( np.arange(-180.,181.,40.), labels=[False,False,True,True] )
plt.show()
Here is the resulting map:
Edit 1: I just tried on a different computer and there it works, i.e. dashes=[1,0] results in solid linestyle. The version used on that computer are (according to a pip freeze)
basemap==1.2.0
matplotlib==2.2.3
As soon as I have access again to the original computer, I'll check what is going on there (and which versions are installed).
Edit 2: Being back at the computer where it did not worked, I can now tell a bit more. First, the following versions are used:
basemap==1.1.1
matplotlib==3.0.2
Then the error message (which I forgot to include previously):
ValueError: All values in the dash list must be positive
Edit 3: For the sake of completeness (and since it was partly helpful to hunt down the solution), here is the full Traceback:
Exception in Tkinter callback
Traceback (most recent call last):
File "/usr/lib/python2.7/lib-tk/Tkinter.py", line 1540, in __call__
return self.func(*args)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_tkagg.py", line 280, in resize
self.show()
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_tkagg.py", line 351, in draw
FigureCanvasAgg.draw(self)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.py", line 464, in draw
self.figure.draw(self.renderer)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/artist.py", line 63, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/figure.py", line 1143, in draw
renderer, self, dsu, self.suppressComposite)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/image.py", line 139, in _draw_list_compositing_images
a.draw(renderer)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/artist.py", line 63, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_base.py", line 2409, in draw
mimage._draw_list_compositing_images(renderer, self, dsu)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/image.py", line 139, in _draw_list_compositing_images
a.draw(renderer)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/artist.py", line 63, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/lines.py", line 822, in draw
drawFunc(renderer, gc, tpath, affine.frozen())
File "/usr/local/lib/python2.7/dist-packages/matplotlib/lines.py", line 1267, in _draw_lines
self._lineFunc(renderer, gc, path, trans)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/lines.py", line 1297, in _draw_dashed
gc.set_dashes(self._dashOffset, self._dashSeq)
File "/usr/local/lib/python2.7/dist-packages/matplotlib/backend_bases.py", line 1007, in set_dashes
raise ValueError("All values in the dash list must be positive")
After some research on some bugreports on github I found the solution [1], dashes=(None,None):
map1.drawmeridians( np.arange(-180.,181.,40.), labels=[False,False,True,True], dashes=(None,None) )
[1] https://github.com/matplotlib/basemap/issues/173#issuecomment-68243710

Matplotlib fails with ValueError: cannot convert float NaN to integer

I encountered a strange issue with the seaborn library. When generating barplot for data with ranging from very low to very high values, e.g.:
job duration type
0 1 83066.639344 A
1 2 820.700000 B
it fails with:
ValueError: cannot convert float NaN to integer
This looks like a bug in matplotlib and a duplicate of "pyplot.savefig fails with ValueError: cannot convert float NaN to integer". The latter has not been fixed yet. Is there a workaround for it?
Here's the minimal working example to reproduce the issue:
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
d = {'job': [1, 2]),
'duration': [83066.639344, 820.700000],
'type': ['A', 'B']}
df = pd.DataFrame(d)
plot = sns.catplot(x="duration", y="job", data=df, hue='type',
color="b", kind="bar", height=3, aspect=4)
ax = plot.axes.flat[0]
for p in plt.gca().patches:
ax.text(p.get_width(),
p.get_y() + p.get_height() / 2,
p.get_width())
plot.savefig("barplot.png")
Some observations:
The problem does not occur when I do not differentiate between 'type' (no use ofhue='type').
Here's the full stacktrace:
posx and posy should be finite values
posx and posy should be finite values
/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/numpy/core/fromnumeric.py:83: RuntimeWarning: invalid value encountered in reduce
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
Traceback (most recent call last):
File "/Users/dzieciou/projects/example/gocd/reproduce.py", line 31, in <module>
plot.savefig("barplot.png")
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/seaborn/axisgrid.py", line 37, in savefig
self.fig.savefig(*args, **kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/figure.py", line 2094, in savefig
self.canvas.print_figure(fname, **kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 2075, in print_figure
**kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 510, in print_png
FigureCanvasAgg.draw(self)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 402, in draw
self.figure.draw(self.renderer)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/artist.py", line 50, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/figure.py", line 1649, in draw
renderer, self, artists, self.suppressComposite)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/image.py", line 138, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/artist.py", line 50, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 2610, in draw
mimage._draw_list_compositing_images(renderer, self, artists)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/image.py", line 138, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/artist.py", line 50, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/axis.py", line 1185, in draw
ticks_to_draw = self._update_ticks(renderer)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/axis.py", line 1023, in _update_ticks
tick_tups = list(self.iter_ticks()) # iter_ticks calls the locator
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/axis.py", line 967, in iter_ticks
majorLocs = self.major.locator()
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/ticker.py", line 1985, in __call__
return self.tick_values(vmin, vmax)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/ticker.py", line 1993, in tick_values
locs = self._raw_ticks(vmin, vmax)
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/ticker.py", line 1932, in _raw_ticks
nbins = np.clip(self.axis.get_tick_space(),
File "/Users/dzieciou/virtualenvs/seaborn/lib/python3.7/site-packages/matplotlib/axis.py", line 2543, in get_tick_space
return int(np.floor(length / size))
ValueError: cannot convert float NaN to integer
Note that this is neither really a bug, nor is it related to the linked bug, which is indeed fixed.
One could argue that there should be a better error message when plotting text at nan coordinates though.
Before looking at the error, it seems you have another problem in your code, which is that you set the x coordinate of the text to the width of the bar. They are usually unrelated and you might have meant to use p.get_x() instead.
Now two options:
1. Don't position text at invalid coordinates.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
d = {'job': list(range(1, 3)),
'duration': [83066.639344, 820.700000],
'type': ['A', 'B']}
df = pd.DataFrame(d)
plot = sns.catplot(x="duration", y="job", data=df, hue='type',
color="b", kind="bar", height=3, aspect=4)
ax = plot.axes.flat[0]
for p in plt.gca().patches:
height = np.nan_to_num(p.get_height(), 0)
ax.text(p.get_x(), p.get_y() + height/2., "My text")
plot.savefig("barplot.png")
plt.show()
2. Don't use bbox_inches="tight".
If you want to keep your code as it is, you may workaround this by not setting the bbox_inches="tight" options in seaborn's savefig.
plot.savefig("barplot.png", bbox_inches=None)
Or use matplotlib's savefig option
plot.fig.savefig("barplot.png")

Memory Error when plotting images using loops python

So I have a lot of images that I am plotting for data quality control purposes. I am using nested loops to go through and extract the gzipped files and then plot and save the images as .png so they can later be referred to. I can get it to run for a while but eventually I will get a Memory Error. I don't know if I am making a dumb mistake, any help is appreciated.
I have tried making temp folders to extract the gzipped files to and then removing them. I have also tried closing all the plots after I make them each time in the loop, however I still get a memory error after running it for a while. I did some googling and an issue with flushing and fsync, as well as the fact that the gzipped files are on a NTFS formatted external hard drive, but that is where I get lost.
Below is the my relevant code:
while len(thelist) > 0 :
temppath = tempfile.mkdtemp()
os.chdir(temppath)
tpath = thelist.pop()
myextractor.myextract(tpath)
paths = glob('*HRV')+glob('*.IR120')+glob('*.WV73')+glob('*.VIS8')+glob('*.VIS120')+glob('*.VIS6')+glob('*.IR16')
for x in range(0, len(paths)):
#get the files and paths correct
spath ='/' + paths.__getitem__(x)
spath1 = spath.replace('\\','/')
spath = temppath+ spath1
spath = spath.replace('\\','/')
r = 0
r=McIdasObject.McIdasImageFile(spath)
#the semi colon is important ..maybe
fig = plt.imshow (np.array(r));
#most of this is just getting the correct file and path name
strpath = str(temppath)
strpath = strpath.replace('c:\\users\\appdata\\local\\temp\\','/')
folder = tpath[20]+tpath[21]+tpath[22]+tpath[23]+tpath[24]+tpath[25]+tpath[26]+tpath[27]+tpath[28]
newpath = savepath + '/' + folder
if not os.path.exists(newpath): os.makedirs(newpath)
print 'saving:' + newpath + spath1 + '.png'
plt.savefig(newpath + spath1+ '.png')
r=0
plt.close("all")
gc.collect()
os.chdir(destin)
if len(paths) < 1 :
badimages.append(temppath)
else:
shutil.rmtree(temppath)
This is the trace back
Traceback (most recent call last):
File "<ipython-input-1-22dce83b27e3>", line 1, in <module>
runfile('C:/Users/Alex/Documents/Python Scripts/image printer.py', wdir='C:/Users/Alex/Documents/Python Scripts')
File "C:\Users\Alex\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "C:\Users\Alex\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/Alex/Documents/Python Scripts/image printer.py", line 57, in <module>
plt.savefig(newpath + spath1+ '.png')
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\pyplot.py", line 577, in savefig
res = fig.savefig(*args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\figure.py", line 1476, in savefig
self.canvas.print_figure(*args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\backends\backend_qt5agg.py", line 161, in print_figure
FigureCanvasAgg.print_figure(self, *args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\backend_bases.py", line 2211, in print_figure
**kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\backends\backend_agg.py", line 521, in print_png
FigureCanvasAgg.draw(self)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\backends\backend_agg.py", line 469, in draw
self.figure.draw(self.renderer)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\artist.py", line 59, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\figure.py", line 1085, in draw
func(*args)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\artist.py", line 59, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\axes\_base.py", line 2110, in draw
a.draw(renderer)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\artist.py", line 59, in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\image.py", line 373, in draw
im = self.make_image(renderer.get_image_magnification())
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\image.py", line 597, in make_image
transformed_viewLim)
File "C:\Users\Alex\Anaconda\lib\site-packages\matplotlib\image.py", line 219, in _get_unsampled_image
x = (x * 255).astype(np.uint8)
MemoryError
Here's a simple example that's the same as your problem. On my machine, this causes ipython to use up all the memory and then crash:
import matplotlib.pyplot as plt
import numpy as np
for ii in np.arange(1000):
fig = plt.imshow(np.random.random([1000,1000]))
(Next time, if you can figure it out, try to provide an example like this that's as small as possible, removing all nonessential code).
Rather than recreating a figure every time, try creating the figure once and re-using it:
import matplotlib.pyplot as plt
import numpy as np
fig = plt.imshow(np.random.random([1000,1000]))
for ii in np.arange(1000):
fig.set_data(np.random.random([1000,1000]))

Why do I get "python int too large to convert to C long" errors when I use matplotlib's DateFormatter to format dates on the x axis?

Following this answer's use of DateFormatter, I tried to plot a time series and label its x axis with years using pandas 0.15.0 and matplotlib 1.4.2:
import datetime as dt
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas.io.data as pdio
import scipy as sp
t1 = dt.datetime(1960, 1, 1)
t2 = dt.datetime(2014, 6, 1)
data = pdio.DataReader("GS10", "fred", t1, t2).resample("Q", how=sp.mean)
fig, ax1 = plt.subplots()
ax1.plot(data.index, data.GS10)
ax1.set_xlabel("Year")
ax1.set_ylabel("Rate (%)")
ax1.xaxis.set_major_formatter(mpl.dates.DateFormatter("%Y"))
fig.suptitle("10-yr Treasury Rate", fontsize=14)
fig.savefig('test.eps')
The final line throws an error: OverflowError: Python int too large to convert to C long
with this traceback:
C:\Anaconda3\lib\site-packages\IPython\core\formatters.py:239:
FormatterWarning: Exception in image/png formatter: Python int too
large to convert to C long FormatterWarning,
Traceback (most recent call
last):
File "", line 1, in
runfile('D:/username/latex_template/new_pandas_example.py', wdir='D:/username/latex_template')
File
"C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 580, in runfile
execfile(filename, namespace)
File
"C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py",
line 48, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "D:/username/latex_template/new_pandas_example.py", line 18, in
fig.savefig('test.eps')
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line
1470, in savefig
self.canvas.print_figure(*args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\backend_bases.py",
line 2194, in print_figure
**kwargs)
File
"C:\Anaconda3\lib\site-packages\matplotlib\backends\backend_ps.py",
line 992, in print_eps
return self._print_ps(outfile, 'eps', *args, **kwargs)
File
"C:\Anaconda3\lib\site-packages\matplotlib\backends\backend_ps.py",
line 1020, in _print_ps
**kwargs)
File
"C:\Anaconda3\lib\site-packages\matplotlib\backends\backend_ps.py",
line 1110, in _print_figure
self.figure.draw(renderer)
File "C:\Anaconda3\lib\site-packages\matplotlib\artist.py", line 59,
in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line
1079, in draw
func(*args)
File "C:\Anaconda3\lib\site-packages\matplotlib\artist.py", line 59,
in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\axes_base.py", line
2092, in draw
a.draw(renderer)
File "C:\Anaconda3\lib\site-packages\matplotlib\artist.py", line 59,
in draw_wrapper
draw(artist, renderer, *args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\axis.py", line 1114,
in draw
ticks_to_draw = self._update_ticks(renderer)
File "C:\Anaconda3\lib\site-packages\matplotlib\axis.py", line 957,
in _update_ticks
tick_tups = [t for t in self.iter_ticks()]
File "C:\Anaconda3\lib\site-packages\matplotlib\axis.py", line 957,
in
tick_tups = [t for t in self.iter_ticks()]
File "C:\Anaconda3\lib\site-packages\matplotlib\axis.py", line 905,
in iter_ticks
for i, val in enumerate(majorLocs)]
File "C:\Anaconda3\lib\site-packages\matplotlib\axis.py", line 905,
in
for i, val in enumerate(majorLocs)]
File "C:\Anaconda3\lib\site-packages\matplotlib\dates.py", line 411,
in call
dt = num2date(x, self.tz)
File "C:\Anaconda3\lib\site-packages\matplotlib\dates.py", line 345,
in num2date
return _from_ordinalf(x, tz)
File "C:\Anaconda3\lib\site-packages\matplotlib\dates.py", line 225,
in _from_ordinalf
dt = datetime.datetime.fromordinal(ix)
OverflowError: Python int too large to convert to C long
Am I using DateFormatter incorrectly here? How can I easily put years (or any time format, since my time series might differ) on the a-axis of a matplotlib figure?
This is a 'regression' in pandas 0.15 (due to the refactor of Index), see https://github.com/matplotlib/matplotlib/issues/3727 and https://github.com/pydata/pandas/issues/8614, but is fixed in 0.15.1.
Short story: matplotlib now sees the pandas index as an array of datetime64[ns] values (which are actually very large int64s), instead of an array of Timestamps (which are subclass of datetime.datetime, and can be handled by matplotlib) in previous versions of pandas. So the underlying reason is that matplotlib does not handle datetime64 as date values but as ints.
For pandas 0.15.0 (but better upgrade to a newer version), there are two possible workarounds:
Register the datetime64 type, so it will also be handled as a date by matplotlib:
units.registry[np.datetime64] = pd.tseries.converter.DatetimeConverter()
Or convert the DatetimeIndex (with datetime64 values) to an array of datetime.datetime values with the to_pydatetime method, and plot this:
ax1.plot(data.index.to_pydatetime(), data.GS10)
related question: Plotting datetimeindex on x-axis with matplotlib creates wrong ticks in pandas 0.15 in contrast to 0.14

Categories

Resources