How to achieve spaces between stacked bar chart, center aligned

How to achieve spaces between stacked bar chart, center aligned - python

I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.
My graph:
x-axis has 8 income distributions, one for each bar
y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.
My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?
The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.
Let me know if you need any more information (I tried to keep everything relevant). Thanks!
Full code (I hope it's not too difficult to read):
from __future__ import division
from pylab import *
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher
data = csv2rec('coa.csv', delimiter=',')
x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']
df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))
#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source:
source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))
#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source2:
source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])
total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10
#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])
#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths, showFirst = 8, xLabels=d_labels,scale=True)
Stackedbarchart program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='' # label for y axis
):
#------------------------------------------------------------------------------
# data fixeratering
# make sure this makes sense
if showFirst != -1:
showFirst = np.min([showFirst, np.shape(data)[0]])
data_copy = np.copy(data[:showFirst]).transpose().astype('float')
data_shape = np.shape(data_copy)
if heights is not None:
heights = heights[:showFirst]
if widths is not None:
widths = widths[:showFirst]
showFirst = -1
else:
data_copy = np.copy(data).transpose()
data_shape = np.shape(data_copy)
# determine the number of bars and corresponding levels from the shape of the data
num_bars = data_shape[1]
levels = data_shape[0]
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2)
# stack the data --
# replace the value in each level by the cumulative sum of all preceding levels
data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)
# scale the data is needed
if scale:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
if heights is not None:
print "WARNING: setting scale and heights does not make sense."
heights = None
elif heights is not None:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
for i in np.arange(num_bars):
data_copy[:,i] *= heights[i]
data_stack[:,i] *= heights[i]
#------------------------------------------------------------------------------
# ticks
if yTicks is not "none":
# it is either a set of ticks or the number of auto ticks to make
real_ticks = True
try:
k = len(yTicks[1])
except:
real_ticks = False
if not real_ticks:
yTicks = float(yTicks)
if scale:
# make the ticks line up to 100 %
y_ticks_at = np.arange(yTicks)/(yTicks-1)
y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
else:
# space the ticks along the y axis
y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
y_tick_labels = np.array([str(i) for i in y_ticks_at])
yTicks=(y_ticks_at, y_tick_labels)
#------------------------------------------------------------------------------
# plot
if edgeCols is None:
edgeCols = ["none"]*len(cols)
# bars
ax.bar(x,
data_stack[0],
color=cols[0],alpha=0.7,
edgecolor=edgeCols[0],
width=widths,
linewidth=0.5,
align='center'
)
for i in np.arange(1,levels):
ax.bar(x,
data_copy[i],
bottom=data_stack[i-1],
color=cols[i],alpha=0.7,
edgecolor=edgeCols[i],
width=widths,
linewidth=0.5,
align='center'
)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
# make ticks if necessary
if yTicks is not "none":
ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
ax.yaxis.tick_left()
plt.yticks(yTicks[0], yTicks[1])
else:
plt.yticks([], [])
if xLabels is not None:
ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
ax.xaxis.tick_bottom()
plt.xticks(x, xLabels, rotation='horizontal')
else:
plt.xticks([], [])
# limits
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
# labels
if xlabel != '':
ax.xlabel(xlabel)
if ylabel != '':
ax.ylabel(ylabel)

Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).
I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='', # label for y axis
xaxlim=None,
axspacing=0,
):
.
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)
.
# limits
#ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
if xaxlim is None:
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
else:
ax.set_xlim(xaxlim)

Related

Stacked bar plot color by a diferent category

I want to make a plot where it shows the RFS by floor opened by the unit_id, but i want the color of the unit_id to be defined by the year. So far i make it work for a reduced set of data, but i think it will be difficult to scale the code.
What i do is, first i identify the order that each unit has at it's floor so i first plot all the units by floor in the first position, then the ones that are in second position and so on.
Thanks!
df_build = pd.DataFrame({'floor':[1,1,1,2,2,2,3,3,3],'unidad':[100,101,102,200,201,202,300,301,302],
'rsf':[2000,1000,1500,1500,2000,1000,1000,1500,2000],'order':[0,1,2,0,1,2,0,1,2],
'year':[2008,2009,2010,2009,2010,2011,2010,2011,2012]})
assign_colors = {2008:'tab:red',2009:'tab:blue',2010:'tab:green',2011:'tab:pink',2012:'tab:olive'}
labels = list(df_build.floor.unique())
order_0 = df_build[df_build.order==0].rsf.values
c1=list(df_build[df_build.order==0].year.replace(assign_colors).values)
order_1 = df_build[df_build.order==1].rsf.values
c2=list(df_build[df_build.order==1].year.replace(assign_colors).values)
order_2 = df_build[df_build.order==2].rsf.values
c3=list(df_build[df_build.order==2].year.replace(assign_colors).values)
width = 0.35
fig, ax = plt.subplots()
ax.barh(labels, order_0, width,color=c1)
ax.barh(labels, order_1, width,left=order_0, color=c2)
ax.barh(labels, order_2, width,left=order_0+order_1, color=c3)
ax.set_ylabel('floor')
ax.set_title('Stacking Plan')
#ax.legend()
plt.show()

Try pivoting the data and loop:
# map the color
df_build['color'] = df_build['year'].map(assign_colors)
# pivot the data
plot_df = df_build.pivot(index='floor', columns='order')
# plot by row
fig, ax = plt.subplots()
for i in df.index:
rights = plot_df.loc[i,'rsf'].cumsum()
lefts = rights.shift(fill_value=0)
ax.barh(i, plot_df.loc[i,'rsf'], left=lefts, color=plot_df.loc[i,'color'])
for j in range(len(rights)):
label = plot_df.loc[i, 'unidad'].iloc[j]
rsf = plot_df.loc[i, 'rsf'].iloc[j]
x = (rights.iloc[j] + lefts.iloc[j]) / 2
ax.text(x, i, f'{label}-{rsf}', ha='center')
Output:

matplotlib barh: how to make a visual gap between two groups of bars?

I have some sorted data of which I only show the highest and lowest values in a figure. This is a minimal version of what currently I have:
import matplotlib.pyplot as plt
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
ax.set_title('Comparison of Top 3 and Bottom 3')
plt.show()
Result:
I'd like to make an additional gap to this figure to make it more visually clear that the majority of data is in fact not displayed in this plot. For example, something very simple like the following would be sufficient:
Is this possible in matplotlib?

Here is a flexible approach that just plots a dummy bar in-between. The yaxis-transform together with the dummy bar's position is used to plot 3 black dots.
If multiple separations are needed, they all need a different dummy label, for example repeating the space character.
import matplotlib.pyplot as plt
import numpy as np
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# dummy bar inbetween
dummy_bar = ax.barh(" ", 0, color='none')
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
# add three dots using the dummy bar's position
ax.scatter([0.05] * 3, dummy_bar[0].get_y() + np.linspace(0, dummy_bar[0].get_height(), 3),
marker='o', s=5, color='black', transform=ax.get_yaxis_transform())
ax.set_title('Comparison of Top 3 and Bottom 3')
ax.tick_params(axis='y', length=0) # hide the tick marks
ax.margins(y=0.02) # less empty space at top and bottom
plt.show()

The following function,
def top_bottom(x, l, n, ax=None, gap=1):
from matplotlib.pyplot import gca
if n <= 0 : raise ValueError('No. of top/bottom values must be positive')
if n > len(x) : raise ValueError('No. of top/bottom values should be not greater than data length')
if n+n > len(x):
print('Warning: no. of top/bottom values is larger than one'
' half of data length, OVERLAPPING')
if gap < 0 : print('Warning: some bar will be overlapped')
ax = ax if ax else gca()
top_x = x[:+n]
bot_x = x[-n:]
top_y = list(range(n+n, n, -1))
bot_y = list(range(n-gap, -gap, -1))
top_l = l[:+n] # A B C
bot_l = l[-n:] # X Y Z
top_bars = ax.barh(top_y, top_x)
bot_bars = ax.barh(bot_y, bot_x)
ax.set_yticks(top_y+bot_y)
ax.set_yticklabels(top_l+bot_l)
return top_bars, bot_bars
when invoked with your data and n=4, gap=4
bars_top, bars_bottom = top_bottom(x_data, labels, 4, gap=4)
produces
Later, you'll be able to customize the appearance of the bars as you like using the Artists returned by the function.

Doing a custom legend of marker sizes in matplotlib using a lambda function?

I am playing with the third example of "Scatter plots with a legend" in the matplotlib manual.
I have tweaked the marker sizes to:
s = (50 / price) ** 2
And as an input to legend_elements I am using:
func=lambda s: 50 / np.sqrt(s)
I get the output below. The marker sizes of the legend are wrong. Why is that?
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: 50 / np.sqrt(s),
)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()

The issue appears to be related to the inverse relationship between price and marker size. The way the data is calculated in legend_elements doesn't account for this, and the calculation doesn't quite work. I've submitted a pull request.
The problem is in np.interp that expects increasing input for the second argument. Here is a work around for now that sorts the input first:
legend2 = ax.legend(*legend_elements(scatter, **kw),
loc="lower right", title="Price")
Run this after defining legend_elements as:
def legend_elements(self, prop="colors", num="auto",
fmt=None, func=lambda x: x, **kwargs):
"""
Creates legend handles and labels for a PathCollection. This is useful
for obtaining a legend for a :meth:`~.Axes.scatter` plot. E.g.::
scatter = plt.scatter([1, 2, 3], [4, 5, 6], c=[7, 2, 3])
plt.legend(*scatter.legend_elements())
Also see the :ref:`automatedlegendcreation` example.
Parameters
----------
prop : string, optional, default *"colors"*
Can be *"colors"* or *"sizes"*. In case of *"colors"*, the legend
handles will show the different colors of the collection. In case
of "sizes", the legend will show the different sizes.
num : int, None, "auto" (default), array-like, or `~.ticker.Locator`,
optional
Target number of elements to create.
If None, use all unique elements of the mappable array. If an
integer, target to use *num* elements in the normed range.
If *"auto"*, try to determine which option better suits the nature
of the data.
The number of created elements may slightly deviate from *num* due
to a `~.ticker.Locator` being used to find useful locations.
If a list or array, use exactly those elements for the legend.
Finally, a `~.ticker.Locator` can be provided.
fmt : str, `~matplotlib.ticker.Formatter`, or None (default)
The format or formatter to use for the labels. If a string must be
a valid input for a `~.StrMethodFormatter`. If None (the default),
use a `~.ScalarFormatter`.
func : function, default *lambda x: x*
Function to calculate the labels. Often the size (or color)
argument to :meth:`~.Axes.scatter` will have been pre-processed
by the user using a function *s = f(x)* to make the markers
visible; e.g. *size = np.log10(x)*. Providing the inverse of this
function here allows that pre-processing to be inverted, so that
the legend labels have the correct values;
e.g. *func = np.exp(x, 10)*.
kwargs : further parameters
Allowed keyword arguments are *color* and *size*. E.g. it may be
useful to set the color of the markers if *prop="sizes"* is used;
similarly to set the size of the markers if *prop="colors"* is
used. Any further parameters are passed onto the `.Line2D`
instance. This may be useful to e.g. specify a different
*markeredgecolor* or *alpha* for the legend handles.
Returns
-------
tuple (handles, labels)
with *handles* being a list of `.Line2D` objects
and *labels* a matching list of strings.
"""
handles = []
labels = []
hasarray = self.get_array() is not None
if fmt is None:
fmt = mpl.ticker.ScalarFormatter(useOffset=False, useMathText=True)
elif isinstance(fmt, str):
fmt = mpl.ticker.StrMethodFormatter(fmt)
fmt.create_dummy_axis()
if prop == "colors":
if not hasarray:
warnings.warn("Collection without array used. Make sure to "
"specify the values to be colormapped via the "
"`c` argument.")
return handles, labels
u = np.unique(self.get_array())
size = kwargs.pop("size", mpl.rcParams["lines.markersize"])
elif prop == "sizes":
u = np.unique(self.get_sizes())
color = kwargs.pop("color", "k")
else:
raise ValueError("Valid values for `prop` are 'colors' or "
f"'sizes'. You supplied '{prop}' instead.")
fmt.set_bounds(func(u).min(), func(u).max())
if num == "auto":
num = 9
if len(u) <= num:
num = None
if num is None:
values = u
label_values = func(values)
else:
if prop == "colors":
arr = self.get_array()
elif prop == "sizes":
arr = self.get_sizes()
if isinstance(num, mpl.ticker.Locator):
loc = num
elif np.iterable(num):
loc = mpl.ticker.FixedLocator(num)
else:
num = int(num)
loc = mpl.ticker.MaxNLocator(nbins=num, min_n_ticks=num-1,
steps=[1, 2, 2.5, 3, 5, 6, 8, 10])
label_values = loc.tick_values(func(arr).min(), func(arr).max())
cond = ((label_values >= func(arr).min()) &
(label_values <= func(arr).max()))
label_values = label_values[cond]
yarr = np.linspace(arr.min(), arr.max(), 256)
xarr = func(yarr)
ix = np.argsort(xarr)
values = np.interp(label_values, xarr[ix], yarr[ix])
kw = dict(markeredgewidth=self.get_linewidths()[0],
alpha=self.get_alpha())
kw.update(kwargs)
for val, lab in zip(values, label_values):
if prop == "colors":
color = self.cmap(self.norm(val))
elif prop == "sizes":
size = np.sqrt(val)
if np.isclose(size, 0.0):
continue
h = mlines.Line2D([0], [0], ls="", color=color, ms=size,
marker=self.get_paths()[0], **kw)
handles.append(h)
if hasattr(fmt, "set_locs"):
fmt.set_locs(label_values)
l = fmt(lab)
labels.append(l)
return handles, labels

You can also manually create your own legend. The trick here is that you have to apply np.sqrt to sizes in the legend for some reason I don't quite follow but #busybear has in her snippet.
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# # easy legend
# kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
# func=lambda s: 50 / np.sqrt(s),
# )
# legend2 = ax.legend(*scatter.legend_elements(**kw),
# loc="lower right", title="Price")
# ax.add_artist(legend2)
# manual legend
legend_values = np.array([2,4,6,8])
legend_sizes = (50 / legend_values) ** 2
# IMPORTANT: for some reason the square root needs to be applied to sizes in the legend
legend_sizes_sqrt = np.sqrt(legend_sizes)
elements3 = [Line2D([0], [0], color=scatter.cmap(0.7), lw=0, marker="o", linestyle=None, markersize=s) for s in legend_sizes_sqrt]
legend3 = ax.legend(elements3, [f"$ {p:.2f}" for p in legend_values], loc='lower right', title="Price")
ax.add_artist(legend3)
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()

How do I reliably scale matplotlib pcolormesh plots for large data sets?

I'm trying to plot some data using a pcolormesh from the matplotlib.pyplot but I'm having some difficulty when saving the output (specifically, in scaling the image appropriately).
I'm using Python v3.4 with matplotlib v1.51 if that makes a difference.
This is what my code currently looks like:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def GetData(data_entries, num_of_channels):
data_dict = {'timestamp' : np.linspace(1, data_entries*21, data_entries, endpoint=True)}
for chan in range(0, num_of_channels, 1):
data_dict['random%03d'%chan] = np.random.rand(data_entries, 1).flatten()
num_at_each_end_to_highlight = 10
data_dict['random%03d'%chan][0:num_at_each_end_to_highlight] = 1.5
data_dict['random%03d'%chan][-num_at_each_end_to_highlight:] = 1.5
for chan in range(0, num_of_channels, 1):
data_dict['periodic%03d' % chan] = np.zeros(data_entries)#.flatten()
data_dict['periodic%03d' % chan][::65] = 5000
return pd.DataFrame(data_dict)
def GetSubPlotIndex(totalRows, totalCols, row):
return totalRows*100+totalCols*10+row
def PlotData(df, num_of_channels, field_names):
# Calculate the range of data to plot
data_entries = len(df.index)
# Create the x/y mesh that the data will be plotted on
x = df['timestamp']
y = np.linspace(0, num_of_channels - 1, num_of_channels)
X,Y = np.meshgrid(x,y)
# Iterate through all of the field types and produce one plot for each but share the X axis
for idx, field_name in enumerate(field_names):
# Create this sub-plot
subPlotIndex = GetSubPlotIndex(len(field_names), 1, idx + 1)
ax = plt.subplot(subPlotIndex)
if idx is 0:
ax.set_title('Raw Data Time Series')
# Set the axis scale to exactly meet the limits of the data set.
ax.set_autoscale_on(False)
plt.axis([x[0], x[data_entries-1], 0, num_of_channels - 1])
# Set up the colour palette used to render the data.
# Make bad results (those that are masked) invisible so the background shows instead.
palette = plt.cm.get_cmap('autumn')
palette.set_bad(alpha=0.0)
ax.set_axis_bgcolor('black') # Set the background to zero
# Grab the data and transpose it so we can stick it in the time series running along the X axis.
firstFftCol = df.columns.get_loc(field_name + "%03d"%(0))
lastFftCol = df.columns.get_loc(field_name + "%03d"%(num_of_channels - 1))
data = df.ix[:,firstFftCol:lastFftCol]
data = data.T # Transpose so that time runs along the X axis and bin index is on the Y
# Mask off data with zero's so that it doesn't obscure the data we're actually interested in.
data = np.ma.masked_where(data == 0.0, data)
# Actually create the data mesh so we can plot it
z_min, z_max = data.min().min(), data.max().max()
p = ax.pcolormesh(X,Y, data, cmap=palette, vmin=z_min, vmax=z_max)
# Render it
plt.plot()
# Label the plot and add a key
plt.ylabel(field_name)
plt.colorbar(p)
# Label the plot
plt.xlabel('Time (ms)')
# Record the result
plt.savefig('test.png', edgecolor='none', transparent=False)
if __name__ == '__main__':
data_entries = 30000 # Large values here cause issues
num_of_channels = 255
fields_to_plot = ('random', 'periodic')
data = GetData(data_entries, num_of_channels)
width_in_pixels = len(data.index)+200
additional_vertical_space_per_plot = 50
num_of_plots = len(fields_to_plot)
height_in_pixels = (num_of_channels+additional_vertical_space_per_plot)*num_of_plots
dpi = 80 # The default according to the documentation.
fig = plt.figure(1,figsize=(width_in_pixels/dpi, height_in_pixels/dpi), dpi=dpi)
PlotData(data, num_of_channels, fields_to_plot)
With 1000 entries, the result looks fine:
If I increase the number of samples to the sort of size I actually want to plot (30000), the image is the correct size (30200 pixels wide) but I see a lot of dead space. This is a zoomed-out summary of the issues I see:
Is there a way to more accurately fill the image with the data?

Thanks to the prompt from #Dusch, this seems to solve things rather neatly:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def GetData(data_entries, num_of_channels):
data_dict = {'timestamp' : np.linspace(1, data_entries*21, data_entries, endpoint=True)}
for chan in range(0, num_of_channels, 1):
data_dict['random%03d'%chan] = np.random.rand(data_entries, 1).flatten()
num_at_each_end_to_highlight = 10
data_dict['random%03d'%chan][0:num_at_each_end_to_highlight] = 1.5
data_dict['random%03d'%chan][-num_at_each_end_to_highlight:] = 1.5
for chan in range(0, num_of_channels, 1):
data_dict['periodic%03d' % chan] = np.zeros(data_entries)#.flatten()
data_dict['periodic%03d' % chan][::65] = 5000
return pd.DataFrame(data_dict)
def GetSubPlotIndex(totalRows, totalCols, row):
return totalRows*100+totalCols*10+row
def PlotData(df, num_of_channels, field_names):
# Calculate the range of data to plot
data_entries = len(df.index)
# Create the x/y mesh that the data will be plotted on
x = df['timestamp']
y = np.linspace(0, num_of_channels - 1, num_of_channels)
X,Y = np.meshgrid(x,y)
# Iterate through all of the field types and produce one plot for each but share the X axis
for idx, field_name in enumerate(field_names):
# Create this sub-plot
subPlotIndex = GetSubPlotIndex(len(field_names), 1, idx + 1)
ax = plt.subplot(subPlotIndex)
if idx is 0:
ax.set_title('Raw Data Time Series')
# Set the axis scale to exactly meet the limits of the data set.
ax.set_autoscale_on(False)
plt.axis([x[0], x[data_entries-1], 0, num_of_channels - 1])
# Set up the colour palette used to render the data.
# Make bad results (those that are masked) invisible so the background shows instead.
palette = plt.cm.get_cmap('autumn')
palette.set_bad(alpha=0.0)
ax.set_axis_bgcolor('black') # Set the background to zero
# Grab the data and transpose it so we can stick it in the time series running along the X axis.
firstFftCol = df.columns.get_loc(field_name + "%03d"%(0))
lastFftCol = df.columns.get_loc(field_name + "%03d"%(num_of_channels - 1))
data = df.ix[:,firstFftCol:lastFftCol]
data = data.T # Transpose so that time runs along the X axis and bin index is on the Y
# Mask off data with zero's so that it doesn't obscure the data we're actually interested in.
data = np.ma.masked_where(data == 0.0, data)
# Actually create the data mesh so we can plot it
z_min, z_max = data.min().min(), data.max().max()
p = ax.pcolormesh(X,Y, data, cmap=palette, vmin=z_min, vmax=z_max)
# Render it
plt.plot()
# Label this sub-plot
plt.ylabel(field_name)
# Sort out the color bar
fig = plt.gcf()
image_width = fig.get_size_inches()[0] * fig.dpi # size in pixels
colorbar_padding_width_in_pixels = 20
colorbar_padding = colorbar_padding_width_in_pixels/image_width
plt.colorbar(p, pad=colorbar_padding)
# Label the plot
plt.xlabel('Time (ms)')
# Record the result
plt.savefig('test.png', edgecolor='none', transparent=False, bbox_inches='tight')
plt.tight_layout()
if __name__ == '__main__':
data_entries = 30000 # Large values here cause issues
num_of_channels = 255
fields_to_plot = ('random', 'periodic')
data = GetData(data_entries, num_of_channels)
width_in_pixels = len(data.index)+200
additional_vertical_space_per_plot = 50
num_of_plots = len(fields_to_plot)
height_in_pixels = (num_of_channels+additional_vertical_space_per_plot)*num_of_plots
dpi = 80 # The default according to the documentation.
fig = plt.figure(1,figsize=(width_in_pixels/dpi, height_in_pixels/dpi), dpi=dpi)
PlotData(data, num_of_channels, fields_to_plot)
The secret sauce in the end was:
Add plt.tight_layout() immediately before the plt.savefig call.
Add bbox_inches='tight' to the plt.savefig call.
Add , pad=colorbar_padding after calculating colorbar_padding by checking what proportion of the overall image width a 20 pixel padding equates to.

First boxplot missing or not visible

Why is the first boxplot missing? There's supoussed to be 24 boxplots, but only 23 are being display as you can see in the image. Who's size do i have to change to make it visible? I've tried changing the size of the figure but its the same.
Not sure if it helps, but this is the code:
def obtenerBoxplotsAnualesIntercalados(self, directorioEntrada, directorioSalida):
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ciudades = ["CO","CR"]
anios = ["2011", "2012", "2013"]
for anio in anios:
fig = plt.figure()
fig.set_size_inches(14.3, 9)
ax = plt.axes()
plt.hold(True)
bpArray = []
i=0
ticks = []
for mes in range(len(meses)):
archivoCO = open(directorioEntrada+"/"+"CO"+"-"+self.mesStr(mes+1)+"-"+anio, encoding = "ISO-8859-1")
archivoCR = open(directorioEntrada+"/"+"CR"+"-"+self.mesStr(mes+1)+"-"+anio, encoding = "ISO-8859-1")
datosCOmes = self.obtenerDatosmensuales(archivoCO)
datosCRmes = self.obtenerDatosmensuales(archivoCR)
data = [ [int(float(datosCOmes[2])), int(float(datosCOmes[0])), int(float(datosCOmes[1]))],
[int(float(datosCRmes[2])), int(float(datosCRmes[0])), int(float(datosCRmes[1]))] ]
bpArray.append(plt.boxplot(data, positions=[i,i+1], widths=0.5))
ticks.append(i+0.5)
i=i+2
hB, = plt.plot([1,1],'b-')
hR, = plt.plot([1,1],'r-')
plt.legend((hB, hR),('Caleta', 'Comodoro'))
hB.set_visible(False)
hR.set_visible(False)
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
self.setBoxColors(bpArray)
plt.title('Variación de temperatura mensual Caleta Olivia-Comodoro Rivadavia. Año '+anio)
plt.savefig(directorioSalida+"/asdasd"+str(anio)+".ps", orientation='landscape', papertype='A4' )

Your boxplot is there but it's hidden. This reproduces your problem:
import matplotlib
import numpy as np
data = np.random.normal(10,2,100*24).reshape(24,-1) # let's get 12 pairs of arrays to plot
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ax = plt.axes()
plt.hold(True)
i=0
ticks = []
for mes in range(0,len(meses)):
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i+=2
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
plt.show()
Notice that you are defining your positions as ranging from 0 to 12, but you append ticks as range(0,12) + 0.5. Thus, when you later do set_xticks(ticks), your x axis will begin from 0.5 but your 1st boxplot is plotted at position 0.
I've adapted your code slightly to produce the result you want:
ax = plt.axes()
plt.hold(True)
i=1 # we start plotting from position 1 now
ticks = []
for mes in range(0,len(meses)):
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i+=2
ax.set_xticklabels(meses)
ax.set_xlim(0,ticks[-1]+1) # need to shift the right end of the x limit by 1
ax.set_xticks(ticks)
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to achieve spaces between stacked bar chart, center aligned - python

Related

Stacked bar plot color by a diferent category

matplotlib barh: how to make a visual gap between two groups of bars?

Doing a custom legend of marker sizes in matplotlib using a lambda function?

How do I reliably scale matplotlib pcolormesh plots for large data sets?

First boxplot missing or not visible

Categories

Resources