How to use SpanSelector in matplotlib with subplots? - python

I am plotting data from multiple sources and using SpanSelector to analyze the data during a particular event. I can select the data from each subplot and extract the data. However, when I make a new selection in a new subplot, I want the selection in the previous subplot to disappear.
There is span.set_visible(True/False). But for this to work, I need to know which subplot is being selected. I tried to find that using lambda function, but it is not working at the moment.
I tried first:
for signal in self.signals:
self.span[k] = SpanSelector(
self.ax[k],
onselect = lambda min, max : self.onselect_function(min, max, k),
# onselect = self.onselect_function,
direction = 'horizontal',
minspan = 1,
useblit = True,
interactive = True,
button = 1,
props = {'facecolor': 'red', 'alpha':0.5},
drag_from_anywhere = True
)
k += 1
def onselect_function(self, min_value, max_value, selected_graph)
...
But it keeps on sending a value of selected_graph = 3 for selected_graph, no matter, which subplot I select. There are 3 subgraphs ( k = 0 - 2).
I thought this was because k was not in the namespace, and I tried:
onselect = lambda min, max, k : self.onselect_function(min, max, k)
But now, I get the following error:
File "C:\Users\Ashu\miniconda3\envs\my_env\lib\site-packages\matplotlib\cbook\__init__.py", line 287, in process
func(*args, **kwargs)
File "C:\Users\Ashu\miniconda3\envs\my_env\lib\site-packages\matplotlib\widgets.py", line 1958, in release
self._release(event) File "C:\Users\Ashu\miniconda3\envs\my_env\lib\site-packages\matplotlib\widgets.py", line 2359, in _release
self.onselect(vmin, vmax)
TypeError: <lambda>() missing 1 required positional argument: 'k'
Ideally, I would want the selection to highlight the x-axis range in all subplots (I can still do that manually as I have info on what range is being selected, as shown in the figure with dashed lines). But I need the previous selection to disappear when the new selection begins.
Example of how it currently displays. The last selection was made in the top graph, so each graph shows the textual information for that selection (51-53), but the highlights in the bottom two graphs are in the wrong parts. I tried clearing axes as well but that also doesn't work.
Edit: Adding minimal reproducible example
import matplotlib.pyplot as plt
from matplotlib.widgets import SpanSelector
from random import randrange
import math
plt.style.use('seaborn')
noOfDataToShow = 10
noOfSignals = 3
fig, ax = plt.subplots(nrows = noOfSignals, ncols = 1)
x_pos = []
signal_data_holder = []
for i in range(noOfDataToShow):
x_pos.append(i)
for i in range(noOfSignals):
temp = []
for j in range (noOfDataToShow):
temp.append(randrange(0,10))
signal_data_holder.append(temp)
def onselect_function(min_value, max_value, selected_graph = 0):
k = 0
for param in signal_data_holder:
ax[k].cla()
ax[k].set_ylim([0,10])
ax[k].plot(x_pos, param, label = str (math.trunc(param[noOfDataToShow - 1])))
ax[k].legend(loc = "upper left")
k += 1
#******Calculating Min_Max (Can be ignored) ******#
min_value = math.floor(min_value)
max_value = math.ceil(max_value)
min_value_data = [None] * noOfSignals
max_value_data = [None] * noOfSignals
if (min_value < 0):
min_value = 0
if (max_value >= noOfDataToShow):
max_value = noOfDataToShow
for k in range(noOfSignals):
min_value_data = signal_data_holder[k][min_value]
max_value_data = signal_data_holder[k][min_value]
for i in range(min_value, max_value):
if (signal_data_holder[k][i] < min_value_data):
min_value_data = signal_data_holder[k][i]
elif (signal_data_holder[k][i] > max_value_data):
max_value_data = signal_data_holder[k][i]
labelText = "Min_Value: " + str (min_value_data) + "\n"
labelText += "Max_Value: " + str (max_value_data) + "\n"
labelText += "Range: " + str (min_value) + "-" + str (max_value) + "\n"
ax[k].legend(labels = [labelText], loc = "upper left")
print(min_value_data, max_value_data)
ax[k].axvline(min_value, color = 'red', linestyle = 'dashed')
ax[k].axvline(max_value, color = 'red', linestyle = 'dashed')
fig.canvas.draw()
#******Calculating Min_Max (Can be ignored) ******#
return min_value, max_value
span = [None] * noOfSignals
for k in range(noOfSignals):
span[k] = SpanSelector(
ax[k],
# onselect = lambda min, max, k : onselect_function(min, max, k),
# onselect = lambda min, max : onselect_function(min, max, k),
onselect = onselect_function,
direction = 'horizontal',
minspan = 1,
useblit = True,
interactive = True,
button = 1,
props = {'facecolor': 'red', 'alpha':0.5},
drag_from_anywhere = True
)
k = 0
for param in signal_data_holder:
ax[k].cla()
ax[k].set_ylim([0,10])
ax[k].plot(x_pos, param, label = str (math.trunc(param[noOfDataToShow - 1])))
ax[k].legend(loc = "upper left")
k += 1
plt.show()

I don't think you can directly retrieve the axis object linked to the span selector (or at least, I wouldn't know how). However, we can also collect the axis object of the last mouse click:
import matplotlib.pyplot as plt
from matplotlib.widgets import SpanSelector
import numpy as np
#sample data
fig, axis = plt.subplots(3)
for i, ax in enumerate(axis):
t=np.linspace(-i, i+1 , 100)
ax.plot(t, np.sin(2 * np.pi * t))
#list to store the axis last used with a mouseclick
curr_ax = []
#detect the currently modified axis
def on_click(event):
if event.inaxes:
curr_ax[:] = [event.inaxes]
#modify the current axis objects
def onselect(xmin, xmax):
#ignore if accidentally clicked into an axis object
if xmin==xmax:
return
#set all span selectors invisible accept the current
for ax, span in zip(axis, list_of_spans):
if ax != curr_ax[0]:
span.set_visible(False)
#do something with xmin, xmax
print(xmin, xmax)
fig.canvas.draw_idle()
#collect span selectors in a list in the same order as their axes objects
list_of_spans = [SpanSelector(
ax,
onselect,
"horizontal",
useblit=True,
props=dict(alpha=0.5, facecolor="tab:blue"),
interactive=True,
drag_from_anywhere=True
)
for ax in axis]
plt.connect('button_press_event', on_click)
plt.show()

Related

How to avoid colors mixing up between two plot types in matplotlib

I have the following result:
It's a plt.scatter with plt.bar on top of it.
For some reason, the bar chart is being colored, but I want it to be of a specific color which is not part of the colors used by the scatter plot. How do I achieve that?
My current code:
from collections import OrderedDict
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
def scatter_plot_from_to_delta(start, end, title="Delta times per day"):
filtered = OrderedDict()
# Generating filrered score dict (K=participant, V=list of each day's delta, with 0 as filtered out)
for name in names:
score = list()
for delta in scores.get(name):
if delta > end or delta < start:
delta = 0
score.append(delta)
filtered[name] = score
figure(figsize=(20, 11)) # size of the whole graph
# Preparing data for listed participants
listed = OrderedDict() # (K=name displayed in the graph, V=amount of times displayed)
latest_listed_day = 1 # ends up determining the last X axis value in the graph
for key, value in filtered.items():
listed[key] = dict()
listed[key]['amnt'] = 0
x = list()
y = list()
for i, val in enumerate(value):
if val != 0:
x.append(i+1)
latest_listed_day = max(i+1, latest_listed_day)
y.append(val)
listed[key]['amnt'] += 1
if listed[key]['amnt'] == 0:
del listed[key]
else:
listed[key]['x'] = x
listed[key]['y'] = y
ordered_counter = OrderedDict(sorted(listed.items(), reverse=True, key=lambda item: item[1]['amnt']))
print(ordered_counter)
# Scatter plot
for key, value in ordered_counter.items():
x = value['x']
y = value['y']
plt.scatter(x, y, s=200) # s=size of the dots
for i, _ in enumerate(x):
plt.annotate(key, (x[i]+0.15, y[i]-0.27)) # marking names by each dot
# Bar chart for the amount of listed participants on each day
for i in range(num_challenges-1):
participants_listed_that_day = 0
for _, val in filtered.items():
s = val[i]
if s != 0:
participants_listed_that_day += 1
if participants_listed_that_day != 0:
plt.bar(i+1, participants_listed_that_day)
# Displaying the total per day above each bar
plt.text(i+0.908,
participants_listed_that_day+0.15,
str(participants_listed_that_day),
fontsize=18)
# Concatenate (sorted) amount of times displayed for more info in legend
displayed = list()
for key, value in ordered_counter.items():
concat = "(" + str(value['amnt']) + ") " + key
displayed.append(concat)
# Legend of listed participants
plt.legend(displayed,
scatterpoints=11,
loc='center left',
bbox_to_anchor=(1, 0.5),
ncol=1,
fontsize=16)
plt.xticks(range(1,latest_listed_day+1)) # ensure x axis tick for every day
plt.xlim((0.5,latest_listed_day+1.5)) # force displayed x range
plt.title(title, fontsize=30)
plt.xlabel("Day")
plt.ylabel("Delta (in seconds)")
plt.show()
You can choose two discrete or qualitative colormaps that have distinct colors from each other (and also enough colors to exceed the number of categories in your data), such as tab10 and tab20b, then update the scatter plot and bar chart portions of your function.
import matplotlib.cm as cm
# Scatter plot
for key, value in ordered_counter.items():
x = value['x']
y = value['y']
plt.scatter(x, y, s=200, cmap=cm.tab10) # s=size of the dots
for i, _ in enumerate(x):
plt.annotate(key, (x[i]+0.15, y[i]-0.27)) # marking names by each dot
# Bar chart for the amount of listed participants on each day
for i in range(num_challenges-1):
participants_listed_that_day = 0
for _, val in filtered.items():
s = val[i]
if s != 0:
participants_listed_that_day += 1
if participants_listed_that_day != 0:
cmap_tab20b = plt.get_cmap("tab20b")
rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))
plt.bar(i+1, participants_listed_that_day, color=cmap_tab20b(rescale(participants_listed_that_day)))
# Displaying the total per day above each bar
plt.text(i+0.908,
participants_listed_that_day+0.15,
str(participants_listed_that_day),
fontsize=18)

matplotlib barh: how to make a visual gap between two groups of bars?

I have some sorted data of which I only show the highest and lowest values in a figure. This is a minimal version of what currently I have:
import matplotlib.pyplot as plt
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
ax.set_title('Comparison of Top 3 and Bottom 3')
plt.show()
Result:
I'd like to make an additional gap to this figure to make it more visually clear that the majority of data is in fact not displayed in this plot. For example, something very simple like the following would be sufficient:
Is this possible in matplotlib?
Here is a flexible approach that just plots a dummy bar in-between. The yaxis-transform together with the dummy bar's position is used to plot 3 black dots.
If multiple separations are needed, they all need a different dummy label, for example repeating the space character.
import matplotlib.pyplot as plt
import numpy as np
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# dummy bar inbetween
dummy_bar = ax.barh(" ", 0, color='none')
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
# add three dots using the dummy bar's position
ax.scatter([0.05] * 3, dummy_bar[0].get_y() + np.linspace(0, dummy_bar[0].get_height(), 3),
marker='o', s=5, color='black', transform=ax.get_yaxis_transform())
ax.set_title('Comparison of Top 3 and Bottom 3')
ax.tick_params(axis='y', length=0) # hide the tick marks
ax.margins(y=0.02) # less empty space at top and bottom
plt.show()
The following function,
def top_bottom(x, l, n, ax=None, gap=1):
from matplotlib.pyplot import gca
if n <= 0 : raise ValueError('No. of top/bottom values must be positive')
if n > len(x) : raise ValueError('No. of top/bottom values should be not greater than data length')
if n+n > len(x):
print('Warning: no. of top/bottom values is larger than one'
' half of data length, OVERLAPPING')
if gap < 0 : print('Warning: some bar will be overlapped')
ax = ax if ax else gca()
top_x = x[:+n]
bot_x = x[-n:]
top_y = list(range(n+n, n, -1))
bot_y = list(range(n-gap, -gap, -1))
top_l = l[:+n] # A B C
bot_l = l[-n:] # X Y Z
top_bars = ax.barh(top_y, top_x)
bot_bars = ax.barh(bot_y, bot_x)
ax.set_yticks(top_y+bot_y)
ax.set_yticklabels(top_l+bot_l)
return top_bars, bot_bars
when invoked with your data and n=4, gap=4
bars_top, bars_bottom = top_bottom(x_data, labels, 4, gap=4)
produces
Later, you'll be able to customize the appearance of the bars as you like using the Artists returned by the function.

simple animation with matplotlib

I am trying to make a simple animation for a stochastic process (just black and white dots randomly changing their colors). To simulate that, I have basically plotted the dots over a grid. However, the important parameter for me being the rate of black dots, I'd like to draw under this grid a progressive bar showing the rate #blackdots/#totaldots looking approximately like this : [///////////////////////_____] 70% (simply just like a power charge bar).
I tried this but the bars are overlaid, and I don't think that Slider is meant to do such animations.
import numpy as np
import matplotlib.pyplot as plt
import random
from matplotlib.widgets import Slider
t=1500
d=5
n=10
raws = [i for i in range(n)]
config = [[2*random.randrange(2)-1 for i in range(n)] for i in range(n)]
def color(op):
if op == 1:
return 'white'
return 'black'
nbblack = 0
for i in config:
for j in i :
nbblack += (j==-1)
blackrate = nbblack/(n**2)
plt.subplots_adjust(bottom=0.25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line]*n,raws,c=colors,edgecolors='black',s=50)
plt.title('t=0',fontdict={'size': 16},x=-0.20,y=25)
samp = Slider(axamp, 'Rate', 0, 1, valinit=blackrate,color='black')
for step in range(t):
plt.pause(0.001)
xpick = random.randrange(n)
ypick = random.randrange(n)
opinion_picked = config[xpick][ypick]
for j in range(d) :
neighboor = random.randrange(n),random.randrange(n)
opinion_neig = config[neighboor[0]][neighboor[1]]
if opinion_neig == opinion_picked :
break
elif j == d-1 :
config[xpick][ypick]=-config[xpick][ypick]
nbblack-=config[xpick][ypick]
blackrate = nbblack/(n**2)
plt.title('t={}'.format(step),fontdict={'size': 16},x=-0.20,y=25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line]*n,raws,c=colors,edgecolors='black',s=50)
axamp = plt.axes([0.28, 0.15, 0.48, 0.03])
samp = Slider(axamp, 'Rate', 0, 1, valinit=blackrate,color='black')
plt.show()
I am not very familiar with maplot so please let me know if there is a better way to do things and thanks a lot for your help !
I don't think that Slider is meant to do such animations ... please let me know if there is a better way to do things ...
Maybe using a custom colorbar would work. I adapted from the Discrete Intervals colorbar example.
The following uses the percentage of black dots to decide which portion of the color bar should be black or white.
Here is an example without animation: five successive plots drawn by a loop. I tried to keep it as close to your example as possible.
import matplotlib as mpl
from matplotlib import pyplot as plt
import random
t = 1500
d = 5
n = 10
raws = [i for i in range(n)]
def f(t=t, d=d, n=n, raws=raws):
# try to get more skew in the data
mode = random.random()
config = [[random.triangular(mode=mode) > 0.5 for i in range(n)] for i in range(n)]
config = [[int(item) or -1 for item in row] for row in config]
# config = [[2*random.randrange(2)-1 for i in range(n)] for i in range(n)]
def color(op):
if op == 1:
return "white"
return "black"
nbblack = 0
for i in config:
for j in i:
nbblack += j == -1
blackrate = nbblack / (n ** 2)
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.25)
# plt.subplots_adjust(bottom=0.25)
for line in range(n):
colors = [color(config[line][raw]) for raw in raws]
plt.scatter([line] * n, raws, c=colors, edgecolors="black", s=50)
plt.title("t=0", fontdict={"size": 16}, x=-0.20, y=25)
cmap = mpl.colors.ListedColormap(["black", "white"])
bounds = [0, int(blackrate * 100), 100]
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
fig.colorbar(
mpl.cm.ScalarMappable(cmap=cmap, norm=norm),
# cax=ax,
# boundaries=[0] + bounds + [13], # Adding values for extensions.
# extend='both',
ticks=bounds,
spacing="proportional",
orientation="horizontal",
label="Percentage Black",
)
plt.show()
plt.close()
for _ in range(5):
f()
BoundaryNorm determines how the colors are distributed. The example uses two colors, black/white, and defines two bins between 0 and 100 using the percentage of black dots for the bin edge.
The spacing="proportional" argument to Figure.colorbar ensures the black/white area is proportional to the bins.
The Matplotlib Tutorials are worth investing time in.

Doing a custom legend of marker sizes in matplotlib using a lambda function?

I am playing with the third example of "Scatter plots with a legend" in the matplotlib manual.
I have tweaked the marker sizes to:
s = (50 / price) ** 2
And as an input to legend_elements I am using:
func=lambda s: 50 / np.sqrt(s)
I get the output below. The marker sizes of the legend are wrong. Why is that?
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: 50 / np.sqrt(s),
)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()
The issue appears to be related to the inverse relationship between price and marker size. The way the data is calculated in legend_elements doesn't account for this, and the calculation doesn't quite work. I've submitted a pull request.
The problem is in np.interp that expects increasing input for the second argument. Here is a work around for now that sorts the input first:
legend2 = ax.legend(*legend_elements(scatter, **kw),
loc="lower right", title="Price")
Run this after defining legend_elements as:
def legend_elements(self, prop="colors", num="auto",
fmt=None, func=lambda x: x, **kwargs):
"""
Creates legend handles and labels for a PathCollection. This is useful
for obtaining a legend for a :meth:`~.Axes.scatter` plot. E.g.::
scatter = plt.scatter([1, 2, 3], [4, 5, 6], c=[7, 2, 3])
plt.legend(*scatter.legend_elements())
Also see the :ref:`automatedlegendcreation` example.
Parameters
----------
prop : string, optional, default *"colors"*
Can be *"colors"* or *"sizes"*. In case of *"colors"*, the legend
handles will show the different colors of the collection. In case
of "sizes", the legend will show the different sizes.
num : int, None, "auto" (default), array-like, or `~.ticker.Locator`,
optional
Target number of elements to create.
If None, use all unique elements of the mappable array. If an
integer, target to use *num* elements in the normed range.
If *"auto"*, try to determine which option better suits the nature
of the data.
The number of created elements may slightly deviate from *num* due
to a `~.ticker.Locator` being used to find useful locations.
If a list or array, use exactly those elements for the legend.
Finally, a `~.ticker.Locator` can be provided.
fmt : str, `~matplotlib.ticker.Formatter`, or None (default)
The format or formatter to use for the labels. If a string must be
a valid input for a `~.StrMethodFormatter`. If None (the default),
use a `~.ScalarFormatter`.
func : function, default *lambda x: x*
Function to calculate the labels. Often the size (or color)
argument to :meth:`~.Axes.scatter` will have been pre-processed
by the user using a function *s = f(x)* to make the markers
visible; e.g. *size = np.log10(x)*. Providing the inverse of this
function here allows that pre-processing to be inverted, so that
the legend labels have the correct values;
e.g. *func = np.exp(x, 10)*.
kwargs : further parameters
Allowed keyword arguments are *color* and *size*. E.g. it may be
useful to set the color of the markers if *prop="sizes"* is used;
similarly to set the size of the markers if *prop="colors"* is
used. Any further parameters are passed onto the `.Line2D`
instance. This may be useful to e.g. specify a different
*markeredgecolor* or *alpha* for the legend handles.
Returns
-------
tuple (handles, labels)
with *handles* being a list of `.Line2D` objects
and *labels* a matching list of strings.
"""
handles = []
labels = []
hasarray = self.get_array() is not None
if fmt is None:
fmt = mpl.ticker.ScalarFormatter(useOffset=False, useMathText=True)
elif isinstance(fmt, str):
fmt = mpl.ticker.StrMethodFormatter(fmt)
fmt.create_dummy_axis()
if prop == "colors":
if not hasarray:
warnings.warn("Collection without array used. Make sure to "
"specify the values to be colormapped via the "
"`c` argument.")
return handles, labels
u = np.unique(self.get_array())
size = kwargs.pop("size", mpl.rcParams["lines.markersize"])
elif prop == "sizes":
u = np.unique(self.get_sizes())
color = kwargs.pop("color", "k")
else:
raise ValueError("Valid values for `prop` are 'colors' or "
f"'sizes'. You supplied '{prop}' instead.")
fmt.set_bounds(func(u).min(), func(u).max())
if num == "auto":
num = 9
if len(u) <= num:
num = None
if num is None:
values = u
label_values = func(values)
else:
if prop == "colors":
arr = self.get_array()
elif prop == "sizes":
arr = self.get_sizes()
if isinstance(num, mpl.ticker.Locator):
loc = num
elif np.iterable(num):
loc = mpl.ticker.FixedLocator(num)
else:
num = int(num)
loc = mpl.ticker.MaxNLocator(nbins=num, min_n_ticks=num-1,
steps=[1, 2, 2.5, 3, 5, 6, 8, 10])
label_values = loc.tick_values(func(arr).min(), func(arr).max())
cond = ((label_values >= func(arr).min()) &
(label_values <= func(arr).max()))
label_values = label_values[cond]
yarr = np.linspace(arr.min(), arr.max(), 256)
xarr = func(yarr)
ix = np.argsort(xarr)
values = np.interp(label_values, xarr[ix], yarr[ix])
kw = dict(markeredgewidth=self.get_linewidths()[0],
alpha=self.get_alpha())
kw.update(kwargs)
for val, lab in zip(values, label_values):
if prop == "colors":
color = self.cmap(self.norm(val))
elif prop == "sizes":
size = np.sqrt(val)
if np.isclose(size, 0.0):
continue
h = mlines.Line2D([0], [0], ls="", color=color, ms=size,
marker=self.get_paths()[0], **kw)
handles.append(h)
if hasattr(fmt, "set_locs"):
fmt.set_locs(label_values)
l = fmt(lab)
labels.append(l)
return handles, labels
You can also manually create your own legend. The trick here is that you have to apply np.sqrt to sizes in the legend for some reason I don't quite follow but #busybear has in her snippet.
import numpy as np
import matplotlib.pyplot as plt
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
s = (50 / price) ** 2
scatter = ax.scatter(volume, amount, c=ranking, s=s,
vmin=-3, vmax=3, cmap="Spectral", label=price)
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# # easy legend
# kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
# func=lambda s: 50 / np.sqrt(s),
# )
# legend2 = ax.legend(*scatter.legend_elements(**kw),
# loc="lower right", title="Price")
# ax.add_artist(legend2)
# manual legend
legend_values = np.array([2,4,6,8])
legend_sizes = (50 / legend_values) ** 2
# IMPORTANT: for some reason the square root needs to be applied to sizes in the legend
legend_sizes_sqrt = np.sqrt(legend_sizes)
elements3 = [Line2D([0], [0], color=scatter.cmap(0.7), lw=0, marker="o", linestyle=None, markersize=s) for s in legend_sizes_sqrt]
legend3 = ax.legend(elements3, [f"$ {p:.2f}" for p in legend_values], loc='lower right', title="Price")
ax.add_artist(legend3)
for p, v, a in zip(price, volume, amount):
ax.annotate(round(p, 0), (v, a))
plt.show()

How to achieve spaces between stacked bar chart, center aligned

I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.
My graph:
x-axis has 8 income distributions, one for each bar
y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.
My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?
The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.
Let me know if you need any more information (I tried to keep everything relevant). Thanks!
Full code (I hope it's not too difficult to read):
from __future__ import division
from pylab import *
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher
data = csv2rec('coa.csv', delimiter=',')
x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']
df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))
#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source:
source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))
#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
for key in source2:
source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])
total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10
#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])
#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths, showFirst = 8, xLabels=d_labels,scale=True)
Stackedbarchart program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='' # label for y axis
):
#------------------------------------------------------------------------------
# data fixeratering
# make sure this makes sense
if showFirst != -1:
showFirst = np.min([showFirst, np.shape(data)[0]])
data_copy = np.copy(data[:showFirst]).transpose().astype('float')
data_shape = np.shape(data_copy)
if heights is not None:
heights = heights[:showFirst]
if widths is not None:
widths = widths[:showFirst]
showFirst = -1
else:
data_copy = np.copy(data).transpose()
data_shape = np.shape(data_copy)
# determine the number of bars and corresponding levels from the shape of the data
num_bars = data_shape[1]
levels = data_shape[0]
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2)
# stack the data --
# replace the value in each level by the cumulative sum of all preceding levels
data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)
# scale the data is needed
if scale:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
if heights is not None:
print "WARNING: setting scale and heights does not make sense."
heights = None
elif heights is not None:
data_copy /= data_stack[levels-1]
data_stack /= data_stack[levels-1]
for i in np.arange(num_bars):
data_copy[:,i] *= heights[i]
data_stack[:,i] *= heights[i]
#------------------------------------------------------------------------------
# ticks
if yTicks is not "none":
# it is either a set of ticks or the number of auto ticks to make
real_ticks = True
try:
k = len(yTicks[1])
except:
real_ticks = False
if not real_ticks:
yTicks = float(yTicks)
if scale:
# make the ticks line up to 100 %
y_ticks_at = np.arange(yTicks)/(yTicks-1)
y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
else:
# space the ticks along the y axis
y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
y_tick_labels = np.array([str(i) for i in y_ticks_at])
yTicks=(y_ticks_at, y_tick_labels)
#------------------------------------------------------------------------------
# plot
if edgeCols is None:
edgeCols = ["none"]*len(cols)
# bars
ax.bar(x,
data_stack[0],
color=cols[0],alpha=0.7,
edgecolor=edgeCols[0],
width=widths,
linewidth=0.5,
align='center'
)
for i in np.arange(1,levels):
ax.bar(x,
data_copy[i],
bottom=data_stack[i-1],
color=cols[i],alpha=0.7,
edgecolor=edgeCols[i],
width=widths,
linewidth=0.5,
align='center'
)
# borders
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
# make ticks if necessary
if yTicks is not "none":
ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
ax.yaxis.tick_left()
plt.yticks(yTicks[0], yTicks[1])
else:
plt.yticks([], [])
if xLabels is not None:
ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
ax.xaxis.tick_bottom()
plt.xticks(x, xLabels, rotation='horizontal')
else:
plt.xticks([], [])
# limits
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
# labels
if xlabel != '':
ax.xlabel(xlabel)
if ylabel != '':
ax.ylabel(ylabel)
Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).
I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:
def stackedBarPlot(self,
ax, # axes to plot onto
data, # data to plot
cols, # colors for each level
xLabels = None, # bar specific labels
yTicks = 6., # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
edgeCols=None, # colors for edges
showFirst=-1, # only plot the first <showFirst> bars
scale=False, # scale bars to same height
widths=None, # set widths for each bar
heights=None, # set heights for each bar
ylabel='', # label for x axis
xlabel='', # label for y axis
xaxlim=None,
axspacing=0,
):
.
if widths is None:
widths = np.array([1] * num_bars)
x = np.arange(num_bars)
else:
x = [0]
for i in range(1, len(widths)):
x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)
.
# limits
#ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
ax.set_ylim(0, np.max(data_stack))
if xaxlim is None:
ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
else:
ax.set_xlim(xaxlim)

Categories

Resources