Prevent overlapping labels and graphs in matplotlib

Prevent overlapping labels and graphs in matplotlib - python

I have the following code:
from mpl_toolkits.axes_grid.axislines import SubplotZero
from matplotlib.transforms import BlendedGenericTransform
from matplotlib import patches
import matplotlib.pyplot as plt
import numpy
if 1:
fig = plt.figure(1)
ax = SubplotZero(fig, 111)
fig.add_subplot(ax)
ax.axhline(linewidth=1.7, color="k")
ax.axvline(linewidth=1.7, color="k")
plt.xticks([])
plt.yticks([])
ax.text(0, 1.05, r'$y$', transform=BlendedGenericTransform(ax.transData, ax.transAxes), ha='center')
ax.text(1.05, 0, r'$x$', transform=BlendedGenericTransform(ax.transAxes, ax.transData), va='center')
for direction in ["xzero", "yzero"]:
ax.axis[direction].set_axisline_style("-|>")
ax.axis[direction].set_visible(True)
for direction in ["left", "right", "bottom", "top"]:
ax.axis[direction].set_visible(False)
x = numpy.linspace(-1.499999999, 5, 1000000)
yy = numpy.log(2*x + 3)/2 + 3
ax.plot(x, yy, linewidth=1.2, color="black")
plt.ylim(-2, 5)
plt.xlim(-5, 5)
plt.text((numpy.e**(-6) - 3)/2, 0, r'$(\frac{1}{2} (e^{-6} - 3), 0)$', position=((numpy.e**(-6) - 3)/2 + 0.1, 0.1))
plt.plot((numpy.e**(-6) - 3)/2, 0, 'ko')
plt.text(0, numpy.log(3)/2 + 3, r'$(0, \frac{1}{2} \log_e{\left (3 \right )} + 3)$', position=(0.1, numpy.log(3)/2 + 3 + 0.1))
plt.plot(0, numpy.log(3)/2 + 3, 'ko')
plt.savefig('AnswersSA1a.png')
that produces this graph:
The two labelled axis intercepts have significant overlap with the axes as well as the graph itself. I know this could be solved by manually placing the text at a separate point. However - is there a way to have matplotlib smartly place the label so it doesn't overlap?

Related

Matplotlib, plot a vector of numbers as a rectangle filled with numbers

So let's say I have a vector of numbers.
np.random.randn(5).round(2).tolist()
[2.05, -1.57, 1.07, 1.37, 0.32]
I want a draw a rectangle that shows this elements as numbers in a rectangle.
Something like this:
Is there an easy way to do this in matplotlib?

A bit convoluted but you could take advantage of seaborn.heatmap, creating a white colormap:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
data = np.random.randn(5).round(2).tolist()
linewidth = 2
ax = sns.heatmap([data], annot=True, cmap=LinearSegmentedColormap.from_list('', ['w', 'w'], N=1),
linewidths=linewidth, linecolor='black', square=True,
cbar=False, xticklabels=False, yticklabels=False)
plt.tight_layout()
plt.show()
In this case, the external lines won't be as thick as the internal ones. If needed, this can be fixed with:
ax.axhline(y=0, color='black', lw=linewidth*2)
ax.axhline(y=1, color='black', lw=linewidth*2)
ax.axvline(x=0, color='black', lw=linewidth*2)
ax.axvline(x=len(data), color='black', lw=linewidth*2)
Edit: avoid these lines and add clip_on=False to sns.heatmap (thanks/credit #JohanC)
Output:

We can add rectangles , and annotate them in a for loop.
from matplotlib import pyplot as plt
import numpy as np
# Our numbers
nums = np.random.randn(5).round(2).tolist()
# rectangle_size
rectangle_size = 2
# We want rectangles look squared, you can change if you want
plt.rcParams["figure.figsize"] = [rectangle_size * len(nums), rectangle_size]
plt.rcParams["figure.autolayout"] = True
fig = plt.figure()
ax = fig.add_subplot(111)
for i in range(len(nums)):
# We are adding rectangles
# You can change colors as you wish
plt.broken_barh([(rectangle_size * i, rectangle_size)], (0, rectangle_size), facecolors='white', edgecolor='black'
,linewidth = 1)
# We are calculating where to annotate numbers
cy = rectangle_size / 2.0
cx = rectangle_size * i + cy
# Annotation You can change color,font, etc ..
ax.annotate(str(nums[i]), (cx, cy), color='black', weight='bold', fontsize=20, ha='center', va='center')
# For squared look
plt.xlim([0, rectangle_size*len(nums)])
plt.ylim([0, rectangle_size])
# We dont want to show ticks
plt.axis('off')
plt.show()

One way using the Rectangle patch is:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
x = np.random.randn(5).round(2).tolist()
fig, ax = plt.subplots(figsize=(9, 2)) # make figure
dx = 0.15 # edge size of box
buf = dx / 10 # buffer around edges
# set x and y limits
ax.set_xlim([0 - buf, len(x) * dx + buf])
ax.set_ylim([0 - buf, dx + buf])
# set axes as equal and turn off axis lines
ax.set_aspect("equal")
ax.axis("off")
# draw plot
for i in range(len(x)):
# create rectangle with linewidth=4
rect = Rectangle((dx * i, 0), dx, dx, facecolor="none", edgecolor="black", lw=4)
ax.add_patch(rect)
# get text position
x0, y0 = dx * i + dx / 2, dx / 2
# add text
ax.text(
x0, y0, f"{x[i]}", color="black", ha="center", va="center", fontsize=28, fontweight="bold"
)
fig.tight_layout()
fig.show()
which gives:

Matplotlib fill_between edge

I need to create a plot as close to this picture as possible (given the generated dataframe code below):
And here's the output plot of my code:
What I am having problems with is:
The edge of fill_between is not sharp as in the picture. What I have is some kind of white shadow. How do I change the line between the fillings to match a target picture?
How do I aling legend color lines to the center, but not to the left border which my code does?
Here's my code:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / \
probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = [cm.coolwarm(x) for x in cm_subsection]
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
for i, color in enumerate(colors):
if i == 0:
plt.plot(probabilities_in_time[i], color=color)
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[0], color=color)
else:
probabilities_in_time[i] += probabilities_in_time[i-1]
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i], probabilities_in_time[i-1],
color=color)
plt.plot(probabilities_in_time[i], label=' Probability: {}'.format(
i), color=color)
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
for i in leg.legendHandles:
i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)

To set the legend in the center, you can set loc='center', or you can put the legend outside. To avoid that the legend handles grow to larger, you can leave out .set_linewidth(12) (this sets a very wide edge width of 12 points).
Shifting the colors by one position can help to show the fill borders more pronounced. To still have a correct legend, the label should then be added to fill_between.
The code below also tries to simplify part of the calls:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
ncols = 10
figsize = (20, 5)
fontsize = 14
dti = pd.date_range('2013-01-01', '2020-12-31', freq='2W')
probabilities_in_time = np.random.random((ncols, len(dti)))
probabilities_in_time = probabilities_in_time / probabilities_in_time.sum(axis=0)
probabilities_in_time = pd.DataFrame(probabilities_in_time).T
probabilities_in_time.index = dti
cm_subsection = np.linspace(0, 1, ncols)
colors = cm.coolwarm(cm_subsection)
def plot_time_probabilities(probabilities_in_time, figsize):
plt.figure(figsize=figsize)
plt.yticks(np.arange(0, 1.2, 0.2), fontsize=fontsize)
plt.xticks(fontsize=fontsize)
draw_stack_plot(colors, probabilities_in_time)
set_grid()
set_legend()
# plt.margins(x=0, y=0)
plt.margins(x=0.02)
plt.tight_layout()
plt.show()
def draw_stack_plot(colors, probabilities_in_time):
current_probabilities = 0
for i, color in enumerate(colors):
plt.fill_between(probabilities_in_time.index,
probabilities_in_time[i] + current_probabilities, current_probabilities,
color=color, label=f' Probability: {i}')
current_probabilities += probabilities_in_time[i]
plt.plot(current_probabilities,
color=colors[0] if i <= 1 else colors[-1] if i >= 8 else colors[i - 1] if i < 5 else colors[i + 1])
def set_grid():
ax = plt.gca()
ax.set_axisbelow(False)
ax.xaxis.grid(True, linestyle='-', lw=1)
def set_legend():
leg = plt.legend(loc='lower left', fontsize=14, handlelength=1.3)
# leg = plt.legend(loc='upper left', bbox_to_anchor=(1.01, 1), fontsize=14, handlelength=1.3)
# for i in leg.legendHandles:
# i.set_linewidth(12)
plot_time_probabilities(probabilities_in_time, figsize)

How to center the bar plot to show the difference of a certain column?

How to center the bar plot to show the difference of a certain column?
I have the following bar plot, done with matplotlib :
Note how the barplot is really bad. The difference between each bar cant really be seen properly. So what I want, is to use the red bar as the origin in the y-axis. That way, the other bars would show the difference (blue_bar(i) - redbar).
In other words, I want the value of the red bar in the y-axis to be the y-origin of the plot.
Again, in another words, the red bar is the accuracy obtained by my academic work. I want to plot the other article results compared/ IN RELATION to mine.
I made the following picture using paint.net to illustrate what I want.
Any other ideas/suggestions are really appreciated.
Appendix :
I used the following code to produce the first graphic :
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, 95.30, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo.png')
plt.show()

You could either set the y limits closer to the interesting values:
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = accuracies
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.ylim(min(y) - 0.5, max(y) +0.5)
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.savefig('foo2.png')
plt.show()
Or you could plot it around zero, with your result being the new origin (but you will have to indicate by how much you shifted the origin somewhere in the legend or somewhere else):
import numpy as np
import random
from matplotlib import pyplot as plt
accuracies = [0.9630, 0.9597, 0.9563, 0.9533, 0.9527, 0.9480, 0.9477, 0.9472, 0.9472, 0.9466, 0.9452, 0.9452, 0.9442,
0.9440, 0.9434, 0.9420, 0.9407, 0.9407, 0.9391, 0.9377, 0.9185, 0.9268]
sensitividades = [0.7680, 0.7200, 0.8173, 0.7569, 0.7406, 0.7354, 0.7746, 0.7344, 0.7067, 0.7410, 0.7370, 0.7321,
0.7357]
especificidades = [0.9827, 0.9733, 0.9816, 0.9807, 0.9789, 0.9724, 0.9764, 0.9801, 0.9751, 0.9521, 0.9487, 0.9694]
accuracies = [x * 100 for x in accuracies]
my_acc = 95.30
y = np.asarray(accuracies) - my_acc
N = len(y)
x = range(N)
width = 1 / 1.1
fig, ax = plt.subplots(1, 1)
ax.grid(zorder=0)
# Plot other articles
bars = ax.bar(x, y, width, color="blue", zorder=3)
# Plot my work
# ax.bar(x[len(x) - 1] + 1, my_acc, width, color="red", zorder=3)
plt.title('Accuracy of each article')
plt.yticks([0, -0.3, -1.3, -2.3, -3.3, 0.7, 1.7], [95.30, 95, 94, 93, 92, 96, 97])
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(y) - 0.5, max(y) + 0.7)
def autolabel(rects):
for i in range(len(rects)):
rect = rects[i]
height = rect.get_height()
if (height >= 0):
ax.text(rect.get_x() + rect.get_width()/2.,
0.3 + height,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
if (height < 0):
ax.text(rect.get_x() + rect.get_width()/2.,
height - 0.3,'[{}]'.format( i), ha='center', va='bottom',
fontsize=7.5)
autolabel(bars)
plt.savefig('foo.png')
plt.show()
Of course, your own result would not appear in the second plot, since it would have height zero.

I actually think the way you have it represented now is actually best -- meaning there isn't a huge difference in accuracy on a cursory level.
However, if you want to set the value of the red bar as the origin, try this:
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(95.30) # Sets the value of the red bar as the origin.
plt.savefig('foo.png')
plt.show()
Perhaps setting the minimum value of lowest accuracy of the article might make this graph more digestible.
...
plt.title('Accuracy of each article')
plt.xlabel('Article')
plt.ylabel('Accuracy')
plt.ylim(min(accuracies), 100) # Sets the value of minimum accuracy as the origin and the max value as 100.
plt.savefig('foo.png')
plt.show()

Wrong scatter label color in pyplot legend

I am making this bar plot:
... using this code segment:
my_cmap = plt.get_cmap('copper')
plt.figure()
plt.set_cmap(my_cmap)
plt.pcolormesh(xx, yy, Z)
labels = ['Negative', 'Negative (doubtful)', 'Positive (doubtful)', 'Positive' ]
for i in [0, 1, 2, 3] :
plt.scatter(clustered_training_data[y==i, 0], clustered_training_data[y==i, 1], c=my_cmap(i / 3.0), label=labels[i], s=50, marker='o', edgecolor='white', alpha=0.7)
plt.scatter(lda_trans_eval[q == -1, 0], lda_trans_eval[q == -1, 1], c='green', label='Your patient', s=80, marker='h', edgecolor='white')
plt.legend(prop={'size':8})
Only one (second) color is always blue, regardless of chosen color map. Corresponding data points are correctly colored in the plot and I can't see the reason why pyplot colors the second label differently.

I can't reproduce it with dummy data. Does this have the problem when you run it?
import matplotlib.pyplot as plt
import numpy as np
my_cmap = plt.get_cmap('copper')
fig = plt.figure(figsize=(5,5))
plt.set_cmap(my_cmap)
X = np.linspace(-1,5,100)
Y = np.linspace(-1,5,100)
X,Y = np.meshgrid(X,Y)
Z = (X**2 + Y**2)
Z = Z.astype(int)
Z += (X**2 + Y**2) < .5
ax = plt.pcolormesh(X, Y, Z)
for i in [0,1,2,3]:
plt.scatter([i],[i],c=my_cmap(i / 3.0),label='i=%s'%str(i),
edgecolor='white', alpha=0.7)
plt.scatter([],[],c=my_cmap(1/3.0), label='empty data')
plt.scatter([3],[1],c='green',label='Force color')
plt.legend(loc=2, prop={'size':8})
from os.path import realpath, basename
s = basename(realpath(__file__))
fig.savefig(s.split('.')[0])
plt.show()

This happened to me. I fixed it by using color instead of c.
plt.scatter(clustered_training_data[y==i, 0], clustered_training_data[y==i, 1], color=my_cmap(i / 3.0), label=labels[i], s=50, marker='o', edgecolor='white', alpha=0.7)

Unable to change tick label properties in matplotlib

No matter what I seem to do, or whichever past questions I seem to look up - I seem unable to change simple properties of tick labels.
This code:
from mpl_toolkits.axes_grid.axislines import SubplotZero
from matplotlib.transforms import BlendedGenericTransform
import matplotlib.pyplot as plt
import numpy
if 1:
fig = plt.figure(1)
ax = SubplotZero(fig, 111)
fig.add_subplot(ax)
# thicken the axis lines
ax.axhline(linewidth=1.7, color="k")
ax.axvline(linewidth=1.7, color="k")
plt.xticks([-numpy.pi/2, -numpy.pi/4, 0, numpy.pi/4, numpy.pi/2], [r'$-\pi$', r'$-\pi/2$', r'$O$', r'$\pi/2$', r'$\pi$'], rotation=30)
plt.yticks([])
#ax.set_xticklabels([r'$-\pi$', r'$-\pi/2$', r'$0$', r'$\pi/2$', r'$\pi$'], rotation=40, ha='left')
# end-of-axis arrows
ax.text(0, 1.05, r'$y$', transform=BlendedGenericTransform(ax.transData, ax.transAxes), ha='center')
ax.text(1.03, 0, r'$x$', transform=BlendedGenericTransform(ax.transAxes, ax.transData), va='center')
plt.ylim(-5, 5)
plt.xlim(-numpy.pi/2, numpy.pi/2)
x_width = (abs(plt.xlim()[0]) + abs(plt.xlim()[1])) / 2
y_width = (abs(plt.ylim()[0]) + abs(plt.ylim()[1])) / 2
# end-of-axis arrows
plt.arrow(plt.xlim()[1], -0.003, x_width*0.01, 0,
width=x_width*0.0015, color="k", clip_on=False,
head_width=y_width*0.24/7, head_length=x_width*0.024)
plt.arrow(0.003, plt.ylim()[1], 0, y_width*0.01,
width=y_width*0.0015, color="k", clip_on=False,
head_width=x_width*0.24/7, head_length=y_width*0.024)
for direction in ["xzero", "yzero"]:
ax.axis[direction].set_visible(True)
for direction in ["left", "right", "bottom", "top"]:
ax.axis[direction].set_visible(False)
x = numpy.linspace(-numpy.pi/2, numpy.pi/2, 2500)
yy = numpy.tan(2*(x - numpy.pi/2))
threshold = 1000
yy[yy > threshold] = numpy.inf
yy[yy < -threshold] = numpy.inf
ax.plot(x, yy, linewidth=1.2, color="black")
ax.axvline(x=-3*numpy.pi/4, linewidth=1.0, color="k", linestyle="--")
ax.axvline(x=-numpy.pi/4, linewidth=1.0, color="k", linestyle="--")
ax.axvline(x=numpy.pi/4, linewidth=1.0, color="k", linestyle="--")
ax.axvline(x=3*numpy.pi/4, linewidth=1.0, color="k", linestyle="--")
plt.savefig('MC6.png')
with particular note of this line:
plt.xticks([-numpy.pi/2, -numpy.pi/4, 0, numpy.pi/4, numpy.pi/2], [r'$-\pi$', r'$-\pi/2$', r'$O$', r'$\pi/2$', r'$\pi$'], rotation=30)
does not rotate the labels by 30 degrees. Likewise if I do other workarounds to change the font-size, the labels always stay the same.
Am I missing something super simple about matplotlib???

Add this line:
ax.axis["xzero"].major_ticklabels.set_rotation(30)
For the reference, here's a mailing list item where this comes from: http://matplotlib.1069221.n5.nabble.com/rotating-x-tick-labels-bold-labels-with-axislines-toolkit-td7661.html
As to why what you do has no effect, I can only guess. A wild guess would be that the state machine (which works under the hood when you call functions from the pyplot namespace) assumes that plt.xticks relate to the bottom spine, which you've explicitly set to be invisible.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Prevent overlapping labels and graphs in matplotlib - python

Related

Matplotlib, plot a vector of numbers as a rectangle filled with numbers

Matplotlib fill_between edge

How to center the bar plot to show the difference of a certain column?

Wrong scatter label color in pyplot legend

Unable to change tick label properties in matplotlib

Categories

Resources