Show previously created pandas plot - python

Say I create a bar plot in a Jupyter notebook:
import pandas as pd
import matplotlib.pyplot as plt
speed = [0.1, 17.5, 40, 48, 52, 69, 88]
lifespan = [2, 8, 70, 1.5, 25, 12, 28]
index = ["snail", "pig", "elephant", "rabbit", "giraffe", "coyote", "horse"]
df = pd.DataFrame({"speed": speed, "lifespan": lifespan}, index=index)
plot = df.plot(kind='bar', stacked=True)
This shows my chart.
Then, in the next cell, I make a modification, e.g. adding data labels:
for bar in plot.patches:
height = bar.get_height()
width = bar.get_width()
x = bar.get_x()
y = bar.get_y()
label_text = height
label_x = x + width / 2
label_y = y + height / 2
if label_text != 0:
plot.text(
label_x,
label_y,
int(label_text),
ha="center",
va="center",
color="white",
fontweight="bold",
)
Now, how can I show the plot again? plt.show() returns nothing.

Just show the figure again:
plot.figure
As an alternative, you can use plot.get_figure().

Related

How can I plot bar plots with variable widths but without gaps in Python, and add bar width as labels on the x-axis?

I have three lists: x, y and w as shown:
x is the name of objects. y is its height and w is its width.
x = ["A","B","C","D","E","F","G","H"]
y = [-25, -10, 5, 10, 30, 40, 50, 60]
w = [30, 20, 25, 40, 20, 40, 40, 30]
I'd like to plot these values in a bar plot in Python such that y represents height and w represents width of the bar.
When I plot it using
colors = ["yellow","limegreen","green","blue","red","brown","grey","black"]
plt.bar(x, height = y, width = w, color = colors, alpha = 0.8)
I get a plot as shown:
Next, I tried to normalize the widths so that the bars would not overlap with each other using
w_new = [i/max(w) for i in w]
plt.bar(x, height = y, width = w_new, color = colors, alpha = 0.8)
#plt.axvline(x = ?)
plt.xlim((-0.5, 7.5))
I get much better results than before as shown:
However, the gaps between the bars are still uneven. For example, between B and C, there is large gap. But between F and G, there is no gap.
I'd like to have plots where there is even gap width or no gap between two consecutive bars. It should look something as shown:
How can I create this type of plot in Python? Is it possible using any data visualization libraries such as matplotlib, seaborn or Plotly? Is there any alternative to do it if the data is available in dataframe?
Additionally, I'd like to add labels for A, B, C, etc. to the right of the plot and rather have actual width of the bar as labels on the x-axis (for e.g. depicted by red numbers in the x-axis plot above). I'd also like to add a vertical red line at distance 50 from the x-axis. I know this can be added using plt.axvline(x = ...) But I am not sure what is the value I should state as x as the scale of W is not exact with the length of x-axis.
IIUC, you can try something like this:
import matplotlib.pyplot as plt
x = ["A","B","C","D","E","F","G","H"]
y = [-25, -10, 5, 10, 30, 40, 50, 60]
w = [30, 20, 25, 40, 20, 40, 40, 30]
colors = ["yellow","limegreen","green","blue","red","brown","grey","black"]
#plt.bar(x, height = y, width = w, color = colors, alpha = 0.8)
xticks=[]
for n, c in enumerate(w):
xticks.append(sum(w[:n]) + w[n]/2)
w_new = [i/max(w) for i in w]
a = plt.bar(xticks, height = y, width = w, color = colors, alpha = 0.8)
_ = plt.xticks(xticks, x)
plt.legend(a.patches, x)
Output:
Or change xticklabels for bar widths:
xticks=[]
for n, c in enumerate(w):
xticks.append(sum(w[:n]) + w[n]/2)
w_new = [i/max(w) for i in w]
a = plt.bar(xticks, height = y, width = w, color = colors, alpha = 0.8)
_ = plt.xticks(xticks, w)
plt.legend(a.patches, x)
Output:
I figured out an alternative way to do this.
x = ["A","B","C","D","E","F","G","H"]
y = [-25, -10, 5, 10, 30, 40, 50, 60]
w = [30, 20, 25, 40, 20, 40, 40, 30]
xpos = []
a = 0
for i in range(len(w)):
if i == 0:
a+=w[i]
xpos.append(w[i]/2)
else:
a += w[i]
xpos.append(a - w[i]/2)
colors = ["yellow","limegreen","green","blue","red","brown","grey","black"]
fig = plt.bar(xpos,
height = y,
width = w,
color = colors,
alpha = 0.5,
)
plt.xticks(ticks = xpos, labels = w)
plt.xlim((0, 245))
plt.axvline(x = 150)
plt.legend(fig.patches, x)
plt.show()

fill between vertical curves in the sub plot

Dears,
I have the following csv file
depth
lst
dol
Anhd
sst
50
20
40
80
100
100
25
50
85
100
150
15
35
75
100
I take the data from csv to draw subplot contains four curves in the same subplot, I have filled by red color from left edge to first curve, also I have filled by blue color from last curve to right edge, I want to fill between entire curves in between first and last curve and make color legend.
the table is equal to csv file
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
import re
import json
test = r'D:\python\TEST-COMPOSITION.csv'
test =pd.read_csv(test)
mineral_names = test.drop(['depth'],axis=1)
mineral_names = list(mineral_names.columns.values)
colors = ["green", "gray"]
fig = plt.figure(figsize=(15, 12), dpi=100, tight_layout=True)
gs = gridspec.GridSpec(nrows=1, ncols=10, wspace=0)
fig.add_subplot(gs[0, 1])
for i in range(len(mineral_names)-1):
plt.plot(test[mineral_names[i]],test['depth'],linewidth=2, color='black')
for i in range(len(mineral_names)-1):
if i == 0:
left_col_value = 0
right_col_value = 100
span = abs(left_col_value - right_col_value)
cmap = plt.get_cmap('hot_r')
color_index = np.arange(left_col_value, right_col_value, span / 100)
for index in sorted(color_index):
index_value = (index - left_col_value) / span
plt.fill_betweenx(test['depth'],test[mineral_names[0]], left_col_value, where=test[mineral_names[i]] >= index, color="red")
if i == range(len(mineral_names)-1)[-1]:
left_col_value = 0
right_col_value = 100
span = abs(left_col_value - right_col_value)
cmap = plt.get_cmap('hot_r')
color_index = np.arange(left_col_value, right_col_value, span / 100)
for index in sorted(color_index):
index_value = (index - left_col_value) / span
plt.fill_betweenx(test['depth'],test[mineral_names[i]], right_col_value, where=test[mineral_names[i]] >= index, color="blue")
#if i ==1:
#plt.fill_betweenx(test['depth'], test[mineral_names[i+1]], test[mineral_names[i]],color = "green", alpha=0.4)
plt.gca().invert_yaxis()
plt.show()```
Here is an approach looping through the curves, and using a variable previous_curve which contains the position of the previous curve. At the start, the previous curve is all zeros. Similarly, the name of the previous curve can be saved and used as a label for the fill. All labels will appear in the default legend.
The example code below uses a gridspec with only 4 columns, to make the example plot a bit clearer.
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
import numpy as np
test = pd.DataFrame({'depth': [50, 100, 150],
'lst': [20, 25, 15],
'dol': [40, 50, 35],
'Anhd': [80, 85, 75],
'sst': [100, 100, 100]})
mineral_names = test.columns[1:]
fig = plt.figure(figsize=(15, 12), dpi=100, tight_layout=True)
gs = gridspec.GridSpec(nrows=1, ncols=4, wspace=0)
ax = fig.add_subplot(gs[0, 1])
for mineral_name in mineral_names[:-1]:
ax.plot(test[mineral_name], test['depth'], linewidth=2, color='black')
colors = ["red", "green", "gray", "blue"]
previous_curve = 0
previous_name = ''
for mineral_name, color in zip(mineral_names, colors):
ax.fill_betweenx(test['depth'], previous_curve, test[mineral_name], color=color, alpha=0.4,
label=f'{previous_name} - {mineral_name}')
previous_curve = test[mineral_name]
previous_name = mineral_name
ax.margins(x=0, y=0) # no white space in plot
ax.invert_yaxis()
ax.legend()
plt.show()

Create stacked bar with matplotlib

I have data displayed in the following format:
values = np.array([10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
And I want to create a straight-up stacked bar chart like the following figure. Each element in the array belongs to a stacked bar.
I have searched to see how can I do this with matplotlib, but unfortunately, I still haven't found a way to do it. How can I do this?
AFAIK, there is now straightforward way to do it. You need to calculate exact position of bars yourself and then normalize it.
import numpy as np
import matplotlib.pyplot as plt
values = np.array([[10, 12,13, 5,20], [30, 7, 10, 25,2], [10, 12,13, 5,20]])
values_normalized = values/np.sum(values, axis=0)
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
r = [0, 1, 2, 3, 4] # position of the bars on the x-axis
names = ['A', 'B', 'C', 'D', 'E'] # names of groups
colors = ['lightblue', 'orange', 'lightgreen']
for i in range(3):
plt.bar(r, values_normalized[i], bottom=bottom_values[i], color=colors[i], edgecolor='white', width=1, tick_label=['a','b','c','d','e'])
for xpos, ypos, yval in zip(r, text_positions[i], values[i]):
plt.text(xpos, ypos, "N=%d"%yval, ha="center", va="center")
# Custom X axis
plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")
plt.show()
There is a source that tells how to add text on top of bars. I'm a bit in a hurry right now so I hope this is useful and I'll update my answer next day if needed.
I've updated my answer. Adding text on top of the bars is tricky, it requires some calculations of their vertical positions.
Btw, I have refactored the most of code that is in a link I shared.
Python 3.8
matplotlib 3.3.1
numpy 1.19.1
Chat Result
import matplotlib.pyplot as plt
import numpy as np
values = np.array([[10, 12, 13, 5, 20], [30, 7, 10, 25, 2], [10, 12, 13, 5, 20]])
row, column = values.shape # (3, 5)
x_type = [x+1 for x in range(column)]
ind = [x for x, _ in enumerate(x_type)]
values_normalized = values/np.sum(values, axis=0)
value1, value2, value3 = values_normalized[0,:], values_normalized[1,:], values_normalized[2,:]
# Create figure
plt.figure(figsize=(8, 6))
plt.bar(ind, value1, width=0.8, label='Searies1', color='#5B9BD5')
plt.bar(ind, value2, width=0.8, label='Searies2', color='#C00000', bottom=value1)
plt.bar(ind, value3, width=0.8, label='Searies3', color='#70AD47', bottom=value1 + value2)
# Show text
bottom_values = np.cumsum(values_normalized, axis=0)
bottom_values = np.vstack([np.zeros(values_normalized[0].size), bottom_values])
text_positions = (bottom_values[1:] + bottom_values[:-1])/2
c = list(range(column))
for i in range(3):
for xpos, ypos, yval in zip(c, text_positions[i], values[i]):
plt.text(xpos, ypos, yval, horizontalalignment='center', verticalalignment='center', color='white')
plt.xticks(ind, x_type)
plt.legend(loc='center', bbox_to_anchor=(0, 1.02, 1, 0.1), handlelength=1, handleheight=1, ncol=row)
plt.title('CHART TITLE', fontdict = {'fontsize': 16,'fontweight': 'bold', 'family': 'serif'}, y=1.1)
# Hide y-axis
plt.gca().axes.yaxis.set_visible(False)
plt.show()

How do I give some space between Y axis and my starting and ending vertical bars in Matplotlib

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.pyplot import figure
plt.style.use('ggplot')
overs = np.arange(1, 51)
india_score = np.random.randint(low = 1, high = 18, size = 50, dtype = 'int16')
plt.bar(overs, india_score, width = 0.80, align = 'center', color = 'orange', label = 'Runs per over')
plt.xlabel('Overs')
plt.ylabel('Score')
plt.title('India Inning')
plt.axis([1, 50, 0, 18])
plt.legend()
plt.grid(color='k', linestyle='-', linewidth=1)
fig = plt.gcf()
fig.set_size_inches(16, 9)
plt.show()
The output looks like this:
If you see the bar chart then runs scored in first over and runs scored in last over stick to the Y axis. How can I give some space between Y axis and my first and last vertical bars. I tried the margins function but that is not working
I searched for similar posts but I was unable to understand the solution as I am new to matplotlib. Any help will be greatly appreciated. Thanks.
Here is how you could do this:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.pyplot import figure
plt.style.use('ggplot')
overs = np.arange(1, 51)
india_score = np.random.randint(low = 1, high = 18, size = 50, dtype = 'int16')
plt.bar(overs, india_score, width = 0.80, align = 'center', color = 'orange', label = 'Runs per over')
plt.xlabel('Overs')
plt.ylabel('Score')
plt.title('India Inning')
plt.axis([1, 50, 0, 18])
plt.legend()
plt.grid(color='k', linestyle='-', linewidth=1)
fig = plt.gcf()
fig.set_size_inches(16, 9)
left, right = plt.xlim()
plt.xlim(left-1, right+1)
plt.show()
left, right = plt.xlim() gets the current limits of the x-axis and plt.xlim(left-1, right+1) sets the new limits by one step further outside relative to the old limits.

Ylabel rescale range and end at 0%

import numpy as np
import matplotlib.pyplot as plt
n = 1000
x = np.arange(0, n)
y1 = np.random.normal(50, 4, n)
y2 = np.random.normal(25, 2.5, n)
y3 = np.random.normal(10, 1.1, n)
fig, (ax1, ax2, ax3) = plt.subplots(nrows = 3, ncols = 1)
ax1.plot(x, y1, 'royalblue')
ax1.set(xticks = [], title = 'Title')
ax2.plot(x, y2, 'darkorange')
ax2.set(xticks = [])
ax3.plot(x, y3, 'forestgreen')
ax3.set(xlabel = 'Random sample')
fig.legend(['First', 'Second', 'Third'])
plt.show()
I would like the ylabels to be shown in percentage, start at 0% and decrease. For example the blue one should go from [30, 40, 50, 60, 70] to [-57.1%, -42.9%, -28.6%, -14.3%, 0%]. The yellow one should go from [10, 20, 30, 40] to [-75%, -50%, -25%, 0%] and the green one should go from [5, 7.5, 10, 12.5, 15] to [-66.6%, -50%, -33.3%, -16.7%, 0%].
The rest of the graphs should look exactly the same, only the ylabels should change.
Just convert your current yticks to floats and change to the range you want them to be at before displaying:
import numpy as np
ticks = [float(x) for x in yvals]
ticks = np.array(ticks) - max(ticks)
yticklabels = ['{0:.1%}'.format(x) for x in ticks]
Do this for each plot separately.

Categories

Resources