I am trying to with a set of data make a table with just two columns. Below is the following data:
data = [[ 66386, 174296, 75131, 577908, 32015],
[ 58230, 381139, 78045, 99308, 160454],
[ 89135, 80552, 152558, 497981, 603535],
[ 78415, 81858, 150656, 193263, 69638],
[139361, 331509, 343164, 781380, 52269]]
I just want to display the first column of the data so that I can table that looks like this below:
Below is a snippet of code that I am trying to use:
columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail')
rows = ['%d year' % x for x in (100, 50, 20, 10, 5)]
# Get some pastel shades for the colors
colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows)))
n_rows = len(data)
# Initialize the vertical-offset for the stacked bar chart.
y_offset = np.zeros(len(columns))
# Plot bars and create text labels for the table
cell_text = []
for row in range(n_rows):
y_offset = data[row]
cell_text.append(['%1.1f' % (x / 1000.0) for x in y_offset])
# Reverse colors and text labels to display the last value at the top.
colors = colors[::-1]
the_table = plt.table(cellText=cell_text,
rowLabels=rows,
rowColours=colors,
colLabels=columns,
loc='center')
How do I tweak this code to get the desired result?
add an attribute 'year' for example in columns
columns = ('year','Freeze', 'Wind', 'Flood', 'Quake', 'Hail')
rows = ['%d year' % x for x in (100, 50, 20, 10, 5)]
# Get some pastel shades for the colors
colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows)))
n_rows = len(data)
# Initialize the vertical-offset for the stacked bar chart.
y_offset = np.zeros(len(columns))
# Plot bars and create text labels for the table
cell_text = []
for row in range(n_rows):
y_offset = data[row]
cell_text.append(['%1.1f' % (x / 1000.0) for x in y_offset])
# Reverse colors and text labels to display the last value at the top.
colors = colors[::-1]
the_table = plt.table(cellText=cell_text,
rowLabels=rows,
rowColours=colors,
colLabels=columns,
loc='center')
Related
This is a question about how to properly organize subplots, not how to create stacked bars.
I have the following dataframe:
corpus group mono p non p plus p minus p
0 fairview all 49 51 49 0
1 i2b2 all 46 54 46 0
2 mipacq all 44 56 43 1
and want to arrange the output as given in the two attached figures so that I get ncolumns and 2-rows, instead of two separate subplots with 1 row each (so in this case, there would be 2-rows, 3-columns on a single subplot instead of 1-row, 3-columns on 2 subplots):
I am generating these two figures as separate subplots using the following code:
data = <above dataframe>
semgroups = ['all']
corpus = ['fairview', 'i2b2', 'mipacq']
for sg in semgroups:
i = semgroups.index(sg)
ix = i + 7
ncols = len(set(data.corpus.tolist()))
nrows = len(set(data.group.tolist()))
fig = plt.figure()
fig, axs = plt.subplots(1, ncols, sharey=True)
for ax,(idx,row) in zip(axs.flat, data.iterrows()):
# I WANT TO PLOT BOTH ROWS on same subplot
#row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])
if row['corpus'] == 'fairview':
corpus = 'Fairview'
label = '(d) '
elif row['corpus'] == 'mipacq':
corpus = 'MiPACQ'
if ncols == 3:
label = '(f) '
else:
label = '(b) '
else:
corpus = 'i2b2'
label = '(e) '
ax.set_title(label + corpus)
ax.tick_params(axis='x', labelrotation = 45)
if sg == 'all':
sg = 'All groups'
# Defining custom 'xlim' and 'ylim' values.
custom_ylim = (0, 60)
# Setting the values for all axes.
plt.setp(axs, ylim=custom_ylim)
fig.suptitle('Figure ' + str(ix) + ' ' + sg)
In the code above, I iterate through my df grabbing the following rows to generate both separate subplots:
# BUT, I WANT TO PLOT BOTH ROWS ON SAME SUBPLOT
row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])
No matter how I do this I cannot get the desired two rows in a single subplot(I always get an empty row of plots with no data on the second row).
See inline comments
Tested in python 3.8.12, pandas 1.3.3, matplotlib 3.4.3, seaborn 0.11.2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # seaborn is a high-level api for matplotlib
# sample dataframe
data = {'corpus': ['fairview', 'i2b2', 'mipacq'], 'group': ['all', 'all', 'all'], 'mono p': [49, 46, 44], 'non p': [51, 54, 56], 'plus p': [49, 46, 43], 'minus p': [0, 0, 1]}
df = pd.DataFrame(data)
semgroups = df.group.unique() # unique groups
corpus = df.corpus.unique() # unique corpus
rows = [['mono p', 'non p'], ['plus p', 'minus p']] # columns for each row of plots
for sg in semgroups:
i = semgroups.index(sg)
ix = i + 7
ncols = len(corpus) # 3 columns for the example
nrows = len(rows) # 2 rows for the example
# create a figure with 2 rows of 3 columns: axes is a 2x3 array of <AxesSubplot:>
fig, axes = plt.subplots(nrows, ncols, sharey=True, figsize=(12, 10))
# iterate through each plot row combined with a list from rows
for axe, row in zip(axes, rows):
# iterate through each plot column of the current row
for i, ax in enumerate(axe):
# select the data for each plot
data = df.loc[df.group.eq(sg) & df.corpus.eq(corpus[i]), row]
# plot the dataframe, but setting the bar color is more difficult
# data.T.plot(kind='bar', legend=False, ax=ax)
# plot the data with seaborn, which is easier to color the bars
sns.barplot(data=data, ax=ax)
if corpus[i] == 'fairview':
l2 = 'Fairview'
l1 = '(d) '
elif corpus[i] == 'mipacq':
l2 = 'MiPACQ'
if ncols == 3:
l1 = '(f) '
else:
l1 = '(b) '
else:
l2 = 'i2b2'
l1 = '(e) '
ax.set_title(l1 + l2)
ax.tick_params(axis='x', labelrotation = 45)
if sg == 'all':
sg = 'All groups'
# Defining custom 'xlim' and 'ylim' values.
custom_ylim = (0, 60)
# Setting the values for all axes.
plt.setp(axes, ylim=custom_ylim)
fig.suptitle('Figure ' + str(ix) + ' ' + sg)
fig.tight_layout()
plt.show()
I want to make a plot where it shows the RFS by floor opened by the unit_id, but i want the color of the unit_id to be defined by the year. So far i make it work for a reduced set of data, but i think it will be difficult to scale the code.
What i do is, first i identify the order that each unit has at it's floor so i first plot all the units by floor in the first position, then the ones that are in second position and so on.
Thanks!
df_build = pd.DataFrame({'floor':[1,1,1,2,2,2,3,3,3],'unidad':[100,101,102,200,201,202,300,301,302],
'rsf':[2000,1000,1500,1500,2000,1000,1000,1500,2000],'order':[0,1,2,0,1,2,0,1,2],
'year':[2008,2009,2010,2009,2010,2011,2010,2011,2012]})
assign_colors = {2008:'tab:red',2009:'tab:blue',2010:'tab:green',2011:'tab:pink',2012:'tab:olive'}
labels = list(df_build.floor.unique())
order_0 = df_build[df_build.order==0].rsf.values
c1=list(df_build[df_build.order==0].year.replace(assign_colors).values)
order_1 = df_build[df_build.order==1].rsf.values
c2=list(df_build[df_build.order==1].year.replace(assign_colors).values)
order_2 = df_build[df_build.order==2].rsf.values
c3=list(df_build[df_build.order==2].year.replace(assign_colors).values)
width = 0.35
fig, ax = plt.subplots()
ax.barh(labels, order_0, width,color=c1)
ax.barh(labels, order_1, width,left=order_0, color=c2)
ax.barh(labels, order_2, width,left=order_0+order_1, color=c3)
ax.set_ylabel('floor')
ax.set_title('Stacking Plan')
#ax.legend()
plt.show()
Try pivoting the data and loop:
# map the color
df_build['color'] = df_build['year'].map(assign_colors)
# pivot the data
plot_df = df_build.pivot(index='floor', columns='order')
# plot by row
fig, ax = plt.subplots()
for i in df.index:
rights = plot_df.loc[i,'rsf'].cumsum()
lefts = rights.shift(fill_value=0)
ax.barh(i, plot_df.loc[i,'rsf'], left=lefts, color=plot_df.loc[i,'color'])
for j in range(len(rights)):
label = plot_df.loc[i, 'unidad'].iloc[j]
rsf = plot_df.loc[i, 'rsf'].iloc[j]
x = (rights.iloc[j] + lefts.iloc[j]) / 2
ax.text(x, i, f'{label}-{rsf}', ha='center')
Output:
I have the following code and would like to get a graph like the one labeled 'want', but I am instead getting one where there is overlapping in color. I believe pandas may have a built in graph like the one I am looking for, but maybe this graph I am generating could do the same.
UPDATE:
I was able to get the graph working, now I need for colors not to repeat. There is repetition of color for each 'State' (i.e. Dehli, etc.) Code has been updated to reflect the changes.
code:
data = Table.read_table('IndiaStatus.csv')#.drop('Discharged', 'Discharge Ratio (%)','Total Cases','Active','Deaths')
data2 = data.to_df()
cols = list(data2.columns)
cols.remove('State/UTs')
# now iterate over the remaining columns and create a new zscore column
for col in cols:
col_zscore = col + '_zscore'
data2[col_zscore] = (data2[col] - data2[col].mean())/data2[col].std(ddof=0)
print(data2)
data2.info()
data2["outlier"] = (abs(data2["Total Cases_zscore"])>1).astype(int)
print(data2)
delete_row = data2[data2["outlier"]== 1].index
data2 = data2.drop(delete_row)
print(data2)
data2["outlier2"] = ((data2["Active_zscore"])> 0.00).astype(int)
delete_row = data2[data2["outlier2"]== 1].index
data2 = data2.drop(delete_row)
'''
#Analyzing and removing outliers for Total Cases_zscore
sns.distplot(data2["Active_zscore"], kde = False, bins = 30)
g = sns.jointplot(x='Active_zscore', y='Active_zscore',
data=data2, hue='State/UTs')
plt.subplots_adjust(right=0.75)
g.ax_joint.legend(bbox_to_anchor=(1.25,1), loc='upper left', borderaxespad=0)
'''
print(data2)
print(data2.mean())
print(data2.std())
#data2.insert(1, column = "Level", value = np.where(data2["Active"] > 9700, "Severe", data["Active"] < 9700 & data["Active"] > 4850, 'Less_Severe','Not_Severe'))
col = 'Active'
conditions = [ data2['Active']<=600, data2['Active']<= 1200, data2['Active'] >1200 ]
choices = [ 'Not_Severe','Less_Severe',"Severe" ]
data2["Level"] = np.select(conditions, choices, default=np.nan)
print(data2)
ax=data2.pivot_table(index='Level', columns = 'State/UTs', values = 'Total Cases').plot(kind='bar',stacked=True,figsize=(15,15),fontsize=25)
ax.legend(fontsize=25)
#set ylim
#plt.ylim(-1, 20,5)
#plt.xlim(-1,4,8)
#grid on
plt.grid()
# set y=0
ax.axhline(0, color='black', lw=1)
#change size of legend
ax.legend(fontsize=8,loc=(1.0,0.2))
#hiding upper and right axis layout
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#changing the thickness
ax.spines['bottom'].set_linewidth(3)
ax.spines['left'].set_linewidth(3)
#setlabels
ax.set_xlabel('Level',fontsize=20,color='r')
ax.set_ylabel('Total Cases',fontsize=20,color='r')
#rotation
plt.xticks(rotation=0)
Want:
Actual Output:
UPDATE:
Make graph background white usuing
sns.set_style("whitegrid")
I have some sorted data of which I only show the highest and lowest values in a figure. This is a minimal version of what currently I have:
import matplotlib.pyplot as plt
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
ax.set_title('Comparison of Top 3 and Bottom 3')
plt.show()
Result:
I'd like to make an additional gap to this figure to make it more visually clear that the majority of data is in fact not displayed in this plot. For example, something very simple like the following would be sufficient:
Is this possible in matplotlib?
Here is a flexible approach that just plots a dummy bar in-between. The yaxis-transform together with the dummy bar's position is used to plot 3 black dots.
If multiple separations are needed, they all need a different dummy label, for example repeating the space character.
import matplotlib.pyplot as plt
import numpy as np
# some dummy data (real data contains about 250 entries)
x_data = list(range(98, 72, -1))
labels = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
ranks = list(range(1, 27))
fig, ax = plt.subplots()
# plot 3 highest entries
bars_top = ax.barh(labels[:3], x_data[:3])
# dummy bar inbetween
dummy_bar = ax.barh(" ", 0, color='none')
# plot 3 lowest entries
bars_bottom = ax.barh(labels[-3:], x_data[-3:])
ax.invert_yaxis()
# print values and ranks
for bar, value, rank in zip(bars_top + bars_bottom,
x_data[:3] + x_data[-3:],
ranks[:3] + ranks[-3:]):
y_pos = bar.get_y() + 0.5
ax.text(value - 4, y_pos, value, ha='right')
ax.text(4, y_pos, f'$rank:\ {rank}$')
# add three dots using the dummy bar's position
ax.scatter([0.05] * 3, dummy_bar[0].get_y() + np.linspace(0, dummy_bar[0].get_height(), 3),
marker='o', s=5, color='black', transform=ax.get_yaxis_transform())
ax.set_title('Comparison of Top 3 and Bottom 3')
ax.tick_params(axis='y', length=0) # hide the tick marks
ax.margins(y=0.02) # less empty space at top and bottom
plt.show()
The following function,
def top_bottom(x, l, n, ax=None, gap=1):
from matplotlib.pyplot import gca
if n <= 0 : raise ValueError('No. of top/bottom values must be positive')
if n > len(x) : raise ValueError('No. of top/bottom values should be not greater than data length')
if n+n > len(x):
print('Warning: no. of top/bottom values is larger than one'
' half of data length, OVERLAPPING')
if gap < 0 : print('Warning: some bar will be overlapped')
ax = ax if ax else gca()
top_x = x[:+n]
bot_x = x[-n:]
top_y = list(range(n+n, n, -1))
bot_y = list(range(n-gap, -gap, -1))
top_l = l[:+n] # A B C
bot_l = l[-n:] # X Y Z
top_bars = ax.barh(top_y, top_x)
bot_bars = ax.barh(bot_y, bot_x)
ax.set_yticks(top_y+bot_y)
ax.set_yticklabels(top_l+bot_l)
return top_bars, bot_bars
when invoked with your data and n=4, gap=4
bars_top, bars_bottom = top_bottom(x_data, labels, 4, gap=4)
produces
Later, you'll be able to customize the appearance of the bars as you like using the Artists returned by the function.
Python 3.7 environent
I want to create a stacked bar plot with some labels on top of each subcategory displyed as the bar. The data comes from a CSV file, and some of the labels are rather long, so they are larger than the bar width. The problem could be easily solved by scaling the whole graphic such that the bars become large enough for the labels, but I fail to re-size the plot as a whole. here the code:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
dataset = 'Number'
dataFrame: pd.DataFrame = pd.read_csv('my_csv_file_with_data.csv', sep=',', header=2)
dataFrame['FaultDuration [h]'] = dataFrame['DurationH']
# ***********************************************************
# Data gymnastics to transform data in desired format
# determine the main categories
mainCategories: pd.Series = dataFrame['MainCategory']
mainCategories = mainCategories.drop_duplicates()
mainCategories = mainCategories.sort_values()
print('Main Categories: '+ mainCategories)
# subcategories
subCategories: pd.Series = pd.Series(data=dataFrame['SubCategorie'].drop_duplicates().sort_values().values)
subCategories = subCategories.sort_values()
print('Sub Categories: '+ subCategories)
# Build new frame with subcategories as headers
columnNames = pd.Series(data=['SubCategory2'])
columnNames = columnNames.append(mainCategories)
rearrangedData: pd.DataFrame = pd.DataFrame(columns=columnNames.values)
for subCategory in subCategories:
subset: pd.DataFrame = dataFrame.loc[dataFrame['SubCategorie'] == subCategory]
rearrangedRow = pd.DataFrame(columns=mainCategories.values)
rearrangedRow = rearrangedRow.append(pd.Series(), ignore_index=True)
rearrangedRow['SubCategory2'] = subCategory
for mainCategory in mainCategories:
rowData: pd.DataFrame = subset.loc[subset['MainCategorie'] == mainCategory]
if (rowData is not None and rowData.size > 0):
rearrangedRow[mainCategory] = float(rowData[dataset].values)
else:
rearrangedRow[mainCategory] = 0.0
rearrangedData = rearrangedData.append(rearrangedRow, ignore_index=True)
# *********************************************************************
# here the plot is created:
thePlot = rearrangedData.set_index('SubCategory2').T.plot.bar(stacked=True, width=1, cmap='rainbow')
thePlot.get_legend().remove()
labels = []
# *************************************************************
# creation of bar patches and labels in bar chart
rowIndex = 0
for item in rearrangedData['SubCategory2']:
colIndex = 0
for colHead in rearrangedData.columns:
if colHead != 'SubCategory2':
if rearrangedData.iloc[rowIndex, colIndex] > 0.0:
label = item + '\n' + str(rearrangedData.iloc[rowIndex, colIndex])
labels.append(item)
else:
labels.append('')
colIndex = colIndex + 1
rowIndex = rowIndex + 1
patches = thePlot.patches
for label, rect in zip(labels, patches):
width = rect.get_width()
if width > 0:
x = rect.get_x()
y = rect.get_y()
height = rect.get_height()
thePlot.text(x + width/2., y + height/2., label, ha='center', va='center', size = 7 )
# Up to here things work like expected...
# *******************************************************
# now I want to produce output in the desired format/size
# things I tried:
1) thePlot.figure(figsize=(40,10)) <---- Fails with error 'Figure' object is not callable
2) plt.figure(figsize=(40,10)) <---- Creates a second, empty plot of the right size, but bar chart remains unchanged
3) plt.figure(num=1, figsize=(40,10)) <---- leaves chart plot unchanged
plt.tight_layout()
plt.show()
The object "thePlot" is an AxesSubplot. How do I get to a properly scaled chart?
You can use the set sizes in inches:
theplot.set_size_inches(18.5, 10.5, forward=True)
For example see:
How do you change the size of figures drawn with matplotlib?