Pie chart enclosed with a black line (rectangle) - python

Below you can see my data and facet plot in matplotlib.
import pandas as pd
import numpy as np
pd.set_option('max_columns', None)
import matplotlib.pyplot as plt
import matplotlib as mpl
# Data
data = {
'type_sale': ['g_1','g_2','g_3','g_4','g_5','g_6','g_7','g_8','g_9','g_10'],
'open':[70,20,24,150,80,90,60,90,20,20],
'closed':[30,14,20,10,20,40,10,10,10,10],
}
df = pd.DataFrame(data, columns = ['type_sale',
'open',
'closed',
])
data1 = {
'type_sale': [ 'open','closed'],
'structure':[70,30],
}
df1 = pd.DataFrame(data1, columns = ['type_sale',
'structure',
])
# Ploting
labels = ['open','closed']
fig, axs = plt.subplots(2,2, figsize=(10,8))
plt.subplots_adjust(wspace=0.2, hspace=0.6)
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe', ax=axs[0,1])
df.plot(x='type_sale', kind='bar', stacked=True, title='Stacked Bar Graph by dataframe',ax=axs[1,0])
df.plot(x='type_sale', kind='bar', stacked=True,title='Stacked Bar Graph by dataframe', ax=axs[1,1])
plt.suptitle(t='Stacked Bar Graph by dataframe', fontsize=16)
plt.show()
If you compare the first pie plot with others, you can spot a big difference. Namely, the first pie plot is not enclosed with a black line (rectangle), while the other is enclosed.
So can anybody help me with how to solve this problem?

After playing around myself, it seems that this is working, but I think the pie gets stretched, which doesn't look that good.
EDIT
found a better solution with set_adjustable
also two options how you create the piechart, the frame and ticks differ in a bit.
# 1
axs[0,0].pie(df1['structure'],labels=labels,autopct='%1.1f%%',frame=True,radius=10)
axs[0,0].set_title('Stacked Bar Graph by dataframe')
# 2
df1.plot(x='type_sale', y='structure',labels=labels,autopct='%1.1f%%',kind='pie', title='Stacked Bar Graph by dataframe',ax=axs[0,0])
axs[0,0].set_frame_on(True)
axs[0,0].set_adjustable('datalim')

Related

changing colors in barh(stacked)

I have one table with regions (y axes) and in x values. Each region has two bars.
I would like to change colors. I would like that each little bar in barh has a different color.
I have this code where colors repeat:
For example after pink I would like another color, not red again.It is possible to change the colors scale? using for example "tab10"
Furthemore, it is possible to get legend where each color outline one year (2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
plotdata = pd.DataFrame({
"2010_y":[0.63,0.56,0.89,0.94,0.68,0.63,0.34,0.54,0.77,0.77,0.86,0.42,0.49,0.70,1.12,1.10,0.30,0.67,0.30,0.23],
"2011_y":[1.10,0.54,0.94,0.81,0.80,0.87,0.90,0.99,0.53,0.78,0.98,0.99,0.87,1.09,1.18,0.89,0.89,0.70,1.05,0.86],
"2012_y":[1.39,0.97,0.98,0.99,0.95,1.10,1.07,1.10,1.02,1.05,0.97,0.92,0.94,0.91,0.85,1.22,1.24,1.08,1.08,1.01],
"2013_m":[1.86,1.34,1.12,1.25,1.26,1.08,1.31,1.58,1.22,1.26,1.37,1.14,1.23,1.00,1.13,1.49,1.14,1.37,1.25,1.23],
"2014_m":[1.59,1.04,1.03,1.10,1.44,1.43,1.33,1.81,1.56,1.22,1.08,1.36,1.11,0.87,1.10,0.68,1.31,1.26,1.25,1.61],
"2015_m":[0.71,1.14,1.18,0.99,0.87,0.74,0.91,0.57,0.79,1.09,1.16,0.89,1.15,0.98,1.24,0.75,0.85,0.83,1.02,0.71],
"2016_m":[0.67,1.22,1.00,0.96,0.97,0.96,0.95,0.79,1.06,1.04,0.83,1.06,1.07,0.98,0.66,1.06,0.99,1.13,1.03,1.05],
"2017_m":[0.68,0.84,0.73,0.73,0.79,0.98,0.80,0.54,0.85,0.91,0.55,0.65,0.86,0.71,0.71,0.77,0.95,0.80,0.81,0.87],
"2018_m":[0.74,1.28,1.21,1.13,0.99,1.00,1.21,0.97,1.07,0.91,1.38,1.34,1.25,1.65,1.28,0.97,1.11,1.09,1.20,1.13],
"2019_m":[0.63,1.07,0.92,1.10,1.24,1.21,1.18,1.10,1.12,0.97,0.80,1.24,1.04,1.11,0.74,1.06,1.23,1.07,1.01,1.30]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
plotdata3 = pd.DataFrame({
"2010_y":[4.12,1.44,5.73,3.91,3.43,0.00,4.26,0.00,1.95,2.65,0.00,4.82,3.61,2.17,3.05,2.66,0.00,2.86,3.00,1.15],
"2011_y":[0.00,0.39,0.00,0.00,0.00,1.32,0.00,0.00,1.02,0.00,2.72,0.00,0.00,0.00,0.83,0.00,0.00,0.00,0.00,0.00],
"2012_y":[0.08,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00],
"2013_m":[0.00,0.00,0.00,0.00,0.45,0.00,0.00,2.58,0.00,0.00,0.00,0.00,0.00,1.90,0.00,0.00,0.00,0.51,0.00,0.00],
"2014_m":[0.05,0.03,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.40,0.00,0.01,0.00,0.00,0.42,0.00,0.00,0.00,0.00,0.00],
"2015_m":[1.45,0.16,0.27,0.09,0.64,1.68,0.20,2.42,0.47,0.41,0.28,0.32,0.09,0.93,0.18,0.52,0.00,0.67,0.00,0.00],
"2016_m":[0.00,0.89,0.00,0.00,1.11,0.00,0.00,0.00,0.00,0.44,0.00,0.03,0.00,0.00,0.00,0.00,0.00,0.22,0.00,0.00],
"2017_m":[0.00,0.00,0.00,0.00,0.95,0.00,0.00,0.00,0.09,1.85,0.00,0.00,0.11,0.00,0.07,0.35,0.00,1.74,0.00,0.00],
"2018_m":[0.00,0.00,0.00,0.00,0.06,0.00,0.11,0.00,0.00,0.00,0.00,0.25,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00],
"2019_m":[0.30,3.09,0.00,0.00,0.34,0.00,0.42,0.00,1.48,0.25,0.00,0.58,0.19,0.00,2.44,0.46,0.00,0.00,0.00,0.85]
}, index=["ABR", "BAS", "CAL", "CAM", "EMR","FVG","LAZ","LIG","LOM","MAR","MOL","PIE","PUG","SAR","SIC","TOS","TAA","UMB","VDA","VEN"]
)
fig, ax = plt.subplots()
#stacked_data = plotdata.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data2 = plotdata2.apply(lambda x: x*100/sum(x), axis=1)
#stacked_data3 = plotdata3.apply(lambda x: x*100/sum(x), axis=1)
stacked_data.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=0, edgecolor='black')
#stacked_data2.plot(kind="barh", stacked=True, width=0.25,
# ax=ay, position=1, hatch='//',edgecolor='black')
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black')
ax.get_legend().remove()
ax.set_ylim(top=len(stacked_data)-0.1)
#ax.set_xlim(right=len(stacked_data)-0.5)
ax.set_facecolor('xkcd:white')
# displaying the title
plt.title("titla")
# set various colors
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
You can specify your colormap when plotting. Also use different colormaps for your barplots if you like, for example:
stacked_data3.plot(kind="barh", stacked=True, width=0.4,
ax=ax, position=1,edgecolor='black', cmap='Accent')
A list of available colormaps is available on matplotlib.org

Plot multiple grouped bar chart with matplotlib in python

Here's few rows of my 100k lines df:
data.head()
my goal is to have 4 grouped bar charts (1row ; 4 col) where :
Each chart correspands to a "product family" (I know i have 4 so i can make 4 sub-df)
"Site" and "year" in x axis,
"sum of Tonnage" in y axis,
Example of a the bar chart I'm trying to get
The closest i got is to have the 4 plots but one under the other. and the code is not as elegant as i want it to be.
I'm a beginner so this might look too easy for you. just bare with me :)
Here's my code:
data_A=data_no_dp.loc[data_no_dp['Product family']=='A'][['id','Site','Tonnage','Année']].drop_duplicates('id')
data_B=data_no_dp.loc[data_no_dp['Product family']=='B'][['id','Site','Tonnage','Année']].drop_duplicates('id')
data_C=data_no_dp.loc[(data_no_dp['Product family']=='C') ][['id','Site','Tonnage','Année']].drop_duplicates('id')
data_D=data_no_dp.loc[(data_no_dp['Product family']=='D') ][['id','Site','Tonnage','Année','Product family']].drop_duplicates('id')
data_A_pivot=data_A.groupby(['Site','Année']).sum().unstack()
data_A_pivot=data_A_pivot['Tonnage'].replace(np.nan,0)
data_B_pivot=data_B.groupby(['Site','Année']).sum().unstack()
data_B_pivot=data_B_pivot['Tonnage'].replace(np.nan,0)
data_C_pivot=data_C.groupby(['Site','Année']).sum().unstack()
data_C_pivot=data_C_pivot['Tonnage'].replace(np.nan,0)
data_D_pivot=data_D.groupby(['Site','Année']).sum().unstack()
data_D_pivot=data_D_pivot['Tonnage'].replace(np.nan,0)
#plt.subplots(1,4, sharey=True, figsize= (20,4))
plt.subplot(2,2,1)
ax1=data_A_pivot.plot(kind='bar')
ax2=data_B_pivot.plot(kind='bar')
ax3=data_C_pivot.plot(kind='bar')
ax4=data_D_pivot.plot(kind='bar')
plt.show()
Since no data were provided, I drew multiple graphs using test data from seaborn. pandas plots and subplots can be addressed with the following technique.
import matplotlib.pyplot as plt
# for sample data
import seaborn as sns
tips = sns.load_dataset("tips")
data_A = tips[tips['day'] == 'Sun']
data_B = tips[tips['day'] == 'Sat']
data_C = tips[tips['day'] == 'Thur']
data_D = tips[tips['day'] == 'Fri']
data_A_pivot=data_A.groupby(['time','sex']).sum().unstack().fillna(0)
data_B_pivot=data_B.groupby(['time','sex']).sum().unstack().fillna(0)
data_C_pivot=data_C.groupby(['time','sex']).sum().unstack().fillna(0)
data_D_pivot=data_D.groupby(['time','sex']).sum().unstack().fillna(0)
fig, [ax1,ax2,ax3,ax4] = plt.subplots(nrows=1, ncols=4, figsize=(20,4))
data_A_pivot.plot(kind='bar', ax=ax1)
data_B_pivot.plot(kind='bar', ax=ax2)
data_C_pivot.plot(kind='bar', ax=ax3)
data_D_pivot.plot(kind='bar', ax=ax4)
plt.show()

Adjusting the color coding on a barplot so that all values are color coded correctly in matplotlib

I have a barplot that plots Rates by State and by Category (there are 5 categories) but the problem is that some States have more categories than other states.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"state" : ["AL","AL","AL","AK", ],
"status" : ["Booked", "Rejected","Cancelled","Rejected"],
"0" : [1.5,2.5,3.5,1.0]})
df2 = df.groupby(['state','status']).size()/df.groupby(['state']).size()
fig, ax = plt.subplots()
plt.xlabel('State')
plt.ylabel('Bookings')
my_colors = 'gyr'
df2.plot(kind='bar', color=my_colors, orientation='vertical')
plt.tight_layout()
plt.show()
This does a good job with most of what I need to do however, what happens is that because some States do not have all values for status and hence do not appear in the plot, it makes some of the color coding incorrect because the colors are just shifted to repeat every 5 colors rather then based on whenever a value is missing or not. What can I do about this?
Possibly you want to show the data in a grouped fashion, namely to have 3 categories per group, such that each category has its own color.
In this case it seems this can easily be achieved by unstacking the multi-index dataframe,
df2.unstack().plot(...)
Complete example:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"state" : ["AL","AL","AL","AK", ],
"status" : ["Booked", "Rejected","Cancelled","Rejected"],
"0" : [1.5,2.5,3.5,1.0]})
df2 = df.groupby(['state','status']).size()/df.groupby(['state']).size()
fig, ax = plt.subplots()
plt.xlabel('State')
plt.ylabel('Bookings')
my_colors = 'gyr'
df2.unstack().plot(kind='bar', color=my_colors, orientation='vertical', ax=ax)
plt.tight_layout()
plt.show()

Scatter plot from multiple columns of a pandas dataframe

I have a pandas dataframe that looks as below:
Filename GalCer(18:1/12:0)_IS GalCer(d18:1/16:0) GalCer(d18:1/18:0)
0 A-1-1 15.0 1.299366 40.662458 0.242658 6.891069 0.180315
1 A-1-2 15.0 1.341638 50.237734 0.270351 8.367316 0.233468
2 A-1-3 15.0 1.583500 47.039423 0.241681 7.902761 0.201153
3 A-1-4 15.0 1.635365 53.139610 0.322680 9.578195 0.345681
4 B-1-10 15.0 2.370330 80.209846 0.463770 13.729810 0.395355
I am trying to plot a scatter sub-plots with a shared x-axis with the first column "Filename" on the x-axis. While I am able to generate barplots, the following code gives me a key error for a scatter plot:
import matplotlib.pyplot as plt
colnames = list (qqq.columns)
qqq.plot.scatter(x=qqq.Filename, y=colnames[1:], legend=False, subplots = True, sharex = True, figsize = (10,50))
KeyError: "['A-1-1' 'A-1-2' 'A-1-3' 'A-1-4' 'B-1-10' ] not in index"
The following code for barplots works fine. Do I need to specify something differently for the scatterplots?
import matplotlib.pyplot as plt
colnames = list (qqq.columns)
qqq.plot(x=qqq.Filename, y=colnames[1:], kind = 'bar', legend=False, subplots = True, sharex = True, figsize = (10,30))
A scatter plot will require numeric values for both axes. In this case you can use the index as x values,
df.reset_index().plot(x="index", y="other column")
The problem is now that you cannot plot several columns at once using the scatter plot wrapper in pandas. Depending on what the reason for using a scatter plot are, you may decide to use a line plot instead, just without lines. I.e. you may specify linestyle="none" and marker="o" to the plot, such that points appear on the plot.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fn = ["{}_{}".format(i,j) for i in list("ABCD") for j in range(4)]
df = pd.DataFrame(np.random.rand(len(fn), 4), columns=list("ZXYQ"))
df.insert(0,"Filename",pd.Series(fn))
colnames = list (df.columns)
df.reset_index().plot(x="index", y=colnames[1:], kind = 'line', legend=False,
subplots = True, sharex = True, figsize = (5.5,4), ls="none", marker="o")
plt.show()
In case you absolutely need a scatter plot, you may create a subplots grid first and then iterate over the columns and axes to plot one scatter plot at a time to the respective axes.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fn = ["{}_{}".format(i,j) for i in list("ABCD") for j in range(4)]
df = pd.DataFrame(np.random.rand(len(fn), 4), columns=list("ZXYQ"))
df.insert(0,"Filename",pd.Series(fn))
colnames = list (df.columns)
fig, axes = plt.subplots(nrows=len(colnames)-1, sharex = True,figsize = (5.5,4),)
for i, ax in enumerate(axes):
df.reset_index().plot(x="index", y=colnames[i+1], kind = 'scatter', legend=False,
ax=ax, c=colnames[i+1], cmap="inferno")
plt.show()

Pandas, matplotlib and plotly - how to fix series legend?

I'm trying to create an interactive plotly graph from pandas dataframes.
However, I can't get the legends displayed correctly.
Here is a working example:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
# sign into the plotly api
py.sign_in("***********", "***********")
# create some random dataframes
dates = pd.date_range('1/1/2000', periods=8)
df1 = pd.DataFrame(np.random.randn(8, 1), index=dates, columns=['A'])
df2 = pd.DataFrame(np.random.randn(8, 1), index=dates, columns=['B'])
df1.index.name = 'date'
df2.index.name = 'date'
Now I attempt to plot the dataframes using plotly.
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
py.iplot_mpl(fig, filename='random')
Notice there is no legend
Edit:
Based on suggestions below I have added an update dict. Although this does display the legend, it messes up the plot itself:
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
update = dict(
layout=dict(
annotations=[dict(text=' ')], # rm erroneous 'A', 'B', ... annotations
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update, filename='random')
Edit 2:
Removing the annotations entry from the layout dict results in the plot being displayed correctly, but the legend is not the y column name, but rather the x column name, the index name of the dataframe
fig, ax = plt.subplots(1,1)
df1.plot(y='A', ax=ax)
df2.plot(y='B', ax=ax)
update = dict(
layout=dict(
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update, filename='random')
This results in the following plot:
Edit 3:
I have found a way to override the legend text but it seems a bit klunky. Given that I've specified the dataframe column I want to plot:
df1.plot(y='A', ax=ax)
I would have expected that y='A' would result in 'A' being used as the legend label.
It seems this is not the case, and while it is possible to override using the index label, as seen below, it just feels wrong.
Is there a better way to achieve this result?
update = dict(
layout=dict(
showlegend=True,
),
data=[
dict(name='A'),
dict(name='B'),
]
)
py.iplot_mpl(fig, update=update, filename='random')
Legends don't convert well from matplotlib to plotly.
Fortunately, adding a plotly legend to a matplotlib plot is straight forward:
update = dict(
layout=dict(
showlegend=True # show legend
)
)
py.iplot_mpl(fig, update=update)
See the full working ipython notebook here.
For more information, refer to the plotly user guide.

Categories

Resources