I'm trying to plot a precedence matrix plot from bupar in python.
So far I'm able to add the text and plot the categorical variables with the count.
def plot_precedence_matrix(data,colx,coly,cols,color=['grey','black'],ratio=10,font='Helvetica',save=False,save_name='Default'):
df = data.copy()
# Create a dict to encode the categeories into numbers (sorted)
colx_codes=dict(zip(df[colx].sort_values().unique(),range(len(df[colx].unique()))))
coly_codes=dict(zip(df[coly].sort_values(ascending=False).unique(),range(len(df[coly].unique()))))
# Apply the encoding
df[colx]=df[colx].apply(lambda x: colx_codes[x])
df[coly]=df[coly].apply(lambda x: coly_codes[x])
ax=plt.gca()
ax.xaxis.set_label_position('top')
ax.xaxis.set_ticks_position('top')
# Prepare the aspect of the plot
# plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
# plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
plt.rcParams['font.sans-serif']=font
plt.rcParams['xtick.color']=color[-1]
plt.rcParams['ytick.color']=color[-1]
# plt.box(False)
# Plot all the lines for the background
for num in range(len(coly_codes)):
plt.hlines(num,-1,len(colx_codes),linestyle='dashed',linewidth=2,color=color[num%2],alpha=0.1)
for num in range(len(colx_codes)):
plt.vlines(num,-1,len(coly_codes),linestyle='dashed',linewidth=2,color=color[num%2],alpha=0.1)
for x, y, tex in zip(df[colx], df[coly], df[colx]):
t = plt.text(x, y, round(tex, 1), horizontalalignment='center',
verticalalignment='center', fontdict={'color':'black',
'size':30})
# Change the ticks numbers to categories and limit them
plt.xticks(ticks=list(colx_codes.values()),labels=colx_codes.keys(),rotation=90)
plt.yticks(ticks=list(coly_codes.values()),labels=coly_codes.keys())
# Lighten borders
plt.gca().spines["top"].set_alpha(0.1)
plt.gca().spines["bottom"].set_alpha(0.1)
plt.gca().spines["right"].set_alpha(0.1)
plt.gca().spines["left"].set_alpha(0.1)
# Save if wanted
if save:
plt.savefig(save_name+'.png')
Sample dataset
| Antecedent | Consequent | Count |
|-------------------:|-------------------:|-------|
| register request | examine thoroughly | 1 |
| examine thoroughly | check ticket | 2 |
| check ticket | decide | 6 |
| decide | reject request | 3 |
| register request | check ticket | 2 |
| check ticket | examine casually | 2 |
| examine casually | decide | 2 |
| decide | pay compensation | 3 |
| register request | examine casually | 3 |
| examine casually | check ticket | 4 |
| decide | reinitiate request | 3 |
| reinitiate request | examine thoroughly | 1 |
| check ticket | examine thoroughly | 1 |
| examine thoroughly | decide | 1 |
| reinitiate request | check ticket | 1 |
| reinitiate request | examine casually | 1 |
colors=['darkorange','grey','darkblue']
#create the plot
fig = plt.figure(figsize=(12,8))
plot_precedence_matrix(df, 'Antecedent', 'Consequent', 'Count',color=colors,ratio=100, font='cursive')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()
How to add the rectangular boxes with color scale using matplotlib? Can anybody shed some light on plotting the above plot with Python? I would be happy to receive any leads on it from you.
You could draw colored rectangles at each of the positions. A colormap together with a norm could define the color.
Here is an example:
from matplotlib import pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib.colors import ListedColormap
import pandas as pd
import numpy as np
from io import StringIO
def plot_precedence_matrix(data, colx, coly, cols, color=['grey', 'black'], ratio=10, font='Helvetica',
save=False, save_name='Default'):
df = data.copy()
# Create a dict to encode the categeories into numbers (sorted)
colx_codes = dict(zip(df[colx].sort_values().unique(), range(len(df[colx].unique()))))
coly_codes = dict(zip(df[coly].sort_values(ascending=False).unique(), range(len(df[coly].unique()))))
# Apply the encoding
df[colx] = df[colx].apply(lambda x: colx_codes[x])
df[coly] = df[coly].apply(lambda x: coly_codes[x])
ax = plt.gca()
ax.xaxis.set_label_position('top')
ax.xaxis.set_ticks_position('top')
# Prepare the aspect of the plot
plt.rcParams['font.sans-serif'] = font
plt.rcParams['xtick.color'] = color[-1]
plt.rcParams['ytick.color'] = color[-1]
# Plot the lines for the background
for num in range(len(coly_codes)):
ax.hlines(num, -1, len(colx_codes), linestyle='dashed', linewidth=2, color=color[num % 2], alpha=0.1)
for num in range(len(colx_codes)):
ax.vlines(num, -1, len(coly_codes), linestyle='dashed', linewidth=2, color=color[num % 2], alpha=0.1)
cmap = ListedColormap(plt.get_cmap('Blues')(np.linspace(0.1, 1, 256))) # skip too light colors
norm = plt.Normalize(df[colx].min(), df[colx].max())
for x, y, tex in zip(df[colx], df[coly], df[colx]):
t = ax.text(x, y, round(tex, 1), horizontalalignment='center', verticalalignment='center',
fontdict={'color': 'black' if norm(tex) < 0.6 else 'white', 'size': 30})
ax.add_patch(plt.Rectangle((x - .5, y - .5), 1, 1, color=cmap(norm(tex)), ec='white'))
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=ax)
# Change the ticks numbers to categories and limit them
ax.set_xticks(list(colx_codes.values()))
ax.set_xticklabels(colx_codes.keys(), rotation=90, fontsize=14)
ax.set_yticks(list(coly_codes.values()))
ax.set_yticklabels(coly_codes.keys(), fontsize=14)
# Lighten borders
for spine in ax.spines:
ax.spines[spine].set_alpha(0.1)
plt.tight_layout() # fit the labels into the figure
if save:
plt.savefig(save_name + '.png')
df_str = """
register request | examine thoroughly | 1
examine thoroughly | check ticket | 2
check ticket | decide | 6
decide | reject request | 3
register request | check ticket | 2
check ticket | examine casually | 2
examine casually | decide | 2
decide | pay compensation | 3
register request | examine casually | 3
examine casually | check ticket | 4
decide | reinitiate request | 3
reinitiate request | examine thoroughly | 1
check ticket | examine thoroughly | 1
examine thoroughly | decide | 1
reinitiate request | check ticket | 1
reinitiate request | examine casually | 1 """
df = pd.read_csv(StringIO(df_str), delimiter="\s*\|\s*", engine='python', names=['Antecedent', 'Consequent', 'Count'])
colors = ['darkorange', 'grey', 'darkblue']
fig = plt.figure(figsize=(12, 8))
plot_precedence_matrix(df, 'Antecedent', 'Consequent', 'Count', color=colors, ratio=100, font='cursive')
plt.show()
I am trying to do a plot that has on x axis dates and on y some values. But I have two columns as dates. I would like to highlight the date of the second column with a dot of another color. Is it possible?
|---------------------|------------------|------------------|------------------|
| ID | Date1 | Date2 | value |
|---------------------|------------------|------------------|------------------|
| 1 | 2008-05-14 | 2010-03-28 | 5 |
|---------------------|------------------|------------------|------------------|
| 1 | 2005-12-07 | 2010-03-28 | 3 |
|---------------------|------------------|------------------|------------------|
| 1 | 2008-10-27 | 2010-03-28 | 6 |
df1 = df[df['ID']== 1]
df1= df1.sort_values(by='Date1')
date = df1['Date1']
res = df1['values']
fig, ax = plt.subplots()
ax.plot(date, res, 'o-')
I have a dataframe consisting of mean and std-dev of distributions
df.head()
+---+---------+----------------+-------------+---------------+------------+
| | user_id | session_id | sample_mean | sample_median | sample_std |
+---+---------+----------------+-------------+---------------+------------+
| 0 | 1 | 20081023025304 | 4.972789 | 5 | 0.308456 |
| 1 | 1 | 20081023025305 | 5.000000 | 5 | 1.468418 |
| 2 | 1 | 20081023025306 | 5.274419 | 5 | 4.518189 |
| 3 | 1 | 20081024020959 | 4.634855 | 5 | 1.387244 |
| 4 | 1 | 20081026134407 | 5.088195 | 5 | 2.452059 |
+---+---------+----------------+-------------+---------------+------------+
From this, I plot a histogram of the distribution
plt.hist(df['sample_mean'],bins=50)
plt.xlabel('sampling rate (sec)')
plt.ylabel('Frequency')
plt.title('Histogram of trips mean sampling rate')
plt.show()
I then write a function to compute pdf and cdf, passing dataframe and column name:
def compute_distrib(df, col):
stats_df = df.groupby(col)[col].agg('count').pipe(pd.DataFrame).rename(columns = {col: 'frequency'})
# PDF
stats_df['pdf'] = stats_df['frequency'] / sum(stats_df['frequency'])
# CDF
stats_df['cdf'] = stats_df['pdf'].cumsum()
stats_df = stats_df.reset_index()
return stats_df
So for example:
stats_df = compute_distrib(df, 'sample_mean')
stats_df.head(2)
+---+---------------+-----------+----------+----------+
| | sample_median | frequency | pdf | cdf |
+---+---------------+-----------+----------+----------+
| 0 | 1 | 4317 | 0.143575 | 0.143575 |
| 1 | 2 | 10169 | 0.338200 | 0.481775 |
+---+---------------+-----------+----------+----------+
Then I plot the cdf distribution this way:
ax1 = stats_df.plot(x = 'sample_mean', y = ['cdf'], grid = True)
ax1.legend(loc='best')
Goal:
I would like to plot these figures in one figure side-by-side instead of plotting separately and somehow putting them together in my slides.
You can use matplotlib.pyplot.subplots to draw multiple plots next to each other:
import matplotlib.pyplot as plt
fig, axs = plt.subplots(nrows=1, ncols=2)
# Pass the data you wish to plot.
axs[0][0].hist(...)
axs[0][1].plot(...)
plt.show()
This question already has answers here:
How to create a grouped bar plot
(4 answers)
Annotate bars with values on Pandas bar plots
(4 answers)
Closed 3 years ago.
I am having trouble in plotting a bar graph on this Dataset.
+------+------------+--------+
| Year | Discipline | Takers |
+------+------------+--------+
| 2010 | BSCS | 213 |
| 2010 | BSIS | 612 |
| 2010 | BSIT | 796 |
| 2011 | BSCS | 567 |
| 2011 | BSIS | 768 |
| 2011 | BSIT | 504 |
| 2012 | BSCS | 549 |
| 2012 | BSIS | 595 |
| 2012 | BSIT | 586 |
+------+------------+--------+
I'm trying to plot a bar chart with 3 bars representing the number of takers for each year. This is the algorithm I did.
import matplotlib.pyplot as plt
import pandas as pd
Y = df_group['Takers']
Z = df_group['Year']
df = pd.DataFrame(df_group['Takers'], index = df_group['Discipline'])
df.plot.bar(figsize=(20,10)).legend(["2010", "2011","2012"])
plt.show()
I'm expecting to show something like this graph
With the same legends
First reshape by DataFrame.pivot, plot and last add labels by this:
ax = df.pivot('Discipline', 'Year','Takers').plot.bar(figsize=(10,10))
for p in ax.patches:
ax.annotate(np.round(p.get_height(),decimals=2), (p.get_x()+p.get_width()/2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points')
With Seaborn, you can directly use your Dataframe:
import seaborn as sns
ax = sns.barplot(data=df, x="Discipline", hue="Year", y="Takers")
To add the labels, you can use the snippet from jezrael:
for p in ax.patches:
ax.annotate(np.round(p.get_height(),decimals=2), (p.get_x()+p.get_width()/2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points')
plt.tight_layout()
Just add 2 more lines before plt.show() in your code and you will get your result.
The whole code is given below.
import matplotlib.pyplot as plt
import pandas as pd
Y = df_group['Takers']
Z = df_group['Year']
df = pd.DataFrame(df_group['Takers'], index = df_group['Discipline'])
df.plot.bar(figsize=(20,10)).legend(["2010", "2011","2012"])
for i,v in enumerate(Y):
plt.text(x=i, y=v+2, s=v)
# Here i= index value
# v= real value which you have in variable Y.
# x and y are coordinate.
# 's' is the value you want to show on the plot.
plt.show()
I would like to draw a barplot graph that would compare the evolution of 2 variables of revenues on a monthly time-axis (12 months of invoices).
I wanted to use sns.barplot, but can't use "hue" (cause the 2 variables aren't subcategories?). Is there another way, as simple as with hue? Can I "create" a hue?
Here is a small sample of my data:
(I did transform my table into a pivot table)
[In]
data_pivot['Revenue-Small-Seller-in'] = data_pivot["Small-Seller"] + data_pivot["Best-Seller"] + data_pivot["Medium-Seller"]
data_pivot['Revenue-Not-Small-Seller-in'] = data_pivot["Best-Seller"] + data_pivot["Medium-Seller"]
data_pivot
[Out]
InvoiceNo Month Year Revenue-Small-Seller-in Revenue-Not-Small-Seller-in
536365 12 2010 139.12 139.12
536366 12 2010 22.20 11.10
536367 12 2010 278.73 246.93
(sorry for the ugly presentation of my data, see the picture to see the complete table (as there are multiple columns))
You can do:
render_df = data_pivot[data_pivot.columns[-2:]]
fig, ax = plt.subplots(1,1)
render_df.plot(kind='bar', ax=ax)
ax.legend()
plt.show()
Output:
Or sns style like you requested
render_df = data_pivot[data_pivot.columns[-2:]].stack().reset_index()
sns.barplot('level_0', 0, hue='level_1',
render_df)
here render_df after stack() is:
+---+---------+-----------------------------+--------+
| | level_0 | level_1 | 0 |
+---+---------+-----------------------------+--------+
| 0 | 0 | Revenue-Small-Seller-in | 139.12 |
| 1 | 0 | Revenue-Not-Small-Seller-in | 139.12 |
| 2 | 1 | Revenue-Small-Seller-in | 22.20 |
| 3 | 1 | Revenue-Not-Small-Seller-in | 11.10 |
| 4 | 2 | Revenue-Small-Seller-in | 278.73 |
| 5 | 2 | Revenue-Not-Small-Seller-in | 246.93 |
+---+---------+-----------------------------+--------+
and output: