Fix x-axis scale seaborn factorplot - python

I'm attempting to make a figure that shows two plots, with each plot separated based on a set of categorical data. However, although I can make the graph, I cant figure out how to get the x-axis to be properly spaced.
I want the x-axis to start before the first value (want axis to start at 60 [first value = 63]) and end after the last (want axis to end at 95 [last value = 92.1]), with xticks going up in 5's.
Any help is much appreciated! Thanks in advance!
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.axes
import seaborn as sns
Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1]
Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002]
Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"]
df = pd.DataFrame({"Temperature": Temperature,
"Derivative": Derivative,
"Category" : Category})
g = sns.factorplot(x="Temperature", y="Derivative", data=df, col="Category")
g.set_xticklabels(step=10)

All the desired feature you describe suggest that using a factorplot here is absolutely the wrong choice. Instead use a normal matplotlib plot and then set the limits as usual, plt.xlim(60,95).
import pandas as pd
import matplotlib.pyplot as plt
Temperature = [63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1,63.0,63.3,63.6,63.9,64.2,64.5,64.8,65.2,65.5,65.8,66.1,66.4,66.7,67.0,67.3,67.7,68.0,68.3,68.6,68.9,69.2,69.5,69.9,70.2,70.5,70.8,71.1,71.4,71.8,72.1,72.4,72.7,73.0,73.4,73.7,74.0,74.3,74.6,74.9,75.2,75.6,75.9,76.2,76.5,76.9,77.2,77.5,77.8,78.1,78.5,78.8,79.1,79.4,79.7,80.1,80.4,80.7,81.0,81.3,81.6,81.9,82.3,82.6,82.9,83.2,83.5,83.8,84.1,84.4,84.8,85.1,85.4,85.7,86.0,86.3,86.6,86.9,87.2,87.5,87.8,88.1,88.4,88.7,89.0,89.3,89.6,89.8,90.1,90.4,90.7,91.0,91.2,91.5,91.8,92.1]
Derivative = [0.0495,0.0507,0.0525,0.0548,0.0570,0.0579,0.0579,0.0574,0.0574,0.0576,0.0581,0.0587,0.0593,0.0592,0.0584,0.0580,0.0579,0.0580,0.0582,0.0588,0.0592,0.0594,0.0588,0.0581,0.0578,0.0579,0.0580,0.0579,0.0582,0.0581,0.0579,0.0574,0.0571,0.0563,0.0548,0.0538,0.0536,0.0540,0.0544,0.0551,0.0556,0.0551,0.0542,0.0535,0.0536,0.0542,0.0564,0.0623,0.0748,0.0982,0.1360,0.1897,0.2550,0.3228,0.3807,0.4177,0.4248,0.3966,0.3365,0.2558,0.1713,0.0971,0.0438,0.0140,0.0034,0.0028,0.0048,0.0058,0.0057,0.0050,0.0042,0.0038,0.0039,0.0041,0.0038,0.0031,0.0023,0.0017,0.0014,0.0012,0.0015,0.0019,0.0020,0.0018,0.0017,0.0015,0.0014,0.0014,0.0015,0.0014,0.0013,0.0011,0.0007,0.0004,0.0011,0.0105,0.0100,0.0096,0.0090,0.0084,0.0081,0.0077,0.0071,0.0066,0.0063,0.0064,0.0060,0.0057,0.0055,0.0054,0.0051,0.0047,0.0046,0.0042,0.0037,0.0035,0.0040,0.0043,0.0039,0.0032,0.0028,0.0028,0.0027,0.0029,0.0034,0.0038,0.0034,0.0027,0.0024,0.0021,0.0017,0.0015,0.0016,0.0015,0.0011,0.0008,0.0012,0.0019,0.0025,0.0027,0.0026,0.0019,0.0012,0.0010,0.0014,0.0016,0.0014,0.0010,0.0007,0.0007,0.0010,0.0017,0.0021,0.0020,0.0013,0.0012,0.0013,0.0014,0.0015,0.0018,0.0017,0.0012,0.0013,0.0018,0.0028,0.0031,0.0033,0.0027,0.0022,0.0015,0.0016,0.0022,0.0026,0.0026,0.0019,0.0012,0.0006,0.0007,0.0011,0.0016,0.0014,0.0010,0.0009,0.0012,0.0015,0.0014,0.0008,0.0001,-0.0003,0.0002]
Category = ["a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","a","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b","b"]
df = pd.DataFrame({"Temperature": Temperature,
"Derivative": Derivative,
"Category" : Category})
for n, data in df.groupby("Category"):
plt.plot(data["Temperature"],data["Derivative"] , marker="o", label=n)
plt.xlim(60,95)
plt.legend()
plt.show()
Or if subplots are desired,
fig,axes = plt.subplots(ncols=len(df["Category"].unique()), sharey=True)
for ax,(n, data) in zip(axes,df.groupby("Category")):
ax.plot(data["Temperature"],data["Derivative"] , marker="o", label=n)
ax.set_title("Category {}".format(n))
ax.set_xlim(60,95)
plt.show()
Finally, you may use a seaborn FacetGrid onto which you plot your data with a plot:
g = sns.FacetGrid(df, col="Category")
g.map(plt.plot, "Temperature", "Derivative",marker="o",)
for ax in g.axes.flat:
ax.set_xlim(60,95)
plt.show()

Related

How to add multiple custom ticks to seaborn boxplot

I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()

Axis ticks in histogram of times in matplotlib/seaborn

I've got a df with messages from a WhatsApp chat, the sender and the corresponding time in datetime format.
Time
Sender
Message
2020-12-21 22:23:00
Sender 1
"..."
2020-12-21 22:26:00
Sender 2
"..."
2020-12-21 22:35:00
Sender 1
"..."
I can plot the histogram with sns.histplot(df["Time"], bins=48)
But now the ticks on the x-axis don't make much sense. I end up with 30 ticks even though it should be 24 and also the ticks all contain the whole date plus the time where I would want only the time in "%H:%M"
Where is the issue with the wrong ticks coming from?
Thanks!
Both seaborn and pandas use matplotlib for plotting functions. Let's see who returns the bin values, we would need to adapt the x-ticks:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
#fake data generation
np.random.seed(1234)
n=20
start = pd.to_datetime("2020-11-15")
df = pd.DataFrame({"Time": pd.to_timedelta(np.random.rand(n), unit="D") + start, "A": np.random.randint(1, 100, n)})
#print(df)
#pandas histogram plotting function, left
pd_g = df["Time"].hist(bins=5, xrot=90, ax=ax1)
#no bin information
print(pd_g)
ax1.set_title("Pandas")
#seaborn histogram plotting, middle
sns_g = sns.histplot(df["Time"], bins=5, ax=ax2)
ax2.tick_params(axis="x", labelrotation=90)
#no bin information
print(sns_g)
ax2.set_title("Seaborn")
#matplotlib histogram, right
mpl_g = ax3.hist(df["Time"], bins=5, edgecolor="white")
ax3.tick_params(axis="x", labelrotation=90)
#hooray, bin information, alas in floats representing dates
print(mpl_g)
ax3.set_title("Matplotlib")
plt.tight_layout()
plt.show()
Sample output:
From this exercise we can conclude that all three refer to the same routine. So, we can directly use matplotlib which provides us with the bin values:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.dates import num2date
fig, ax = plt.subplots(figsize=(8, 5))
#fake data generation
np.random.seed(1234)
n=20
start = pd.to_datetime("2020-11-15")
df = pd.DataFrame({"Time": pd.to_timedelta(np.random.rand(n), unit="D") + start, "A": np.random.randint(1, 100, n)})
#plots histogram, returns counts, bin border values, and the bars themselves
h_vals, h_bins, h_bars = ax.hist(df["Time"], bins=5, edgecolor="white")
#plot x ticks at the place where the bin borders are
ax.set_xticks(h_bins)
#label them with dates in HH:MM format after conversion of the float values that matplotlib uses internally
ax.set_xticklabels([num2date(curr_bin).strftime("%H:%M") for curr_bin in h_bins])
plt.show()
Sample output:
Seaborn and pandas make life easier because they provide convenience wrappers and some additional functionality for commonly used plotting functions. However, if they do not suffice in the parameters they provide, one has often to revert to matplotlib which is more flexible in what it can do. Obviously, there might be an easier way in pandas or seaborn, I am not aware of. I will happily upvote any better suggestion within these libraries.

Matplotlib Plot time series with different periodicity

I have 2 dfs. One of them has data for a month. Another one, averages for the past quarters. I wanna plot the averages in front of the monthly data. How can I do it? Please note that I am trying to plot averages as dots and monthly as line chart.
So far my best result was achieved by ax1=ax.twiny(), but still not ideal result as data point appear in throughout the chart, rather than just in front.
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter, FuncFormatter
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
date_base = pd.date_range(start='1/1/2018', end='1/30/2018')
df_base = pd.DataFrame(np.random.randn(30,4), columns=list("ABCD"), index=date_base)
date_ext = pd.date_range(start='1/1/2017', end='1/1/2018', freq="Q")
df_ext = pd.DataFrame(np.random.randn(4,4), columns=list("ABCD"), index=date_ext)
def drawChartsPlt(df_base, df_ext):
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
number_of_plots = len(df_base.columns)
LINE_STYLES = ['-', '--', '-.', 'dotted']
colormap = plt.cm.nipy_spectral
ax.set_prop_cycle("color", [colormap(i) for i in np.linspace(0,1,number_of_plots)])
date_base = df_base.index
date_base = [i.strftime("%Y-%m-%d") for i in date_base]
q_ends = df_ext.index
q_ends = [i.strftime("%Y-%m-%d") for i in q_ends]
date_base.insert(0, "") #to shift xticks so they match chart
date_base += q_ends
for i in range(number_of_plots):
df_base.ix[:-3, df_base.columns[i]].plot(kind="line", linestyle=LINE_STYLES[i%2], subplots=False, ax=ax)
#ax.set_xticks(date_base)
#ax.set_xticklabels(date_base)
# ax.xaxis.set_major_locator(ticker.MultipleLocator(20))
ax.xaxis.set_major_locator(ticker.LinearLocator(len(date_base)))
ax.xaxis.set_major_formatter(plt.FixedFormatter(date_base))
fig.autofmt_xdate()
# ax1=ax.twinx()
ax1=ax.twiny()
ax1.set_prop_cycle("color", [colormap(i) for i in np.linspace(0,1,number_of_plots)])
for i in range(len(df_ext.columns)):
ax1.scatter(x=df_ext.index, y=df_ext[df_ext.columns[i]])
ax.set_title("Test")
#plt.minorticks_off())
ax.minorticks_off()
#ax1.minorticks_off()
#ax1.set_xticklabels(date_base)
#ax1.set_xticklabels(q_ends)
ax.legend(loc="center left", bbox_to_anchor=(1,0.5))
ax.xaxis.label.set_size(12)
plt.xlabel("TEST X Label")
plt.ylabel("TEST Y Label")
ax1.set_xlabel("Quarters")
plt.show()
drawChartsPlt(df_base, df_ext)
The way I ended up coding it is by saving quarterly index of df_ext to a temp variable, overwriting it with dates that are close to df_base.index using pd.date_range(start=df_base.index[-1], periods=len(df_ext), freq='D'), and the finally setting the dates that I need with ax.set_xticklabels(list(date_base)+list(date_ext)).
It looks like it could be achieved using broken axes as indicated Break // in x axis of matplotlib and Python/Matplotlib - Is there a way to make a discontinuous axis?, but I haven't tried that solution.

Plotting multiple lines grouped by one column dataframe, with date time as x axis [duplicate]

In Pandas, I am doing:
bp = p_df.groupby('class').plot(kind='kde')
p_df is a dataframe object.
However, this is producing two plots, one for each class.
How do I force one plot with both classes in the same plot?
Version 1:
You can create your axis, and then use the ax keyword of DataFrameGroupBy.plot to add everything to these axes:
import matplotlib.pyplot as plt
p_df = pd.DataFrame({"class": [1,1,2,2,1], "a": [2,3,2,3,2]})
fig, ax = plt.subplots(figsize=(8,6))
bp = p_df.groupby('class').plot(kind='kde', ax=ax)
This is the result:
Unfortunately, the labeling of the legend does not make too much sense here.
Version 2:
Another way would be to loop through the groups and plot the curves manually:
classes = ["class 1"] * 5 + ["class 2"] * 5
vals = [1,3,5,1,3] + [2,6,7,5,2]
p_df = pd.DataFrame({"class": classes, "vals": vals})
fig, ax = plt.subplots(figsize=(8,6))
for label, df in p_df.groupby('class'):
df.vals.plot(kind="kde", ax=ax, label=label)
plt.legend()
This way you can easily control the legend. This is the result:
import matplotlib.pyplot as plt
p_df.groupby('class').plot(kind='kde', ax=plt.gca())
Another approach would be using seaborn module. This would plot the two density estimates on the same axes without specifying a variable to hold the axes as follows (using some data frame setup from the other answer):
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# data to create an example data frame
classes = ["c1"] * 5 + ["c2"] * 5
vals = [1,3,5,1,3] + [2,6,7,5,2]
# the data frame
df = pd.DataFrame({"cls": classes, "indices":idx, "vals": vals})
# this is to plot the kde
sns.kdeplot(df.vals[df.cls == "c1"],label='c1');
sns.kdeplot(df.vals[df.cls == "c2"],label='c2');
# beautifying the labels
plt.xlabel('value')
plt.ylabel('density')
plt.show()
This results in the following image.
There are two easy methods to plot each group in the same plot.
When using pandas.DataFrame.groupby, the column to be plotted, (e.g. the aggregation column) should be specified.
Use seaborn.kdeplot or seaborn.displot and specify the hue parameter
Using pandas v1.2.4, matplotlib 3.4.2, seaborn 0.11.1
The OP is specific to plotting the kde, but the steps are the same for many plot types (e.g. kind='line', sns.lineplot, etc.).
Imports and Sample Data
For the sample data, the groups are in the 'kind' column, and the kde of 'duration' will be plotted, ignoring 'waiting'.
import pandas as pd
import seaborn as sns
df = sns.load_dataset('geyser')
# display(df.head())
duration waiting kind
0 3.600 79 long
1 1.800 54 short
2 3.333 74 long
3 2.283 62 short
4 4.533 85 long
Plot with pandas.DataFrame.plot
Reshape the data using .groupby or .pivot
.groupby
Specify the aggregation column, ['duration'], and kind='kde'.
ax = df.groupby('kind')['duration'].plot(kind='kde', legend=True)
.pivot
ax = df.pivot(columns='kind', values='duration').plot(kind='kde')
Plot with seaborn.kdeplot
Specify hue='kind'
ax = sns.kdeplot(data=df, x='duration', hue='kind')
Plot with seaborn.displot
Specify hue='kind' and kind='kde'
fig = sns.displot(data=df, kind='kde', x='duration', hue='kind')
Plot
Maybe you can try this:
fig, ax = plt.subplots(figsize=(10,8))
classes = list(df.class.unique())
for c in classes:
df2 = data.loc[data['class'] == c]
df2.vals.plot(kind="kde", ax=ax, label=c)
plt.legend()

Adjusting the color coding on a barplot so that all values are color coded correctly in matplotlib

I have a barplot that plots Rates by State and by Category (there are 5 categories) but the problem is that some States have more categories than other states.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"state" : ["AL","AL","AL","AK", ],
"status" : ["Booked", "Rejected","Cancelled","Rejected"],
"0" : [1.5,2.5,3.5,1.0]})
df2 = df.groupby(['state','status']).size()/df.groupby(['state']).size()
fig, ax = plt.subplots()
plt.xlabel('State')
plt.ylabel('Bookings')
my_colors = 'gyr'
df2.plot(kind='bar', color=my_colors, orientation='vertical')
plt.tight_layout()
plt.show()
This does a good job with most of what I need to do however, what happens is that because some States do not have all values for status and hence do not appear in the plot, it makes some of the color coding incorrect because the colors are just shifted to repeat every 5 colors rather then based on whenever a value is missing or not. What can I do about this?
Possibly you want to show the data in a grouped fashion, namely to have 3 categories per group, such that each category has its own color.
In this case it seems this can easily be achieved by unstacking the multi-index dataframe,
df2.unstack().plot(...)
Complete example:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"state" : ["AL","AL","AL","AK", ],
"status" : ["Booked", "Rejected","Cancelled","Rejected"],
"0" : [1.5,2.5,3.5,1.0]})
df2 = df.groupby(['state','status']).size()/df.groupby(['state']).size()
fig, ax = plt.subplots()
plt.xlabel('State')
plt.ylabel('Bookings')
my_colors = 'gyr'
df2.unstack().plot(kind='bar', color=my_colors, orientation='vertical', ax=ax)
plt.tight_layout()
plt.show()

Categories

Resources