I've assigned the 365 days of a year to several clusters and I'm now trying to plot them on a heatmap.
My code works fine except that cbar.set_ticks(some_range) has no effects: the tick labels on my colorbar have the right text but the wrong position
Here is a MCVE
from datetime import date
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import matplotlib
import seaborn as sns
#create some random data
n_cluster = 4
index = pd.date_range('01/01/2016', end='31/12/2016', freq='1D')
df = pd.DataFrame(np.random.randint(0, n_cluster, len(index)),
index=index, columns=['cluster'])
pivot = df.pivot_table('cluster',
columns=[lambda x: x.weekofyear],
index= [lambda x: x.dayofweek])
#yticklabels of the heatmap
days = [date(2018, 1, d).strftime('%a')[:3] for d in range(1, 8)]
#get a discrete cmap
cmap = plt.cm.get_cmap('RdBu', n_cluster)
fig = plt.figure(figsize=(10,3))
gs = matplotlib.gridspec.GridSpec(1, 2, width_ratios=[50,1])
ax = plt.subplot(gs[0])
cbar = plt.subplot(gs[1])
sns.heatmap(pivot, square=True, cmap=cmap,
yticklabels=days, ax=ax, cbar_ax=cbar)
#There is something wrong here
cbar.set_yticks([i + 1/(2.0*n_cluster) for i in np.arange(0, 1, 1.0/n_cluster)])
#This one is ok
cbar.set_yticklabels(range(0, n_cluster))
Thanks for your help
As a workaround, the following adds the correct labels in the correct place,
cbar.yaxis.set_ticks([0.125, 0.375, 0.625, 0.875])
which looks like,
EDIT:
Or the more general suggestion of mfitzp,
cbar.yaxis.set_ticks([i + 1/(2.0*n_cluster)
for i in np.arange(0, 1, 1.0/n_cluster)])
Related
I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
I have plotted a heatmap which is displayed below. on the xaxis it shows time of the day and y axis shows date. I want to show xaxis at every hour instead of the random xlabels it displays here.
I tried following code but the resulting heatmap overrites all xlabels together:
t = pd.date_range(start='00:00:00', end='23:59:59', freq='60T').time
df = pd.DataFrame(index=t)
df.reset_index(inplace=True)
df['index'] = df['index'].astype('str')
sns_hm = sns.heatmap(data=mat, cbar=True, lw=0,cmap=colormap,xticklabels=df['index'])
The following code supposes mat is a dataframe with columns for some timestamps for each of a number of days. Each of the days, the same timestamps need to appear again.
After drawing the heatmap, the left and right limits of the x-axis are retrieved. Supposing these go from 0 to 24 hour, the range can be subdivided into 25 positions, one for each of the hours.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
from matplotlib.colors import ListedColormap, to_hex
# first, create some test data
df = pd.DataFrame()
df["date"] = pd.date_range('20220304', periods=19000, freq=DateOffset(seconds=54))
df["val"] = (((np.random.rand(len(df)) ** 100).cumsum() / 2).astype(int) % 2) * 100
df['day'] = df['date'].dt.strftime('%d-%m-%Y')
df['time'] = df['date'].dt.strftime('%H:%M:%S')
mat = df.pivot(index='day', columns='time', values='val')
colors = list(plt.cm.Greens(np.linspace(0.2, 0.9, 10)))
ax = sns.heatmap(mat, cmap=colors, cbar_kws={'ticks': range(0, 101, 10)})
xmin, xmax = ax.get_xlim()
tick_pos = np.linspace(xmin, xmax, 25)
tick_labels = [f'{h:02d}:00:00' for h in range(len(tick_pos))]
ax.set_xticks(tick_pos)
ax.set_xticklabels(tick_labels, rotation=90)
ax.set(xlabel='', ylabel='')
plt.tight_layout()
plt.show()
The left plot shows the default tick labels, the right plot the customized labels.
I have around 4475 rows of csv data like below:
,Time,Values,Size
0,1900-01-01 23:11:30.368,2,
1,1900-01-01 23:11:30.372,2,
2,1900-01-01 23:11:30.372,2,
3,1900-01-01 23:11:30.372,2,
4,1900-01-01 23:11:30.376,2,
5,1900-01-01 23:11:30.380,,
6,1900-01-01 23:11:30.380,,
7,1900-01-01 23:11:30.380,,
8,1900-01-01 23:11:30.380,,321
9,1900-01-01 23:11:30.380,,111
.
.
4474,1900-01-01 23:11:32.588,,
When I try to create simple seaborn lineplot with below code. It creates line chart but its continuous chart while my data i.e. 'Values' has many empty/nan values which should show as gap on chart. How can I do that?
[from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("Data.csv")
sns.set(rc={'figure.figsize':(13,4)})
ax =sns.lineplot(x="Time", y="Values", data=df)
ax.set(xlabel='Time', ylabel='Values')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()]
As reported in this answer:
I've looked at the source code and it looks like lineplot drops nans from the DataFrame before plotting. So unfortunately it's not possible to do it properly.
So, the easiest way to do it is to use matplotlib in place of seaborn.
In the code below I generate a dataframe like your with 20% of missing values in 'Values' column and I use matplotlib to draw a plot:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Time': pd.date_range(start = '1900-01-01 23:11:30', end = '1900-01-01 23:11:30.1', freq = 'L')})
df['Values'] = np.random.randint(low = 2, high = 10, size = len(df))
df['Values'] = df['Values'].mask(np.random.random(df['Values'].shape) < 0.2)
fig, ax = plt.subplots(figsize = (13, 4))
ax.plot(df['Time'], df['Values'])
ax.set(xlabel = 'Time', ylabel = 'Values')
plt.xticks(rotation = 90)
plt.tight_layout()
plt.show()
I'm trying to make my colourbar have integer values instead of decimals, but coding this is a lot harder than anticipated.
my initial code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
#sns.set()
# read data
revels_data = pd.read_csv("revels2.txt")
rd = revels_data
revels = rd.pivot("Flavour", "Packet number", "Contents")
# orders flavours
revels.index = pd.CategoricalIndex(revels.index, categories=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"])
revels.sortlevel(level=0, inplace=True)
# Draw a heatmap with the numeric values in each cell
ax = sns.heatmap(revels, annot=True, fmt="d", linewidths=0.4, cmap="YlOrRd")
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.show()
which produces
Trying to reverse engineer one of the answers from here
by adding the following code
cmap = plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
plt.get_cmap("YlOrRd", np.max(rd.Contents)-np.min(rd.Contents)+1)
# set limits .5 outside true range
mat = plt.matshow(rd.Contents, cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
plt.matshow(rd.Contents ,cmap=cmap, vmin = np.min(rd.Contents)-.5, vmax = np.max(rd.Contents)+.5)
#tell the colorbar to tick at integers
cax = plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
plt.colorbar(mat, ticks=np.arange(np.min(rd.Contents),np.max(rd.Contents)+1))
but getting errors, namely ValueError: not enough values to unpack.
I think I may have applied the code wrong, would appreciate any help.
Here is a full working example, which creates a discrete colorbar for a seaborn heatmap plot with integer values as colorbar ticks.
import pandas as pd
import numpy as np; np.random.seed(8)
import matplotlib.pyplot as plt
import seaborn.apionly as sns
plt.rcParams["figure.figsize"] = 10,5.5
flavours=["orange", "toffee", "chocolate", "malteser", "raisin", "coffee"]
num = np.arange(0, 6*36).astype(int) % 36
flavs = np.random.choice(flavours, size=len(num))
conts = np.random.randint(0,6, len(num)).astype(int)
df = pd.DataFrame({"Packet number":num ,"Flavour":flavs,"Contents" : conts})
revels = pd.pivot_table(df, index=["Flavour"], columns=["Packet number"], values="Contents", aggfunc=np.sum)
revels.index = pd.CategoricalIndex(revels.index, categories=flavours)
revels.sortlevel(level=0, inplace=True)
revels= revels.fillna(0)
ticks=np.arange(revels.values.min(),revels.values.max()+1 )
boundaries = np.arange(revels.values.min()-.5,revels.values.max()+1.5 )
cmap = plt.get_cmap("YlOrRd", revels.values.max()-revels.values.min()+1)
ax = sns.heatmap(revels, annot=True, linewidths=0.4, cmap=cmap,
cbar_kws={"ticks":ticks, "boundaries":boundaries})
ax.set_title('REVELS PACKET COUNT HEATMAP', weight="bold")
plt.tight_layout()
plt.show()
I need to change the colors of the boxplot drawn using pandas utility function. I can change most properties using the color argument but can't figure out how to change the facecolor of the box. Someone knows how to do it?
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props)
While I still recommend seaborn and raw matplotlib over the plotting interface in pandas, it turns out that you can pass patch_artist=True as a kwarg to df.plot.box, which will pass it as a kwarg to df.plot, which will pass is as a kwarg to matplotlib.Axes.boxplot.
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props, patch_artist=True)
As suggested, I ended up creating a function to plot this, using raw matplotlib.
def plot_boxplot(data, ax):
bp = ax.boxplot(data.values, patch_artist=True)
for box in bp['boxes']:
box.set(color='DarkGreen')
box.set(facecolor='DarkGreen')
for whisker in bp['whiskers']:
whisker.set(color="DarkOrange")
for cap in bp['caps']:
cap.set(color="Gray")
for median in bp['medians']:
median.set(color="white")
ax.axhline(0, color="DarkBlue", linestyle=":")
ax.set_xticklabels(data.columns)
I suggest using df.plot.box with patch_artist=True and return_type='both' (which returns the matplotlib axes the boxplot is drawn on and a dictionary whose values are the matplotlib Lines of the boxplot) in order to have the best customization possibilities.
For example, given this data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(
data=np.random.randn(100, 4),
columns=list("ABCD")
)
you can set a specific color for all the boxes:
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch in props['boxes']:
patch.set_facecolor('lime')
plt.show()
you can set a specific color for each box:
colors = ['green','blue','yellow','red']
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors):
patch.set_facecolor(color)
plt.show()
you can easily integrate a colormap:
colors = np.random.randint(0,10, 4)
cm = plt.cm.get_cmap('rainbow')
colors_cm = [cm((c-colors.min())/(colors.max()-colors.min())) for c in colors]
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors_cm):
patch.set_facecolor(color)
# to add colorbar
fig.colorbar(plt.cm.ScalarMappable(
plt.cm.colors.Normalize(min(colors),max(colors)),
cmap='rainbow'
), ax=ax, cmap='rainbow')
plt.show()