overlapping y axis label in seaborn heatmap

overlapping y axis label in seaborn heatmap - python

how to spread out the cells further without creating any white borders in between?
the Dates on y-axis are overlapping and I want to spread it out.
I tried to increase the figsize in the column but the graph does not change when I change the parameter. is this any method to spread it out, and no borders in between?
import seaborn as sns
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import yfinance as yf
#====================================================
prev=150
endDate=dt.datetime.today().date()
sDate=endDate-pd.to_timedelta(prev,unit='d')
#--------------------------------------------------------------
def get_price(tickers,roll_num=20): #input is a list or Series
result=pd.DataFrame()
pic=pd.DataFrame()
for i in tickers:
try:
df=pd.DataFrame()
df['Adj Close']=yf.download(i,sDate,endDate)['Adj Close']
df['MA']=df['Adj Close'].rolling(roll_num).mean()
df.sort_values(ascending=False,inplace=True,by="Date") # sometimes error
df['Higher?']=df['Adj Close']>df['MA']
df['Higher?']=df['Higher?'].astype(int)
result[str(i)]=df['Higher?']
except Exception as ex: # no date column
print('Ticker', i, 'ERROR', ex)
print(df)
pic[tickers.name]=(result.sum(axis=1)/len(result.columns)*100).astype(int)
pic.name=tickers.name
pic.drop(pic.tail(roll_num-1).index,inplace=True)
return pic
#--------------------------------------------------------------
test=pd.Series(['A','TSLA','KO','T','aapl','nke'])
test=test.str.replace('.','-')
test.name='I am test'
a=get_price(test)
print(a)
#=============================================================================
base_url = "http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportExcel?symbol="
data = {
'Ticker' : [ 'XLC','XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLRE','XLK','XLU' ]
, 'Name' : [ 'Communication Services','Consumer Discretionary','Consumer Staples','Energy','Financials','Health Care','Industrials','Materials','Real Estate','Technology','Utilities' ]
}
spdr_df = pd.DataFrame(data)
print(spdr_df)
#-------------------------------------------------------------------
final_product=[]
for i, row in spdr_df.iterrows():
url = base_url + row['Ticker']
df_url = pd.read_excel(url)
header = df_url.iloc[0]
holdings_df = df_url[1:]
holdings_df.set_axis(header, axis='columns', inplace=True)
holdings_df=holdings_df['Symbol'].str.replace('.','-')
holdings_df.name=row.Name
final_product.append(get_price(holdings_df))
final_product=pd.concat(final_product,axis=1)
final_product['Sum']=final_product.sum(axis=1)
final_product.index=final_product.index.strftime('%Y-%m-%d')
print(final_product)
#------------------------------------------------
#----------------------------
plt.rcParams['ytick.labelsize']=12
fontsize_pt = plt.rcParams['ytick.labelsize']
dpi = 72.27
column_labels = final_product.columns[:-1]
## comput the matrix height in points and inches
matrix_height_pt = fontsize_pt * final_product.shape[0]
matrix_height_in = matrix_height_pt / dpi
# compute the required figure height
top_margin = 0.1 # in percentage of the figure height
bottom_margin = 0.04 # in percentage of the figure height
figure_height = matrix_height_in / (1 - top_margin - bottom_margin)
# build the figure instance with the desired height
fig, (ax1,ax2)= plt.subplots(ncols=2,figsize=(10,50),
gridspec_kw=dict(top=1-top_margin, bottom=bottom_margin,wspace=0.01))
# let seaborn do it's thing
cmap = sns.diverging_palette(20, 145)
ax1 = sns.heatmap(final_product[final_product.columns[:-1]],cmap=cmap, vmin=0,vmax=100,annot=True,xticklabels=column_labels, cbar=False, ax=ax1, fmt='.0f')
ax2 = sns.heatmap(final_product[final_product.columns[-1:]], cmap=cmap, vmin=0, vmax=1100, annot=True, fmt='.0f',yticklabels=[], cbar=False, ax=ax2)
ax2.set_ylabel('')
ax2.tick_params(axis='x', labelrotation=90)
ax1.xaxis.tick_top()
ax1.xaxis.set_label_position('top')
ax1.tick_params(axis='x', labelrotation=45)
plt.savefig('heatmap.png')
my output picture looks like:

I think you are overwriting the figsize with the argument gridspec_kw in plt.subplots(). Try to change the top and bottom arguments in gridspec_kw, or to remove gridspec_kw.

Related

custom xlabel ticks in Seaborn heatmaps

I have plotted a heatmap which is displayed below. on the xaxis it shows time of the day and y axis shows date. I want to show xaxis at every hour instead of the random xlabels it displays here.
I tried following code but the resulting heatmap overrites all xlabels together:
t = pd.date_range(start='00:00:00', end='23:59:59', freq='60T').time
df = pd.DataFrame(index=t)
df.reset_index(inplace=True)
df['index'] = df['index'].astype('str')
sns_hm = sns.heatmap(data=mat, cbar=True, lw=0,cmap=colormap,xticklabels=df['index'])

The following code supposes mat is a dataframe with columns for some timestamps for each of a number of days. Each of the days, the same timestamps need to appear again.
After drawing the heatmap, the left and right limits of the x-axis are retrieved. Supposing these go from 0 to 24 hour, the range can be subdivided into 25 positions, one for each of the hours.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
from matplotlib.colors import ListedColormap, to_hex
# first, create some test data
df = pd.DataFrame()
df["date"] = pd.date_range('20220304', periods=19000, freq=DateOffset(seconds=54))
df["val"] = (((np.random.rand(len(df)) ** 100).cumsum() / 2).astype(int) % 2) * 100
df['day'] = df['date'].dt.strftime('%d-%m-%Y')
df['time'] = df['date'].dt.strftime('%H:%M:%S')
mat = df.pivot(index='day', columns='time', values='val')
colors = list(plt.cm.Greens(np.linspace(0.2, 0.9, 10)))
ax = sns.heatmap(mat, cmap=colors, cbar_kws={'ticks': range(0, 101, 10)})
xmin, xmax = ax.get_xlim()
tick_pos = np.linspace(xmin, xmax, 25)
tick_labels = [f'{h:02d}:00:00' for h in range(len(tick_pos))]
ax.set_xticks(tick_pos)
ax.set_xticklabels(tick_labels, rotation=90)
ax.set(xlabel='', ylabel='')
plt.tight_layout()
plt.show()
The left plot shows the default tick labels, the right plot the customized labels.

Formatting time series axis in Seaborn

I'm learning Seaborn and trying to figure out how I can format an X axis for dates over a yearly period, so that it is readable. Let's assume we have a dataframe which holds weather measurements for each day of an entire year (365 rows).
sns.scatterplot(x = df_weather["DATE"], y = df_weather["MAX_TEMPERATURE_C"], color = 'red')
sns.scatterplot(x = df_weather["DATE"], y = df_weather["MIN_TEMPERATURE_C"], color = 'blue')
plt.show()
How can I ensure that the X axis labels are readable? Ideally, one label per month would be fine.
Thanks!

Not very sure what your column date is like, but maybe try something like below, first generate some data, I have the date as a string which I guess is something like yours:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
DATE = pd.date_range('2020-01-01', periods=365, freq='D').strftime('%y%y-%m-%d')
MIN = np.random.uniform(low=10,high=25,size = len(index))
MAX = MIN + np.random.uniform(low=5,high=10,size =len(index))
df = pd.DataFrame({'DATE':DATE,'MIN':MIN,'MAX':MAX})
Plot like you did using sns:
fig, ax = plt.subplots(figsize = (10,4))
ax = sns.scatterplot(x = "DATE", y = "MAX",data=df, color = 'red')
ax = sns.scatterplot(x = "DATE", y = "MIN",data=df, color = 'blue')
Now we define the start of the mths to define ticks:
mths = pd.date_range('2020-01-01', periods=12, freq='MS')
ax.set_xticks(mths.strftime('%y%y-%m-%d'))
ax.set(xticklabels=mths.strftime('%b'))
plt.show()
And it should look ok:

Heatmap with circles indicating size of population

I would like to produce a heatmap in Python, similar to the one shown, where the size of the circle indicates the size of the sample in that cell. I looked in seaborn's gallery and couldn't find anything, and I don't think I can do this with matplotlib.

It's the inverse. While matplotlib can do pretty much everything, seaborn only provides a small subset of options.
So using matplotlib, you can plot a PatchCollection of circles as shown below.
Note: You could equally use a scatter plot, but since scatter dot sizes are in absolute units it would be rather hard to scale them into the grid.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
N = 10
M = 11
ylabels = ["".join(np.random.choice(list("PQRSTUVXYZ"), size=7)) for _ in range(N)]
xlabels = ["".join(np.random.choice(list("ABCDE"), size=3)) for _ in range(M)]
x, y = np.meshgrid(np.arange(M), np.arange(N))
s = np.random.randint(0, 180, size=(N,M))
c = np.random.rand(N, M)-0.5
fig, ax = plt.subplots()
R = s/s.max()/2
circles = [plt.Circle((j,i), radius=r) for r, j, i in zip(R.flat, x.flat, y.flat)]
col = PatchCollection(circles, array=c.flatten(), cmap="RdYlGn")
ax.add_collection(col)
ax.set(xticks=np.arange(M), yticks=np.arange(N),
xticklabels=xlabels, yticklabels=ylabels)
ax.set_xticks(np.arange(M+1)-0.5, minor=True)
ax.set_yticks(np.arange(N+1)-0.5, minor=True)
ax.grid(which='minor')
fig.colorbar(col)
plt.show()

Here's a possible solution using Bokeh Plots:
import pandas as pd
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np
output_notebook()
d = dict(x = ['A','A','A', 'B','B','B','C','C','C','D','D','D'],
y = ['B','C','D', 'A','C','D','B','D','A','A','B','C'],
corr = np.random.uniform(low=-1, high=1, size=(12,)).tolist())
df = pd.DataFrame(d)
df['size'] = np.where(df['corr']<0, np.abs(df['corr']), df['corr'])*50
#added a new column to make the plot size
colors = list(reversed(RdBu[9]))
exp_cmap = LinearColorMapper(palette=colors,
low = -1,
high = 1)
p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width=700,
plot_height=450, title="Correlation",
toolbar_location=None, tools="hover")
p.scatter("x","y",source=df, fill_alpha=1, line_width=0, size="size",
fill_color={"field":"corr", "transform":exp_cmap})
p.x_range.factors = sorted(df['x'].unique().tolist())
p.y_range.factors = sorted(df['y'].unique().tolist(), reverse = True)
p.xaxis.axis_label = 'Values'
p.yaxis.axis_label = 'Values'
bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p.add_layout(bar, "right")
show(p)

One option is to use matplotlib's scatter plots with legends and grid. You can specify size of those circles with specifying the scales. You can also change the color of each circle. You should somehow specify X,Y values so that the circles sit straight on lines. This is an example I got from here:
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
# Because the price is much too small when being provided as size for ``s``,
# we normalize it to some useful point sizes, s=0.3*(price*3)**2
scatter = ax.scatter(volume, amount, c=ranking, s=0.3*(price*3)**2,
vmin=-3, vmax=3, cmap="Spectral")
# Produce a legend for the ranking (colors). Even though there are 40 different
# rankings, we only want to show 5 of them in the legend.
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# Produce a legend for the price (sizes). Because we want to show the prices
# in dollars, we use the *func* argument to supply the inverse of the function
# used to calculate the sizes from above. The *fmt* ensures to show the price
# in dollars. Note how we target at 5 elements here, but obtain only 4 in the
# created legend due to the automatic round prices that are chosen for us.
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: np.sqrt(s/.3)/3)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
plt.show()
Output:

I don't have enough reputation to comment on Delenges' excellent answer, so I'll leave my comment as an answer instead:
R.flat doesn't order the way we need it to, so the circles assignment should be:
circles = [plt.Circle((j,i), radius=R[j][i]) for j, i in zip(x.flat, y.flat)]

Here is an easy example to plot circle_heatmap.
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap
plt.style.use('seaborn-talk')
data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])
#data = df_corr_selected
kwargs = {
'heatmap': {
'vmin': -1,
'vmax': 1,
'cmap': 'viridis',
},
'figure': {
'figsize': (14, 10),
},
}
plot_correlation_heatmap(data, bubble=True, annotate=False, **kwargs)
plt.show()

Display label stacked barh with values from dataframe

How can I display values for my stacked barh chart that come from a dataframe? How can I place the labels above their respective sections on each bar and modify the font so that it shows up well as a gray scale graphic?
It is related to this question but it has a list of values rather than two lists pulled from a pandas dataframe. If it were a singe list, I think I could pull values from a single record in the dataframe but with two lists, I'm not sure how to apply that to each bar in the bar graph.
My dataframe:
Delin. Group1 Group2 Group3 Group4 Group5
Census 0.2829 0.3387 0.2636 0.0795 0.0353
USPS 0.2538 0.3143 0.2901 0.1052 0.0366
My code:
import os
import pandas as pd
import time
#
start_time = time.time()
#
output_dir = r"C:\Some\Directory\For\Ouputs"
#
output_fig = "race_barh2.png"
#
fig_path = os.path.join(output_dir, output_fig)
#
os.chdir(output_dir)
#
input_csv = r"C:\Some\Directory\To\My.csv"
#
df = pd.read_csv(input_csv, delimiter = ",")
#
ax = df.plot.barh( stacked = True, color = ("#252525", "#636363", "#969696", "#cccccc", "#f7f7f7"), edgecolor = "black", linewidth = 1)
#
ax.set_xlabel("Percentage of Total", fontsize = 18)
#
ax.set_ylabel("Boundary Delineation", fontsize = 18)
#
ax.set_yticklabels(["Census", "USPS"])
#
ax.set_xticklabels(["0%", "20%", "40%", "60%", "80%", "100%"])
#
horiz_offset = 1.03
#
vert_offset = 1
#
ax.legend(bbox_to_anchor=(horiz_offset, vert_offset))
#
fig = ax.get_figure()
#
fig.savefig(fig_path, bbox_inches = "tight", dpi = 600)
#
#
#
end_time = round( time.time() - start_time, 5 )
#
print "Seconds elapsed: {0}".format(end_time)

You can do this similarly as in the referenced question, by annotating the bars. For a stacked bar chart you'll have to tweak the position of the labels a little to get them where you want. You can play around with the horizontalalignment, verticalalignment and adding a bit of a margin as I did (+.5).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from cycler import cycler
#used gray colormap, you can use your own colors by replacing colormap='gray' with color=colors
colors = ["#252525", "#636363", "#969696", "#cccccc", "#f7f7f7"]
plt.rcParams['axes.prop_cycle'] = cycler(color=colors)
#dummy data
df = pd.DataFrame(np.random.randint(5, 8, (10, 3)), columns=['Group1', 'Group2', 'Group3'])
for col in df.columns.tolist():
df[col] = df[col].apply(lambda x:x*100 / df[col].sum())
ax = df.T.plot.barh(stacked=True, colormap='gray', edgecolor='black', linewidth=1)
for lbl in ax.patches:
ax.annotate("{:.0f}%".format(int(lbl.get_width())), (lbl.get_x(), lbl.get_y()+.5), verticalalignment='bottom', horizontalalignment='top', fontsize=8, color='black')
ax.legend(loc='center left', bbox_to_anchor=(1.0, .5))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
plt.show()

Pandas - stacked bar chart with column values for stacking

I have a data set with three sets of data: class type, neighborhood, and visibility.
I'm trying to create a bar chart that is both stacked and unstacked -- stacked by visibility, lined up by neighborhood. So basically, I'm looking for a combination of the unstacked-ness of this chart:
nbvis_gb = nbvis.sort_values(by=['visibility'],ascending=False).groupby(by='visibility',sort=False)
fig, ax = plt.subplots(nrows=1,ncols=2,figsize=(14,8),sharey=True)
for (i, j), ax,color in zip(nbvis_gb,ax.flatten(),colors_hood):
print(j['class'].values)
title = str(i)
j.plot.bar(ax=ax,colors=colors_hood)
ax.set_title(title, fontsize=20)
#ax.set_ylim(0,1.05)
ax.tick_params(labelsize=16)
ax.set_xticklabels(j['class'].values)
ax.legend_.remove()
ax.legend(loc=8,fontsize=20,ncol=4,bbox_to_anchor=(0,-.45))
fig.tight_layout(h_pad=2)
fig.suptitle('Visibility of containers by class and neighborhood',y=1.03,fontsize=24)
and the stacked-ness of this chart:
nbvis.unstack()['Neighborhood 1'].plot.bar(stacked=True)
Any help would be greatly appreciated!
Cheers,
Elizabeth

Consider melt and pivot_table of your dataframe to create a multi-index datafame aligned to your graph dimensions. Below outputs graph to screen and saves figure to png image in same folder using seaborn's color scheme. Of course adjust graph settings as needed.
Data
import numpy as np
import pandas as pd
from itertools import product
from matplotlib import pyplot as plt
import seaborn
np.random.seed(444)
df = pd.DataFrame(list(product(['bucket (1)', 'flower pot (2)', 'tarp (3)', 'trash (6)', 'toy (7)',
'piping/tubing (9)', 'other (10)'],
['visible containers', 'partial or not visible containers'])),
columns=['class', 'visibility']).assign(Neighborhood1 = abs(np.random.randn(14)),
Neighborhood2 = abs(np.random.randn(14)),
Neighborhood3 = abs(np.random.randn(14)),
Neighborhood4 = abs(np.random.randn(14)))
Graphing
seaborn.set()
def runplot(pvtdf):
fig, axes = plt.subplots(nrows=1, ncols=len(mdf['Neighborhood'].unique()))
for i, n in enumerate(mdf['Neighborhood'].unique()):
pvtdf.xs(n).plot(ax=axes[i], kind='bar', stacked=True, edgecolor='w',
figsize=(20,8), width=0.5, fontsize = 12,
title='{} - Visibility of containers \n by class and neighborhood'.format(n))
axes[i].title.set_size(16)
plt.tight_layout()
fig.savefig('Output.png')
plt.show()
plt.clf()
# MELT LONG
mdf = pd.melt(df, id_vars = ['class', 'visibility'], var_name='Neighborhood')
# PIVOT WIDE
pvtdf = mdf.pivot_table(index= ['Neighborhood', 'class'], columns='visibility', values='value')
runplot(pvtdf, n)
plt.close()
Output

here's one way you could do this. I used some dummy data:
df = pd.DataFrame({"class":['bucket', 'pot', 'tarp', 'trash', 'toy', 'tubing', 'other','bucket', 'pot', 'tarp', 'trash', 'toy', 'tubing', 'other',],
"visability":["visable", "visable","visable","visable","visable","visable","visable", "not visable","not visable","not visable","not visable","not visable","not visable","not visable",],
"n1":np.random.random(size=14),
"n2":np.random.random(size=14),
"n3":np.random.random(size=14),
"n4":np.random.random(size=14)})
I think the trick is to use bottom:
N=7
width = 0.095
w = 0
ind = np.arange(N) + .15
classes = ['bucket', 'pot', 'tarp', 'trash', 'toy', 'tubing', 'other']
neighborhoods = ['n1', 'n2', 'n3', 'n4']
fig, ax = plt.subplots()
top_colors = ['#ff9999', '#9999ff', '#e6b3ff', '#66ff66']
bottom_colors = ['#b30000', '#000066', '#7700b3', '#004d00']
for i, n in enumerate(neighborhoods):
vis = df[(df.visability == "visable")][n]
non_vis = df[df.visability == "not visable"][n]
rect1 = ax.bar(ind+w, vis, float(width), color=top_colors[i])
rect2 = ax.bar(ind+w, non_vis, width, color=bottom_colors[i], bottom=vis)
w += 0.15
extra_space = 0.05
ax.set_xticks(ind+width+xtra_space)
ax.set_xticklabels(('bucket', 'pot', 'tarp', 'trash', 'toy', 'tubing', 'other',))
ax.set_title('Visability of container types by class')
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

overlapping y axis label in seaborn heatmap - python

I think you are overwriting the figsize with the argument gridspec_kw in plt.subplots(). Try to change the top and bottom arguments in gridspec_kw, or to remove gridspec_kw.

Related

custom xlabel ticks in Seaborn heatmaps

Formatting time series axis in Seaborn

Heatmap with circles indicating size of population

Display label stacked barh with values from dataframe

Pandas - stacked bar chart with column values for stacking

Categories

Resources