Tick labels appearing twice - python

I am trying to create a figure that is a dendrogram on top of a scatterplot, where the ends of the leaves on the dendrogram match up with the dots on the scatterplot, which in turn match up with the tick labels below. I have this working, but for some reason the tick labels appear twice. The labels in red and green are the ones I'm trying to keep.
This is my code:
import pandas as pd
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as sch
import numpy as np
import json
import random
def scatter_and_dendrogram(df, colors,wn='',label_x=False):
'''Args:
df (Pandas DataFrame): similarity matrix
colors (list of strs): list of colors
wn (str): window name
label_x=False(Bool): whether or not to label x axis
Returns: None
'''
norm = plt.Normalize(1,4)
dist_matrix = [] #linkage
for i in range(len(df)):
arr = []
for j in range(1,len(df.iloc[i])):
arr.append(df.iloc[i,j])
dist_matrix.append(list(arr))
X = np.asarray(dist_matrix)
Z = sch.linkage(X, 'ward')
sch.set_link_color_palette(['b'])
fig = plt.figure()
fig, axs = plt.subplots(2, 1, sharex='col', sharey='row',
gridspec_kw={'width_ratios': [1],
'height_ratios': [30, 1],
'hspace': 0, 'wspace': 0})
(ax1, ax2) = axs
dendrogram = sch.dendrogram(Z=Z, p=3,ax=ax1)
icoords = dendrogram['icoord']
dcoords = dendrogram['dcoord']
lst = [[],[],colors]
for i in range(len(icoords)):
ic = icoords[i]
dc = dcoords[i]
if dc.count(0) == 2:
lst[0].append(ic[0])
lst[0].append(ic[-1])
elif dc.count(0) == 1:
ind = dc.index(0)
lst[0].append(ic[ind])
lst[1] = [-0.1]*len(lst[0])
ax2.scatter(lst[0],lst[1],s=10,norm=norm, alpha=0.7)
fig.canvas.set_window_title(wn)
ax1.set_yticklabels([])
ax1.set_xticklabels([])
ax2.set_yticklabels([])
ax2.set_xticklabels([])
if label_x:
letters = list('ABCD')
labels = [letters[ind] for ind in dendrogram['leaves']]
c1 = '#ff0033' #red
c2 = '#006600'#green
xlbls = ax2.set_xticklabels(labels,fontsize=11,linespacing=3)
for lbl in xlbls:
t = lbl.get_text()
c = c2
if letters.index(t) < 2:
c = c1
print(c)
lbl.set_color(c)
ax1.set_title(wn)
ax1.set_ylabel('Aggregation Criterion',fontsize=15)
ax2.set_xlabel('Articles', fontsize=15)
plt.show()
l = ['A','B','C','D']
df = pd.DataFrame(index=l, columns=l)
for i in range(len(l)-1):
for j in range(i+1, len(l)):
r = random.randint(0, 10)
df.iloc[i,j] = r
df.iloc[j, i] = r
df.fillna(0,inplace=True)
print(df)
wn = 'Set C'
scatter_and_dendrogram(df, l, wn,True)
This is what it looks like:

According to matplotlib.pyplot.subplots about sharex and sharey
When subplots have a shared x-axis along a column, only the x tick
labels of the bottom subplot are created.
Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
To later turn other subplots' ticklabels on, use tick_params.
You need to add ax1.tick_params(axis='x', labelbottom=False) under xlbls = ax2.set_xticklabels.
Besides, if fig = plt.figure() is useless, remove it.

Related

How to plot barplot 3D projection in matplotlib for multiple columns

I have a table that contains three different time characteristics according to two different parameters. I want to plot those parameters on x and y-axis and show bars of the three different times on the z-axis. I have created a simple bar plot where I plot one of the time characteristics:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
columns = ['R','Users','A','B','C']
df=pd.DataFrame({'R':[2,2,2,4,4,4,6,6,6,8,8],
'Users':[80,400,1000,80,400,1000,80,400,1000,80,400],
'A':[ 0.05381,0.071907,0.08767,0.04493,0.051825,0.05295,0.05285,0.0804,0.0967,0.09864,0.1097],
'B':[0.04287,0.83652,5.49683,0.02604,.045599,2.80836,0.02678,0.32621,1.41399,0.19025,0.2111],
'C':[0.02192,0.16217,0.71645, 0.25314,5.12239,38.92758,1.60807,262.4874,8493,11.6025,6288]},
columns=columns)
fig = plt.figure()
ax = plt.axes(projection="3d")
num_bars = 11
x_pos = df["R"]
y_pos = df["Users"]
z_pos = [0] * num_bars
x_size = np.ones(num_bars)/4
y_size = np.ones(num_bars)*50
z_size = df["A"]
ax.bar3d(x_pos, y_pos, z_pos, x_size, y_size, z_size, color='aqua')
plt.show()
This produces a simple 3d barplot:
However, I would like to plot similar bars next to the existing ones for the rest two columns (B and C) in a different color and add a plot legend as well. I could not figure out how to achieve this.
As a side question, is it as well possible to show only values from df at x- and y-axis? The values are 2-4-6-8 and 80-400-1000, I do not wish pyplot to add additional values on those axis.
I have managed to find a solution myself. To solve the problem with values I have added one to all times (to avoid negative log) and used np.log on all time columns. The values got on scale 0-10 this way and the plot got way easier to read. After that I used loop to go over each column and create corresponding values, positions and colors which I have added all to one list. I moved y_pos for each column so the columns do not plot on same position.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
columns = ['R','Users','A','B','C']
df=pd.DataFrame({'R':[2,2,2,4,4,4,6,6,6,8,8],
'Users':[80,400,1000,80,400,1000,80,400,1000,80,400],
'A':[ 0.05381,0.071907,0.08767,0.04493,0.051825,0.05295,0.05285,0.0804,0.0967,0.09864,0.1097],
'B':[0.04287,0.83652,5.49683,0.02604,.045599,2.80836,0.02678,0.32621,1.41399,0.19025,0.2111],
'C':[0.02192,0.16217,0.71645, 0.25314,5.12239,38.92758,1.60807,262.4874,8493,11.6025,6288]},
columns=columns)
fig = plt.figure(figsize=(10, 10))
ax = plt.axes(projection="3d")
df["A"] = np.log(df["A"]+1)
df["B"] = np.log(df["B"]+1)
df["C"] = np.log(df["C"]+1)
colors = ['r', 'g', 'b']
num_bars = 11
x_pos = []
y_pos = []
x_size = np.ones(num_bars*3)/4
y_size = np.ones(num_bars*3)*50
c = ['A','B','C']
z_pos = []
z_size = []
z_color = []
for i,col in enumerate(c):
x_pos.append(df["R"])
y_pos.append(df["Users"]+i*50)
z_pos.append([0] * num_bars)
z_size.append(df[col])
z_color.append([colors[i]] * num_bars)
x_pos = np.reshape(x_pos,(33,))
y_pos = np.reshape(y_pos,(33,))
z_pos = np.reshape(z_pos,(33,))
z_size = np.reshape(z_size,(33,))
z_color = np.reshape(z_color,(33,))
ax.bar3d(x_pos, y_pos, z_pos, x_size, y_size, z_size, color=z_color)
plt.xlabel('R')
plt.ylabel('Users')
ax.set_zlabel('Time')
from matplotlib.lines import Line2D
legend_elements = [Line2D([0], [0], marker='o', color='w', label='A',markerfacecolor='r', markersize=10),
Line2D([0], [0], marker='o', color='w', label='B',markerfacecolor='g', markersize=10),
Line2D([0], [0], marker='o', color='w', label='C',markerfacecolor='b', markersize=10)
]
# Make legend
ax.legend(handles=legend_elements, loc='best')
# Set view
ax.view_init(elev=35., azim=35)
plt.show()
Final plot:

How to set matplotlib's shared subplots legend to be horizontal and at lower center position?

I'm using Matplotlib and Seaborn to plot four bar graphs with one shared legend. However, I can't make the legend to be horizontal and at the lower center. I tried to set the numbers in this line:
ax.legend(bbox_to_anchor=(0.99, -0.15),
loc=1,
fontsize=13,
# ncol=2
)
but if the legend goes to the middle, then the distance between the two subplot columns would increase as well making it not good.
Here is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pdb
import pyautogui
import multiprocessing
from time import sleep
from matplotlib import patches as mpatches
def convert_to_grouped_bar_chart_format(data,
col_1_name, col_2_name, col_3_name):
"""
Parameters
----------
data: Pandas dataframe. Format:
Method Class1 Class2 Class3
0 Method_1 0.1 0.2 0.3
1 Method_2 0.6 0.5 0.4
Returns
-------
data_grouped: Pandas dataframe.
"""
cls_list = data.columns[1:].tolist()
col_1 = []
col_2 = []
col_3 = []
(num_of_rows, num_of_cols) = data.shape
for row_idx in range(num_of_rows):
for cls_idx, cls in enumerate(cls_list):
col_1.append(data.iloc[row_idx, 0])
col_2.append(cls)
col_3.append(data.iloc[row_idx, cls_idx+1])
pass
pass
data_grouped_dict = {
col_1_name: col_1,
col_2_name: col_2,
col_3_name: col_3
}
data_grouped = pd.DataFrame(data_grouped_dict, columns = [col_1_name, col_2_name, col_3_name])
return data_grouped
def draw_four_bar_graph_seaborn():
file_list = [
['Measure1_ED.csv', 'Measure1_ES.csv'],
['Measure2_ED.csv', 'Measure2_ES.csv']
]
n_rows = len(file_list)
n_cols = len(file_list[0])
fig, axes = plt.subplots(n_rows, n_cols)
for idx_row in range(n_rows):
# if idx_row > 0:
# continue
for idx_col in range(n_cols):
file_name = file_list[idx_row][idx_col]
data = pd.read_csv(file_name)
col_1_name = 'Method'
col_2_name = 'Class'
col_3_name = file_name.split('_')[0]
data_type = file_name.split('_')[1][:-4]
ax = axes[idx_row, idx_col]
# ax =axes[idx_col]
data_grouped = convert_to_grouped_bar_chart_format(data,
col_1_name, col_2_name, col_3_name)
splot = sns.barplot(
# ax=axes[idx_row, idx_col],
ax=ax,
x=col_2_name,
y=col_3_name,
hue=col_1_name,
palette="magma",
# palette=my_pal,
# sharey=False,
data=data_grouped)
splot.set_xlabel("",fontsize=1)
splot.set_ylabel(col_3_name,fontsize=13)
splot.tick_params(labelsize=13)
title_subplot = 'Title 1'
ax.set_title(title_subplot, fontsize=13)
if col_3_name == 'Measure1':
ax.set_ylim(0, 1.10)
else:
ax.set_ylim(0, 2.25)
for p1 in splot.patches:
splot.annotate('%.3f' % p1.get_height(),
(p1.get_x() + p1.get_width() / 2., p1.get_height()),
ha = 'center', va = 'center',
size=13,
xytext = (0, 8),
textcoords = 'offset points')
if (idx_row == 1) and (idx_col == 0):
ax.legend(
bbox_to_anchor=(1.2, -0.15),
loc=1,
fontsize=13,
# ncol=2
)
else:
splot.get_legend().remove()
# Change width size
# ax = axes[idx_row, idx_col]
new_value = 0.35
for patch in ax.patches :
current_width = patch.get_width()
diff = current_width - new_value
# we change the bar width
patch.set_width(new_value)
# we recenter the bar
patch.set_x(patch.get_x() + diff * .5)
plt.tight_layout(pad=0)
mng = plt.get_current_fig_manager()
mng.window.state('zoomed') #works fine on Windows!
plt.show()
fig.savefig('out.pdf')
plt.close()
def draw_graph_then_save_and_close_automatically(func=None, args=[]):
coords_close_graph = (1365, 12) # Auto click to close graph
multiprocessing.Process(target=func, args=args).start()
sleep(10)
pyautogui.moveTo(coords_close_graph)
pyautogui.click()
def main():
draw_graph_then_save_and_close_automatically(
func=draw_four_bar_graph_seaborn,
args=[])
if __name__ == '__main__':
main()
Please help me, thank you very much.
Use a figure-legend instead of place on on one of your axes and set the number of columns that the legend should have to the number of legend entries. Here is an example (I did find your's to be minimal enough^^)
import numpy as np
from matplotlib import pyplot as plt
# create random data
y = np.random.randint(0,100,size=(10, 3))
# open a figure with two axes
fig,axs = plt.subplots(1,2)
# plot something in the axes
axs[0].plot(y[:,0])
axs[1].plot(y[:,1:])
# define the name of the
legendEntries = ("a","bcdefg","h")
# set figure legend entries, number of columns, location
fig.legend(legendEntries,ncol=len(legendEntries),loc="upper center")
Here is a doc-example, emphasizing to use the argument ncol to force matplotlib to expand the legend horizontally. And here is a tutorial/example how you can place the legend of an axis outside the region of the axis.

Change figure position by mouse

I plotted two curves as shown in the following figure. I want to match the subplot curve (ax1) with one of the main curves by changing the position of the ax1 with the mouse in the X and Y direction. How i can do that?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
xl = pd.ExcelFile('Book2.xlsx')
column = xl.parse('Sheet1', names=['A', 'B', 'C'], header=None, index_col=False)
time = np.array(column['A'])
rate = np.array(column['B'])
fig, ax = plt.subplots()
tDd = np.arange(10e-4, 10e2, 1)
plt.plot()
b = np.arange(0, 1, 0.1)
index = 0
for i in tDd:
q_Dd = [np.exp(-tDd) if b == 0 else (1+b*tDd)**(-1/b) for b in b]
ax.loglog(tDd,q_Dd[index], 'b-')
if index <= 8: index += 1
plt.xlim(10e-4, 10e1)
plt.ylim(10e-4, 10e0)
ax1 = fig.add_axes([0.3, 0.3, 0.5, 0.5])
ax1.patch.set_alpha(0.5)
ax1.loglog(time,rate,'.')

Pandas combine multiple subplots with same x axis into 1 bar chart

I am looping through a list containing 6 col_names. I loop by taking 3 cols at a time so i can print 3 subplots per iteration later.
I have 2 dataframes with same column names so they look identical except for the histograms of each column name.
I want to plot similar column names of both dataframes on the same subplot. Right now, im plotting their histograms on 2 separate subplots.
currently, for col 'A','B','C' in df_plot:
and for col 'A','B','C' in df_plot2:
I only want 3 charts where i can combine similar column names into same chart so there is blue and yellow bars in the same chart.
Adding df_plot2 below doesnt work. i think im not defining my second axs properly but im not sure how to do that.
col_name_list = ['A','B','C','D','E','F']
chunk_list = [col_name_list[i:i + 3] for i in xrange(0, len(col_name_list), 3)]
for k,g in enumerate(chunk_list):
df_plot = df[g]
df_plot2 = df[g][df[g] != 0]
fig, axs = plt.subplots(1,len(g),figsize = (50,20))
axs = axs.ravel()
for j,x in enumerate(g):
df_plot[x].value_counts(normalize=True).head().plot(kind='bar',ax=axs[j], position=0, title = x, fontsize = 30)
# adding this doesnt work.
df_plot2[x].value_counts(normalize=True).head().plot(kind='bar',ax=axs[j], position=1, fontsize = 30)
axs[j].title.set_size(40)
fig.tight_layout()
the solution is to plot on the same ax:
change axs[j] to axs
for k,g in enumerate(chunk_list):
df_plot = df[g]
df_plot2 = df[g][df[g] != 0]
fig, axs = plt.subplots(1,len(g),figsize = (50,20))
axs = axs.ravel()
for j,x in enumerate(g):
df_plot[x].value_counts(normalize=True).head().plot(kind='bar',ax=axs, position=0, title = x, fontsize = 30)
# adding this doesnt work.
df_plot2[x].value_counts(normalize=True).head().plot(kind='bar',ax=axs, position=1, fontsize = 30)
axs[j].title.set_size(40)
fig.tight_layout()
then just call plt.plot()
Example this will plot x and y on the same subplot:
import matplotlib.pyplot as plt
x = np.arange(0, 10, 1)
y = np.arange(0, 20, 2)
ax = plt.subplot(1,1)
fig = plt.figure()
ax = fig.gca()
ax.plot(x)
ax.plot(y)
plt.show()
EDIT:
There is now a squeeze keyword argument. This makes sure the result is always a 2D numpy array.
fig, ax2d = subplots(2, 2, squeeze=False)
if needed Turning that into a 1D array is easy:
axli = ax1d.flatten()

Row Titles within a matplotlib GridSpec

I have an GridSpec defined layout with to subgrids, one is supposed to include a colorbar
import pylab as plt
import numpy as np
gs_outer = plt.GridSpec(1, 2, width_ratios=(10, 1))
gs_inner = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(2, 3, gs_outer[0])
ax = []
for i in xrange(6):
ax.append(plt.subplot(gs_inner[i]))
plt.setp(ax[i].get_xticklabels(), visible=False)
plt.setp(ax[i].get_yticklabels(), visible=False)
ax.append(plt.subplot(gs_outer[1]))
plt.show()
I'd now like to get for the left part a row-wise labeling like this:
I tried to add another GridSpec into the GridSpec, but that did not work out:
import pylab as plt
import numpy as np
fig = plt.figure()
gs_outer = plt.GridSpec(1, 2, width_ratios=(10, 1))
gs_medium = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(3, 1, gs_outer[0])
ax_title0 = plt.subplot(gs_medium[0])
ax_title0.set_title('Test!')
gs_row1 = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(1, 3, gs_medium[0])
ax00 = plt.subplot(gs_row1[0]) # toggle this line to see the effect
plt.show()
Adding the ax00 = plt.subplot... line seems to erase the previously created axis
Following CT Zhu comment I came up with the following answer (I don't really like it, but it seems to work)
import pylab as plt
import numpy as np
fig = plt.figure()
rows = 2
cols = 3
row_fraction = 9
row_size = row_fraction / float(rows)
gs_outer = plt.GridSpec(1,2, width_ratios=(9,1))
gs_plots= plt.matplotlib.gridspec.GridSpecFromSubplotSpec(rows * 2, cols, subplot_spec=gs_outer[0], height_ratios = rows * [1, row_size])
# Create title_axes
title_ax = []
for ta in xrange(rows):
row_index = (ta) * 2
title_ax.append(plt.subplot(gs_plots[row_index, :]))
# Create Data axes
ax = []
for row in xrange(rows):
row_index = (row + 1) * 2 -1
for col in xrange(cols):
try:
ax.append(plt.subplot(gs_plots[row_index, col], sharex=ax[0], sharey=ax[0]))
except IndexError:
if row == 0 and col == 0:
ax.append(plt.subplot(gs_plots[row_index, col]))
else:
raise IndexError
# Delete Boxes and Markers from title axes
for ta in title_ax:
ta._frameon = False
ta.xaxis.set_visible(False)
ta.yaxis.set_visible(False)
# Add labels to title axes:
for ta, label in zip(title_ax, ['Row 1', 'Row 2']):
plt.sca(ta)
plt.text(
0.5, 0.5, label, horizontalalignment='center', verticalalignment='center')
# Add common colorbar
gs_cb = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(
1, 1, subplot_spec=gs_outer[1])
ax.append(plt.subplot(gs_cb[:, :]))
Of course labeling and ticklabels could be improved. But how to achive that is likely already explained on SO.
Let's define an example grid pltgrid:
pltgrid = gridspec.GridSpec(ncols=3, nrows=2,
width_ratios=[1]*3, wspace=0.3,
hspace=0.6, height_ratios=[1]*2)
Before your for loop, you can define a list ax using map:
num=list(range(7))
ax=list(map(lambda x : 'ax'+str(x), num))
You may have a list plotnames containing the names. As an example, I'll plot a normal distribution Q-Q plot for each i in the for loop:
for i in xrange(6):
ax[i]=fig.add.subplot(pltgrid[i])
res = stats.probplot(x, dist="norm", plot=ax[i])
# set title for subplot using existing 'plotnames' list
ax[i].set_title(plotnames[i])
# display subplot
ax[i]

Categories

Resources