I simply want to change the the labels on the y-axis to show more numbers. For example with a range from 0 - 40, it shows numbers 0, 10, 20, 30, 40.
I want to see 0, 1, 2, 3, 4, ... 38, 39, 40.
Also I want a grid (supporting lines or how it's called) to be shown.
My code looks like this, where I have a dataframe with train dataset names, classifier names and times.
I am creating a boxplot for each classifier showing times spent on all datasets by that classifier.
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
## agg backend is used to create plot as a .png file
mpl.use('agg')
# read dataset
data = pd.read_csv("classifier_times_sml.csv", ";")
# extract data
g = data.sort_values("time", ascending=False)[["classifier", "train", "time"]].groupby("classifier")
# Create a figure instance
fig = plt.figure(1, figsize=(20, 30))
# Create an axes instance
ax = fig.add_subplot(111)
labels = []
times = []
counter = 0
for group, group_df in g:
# Create the boxplot
times.append( np.asarray(group_df["time"]) )
labels.append(group)
# Create the boxplot
bp = ax.boxplot(times, showfliers=False )
ax.set_xticklabels(labels, rotation=90)
# Save the figure
fig.savefig('times_sml.png', bbox_inches='tight')
I have been searching thotoughly and didn't find any useful option for the boxplot. The grid option for the ax.boxplot(...) is not allowed here. What am I doing wrong?
Use ax.set_yticks(np.arange(min,max,step)) or plt.yticks(np.arange(min,max,step))
and ax.grid(True) to turn on the grids.
Are you looking for something like this ?
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns;sns.set()
from numpy import arange
data = np.random.randint(0,40,size=40)
fig = plt.figure(1, figsize=(20, 30))
ax = fig.add_subplot(111)
ax.boxplot(data)
ax.set_yticks(np.arange(0, 40, 1.0))
ax.grid(True)
plt.show()
Related
I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
I'm using geopandas (python 3.8.2) to plot variables contained in a geodataframe.
I would like to plot on a single figure, all datasets with their own colormap.
The problem is that the plot shows only the last dataset, which corresponds to 'var_5' with colormap 'Reds'. Even if I set: ax = geodataframe.plot() it does not work.
Any idea ? Many Thanks!
import geopandas as gpd
import matplotlib.pyplot as plt
filename = 'myfile.geojson'
geodataframe = gpd.read_file(filename)
cmaps = ['plasma', 'Greens', 'Blues', 'binary', 'Reds']
variables = ['var_1', 'var_2', 'var_3', 'var_4', 'var_5']
plt.rcParams['figure.figsize'] = (20, 10)
ax = plt.gca()
for i, var in enumerate(variables):
geodataframe.plot(ax=ax, column=var, cmap=cmaps[i])
plt.show()
Edit:
After taking into account the answers, I got this image:
I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])
I am trying to save a large dendrogram made from a large table (10000+ rows, 18 columns), and I came with this code
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import numpy as np
import pandas as pd
data = pd.read_csv("Input.txt", header = 0, index_col = None,\
sep = "\t", memory_map = True)
data = data.fillna(0)
Matrix = data.iloc[:,-18:]
Linkage_Matrix = linkage (Matrix, "ward")
fig=plt.figure(figsize=(20, 200))
#fig, ax = plt.subplots(1, 1, tight_layout=False)
ax = fig.add_axes([0.1,0.1,0.75,0.75])
#fig.title('Hierarchical Clustering Dendrogram')
ax.set_title("Hierarchical Clustering Dendrogram")
ax.set_xlabel("distance")
ax.set_xlabel("name")
dendrogram(
Linkage_Matrix,
orientation ="left",
leaf_rotation=0.,
leaf_font_size=12.,
labels = list(data.loc[:,"name"])
)
ax.set_yticklabels(list(data.loc[:,"name"]), minor=False)
ax.yaxis.set_label_position('right')
ax.yaxis.tick_right()
plt.savefig("plt1.png", dpi = 320, format= "png", bbox_inches=None)
But unfortunately, it doesn't save the axis, while I left some space as showed in these:
Matplotlib savefig does not save axes
Why is my xlabel cut off in my matplotlib plot?
Matplotlib savefig image trim
Plotting hierarchical clustering dendrograms for large data sets
Dendrogram generated by scipy-cluster customisation
I have a correct display in the console, which I can save, but the dpi are not good, and ideally I also would like to switch to svg to be able to set the level of readability afterwards.
Any insights would be greatly appreciated
Removing this line
ax = fig.add_axes([0.1,0.1,0.75,0.75])
and setting bbox_inches='tight' in plt.savefig() makes it work for me.
Also, since you are loading the data with pandas, note how you can declare the 'name' column as index and use these index values as labels.
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import numpy as np
import pandas as pd
data = pd.read_csv('input.txt', header=0, index_col=['name'], sep="\t")
data = data.fillna(0)
link_matrix = linkage(data, 'ward')
fig, ax = plt.subplots(1, 1, figsize=(20,200))
ax.set_title('Hierarchical Clustering Dendrogram')
ax.set_xlabel('distance')
ax.set_ylabel('name')
dendrogram(
link_matrix,
orientation='left',
leaf_rotation=0.,
leaf_font_size=12.,
labels=data.index.values
)
ax.yaxis.set_label_position('right')
ax.yaxis.tick_right()
plt.savefig('plt1.png', dpi=320, format='png', bbox_inches='tight')
I need to change the colors of the boxplot drawn using pandas utility function. I can change most properties using the color argument but can't figure out how to change the facecolor of the box. Someone knows how to do it?
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props)
While I still recommend seaborn and raw matplotlib over the plotting interface in pandas, it turns out that you can pass patch_artist=True as a kwarg to df.plot.box, which will pass it as a kwarg to df.plot, which will pass is as a kwarg to matplotlib.Axes.boxplot.
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props, patch_artist=True)
As suggested, I ended up creating a function to plot this, using raw matplotlib.
def plot_boxplot(data, ax):
bp = ax.boxplot(data.values, patch_artist=True)
for box in bp['boxes']:
box.set(color='DarkGreen')
box.set(facecolor='DarkGreen')
for whisker in bp['whiskers']:
whisker.set(color="DarkOrange")
for cap in bp['caps']:
cap.set(color="Gray")
for median in bp['medians']:
median.set(color="white")
ax.axhline(0, color="DarkBlue", linestyle=":")
ax.set_xticklabels(data.columns)
I suggest using df.plot.box with patch_artist=True and return_type='both' (which returns the matplotlib axes the boxplot is drawn on and a dictionary whose values are the matplotlib Lines of the boxplot) in order to have the best customization possibilities.
For example, given this data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(
data=np.random.randn(100, 4),
columns=list("ABCD")
)
you can set a specific color for all the boxes:
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch in props['boxes']:
patch.set_facecolor('lime')
plt.show()
you can set a specific color for each box:
colors = ['green','blue','yellow','red']
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors):
patch.set_facecolor(color)
plt.show()
you can easily integrate a colormap:
colors = np.random.randint(0,10, 4)
cm = plt.cm.get_cmap('rainbow')
colors_cm = [cm((c-colors.min())/(colors.max()-colors.min())) for c in colors]
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors_cm):
patch.set_facecolor(color)
# to add colorbar
fig.colorbar(plt.cm.ScalarMappable(
plt.cm.colors.Normalize(min(colors),max(colors)),
cmap='rainbow'
), ax=ax, cmap='rainbow')
plt.show()