Using pandas date as the X values of a bar graph Python - python

The code below outputs the bar graph using graph_vals and Monthly_indx, however the x values of the graph Monthly_indx are messed up and impossible to read when displayed. How would I be able to fix the x axis and make the graph readable?
Code:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
graph_vals = np.array([0.0, 15.58698486544634, 62.39522773472678, 53.102459267328186,
20.664598839134662, 37.43748926661284, 24.919985065059905, 2.462993387475203,
20.53746782047295, 87.85805111410333, 38.70428059326558, 31.56608531221292,
7.418190883067737, 17.10281023326888, 31.804189226978277, 30.05396124982128,
64.40080197706149, 33.29684656571501, 52.913157810513006, 20.913775029581142,
68.41187918506716, 47.56850690090707, 56.2736117598243, 31.25351961092795,
72.49099807107945, 44.95116145453175, 78.0696975321658, 54.60288966093484])
Monthly_indx = pd.date_range( pd.to_datetime('2019-05-08'), pd.to_datetime('2021-09-08'), freq='MS')
print(len(graph_vals), len(Monthly_indx))
ax1 = pd.DataFrame({'Monthly PnLs':graph_vals, 'Months':Monthly_indx })
ax = ax1.plot.bar(y='Monthly PnLs',x='Months', rot=0)
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
Output:

If you are using Matplotlib, use plt.setp() to tilt your x-labels:
plt.setp(ax.get_xticklabels(), rotation=30, horizontalalignment='right')
You can adjust the angle to your liking.

Related

How to add multiple custom ticks to seaborn boxplot

I generated a boxplot using seaborn. On the x axis, I would like to have, both the number of days (20, 25, 32) and the actual dates they refer to (2022-05-08, 2022-05-13, 2022-05-20).
I found a potential solution at the following link add custom tick with matplotlib. I'm trying to adapt it to my problem but I could only get the number of days or the dates, not both.
I really would appreciate any help. Thank you in advance for your time.
Please, find below my code and the desired output.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = [tick_label.get(t, ticks[i]) for i,t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()
Here is the desired output.
You can do that by adding these lines in place of ax.set_xticklabels(labels)
new_labels=["{}\n{}".format(a_, b_) for a_, b_ in zip(ticks, labels)]
ax.set_xticklabels(new_labels)
Output
Try this:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'nb_days':[20,20,20,25,25,20,32,32,25,32,32],
'Dates':['2022-05-08','2022-05-08','2022-05-08','2022-05-13','2022-05-13','2022-05-08','2022-05-20','2022-05-20','2022-05-13','2022-05-20','2022-05-20'],
'score':[3,3.5,3.4,2,2.2,3,5,5.2,4,4.3,5]})
df['Dates'] = df['Dates'].apply(pd.to_datetime)
tick_label = dict(zip(df['nb_days'],df['Dates'].apply(lambda x: x.strftime('%Y-%m-%d')))) #My custom xtick label
#Plot
fig,ax = plt.subplots(figsize=(6,6))
ax = sns.boxplot(x='nb_days',y='score',data=df,color=None)
# iterate over boxes to change color
for i,box in enumerate(ax.artists):
box.set_edgecolor('red')
box.set_facecolor('white')
sns.stripplot(x='nb_days',y='score',data=df,color='black')
ticks = sorted(df['nb_days'].unique())
labels = ["{}\n".format(t)+tick_label.get(t, ticks[i]) for i, t in enumerate(ticks)]
ax.set_xticklabels(labels)
plt.tight_layout()
plt.show()
plt.close()

seaborn jointplot with same size plots

I'm doing a jointplot with a basemap, the problem is that when I add the basemap the main plot doesn't have the same size of the marginal plots. I've tried with different parameters without luck. Does anyone have an idea?
import seaborn as sns
import matplotlib.pyplot as plt
import contextily as ctx
import pandas as pd
##exaplme of the data
coords={'longitud':[-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464,
-62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452,
-62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622,
-63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785,
-63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898,
-63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301,
-63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115,
-63.211982035, -63.1689040153],
'latitud':[8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179,
8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804,
8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531,
8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656,
8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527,
8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299,
8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
OSM_C = 'http://c.tile.openstreetmap.org/{z}/{x}/{y}.png'
joint_axes = sns.jointplot(
x='longitud', y='latitud', data=df, ec="r", s=5)
ctx.add_basemap(joint_axes.ax_joint,crs=4326,attribution=False,url=OSM_C)
adjust(hspace=0, wspace=0)
#plt.tight_layout()
plt.show()
Here is an approach that:
removes the axes sharing in the y-direction to be able to change the aspect to 'datalim'
sets the aspect to 'equal', 'datalim'
sets the y data limits of the marginal plot to be the same as the joint plot; this seems to need a redraw
The following code shows the idea (using imshow, as I don't have contextily installed):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
coords = {'longitud' : [-62.2037376443, -62.1263309099, -62.1111660957, -62.2094232682, -62.2373117384, -62.4837603464, -62.4030570833, -62.3975699059, -62.7017114116, -62.7830883096, -62.7786038141, -62.7683234105, -62.7490101452, -62.7709656745, -63.1002199219, -63.1890252191, -63.1183018549, -63.069960016, -62.7957745659, -63.1715687622, -63.2156105034, -63.0634381954, -63.2243260588, -63.1153871895, -63.1068292891, -63.103945266, -63.046202785, -63.1002257551, -63.2076065143, -62.9766391316, -62.9639256604, -62.9911452446, -62.9819984159, -62.9693649898, -63.066770885, -62.9867441519, -62.9566360192, -62.962616287, -62.835080907, -63.0704805194, -62.8796906301, -63.0725050601, -63.2224345145, -63.1609069526, -63.0614466072, -62.8847887504, -63.1093652381, -62.822694115, -63.211982035, -63.1689040153],
'latitud' : [8.54644405234, 8.54344899107, 8.54223724187, 8.54290207992, 8.49122679072, 8.48386575122, 8.46450360179, 8.46404720757, 8.35310083084, 8.31701565261, 8.30258604829, 8.29974870902, 8.29281679496, 8.28939264064, 8.28785272804, 8.28221439317, 8.27978694565, 8.27864159366, 8.27634987807, 8.27619269053, 8.27236343925, 8.27258932351, 8.26833993531, 8.267530064, 8.26446669791, 8.26266392333, 8.2641092051, 8.26208837315, 8.26034269744, 8.26123972942, 8.25789799656, 8.25825378832, 8.25833002805, 8.25914612933, 8.2540499893, 8.25347956867, 8.2540932736, 8.25405171513, 8.2478564527, 8.24561857662, 8.2440865055, 8.24256528837, 8.24089278, 8.23877286416, 8.23782626443, 8.23865421655, 8.23733824299, 8.23477115627, 8.23552604027, 8.24327920905]}
df = pd.DataFrame(coords)
g = sns.jointplot(data=df, x='longitud', y='latitud')
ctx.add_basemap(g.ax_joint,crs=4326,attribution=False,url=OSM_C)
# g.ax_joint.imshow(np.random.rand(20, 10), cmap='spring', interpolation='bicubic',
# extent=[df['longitud'].min(), df['longitud'].max(), df['latitud'].min(), df['latitud'].max()])
for axes in g.ax_joint.get_shared_y_axes():
for ax in axes:
g.ax_joint.get_shared_y_axes().remove(ax)
g.ax_joint.set_aspect('equal', 'datalim')
g.fig.canvas.draw()
g.ax_marg_y.set_ylim(g.ax_joint.get_ylim())
plt.show()
You can still combine this approach with changing the figure's width or height, or adding more whitespace on top or below.

Matplotlib inline in Jupyter - how to contol when the plot is shown?

I have a function that creates a figure and for some reason it is shown in Jupyter notebook twice, even though I didn't run show at all. I pass the fig and ax as an output of this function, and plan to show it only later.
I get confused between plt, fig and ax functionaries and guess that the answer is hidden somewhere there.
Here is an anonymised version of my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
def plot_curve(dummydata):
# builds a chart
fig,ax = plt.subplots(1) # get subplots
fig.set_figheight(7)
fig.set_figwidth(12) #set shape
plt.plot(dummydata.x1, dummydata.y1,label = 'l1') #curve 1
plt.plot(dummydata.x2, dummydata.y2,label = 'l2') #curve2
plt.xlabel('xlabel') #labels
plt.ylabel('xlabel')
plt.yscale('linear') #scale and bounds
plt.ylim(0,100)
ymin,ymax= ax.get_ylim()
ax.axhline(1, color='k', linestyle=':', label = 'lab1') #guideline - horizontal
ax.axvline(2, color='r',linestyle='--', label = 'lab2') #guideline - vertical
ax.axvline(3, color='g',linestyle='--', label = 'lab3') #guideline - vertical
ax.arrow(1,2,3,0, head_width=0.1, head_length=0.01, fc='k', ec='k') # arrow
rect = mpl.patches.Rectangle((1,2), 2,3, alpha = 0.1, facecolor='yellow',
linewidth=0 , label= 'lab4') #yellow area patch
ax.add_patch(rect)
plt.legend()
plt.title('title')
return fig,ax
and then call it with:
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
What should I change to not show the figure by default, and show it only by my command?
Thanks
Try disabling matplotlib interactive mode using plt.ioff(). With interactive mode disabled the plots will only be shown with an explicit plt.show().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
# Desactivate interactive mode
plt.ioff()
def plot_curve(dummydata):
# the same code as before
Then in another cell
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
# I'am assuming this should not be in the for loop
# The plot will NOT be shown because we are not in interactive mode
fig, ax = plot_curve(dummydata) #get the chart
No plot will be shown yet.
Now in another cell
# Now ANY plot (figure) which was created and not shown yet will be finally shown
plt.show()
The plot is finally shown. Note that if you have created several plots all of them will be shown now.
Try this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib
With this importing you should not see the figure after plotting.
But you can see the figure by writing fig to IPython cell:
dummydata = pd.DataFrame({
'x1':np.arange(1,100,0.1),
'y1':np.arange(11,110,0.1),
'x2':np.arange(1,100,0.1),
'y2':np.arange(21,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
fig # Will now plot the figure.
Is this the desired output?

How to show label names in pandas groupby histogram plot

I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)

Change the facecolor of boxplot in pandas

I need to change the colors of the boxplot drawn using pandas utility function. I can change most properties using the color argument but can't figure out how to change the facecolor of the box. Someone knows how to do it?
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props)
While I still recommend seaborn and raw matplotlib over the plotting interface in pandas, it turns out that you can pass patch_artist=True as a kwarg to df.plot.box, which will pass it as a kwarg to df.plot, which will pass is as a kwarg to matplotlib.Axes.boxplot.
import pandas as pd
import numpy as np
data = np.random.randn(100, 4)
labels = list("ABCD")
df = pd.DataFrame(data, columns=labels)
props = dict(boxes="DarkGreen", whiskers="DarkOrange", medians="DarkBlue", caps="Gray")
df.plot.box(color=props, patch_artist=True)
As suggested, I ended up creating a function to plot this, using raw matplotlib.
def plot_boxplot(data, ax):
bp = ax.boxplot(data.values, patch_artist=True)
for box in bp['boxes']:
box.set(color='DarkGreen')
box.set(facecolor='DarkGreen')
for whisker in bp['whiskers']:
whisker.set(color="DarkOrange")
for cap in bp['caps']:
cap.set(color="Gray")
for median in bp['medians']:
median.set(color="white")
ax.axhline(0, color="DarkBlue", linestyle=":")
ax.set_xticklabels(data.columns)
I suggest using df.plot.box with patch_artist=True and return_type='both' (which returns the matplotlib axes the boxplot is drawn on and a dictionary whose values are the matplotlib Lines of the boxplot) in order to have the best customization possibilities.
For example, given this data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(
data=np.random.randn(100, 4),
columns=list("ABCD")
)
you can set a specific color for all the boxes:
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch in props['boxes']:
patch.set_facecolor('lime')
plt.show()
you can set a specific color for each box:
colors = ['green','blue','yellow','red']
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors):
patch.set_facecolor(color)
plt.show()
you can easily integrate a colormap:
colors = np.random.randint(0,10, 4)
cm = plt.cm.get_cmap('rainbow')
colors_cm = [cm((c-colors.min())/(colors.max()-colors.min())) for c in colors]
fig,ax = plt.subplots(figsize=(9,6))
ax,props = df.plot.box(patch_artist=True, return_type='both', ax=ax)
for patch,color in zip(props['boxes'],colors_cm):
patch.set_facecolor(color)
# to add colorbar
fig.colorbar(plt.cm.ScalarMappable(
plt.cm.colors.Normalize(min(colors),max(colors)),
cmap='rainbow'
), ax=ax, cmap='rainbow')
plt.show()

Categories

Resources