When I do the following using Pandas on IPython, it only shows the last picture I drawn, is there a way I can let them show sequentially on IPython?
def drawBar(colName):
df1=df[colName].value_counts().plot(kind='bar', title=colName)
drawBar("myBiscuit")
drawBar("myBedRoom")
...(many more drawBar)
For plotting graphs in the notebook, you'd use the IPython magic %matplotlib inline.
(a) Plotting each individual graph one after the other:
You would need to call plt.show() for each graph. This will return a long list of plots in your IPython.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
colNames = "ABCDEFGHI"
x = np.random.randint(0,5, size=(10, 9))
df = pd.DataFrame(x, columns=[letter for letter in colNames])
def drawBar(colName):
df1=df[colName].value_counts().plot(kind='bar', title=colName)
for i in range(9):
drawBar(colNames[i])
plt.show()
(b) Using subplots.
Creating several subplots can be done with plt.subplots(). Then using the ax keyword argument to the pandas plotting function, creates the graph on the specified axes.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
colNames = "ABCDEFGHI"
x = np.random.randint(0,5, size=(10, 9))
df = pd.DataFrame(x, columns=[letter for letter in colNames])
fig, axes = plt.subplots(3,3)
def drawBar(colName, ax):
df1=df[colName].value_counts().plot(kind='bar', title=colName, ax=ax)
for i, ax in enumerate(axes.flatten()):
drawBar(colNames[i], ax)
plt.tight_layout()
Related
So in Spyder IPython and in Jupyter notebook, the following code is failing to create subplots:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
mydict = {'a': [1,2,3,4], 'b':[2,3,4,5], 'c':[3,4,5,6]}
df = pd.DataFrame(mydict)
fig, axes = plt.subplots(3,1)
axes[0] = plt.plot(df.a)
axes[1] = plt.plot(df.b)
axes[2] = plt.plot(df.c)
plt.show(fig)
and it gives back the following plot:
this also happens when I copy-c copy-vd the example code from the matplotlib webpage
what I would like is the three columns in the three different subplots to be plotted
If you create your axes using plt.subplots you are using the object oriented approach in matplotlib. Then you have to call plot() on the axes object, so axes[0].plot(df.a), not plt.plot.
What you are doing is a weird hybrid between the procedural and object oriented approach and you also overwrite the axes objects that you created when you write axes[0] = plt.plot(....
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
mydict = {'a': [1,2,3,4], 'b':[2,3,4,5], 'c':[3,4,5,6]}
df = pd.DataFrame(mydict)
fig, axes = plt.subplots(3,1)
axes[0].plot(df.a)
axes[1].plot(df.b)
axes[2].plot(df.c)
plt.show()
I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])
I have a function that creates a figure and for some reason it is shown in Jupyter notebook twice, even though I didn't run show at all. I pass the fig and ax as an output of this function, and plan to show it only later.
I get confused between plt, fig and ax functionaries and guess that the answer is hidden somewhere there.
Here is an anonymised version of my code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
def plot_curve(dummydata):
# builds a chart
fig,ax = plt.subplots(1) # get subplots
fig.set_figheight(7)
fig.set_figwidth(12) #set shape
plt.plot(dummydata.x1, dummydata.y1,label = 'l1') #curve 1
plt.plot(dummydata.x2, dummydata.y2,label = 'l2') #curve2
plt.xlabel('xlabel') #labels
plt.ylabel('xlabel')
plt.yscale('linear') #scale and bounds
plt.ylim(0,100)
ymin,ymax= ax.get_ylim()
ax.axhline(1, color='k', linestyle=':', label = 'lab1') #guideline - horizontal
ax.axvline(2, color='r',linestyle='--', label = 'lab2') #guideline - vertical
ax.axvline(3, color='g',linestyle='--', label = 'lab3') #guideline - vertical
ax.arrow(1,2,3,0, head_width=0.1, head_length=0.01, fc='k', ec='k') # arrow
rect = mpl.patches.Rectangle((1,2), 2,3, alpha = 0.1, facecolor='yellow',
linewidth=0 , label= 'lab4') #yellow area patch
ax.add_patch(rect)
plt.legend()
plt.title('title')
return fig,ax
and then call it with:
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
What should I change to not show the figure by default, and show it only by my command?
Thanks
Try disabling matplotlib interactive mode using plt.ioff(). With interactive mode disabled the plots will only be shown with an explicit plt.show().
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
# Desactivate interactive mode
plt.ioff()
def plot_curve(dummydata):
# the same code as before
Then in another cell
for i in range(3):
dummydata = pd.DataFrame({
'x1':np.arange(1+i,100,0.1),
'y1':np.arange(11+i,110,0.1),
'x2':np.arange(1+i,100,0.1),
'y2':np.arange(21+i,120,0.1)
})
# I'am assuming this should not be in the for loop
# The plot will NOT be shown because we are not in interactive mode
fig, ax = plot_curve(dummydata) #get the chart
No plot will be shown yet.
Now in another cell
# Now ANY plot (figure) which was created and not shown yet will be finally shown
plt.show()
The plot is finally shown. Note that if you have created several plots all of them will be shown now.
Try this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib
With this importing you should not see the figure after plotting.
But you can see the figure by writing fig to IPython cell:
dummydata = pd.DataFrame({
'x1':np.arange(1,100,0.1),
'y1':np.arange(11,110,0.1),
'x2':np.arange(1,100,0.1),
'y2':np.arange(21,120,0.1)
})
fig,ax = plot_curve(dummydata) #get the chart
fig # Will now plot the figure.
Is this the desired output?
I 'm using Seaborn in a Jupyter notebook to plot histograms like this:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('CTG.csv', sep=',')
sns.distplot(df['LBE'])
I have an array of columns with values that I want to plot histogram for and I tried plotting a histogram for each of them:
continous = ['b', 'e', 'LBE', 'LB', 'AC']
for column in continous:
sns.distplot(df[column])
And I get this result - only one plot with (presumably) all histograms:
My desired result is multiple histograms that looks like this (one for each variable):
How can I do this?
Insert plt.figure() before each call to sns.distplot() .
Here's an example with plt.figure():
Here's an example without plt.figure():
Complete code:
# imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [6, 2]
%matplotlib inline
# sample time series data
np.random.seed(123)
df = pd.DataFrame(np.random.randint(-10,12,size=(300, 4)), columns=list('ABCD'))
datelist = pd.date_range(pd.datetime(2014, 7, 1).strftime('%Y-%m-%d'), periods=300).tolist()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
df.iloc[0]=0
df=df.cumsum()
# create distplots
for column in df.columns:
plt.figure() # <==================== here!
sns.distplot(df[column])
Distplot has since been deprecated in seaborn versions >= 0.14.0. You can, however, use sns.histplot() to plot histogram distributions of the entire dataframe (numerical features only) in the following way:
fig, axes = plt.subplots(2,5, figsize=(15, 5))
ax = axes.flatten()
for i, col in enumerate(df.columns):
sns.histplot(df[col], ax=ax[i]) # histogram call
ax[i].set_title(col)
# remove scientific notation for both axes
ax[i].ticklabel_format(style='plain', axis='both')
fig.tight_layout(w_pad=6, h_pad=4) # change padding
plt.show()
If, you specifically want a way to estimate the probability density function of a continuous random variable using the Kernel Density Function (mimicing the default behavior of sns.distplot()), then inside the sns.histplot() function call, add kde=True, and you will have curves overlaying the histograms.
Also works when looping with plt.show() inside:
for column in df.columns:
sns.distplot(df[column])
plt.show()
I am running following code to draw histograms in 3 by 3 grid for 9 varaibles.However, it plots only one variable.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def draw_histograms(df, variables, n_rows, n_cols):
fig=plt.figure()
for i, var_name in enumerate(variables):
ax=fig.add_subplot(n_rows,n_cols,i+1)
df[var_name].hist(bins=10,ax=ax)
plt.title(var_name+"Distribution")
plt.show()
You're adding subplots correctly but you call plt.show for each added subplot which causes what has been drawn so far to be shown, i.e. one plot. If you're for instance plotting inline in IPython you will only see the last plot drawn.
Matplotlib provides some nice examples of how to use subplots.
Your problem is fixed like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def draw_histograms(df, variables, n_rows, n_cols):
fig=plt.figure()
for i, var_name in enumerate(variables):
ax=fig.add_subplot(n_rows,n_cols,i+1)
df[var_name].hist(bins=10,ax=ax)
ax.set_title(var_name+" Distribution")
fig.tight_layout() # Improves appearance a bit.
plt.show()
test = pd.DataFrame(np.random.randn(30, 9), columns=map(str, range(9)))
draw_histograms(test, test.columns, 3, 3)
Which gives a plot like:
In case you don't really worry about titles, here's a one-liner
df = pd.DataFrame(np.random.randint(10, size=(100, 9)))
df.hist(color='k', alpha=0.5, bins=10)