How can I properly plot 2 colorbars with pandas scatter plots? Right now the first colorbar is duplicated:
https://gist.github.com/denfromufa/45c446690a69265d39dd
import numpy as np
import pandas as pd
df=pd.DataFrame(np.random.random([100,5]),columns='A B C D E'.split())
df.head()
%matplotlib inline
ax1=df.plot(kind='scatter',x='A',y='B',c='C',s=df.D*50,cmap='summer',linewidth=0,sharex=False);
df.plot(ax=ax1,kind='scatter',x='A',y='C',c='D',s=df.B*50,cmap='winter',linewidth=0,sharex=False);
You can use the matplotlib functions directly:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame(np.random.random([100,5]),columns='A B C D E'.split())
sc1 = plt.scatter(x=df['A'], y=df['B'], c=df['C'], s=50*df['D'], cmap='summer')
plt.colorbar(sc1)
sc2 = plt.scatter(x=df['A'], y=df['C'], c=df['D'], s=50*df['B'], cmap='winter')
plt.colorbar(sc2)
which produces
Related
assuming that I receive multiple dataframes sequentially from some tasks that I perform, how do I create regplots that will show up together - something similar to what's shown below.
assuming these are my codes which generate dfs with random numbers. I would like to show those 4 regplots together
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
sns.set()
for i in range(4):
df1 = pd.DataFrame(np.random.randint(1,10,10),columns=['A'])
df2 = pd.DataFrame(np.random.randint(1,10,10),columns=['B'])
df = pd.concat([df1,df2],axis=1)
sns.regplot(df1,df2)
plt.show()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
fig, axs = plt.subplots(2, 2)
for ax, i in zip(axs.flat, range(4)):
df1 = pd.DataFrame(np.random.randint(1,10,10),columns=['A'])
df2 = pd.DataFrame(np.random.randint(1,10,10),columns=['B'])
df = pd.concat([df1,df2],axis=1)
sns.regplot(x='A', y='B', data=df, ax=ax)
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
%matplotlib inline
!wget -O FuelConsumption.csv https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-
data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv
df = pd.read_csv("FuelConsumption.csv")
df.head()
df1 = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']]
df1.head(9)
plt.scatter(df1.ENGINESIZE , df1.CO2EMISSIONS , color = "black")
plt.xlabel=("enginesize")
plt.ylabel=("emission")
plt.show()
when i try to run this code i get scatter graph without axis label.
how can i get axis label if anyone can assist me ?
You need to pass the label names to xlabel and ylabel.
If it's throwing error - TypeError: 'str' object is not callable then restart the ipython kernel
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
%matplotlib inline
df = pd.read_csv("FuelConsumption.csv")
df.head()
df1 = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']]
df1.head(9)
plt.scatter(df1.ENGINESIZE , df1.CO2EMISSIONS , color = "black")
plt.xlabel("enginesize")
plt.ylabel("emission")
plt.show()
How can we use .pipe() to get plotly express line plot?
Code
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
df = pd.DataFrame(data={'count':np.random.randint(1,20,10)},
index=pd.date_range('2020-01-01','2020-01-10')
)
Line plot (this works)
df1 = df.resample('2D').sum()
px.line(df1,x=df1.index,y='count')
Using pipe
Here creating df1 is un-necessary. How can we use pipe?
My attempt
df.resample('2D').sum().pipe(px.line,x=lambda x: x.index,y='count')
# this does not work, gives empty plot
How to get the correct image?
You are close:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
df = pd.DataFrame(data={'count':np.random.randint(1,20,10)},
index=pd.date_range('2020-01-01','2020-01-10')
)
# line plot with pipe
df.resample('2D').sum().pipe(lambda ts:
px.line(ts, x=ts.index,y='count'))
Output:
I want to plot a dataframe where y values are stored as ndarrays within a column
i.e.:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame(index=np.arange(0,4), columns=('sample','class','values'))
for iloc in [0,2]:
df.loc[iloc] = {'sample':iloc,
'class':'raw',
'values':np.random.random(5)}
df.loc[iloc+1] = {'sample':iloc,
'class':'predict',
'values':np.random.random(5)}
grid = sns.FacetGrid(df, col="class", row="sample")
grid.map(plt.plot, np.arange(0,5), "value")
TypeError: unhashable type: 'numpy.ndarray'
Do I need to break out the ndarrays into separate rows? Is there a simple way to do this?
Thanks
This is quite an unusual way of storing data in a dataframe. Two options (I'd recommend option B):
A. Custom mapping in seaborn
Indeed seaborn does not support such format natively. You may construct your own function to plot to the grid though.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame(index=np.arange(0,4), columns=('sample','class','values'))
for iloc in [0,2]:
df.loc[iloc] = {'sample':iloc,
'class':'raw',
'values':np.random.random(5)}
df.loc[iloc+1] = {'sample':iloc,
'class':'predict',
'values':np.random.random(5)}
grid = sns.FacetGrid(df, col="class", row="sample")
def plot(*args,**kwargs):
plt.plot(args[0].iloc[0], **kwargs)
grid.map(plot, "values")
B. Unnesting
However I would advise to "unnest" the dataframe first and get rid of the numpy arrays inside the cells.
pandas: When cell contents are lists, create a row for each element in the list shows a way to do that.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame(index=np.arange(0,4), columns=('sample','class','values'))
for iloc in [0,2]:
df.loc[iloc] = {'sample':iloc,
'class':'raw',
'values':np.random.random(5)}
df.loc[iloc+1] = {'sample':iloc,
'class':'predict',
'values':np.random.random(5)}
res = df.set_index(["sample", "class"])["values"].apply(pd.Series).stack().reset_index()
res.columns = ["sample", "class", "original_index", "values"]
Then use the FacetGrid in the usual way.
grid = sns.FacetGrid(res, col="class", row="sample")
grid.map(plt.plot, "original_index", "values")
I try to plot a factorplot with seaborn
Here is my python code :
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import style
import pandas as pd
import seaborn as sns
import io
style.use('ggplot')
#load dataset into df2 dataframe
df2 = pd.read_csv('C:/Users/Demonstrator/Downloads/power.csv',delimiter=';')
#drop NaN rows from df2 to build df_no_missing
df_no_missing = df2.dropna().copy()
df_no_missing.head()
df_no_missing['depassement'] = np.where((df_no_missing['P_ACT_KW'] - df_no_missing['P_SOUSCR']) < 0, 0, df_no_missing['P_ACT_KW'] - df_no_missing['P_SOUSCR'])
#build a factorplot
sns.factorplot(data=df_no_missing, x="TIMESTAMP", y="P_ACT_KW")
It is impossible to view a result the process seems busy.