Plotting multiple pandas DataFrames in one *3D* scatterplotplot - python

I want to plot two dataframes in one 3D scatterplot.
This is the code I have for one dataframe:
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
...
sns.set(style = "darkgrid")
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
x = df['xitem']
y = df['yitem']
z = df['zitem']
ax.set_xlabel("X Label")
ax.set_ylabel("Y Label")
ax.set_zlabel("Z Label")
ax.scatter(x, y, z)
plt.show()
I can't figure out how to adjust this so I have two different dataframes plotted on the same plot but with different colors. How can I do this?
Edit: I'm looking for how to use two dataframes for a 3D plot specifically.

Assuming that you have two DataFrame called df1 and df2, both containing columns 'xitem', 'yitem', 'zitem', you can plot them in this way:
for curr_df, c in zip((df1, df2), ('b', 'r')):
ax.scatter(*curr_df[['xitem', 'yitem', 'zitem']].values.T, color=c)
Here a complete example:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style = "darkgrid")
df1 = pd.DataFrame(
data=np.random.random((100, 3)) + np.array([1, 1, 1]),
columns=['xitem', 'yitem', 'zitem'],
)
df2 = pd.DataFrame(
data=np.random.random((100, 3)),
columns=['xitem', 'yitem', 'zitem'],
)
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
for curr_df, c in zip((df1, df2), ('b', 'r')):
ax.scatter(*curr_df[['xitem', 'yitem', 'zitem']].values.T, color=c)
ax.set_xlabel("X Label")
ax.set_ylabel("Y Label")
ax.set_zlabel("Z Label")
plt.show()

Related

Matplotlib subplot not plotting

I have the following code:
import pandas.util.testing as testing
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
df = testing.makeTimeDataFrame(freq='MS')
with mpl.rc_context(rc={'font.family': 'serif', 'font.weight': 'bold', 'font.size': 12}):
fig = plt.figure(figsize= (12, 6))
fig.add_subplot(2, 2, (1,2))
ax2 = ax.twinx()
df['A'].plot(ax=ax, color = 'g')
df['B'].plot(ax=ax2, color ='g')
fig.add_subplot(223)
df['C'].plot(color='r')
fig.add_subplot(224)
df['D'].plot()
fig.tight_layout()
plt.show()
Which produces the following plot.
I am trying to plot df['A'] and df['B'] on the same top plot. Could you please advise what I have overlooked?
one little detail is missing. before calling twinx you need to assign ax to the first subplot. Then it'll work.
ax = fig.add_subplot(2, 2, (1,2))

Markers in beginning and end of line plots

I have 5 datasets that have thousands of x and y coordinates grouped by 'frame' that create 5 trajectory plots. I'd like to mark the first and last coordinates for each plot but having difficulty figuring it out. I am using Jupiter Notebook.
mean_pos1 = gr1.mean()
mean_pos2 = gr2.mean()
mean_pos3 = gr3.mean()
mean_pos4 = gr4.mean()
mean_pos5 = gr5.mean()
plt.figure()
xlim=(200, 1500)
ylim=(0, 1200)
ax1 = mean_pos1.plot(x='x', y='y',color='blue',label='Dolphin A'); ax1.set_title('mean trajectory');
ax2 = mean_pos2.plot(x='x', y='y',color='red',label='Dolphin B'); ax2.set_title('mean trajectory');
ax3 = mean_pos3.plot(x='x', y='y',color='green',label='Dolphin C'); ax3.set_title('mean trajectory');
ax4 = mean_pos4.plot(x='x', y='y',color='magenta',label='Dolphin D'); ax4.set_title('mean trajectory');
ax5 = mean_pos5.plot(x='x', y='y',color='cyan',label='Dolphin E'); ax5.set_title('mean trajectory');
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_ylim(ylim)
ax5.set_xlim(xlim)
ax5.set_ylim(ylim)
plt.show()
the output of them looks like this:
Use the scatter method to plot the markers separately on the same axis by grabbing the first and last elements from your x and y series:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'x': np.random.normal(3,0.2,10), 'y': np.random.normal(5,0.3,10)})
fig, ax = plt.subplots()
df.plot(x='x', y='y', ax=ax)
ax.scatter(df['x'].iloc[0], df['y'].iloc[0], marker='o', color='red')
ax.scatter(df['x'].iloc[-1], df['y'].iloc[-1], marker='o', color='red')
plt.show()

How to use pandas df.plot.scatter to make a figure with subplots

Hello how can i make a figure with scatter subplots using pandas? Its working with plot, but not with scatter.
Here an Example
import numpy as np
import pandas as pd
matrix = np.random.rand(200,5)
df = pd.DataFrame(matrix,columns=['index','A','B','C','D'])
#single plot, working with
df.plot(
kind='scatter',
x='index',
y='A',
s= 0.5
)
# not workig
df.plot(
subplots=True,
kind='scatter',
x='index',
y=['A','B','C'],
s= 0.5
)
Error
raise ValueError(self._kind + " requires an x and y column")
ValueError: scatter requires an x and y column
Edit:
Solution to make a figure with subplots with using df.plot
(Thanks to #Fourier)
import numpy as np
import pandas as pd
matrix = np.random.rand(200,5)#random data
df = pd.DataFrame(matrix,columns=['index','A','B','C','D']) #make df
#get a list for subplots
labels = list(df.columns)
labels.remove('index')
df.plot(
layout=(-1, 5),
kind="line",
x='index',
y=labels,
subplots = True,
sharex = True,
ls="none",
marker="o")
Would this work for you:
import pandas as pd
import numpy as np
df = pd.DataFrame({"index":np.arange(5),"A":np.random.rand(5),"B":np.random.rand(5),"C":np.random.rand(5)})
df.plot(kind="line", x="index", y=["A","B","C"], subplots=True, sharex=True, ls="none", marker="o")
Output
Note: This uses a line plot with invisible lines. For a scatter, I would go and loop over it.
for column in df.columns[:-1]: #[:-1] ignores the index column for my random sample
df.plot(kind="scatter", x="index", y=column)
EDIT
In order to add custom ylabels you can do the following:
axes = df.plot(kind='line', x="index", y=["A","B","C"], subplots=True, sharex=True, ls="none", marker="o", legend=False)
ylabels = ["foo","bar","baz"]
for ax, label in zip(axes, ylabels):
ax.set_ylabel(label)

Pandas groupby scatter plot in a single plot

This is a followup question on this solution. There is automatic assignment of different colors when kind=line but for scatter plot that's not the case.
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
# random df
df = pd.DataFrame(np.random.randint(0,10,size=(25, 3)), columns=['label','x','y'])
# plot groupby results on the same canvas
fig, ax = plt.subplots(figsize=(8,6))
df.groupby('label').plot(kind='scatter', x = "x", y = "y", ax=ax)
There is a connected issue here. Is there any simple workaround for this?
Update:
When I try the solution recommended by #ImportanceOfBeingErnest for a label column with strings, its not working!
df = pd.DataFrame(np.random.randint(0,10,size=(5, 2)), columns=['x','y'])
df['label'] = ['yes','no','yes','yes','no']
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(x='x', y='y', c='label', data=df)
It throws following error,
ValueError: Invalid RGBA argument: 'yes'
During handling of the above exception, another exception occurred:
You can use sns:
df = pd.DataFrame(np.random.randint(0,10,size=(100, 2)), columns=['x','y'])
df['label'] = np.random.choice(['yes','no','yes','yes','no'], 100)
fig, ax = plt.subplots(figsize=(8,6))
sns.scatterplot(x='x', y='y', hue='label', data=df)
plt.show()
Output:
Another option is as what suggested in the comment: Map value to number, by categorical type:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(df.x, df.y, c = pd.Categorical(df.label).codes, cmap='tab20b')
plt.show()
Output:
You can loop over groupby and create a scatter per group. That is efficient for less than ~10 categories.
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
# random df
df = pd.DataFrame(np.random.randint(0,10,size=(5, 2)), columns=['x','y'])
df['label'] = ['yes','no','yes','yes','no']
# plot groupby results on the same canvas
fig, ax = plt.subplots(figsize=(8,6))
for n, grp in df.groupby('label'):
ax.scatter(x = "x", y = "y", data=grp, label=n)
ax.legend(title="Label")
plt.show()
Alternatively you can create a single scatter like
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
# random df
df = pd.DataFrame(np.random.randint(0,10,size=(5, 2)), columns=['x','y'])
df['label'] = ['yes','no','yes','yes','no']
# plot groupby results on the same canvas
fig, ax = plt.subplots(figsize=(8,6))
u, df["label_num"] = np.unique(df["label"], return_inverse=True)
sc = ax.scatter(x = "x", y = "y", c = "label_num", data=df)
ax.legend(sc.legend_elements()[0], u, title="Label")
plt.show()
Incase we have a grouped data already, then I find the following solution could be useful.
df = pd.DataFrame(np.random.randint(0,10,size=(5, 2)), columns=['x','y'])
df['label'] = ['yes','no','yes','yes','no']
fig, ax = plt.subplots(figsize=(7,3))
def plot_grouped_df(grouped_df,
ax, x='x', y='y', cmap = plt.cm.autumn_r):
colors = cmap(np.linspace(0.5, 1, len(grouped_df)))
for i, (name,group) in enumerate(grouped_df):
group.plot(ax=ax,
kind='scatter',
x=x, y=y,
color=colors[i],
label = name)
# now we can use this function to plot the groupby data with categorical values
plot_grouped_df(df.groupby('label'),ax)

Creating charts with Pandas

My code is inside a Jupyter Notebook.
I can create a chart using Method 1 below, and have it look exactly as I'd like it to look.
But when I try with Method 2, which uses subplot, I don't know how to make it look the same (setting the figsize, colors, legend off to the right).
How do I use subplot, and have it look the same as Method 1?
Thank you in advance for your help!
# Using Numpy and Pandas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
df = pd.DataFrame(np.random.randint(0,100,size=(4, 4)), columns=list('ABCD'))
style.use('fivethirtyeight')
# Colorblind-friendly colors
colors = [[0,0,0], [230/255,159/255,0], [86/255,180/255,233/255], [0,158/255,115/255]]
# Method 1
chart = df.plot(figsize = (10,5), color = colors)
chart.yaxis.label.set_visible(True)
chart.set_ylabel("Bitcoin Price")
chart.set_xlabel("Time")
chart.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
# Method 2
fig, ax = plt.subplots()
ax.plot(df)
ax.set_ylabel("Bitcoin Price")
ax.set_xlabel("Time")
plt.show()
You just replace char by ax, like this
ax.yaxis.label.set_visible(True)
ax.set_ylabel("Bitcoin Price") ax.set_xlabel("Time") ax.legend(bbox_to_anchor=(1.05, 1), loc=2)
I'm thinking of two ways to get a result that might be useful for you. pd.DataFrame.plot returns an Axes object you can pass all the methods you want, so both examples just replace chart for ax.
Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
df = pd.DataFrame(np.random.randint(0,100,size=(4, 4)), columns=list('ABCD'))
style.use('fivethirtyeight')
# Colorblind-friendly colors
colors = [[0,0,0], [230/255,159/255,0], [86/255,180/255,233/255], [0,158/255,115/255]]
Iterating over df
colors_gen = (x for x in colors) # we will also be iterating over the colors
fig, ax = plt.subplots(figsize = (10,5))
for i in df: # iterate over columns...
ax.plot(df[i], color=next(colors_gen)) # and plot one at a time
ax.set_ylabel("Bitcoin Price")
ax.set_xlabel("Time")
ax.legend(bbox_to_anchor=(1.05, 1), loc=2)
ax.yaxis.label.set_visible(True)
plt.show()
Use pd.DataFrame.plot but pass ax as an argument
fig, ax = plt.subplots(figsize = (10,5))
df.plot(color=colors, ax=ax)
ax.set_ylabel("Bitcoin Price")
ax.set_xlabel("Time")
ax.legend(bbox_to_anchor=(1.05, 1), loc=2)
ax.yaxis.label.set_visible(True)
plt.show()

Categories

Resources