Drawing a surface 3D plot using "plotnine" library - python

Question : Using the python library 'plotnine', can we draw an interactive 3D surface plot?
Backup Explanations
What I'd like to do is, under python environment, creating an interactive 3D plot with R plot grammars like we do with ggplot2 library in R. It's because I have hard time remembering grammars of matplotlib and other libraries like seaborn.
An interactive 3D plot means a 3D plot that you can zoom in, zoom out, and scroll up and down, etc.
It seems like only Java supported plotting libraries scuh as bokeh or plotly can create interactive 3D plots. But I want to create it with the library 'plotnine' because the library supports ggplot-like grammar, which is easy to remember.
For example, can I draw a 3D surface plot like the one below with the library 'plotnine'?
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
# Read data from a csv
z_data =
pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/
master/api_docs/mt_bruno_elevation.csv')
data = [
go.Surface(
z=z_data.as_matrix()
)]
layout = go.Layout(
title='Mt Bruno Elevation',
autosize=False,
width=500,
height=500,
margin=dict(
l=65,
r=50,
b=65,
t=90
)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='elevations-3d-surface')
The codes above make a figure like below.
You can check out the complete interactive 3D surface plot in this link
p.s. If i can draw an interactive 3D plot with ggplot-like grammar, it does not have to be the 'plotnine' library that we should use.
Thank you for your time for reading this question!

It is possible, if you are willing to expand plotnine a bit, and caveats apply. The final code is as simple as:
(
ggplot_3d(mt_bruno_long)
+ aes(x='x', y='y', z='height')
+ geom_polygon_3d(size=0.01)
+ theme_minimal()
)
And the result:
First, you need to transform your data into long format:
z_data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv', index_col=0)
z = z_data.values
nrows, ncols = z.shape
x, y = np.linspace(0, 1, nrows), np.linspace(0, 1, ncols)
x, y = np.meshgrid(x, y)
mt_bruno_long = pd.DataFrame({'x': x.flatten(), 'y': y.flatten(), 'height': z.flatten()})
Then, we need to create equivalents for ggplot and geom_polygon with awareness of the third dimension.
Since writing this answer the code is is now available in plotnine3d package, so you could just:
from plotnine3d import ggplot_3d, geom_polygon_3d
But for completeness, this is how (relatively) simple it is:
from plotnine import ggplot, geom_polygon
from plotnine.utils import to_rgba, SIZE_FACTOR
class ggplot_3d(ggplot):
def _create_figure(self):
figure = plt.figure()
axs = [plt.axes(projection='3d')]
figure._themeable = {}
self.figure = figure
self.axs = axs
return figure, axs
def _draw_labels(self):
ax = self.axs[0]
ax.set_xlabel(self.layout.xlabel(self.labels))
ax.set_ylabel(self.layout.ylabel(self.labels))
ax.set_zlabel(self.labels['z'])
class geom_polygon_3d(geom_polygon):
REQUIRED_AES = {'x', 'y', 'z'}
#staticmethod
def draw_group(data, panel_params, coord, ax, **params):
data = coord.transform(data, panel_params, munch=True)
data['size'] *= SIZE_FACTOR
grouper = data.groupby('group', sort=False)
for i, (group, df) in enumerate(grouper):
fill = to_rgba(df['fill'], df['alpha'])
polyc = ax.plot_trisurf(
df['x'].values,
df['y'].values,
df['z'].values,
facecolors=fill if any(fill) else 'none',
edgecolors=df['color'] if any(df['color']) else 'none',
linestyles=df['linetype'],
linewidths=df['size'],
zorder=params['zorder'],
rasterized=params['raster'],
)
# workaround for https://github.com/matplotlib/matplotlib/issues/9535
if len(set(fill)) == 1:
polyc.set_facecolors(fill[0])
For interactivity you can use any matplotlib backend of your liking, I went with ipympl (pip install ipympl and then %matplotlib widget in a jupyter notebook cell).
The caveats are:
while shading works nice, plot_trisurf does not handle facecolors well (there is a PR to fix it here)
you may want to add a parameter allowing to disable shading, see matplotlib 3D shading examples
faceting, flipping axes etc will not work without further fiddling - this could however be addressed in the future as discussed in this plotnine issue about bringing 3D plots to plotnine.
Edit: In case if the dataset becomes unavailable, here is a self-contained example based on matplotlib's documentation:
import numpy as np
n_radii = 8
n_angles = 36
radii = np.linspace(0.125, 1.0, n_radii)
angles = np.linspace(0, 2*np.pi, n_angles, endpoint=False)[..., np.newaxis]
x = np.append(0, (radii*np.cos(angles)).flatten())
y = np.append(0, (radii*np.sin(angles)).flatten())
z = np.sin(-x*y)
df = pd.DataFrame(dict(x=x,y=y,z=z))
(
ggplot_3d(df)
+ aes(x='x', y='y', z='z')
+ geom_polygon_3d(size=0.01)
+ theme_minimal()
)

Related

Setting TextPath length and width independently in matplotlib

Below is the plot I generated using axes.text option,
ax[0].text(row.TIMESTAMP, row.HIGH+(0.1*width),row['candlestick_pattern'], fontsize=5, rotation='vertical')
I'm trying to achieve the same output using TextPath and PathPatch, in order to increase/decrease the font size when I zoom in/out of the plot, and below is the code I have (taken from here and here )
textPath = TextPath((data_coord[1], -data_coord[0]), row['candlestick_pattern'], size=2)
pathPatch = PathPatch(textPath, color="black")
transform = mpl.transforms.Affine2D().rotate_deg(90) + ax[0].transData
pathPatch.set_transform(transform)
ax[0].add_patch(pathPatch)
Output with this is
You could see that the text is cramped into a very small region and its not what I want. I would want to set the font size to a smaller value and increase the width (in vertical mode - height) of the TextPath. Is that possible?
Below is the complete code with which we can reproduce the problem for the dataset here
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.textpath import TextPath
from matplotlib.patches import PathPatch
from mplfinance.original_flavor import candlestick_ohlc
from matplotlib import transforms as tf
import pandas as pd
plotDf = pd.read_csv("data.csv")
plotDf.reset_index(inplace=True)
del plotDf['TIMESTAMP']
del plotDf['TOTTRDQTY']
fig, ax = plt.subplots(1)
candlestick_ohlc(ax,plotDf.values,width=0.6, \
colorup='green', colordown='red', alpha=0.8)
maxHigh = plotDf['HIGH'].max()
minLow = plotDf['LOW'].min()
width = maxHigh - minLow
threshold = (width)*0.6
for idx, row in plotDf.iterrows():
if (row['candlestick_pattern'] != 'NO_PATTERN'):
if (row.HIGH < (threshold+minLow)):
data_coord = (idx, row.HIGH+(0.1*width))
#ax.text(idx, row.HIGH+(0.1*width), row['candlestick_pattern'], fontsize=5, rotation='vertical')
else:
data_coord = (idx, row.LOW-(0.4*width))
#ax.text(idx, row.LOW-(0.4*width), row['candlestick_pattern'], fontsize=5, rotation='vertical')
textPath = TextPath((data_coord[1], -data_coord[0]), row['candlestick_pattern'], size=2)
pathPatch = PathPatch(textPath, color="black")
transform = mpl.transforms.Affine2D().rotate_deg(90) + ax.transData
pathPatch.set_transform(transform)
ax.add_patch(pathPatch)
fig.autofmt_xdate()
fig.tight_layout()
fig.suptitle("test", fontsize=16)
fig.set_size_inches(10.5, 10.5)
plt.subplots_adjust(top=0.95)
plt.show()
Apparently, your problem is a scaling problem. Messing around with .scale(x,y), ax.set_xlim and ax.set_ylim might allow you to "unsqueeze" the text. You can also try to set an anchor for your plot like done here:
ts = ax.transData
coords = ts.transform([0,0]) #<-anchor
tr = mpl.transforms.Affine2D().rotate_deg_around(coords[0],coords[1],90).scale(1,3) #<- scale
t = ts + tr
#<extra code>
pathPatch = PathPatch(textPath, color="black", transform = t)
EDIT
I tried many things, but I couldn't find a good way of doing it. I'll leave below what I tried and some resources that might help.
The way to properly use .rotate_deg_around would be like such:
ts = ax.transData
# ts = fig.dpi_scale_trans #this guy uses the fig scale, if you're interested
coords = ts.transform([data_coord[0],data_coord[1]])
converter = (coords[0]/data_coord[0], coords[1]/data_coord[1])
#plot the anchor points for visualization:
plt.plot(coords[0]/converter[0], coords[1]/converter[1],'xk')
tr = mpl.transforms.Affine2D().rotate_deg_around(coords[0]/converter[0],coords[1]/converter[1],90).scale(converter[0],converter[1])
pathPatch = PathPatch(textPath, color="black", transform = tr)
ax.add_patch(pathPatch)
Nonetheless, the results are still similar to what you had at the beginning:
It appears that TextPath does not behave like it should when using transform. Here .get_offset_transform is used, and it apparently fixes this sort of issue, but I was unable to use it since the plt has a Line type.
Also, you will see that if you increase the y axis in .scale, you can start to see the text, but it spreads the coordinates as well. One idea you can try is setting a good readable y scale (use ax.set_ylim to see your text) and then use that value as a divisor when setting the coordinates for your plot.
There are also some ideas here that might serve you.

How to fasten scatterplot of seaborn when there is a big data(many points) to plot?

I found that the seaborn.scatterplot() method in Python is much slower than the function geom_point of ggplot2 in R when there are a large number of data points to be plotted.
For example the same data in Python(seaborn):
sample_data = data.sample(10000)
ax = sns.scatterplot(data=sample_data, x="x", y="y", hue="cate")
and in R(ggplot2):
sample_data <- data[sample(nrow(data),10000),]
p <- ggplot(sample_data,aes(x=x,y=y)) +
geom_point(aes(color=cate))
The former is much slower than the latter.
I wonder what causes the differences in efficiency and how to make the seaborn as fast as ggplotor even better?
Why don't I use R directly: I prefer the grammar style of Python and I could do more customization with matplotlib after plotting with seaborn while ggplot may not be that convenient to do this.
To plot "big data" scatter points, i would suggest Plotly library.
It has a Scattergl function to manage up to billions data points.
Here's an example with 100.000 data points:
import plotly.graph_objects as go
import numpy as np
N = 100000
r = np.random.uniform(0, 1, N)
theta = np.random.uniform(0, 2*np.pi, N)
fig = go.Figure(data=go.Scattergl(
x = r * np.cos(theta), # non-uniform distribution
y = r * np.sin(theta), # zoom to see more points at the center
mode='markers',
marker=dict(
color=np.random.randn(N),
colorscale='Viridis',
line_width=1
)
))
fig.show()
Result:

matplotlib one legend entry too much

I am trying to do an errorplot with different marker-colors in python 2.7. Additionally I am including to line plots.
I found a way here: matplotlib errorbar plot - using a custom colormap using a scatter plot for the colors and errorbar() for the bars.
As you can see in my example code, in the legend I always get one entry too much (just at the top). I cannot figure out, why. Tried to exclude it, which did not work. Did not find something helpful either, as I cannot really call the first legend entry.
Any ideas?
Here's my code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
data = pd.DataFrame()
data['x'] = range(10)
data['y'] = data.x
data['err'] = .5
data['col'] = np.where(data.y<5,'r','b')
### setup 1-1 line
lin = pd.DataFrame() # setting 1-1 line
lin['x'] = range(10)
lin['y'] = range(10)
### setup 1-2 line
lin['x2'] = lin.x
lin['y2'] = lin.y
plt.errorbar(data.x, data.y, yerr = data.err, \
xerr = .3, fmt=' ', markersize=4, zorder = 1)
plt.scatter(data.x,data.y, marker='o', color = data.col, zorder = 2)
plt.plot(lin.x,lin.y,'g-')
plt.plot(lin.x2,1.8*lin.y2,'r-')
plt.legend(['','1-1 line', '1-1.8 line','holla','molla'], loc=4)
What I get is:
Thanks for your help!
To clean this whole thing up, I post a proper answer instead of comments.
The problem could be solved by upgrading matplotlib from 1.3.1 to 1.5.1. Easy as that.

Extracting data from a scatter plot in Matplotlib

I'm writing an interface to do scatter plots in Matplotlib, and I'd like to be able to access the data from a python script.
Right now, my interface is doing:
scat = self.axes.scatter(x_data, y_data, label=label, s=size)
With a standard axes.plot I can do something like:
line = self.axes.plot(x_data, y_data)
data = line[0].get_data()
and that works. What I'd like is something similar, but with the scatter plot.
Can anyone suggest a similar method?
A scatter plot is drawn using PathCollection, so the x, y positions are called "offsets":
import numpy as np
import matplotlib.pyplot as plt
f, ax = plt.subplots()
scat = ax.scatter(np.random.randn(10), np.random.randn(10))
print scat.get_offsets()
[[-0.17477838 -0.47777312]
[-0.97296068 -0.98685982]
[-0.18880346 1.16780445]
[-1.65280361 0.2182109 ]
[ 0.92655599 -1.40315507]
[-0.10468029 0.82269317]
[-0.09516654 -0.80651275]
[ 0.01400393 -1.1474178 ]
[ 1.6800925 0.16243422]
[-1.91496598 -2.12578586]]

Discrete colorbar in matplotlib [duplicate]

How does one set the color of a line in matplotlib with scalar values provided at run time using a colormap (say jet)? I tried a couple of different approaches here and I think I'm stumped. values[] is a storted array of scalars. curves are a set of 1-d arrays, and labels are an array of text strings. Each of the arrays have the same length.
fig = plt.figure()
ax = fig.add_subplot(111)
jet = colors.Colormap('jet')
cNorm = colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
lines = []
for idx in range(len(curves)):
line = curves[idx]
colorVal = scalarMap.to_rgba(values[idx])
retLine, = ax.plot(line, color=colorVal)
#retLine.set_color()
lines.append(retLine)
ax.legend(lines, labels, loc='upper right')
ax.grid()
plt.show()
The error you are receiving is due to how you define jet. You are creating the base class Colormap with the name 'jet', but this is very different from getting the default definition of the 'jet' colormap. This base class should never be created directly, and only the subclasses should be instantiated.
What you've found with your example is a buggy behavior in Matplotlib. There should be a clearer error message generated when this code is run.
This is an updated version of your example:
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import numpy as np
# define some random data that emulates your indeded code:
NCURVES = 10
np.random.seed(101)
curves = [np.random.random(20) for i in range(NCURVES)]
values = range(NCURVES)
fig = plt.figure()
ax = fig.add_subplot(111)
# replace the next line
#jet = colors.Colormap('jet')
# with
jet = cm = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=values[-1])
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
print scalarMap.get_clim()
lines = []
for idx in range(len(curves)):
line = curves[idx]
colorVal = scalarMap.to_rgba(values[idx])
colorText = (
'color: (%4.2f,%4.2f,%4.2f)'%(colorVal[0],colorVal[1],colorVal[2])
)
retLine, = ax.plot(line,
color=colorVal,
label=colorText)
lines.append(retLine)
#added this to get the legend to work
handles,labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper right')
ax.grid()
plt.show()
Resulting in:
Using a ScalarMappable is an improvement over the approach presented in my related answer:
creating over 20 unique legend colors using matplotlib
I thought it would be beneficial to include what I consider to be a more simple method using numpy's linspace coupled with matplotlib's cm-type object. It's possible that the above solution is for an older version. I am using the python 3.4.3, matplotlib 1.4.3, and numpy 1.9.3., and my solution is as follows.
import matplotlib.pyplot as plt
from matplotlib import cm
from numpy import linspace
start = 0.0
stop = 1.0
number_of_lines= 1000
cm_subsection = linspace(start, stop, number_of_lines)
colors = [ cm.jet(x) for x in cm_subsection ]
for i, color in enumerate(colors):
plt.axhline(i, color=color)
plt.ylabel('Line Number')
plt.show()
This results in 1000 uniquely-colored lines that span the entire cm.jet colormap as pictured below. If you run this script you'll find that you can zoom in on the individual lines.
Now say I want my 1000 line colors to just span the greenish portion between lines 400 to 600. I simply change my start and stop values to 0.4 and 0.6 and this results in using only 20% of the cm.jet color map between 0.4 and 0.6.
So in a one line summary you can create a list of rgba colors from a matplotlib.cm colormap accordingly:
colors = [ cm.jet(x) for x in linspace(start, stop, number_of_lines) ]
In this case I use the commonly invoked map named jet but you can find the complete list of colormaps available in your matplotlib version by invoking:
>>> from matplotlib import cm
>>> dir(cm)
A combination of line styles, markers, and qualitative colors from matplotlib:
import itertools
import matplotlib as mpl
import matplotlib.pyplot as plt
N = 8*4+10
l_styles = ['-','--','-.',':']
m_styles = ['','.','o','^','*']
colormap = mpl.cm.Dark2.colors # Qualitative colormap
for i,(marker,linestyle,color) in zip(range(N),itertools.product(m_styles,l_styles, colormap)):
plt.plot([0,1,2],[0,2*i,2*i], color=color, linestyle=linestyle,marker=marker,label=i)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,ncol=4);
UPDATE: Supporting not only ListedColormap, but also LinearSegmentedColormap
import itertools
import matplotlib.pyplot as plt
Ncolors = 8
#colormap = plt.cm.Dark2# ListedColormap
colormap = plt.cm.viridis# LinearSegmentedColormap
Ncolors = min(colormap.N,Ncolors)
mapcolors = [colormap(int(x*colormap.N/Ncolors)) for x in range(Ncolors)]
N = Ncolors*4+10
l_styles = ['-','--','-.',':']
m_styles = ['','.','o','^','*']
fig,ax = plt.subplots(gridspec_kw=dict(right=0.6))
for i,(marker,linestyle,color) in zip(range(N),itertools.product(m_styles,l_styles, mapcolors)):
ax.plot([0,1,2],[0,2*i,2*i], color=color, linestyle=linestyle,marker=marker,label=i)
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,ncol=3,prop={'size': 8})
U may do as I have written from my deleted account (ban for new posts :( there was). Its rather simple and nice looking.
Im using 3-rd one of these 3 ones usually, also I wasny checking 1 and 2 version.
from matplotlib.pyplot import cm
import numpy as np
#variable n should be number of curves to plot (I skipped this earlier thinking that it is obvious when looking at picture - sorry my bad mistake xD): n=len(array_of_curves_to_plot)
#version 1:
color=cm.rainbow(np.linspace(0,1,n))
for i,c in zip(range(n),color):
ax1.plot(x, y,c=c)
#or version 2: - faster and better:
color=iter(cm.rainbow(np.linspace(0,1,n)))
c=next(color)
plt.plot(x,y,c=c)
#or version 3:
color=iter(cm.rainbow(np.linspace(0,1,n)))
for i in range(n):
c=next(color)
ax1.plot(x, y,c=c)
example of 3:
Ship RAO of Roll vs Ikeda damping in function of Roll amplitude A44

Categories

Resources