Copy graph into several subplots - python

I'm working with a large model ensemble. I'm calculating KDE probability distribution functions with pandas - at least for now it is the most feasible option since it automatically determines the (optimal?) bandwith. I'm comparing observations with a subset of models. Basically, I want the same observed pdf in 12 different sub panels so I can compare models and pdf better. This is my minimal example
import numpy as np
import pandas as pd
import xarray as xr
fig = plt.figure(0,figsize=(8.2,10.2))
fig.subplots_adjust(hspace=0.2)
fig.subplots_adjust(wspace=0.36)
fig.subplots_adjust(right=0.94)
fig.subplots_adjust(left=0.13)
fig.subplots_adjust(bottom=0.1)
fig.subplots_adjust(top=0.95)
plt.rcParams['text.usetex'] = False
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['font.size'] = 11
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
ax1 = fig.add_subplot(6,2,1)
ax2 = fig.add_subplot(6,2,2)
ax3 = fig.add_subplot(6,2,3)
ax4 = fig.add_subplot(6,2,4)
ax5 = fig.add_subplot(6,2,5)
ax6 = fig.add_subplot(6,2,6)
ax7 = fig.add_subplot(6,2,7)
ax8 = fig.add_subplot(6,2,8)
ax9 = fig.add_subplot(6,2,9)
ax10 = fig.add_subplot(6,2,10)
ax11 = fig.add_subplot(6,2,11)
ax12 = fig.add_subplot(6,2,12)
obs = np.array([448.2, 172.0881, 118.9816, 5.797349, 2, 0.7, 0.7, 0.1, 0.7, 14,
41.78181, 94.99255])
df= pd.DataFrame()
df['obs'] = obs
axes = [ax1,ax2,ax3,ax4,ax5,ax6,ax7,ax8,ax9,ax10,ax11,ax12]
for a in axes:
a = df['obs'].plot.kde(ax=a, lw=2.0)
plt.show()
Is there any way I can 'copy/ duplicate' my first subplot - so
ax1 = df['obs'].plot.kde(ax=ax1, lw=2.0)
into the other panels without repeating the calculation? Alternatively can I somehow grab the values calculated? The reason why I don't want to repeat the computation is because it takes a lot of computing time with the original data.

Alternatively can I somehow grab the values calculated?
You can extract the line with Axes.get_lines() and its values with Line2D.get_data():
# plot KDE onto axes[0] (once)
df['obs'].plot.kde(ax=axes[0], lw=2.0)
# extract x and y from axes[0]
x, y = axes[0].get_lines()[0].get_data()
# plot x and y on remaining axes[1:]
for a in axes[1:]:
a.plot(x, y)

Related

Create a plot in pyplot with several different y-axis scale [duplicate]

Two and three dimensional data can be viewed relatively straight-forwardly using traditional plot types. Even with four dimensional data, we can often find a way to display the data. Dimensions above four, though, become increasingly difficult to display. Fortunately, parallel coordinates plots provide a mechanism for viewing results with higher dimensions.
Several plotting packages provide parallel coordinates plots, such as Matlab, R, VTK type 1 and VTK type 2, but I don't see how to create one using Matplotlib.
Is there a built-in parallel coordinates plot in Matplotlib? I certainly don't see one in the gallery.
If there is no built-in-type, is it possible to build a parallel coordinates plot using standard features of Matplotlib?
Edit:
Based on the answer provided by Zhenya below, I developed the following generalization that supports an arbitrary number of axes. Following the plot style of the example I posted in the original question above, each axis gets its own scale. I accomplished this by normalizing the data at each axis point and making the axes have a range of 0 to 1. I then go back and apply labels to each tick-mark that give the correct value at that intercept.
The function works by accepting an iterable of data sets. Each data set is considered a set of points where each point lies on a different axis. The example in __main__ grabs random numbers for each axis in two sets of 30 lines. The lines are random within ranges that cause clustering of lines; a behavior I wanted to verify.
This solution isn't as good as a built-in solution since you have odd mouse behavior and I'm faking the data ranges through labels, but until Matplotlib adds a built-in solution, it's acceptable.
#!/usr/bin/python
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
def parallel_coordinates(data_sets, style=None):
dims = len(data_sets[0])
x = range(dims)
fig, axes = plt.subplots(1, dims-1, sharey=False)
if style is None:
style = ['r-']*len(data_sets)
# Calculate the limits on the data
min_max_range = list()
for m in zip(*data_sets):
mn = min(m)
mx = max(m)
if mn == mx:
mn -= 0.5
mx = mn + 1.
r = float(mx - mn)
min_max_range.append((mn, mx, r))
# Normalize the data sets
norm_data_sets = list()
for ds in data_sets:
nds = [(value - min_max_range[dimension][0]) /
min_max_range[dimension][2]
for dimension,value in enumerate(ds)]
norm_data_sets.append(nds)
data_sets = norm_data_sets
# Plot the datasets on all the subplots
for i, ax in enumerate(axes):
for dsi, d in enumerate(data_sets):
ax.plot(x, d, style[dsi])
ax.set_xlim([x[i], x[i+1]])
# Set the x axis ticks
for dimension, (axx,xx) in enumerate(zip(axes, x[:-1])):
axx.xaxis.set_major_locator(ticker.FixedLocator([xx]))
ticks = len(axx.get_yticklabels())
labels = list()
step = min_max_range[dimension][2] / (ticks - 1)
mn = min_max_range[dimension][0]
for i in xrange(ticks):
v = mn + i*step
labels.append('%4.2f' % v)
axx.set_yticklabels(labels)
# Move the final axis' ticks to the right-hand side
axx = plt.twinx(axes[-1])
dimension += 1
axx.xaxis.set_major_locator(ticker.FixedLocator([x[-2], x[-1]]))
ticks = len(axx.get_yticklabels())
step = min_max_range[dimension][2] / (ticks - 1)
mn = min_max_range[dimension][0]
labels = ['%4.2f' % (mn + i*step) for i in xrange(ticks)]
axx.set_yticklabels(labels)
# Stack the subplots
plt.subplots_adjust(wspace=0)
return plt
if __name__ == '__main__':
import random
base = [0, 0, 5, 5, 0]
scale = [1.5, 2., 1.0, 2., 2.]
data = [[base[x] + random.uniform(0., 1.)*scale[x]
for x in xrange(5)] for y in xrange(30)]
colors = ['r'] * 30
base = [3, 6, 0, 1, 3]
scale = [1.5, 2., 2.5, 2., 2.]
data.extend([[base[x] + random.uniform(0., 1.)*scale[x]
for x in xrange(5)] for y in xrange(30)])
colors.extend(['b'] * 30)
parallel_coordinates(data, style=colors).show()
Edit 2:
Here is an example of what comes out of the above code when plotting Fisher's Iris data. It isn't quite as nice as the reference image from Wikipedia, but it is passable if all you have is Matplotlib and you need multi-dimensional plots.
pandas has a parallel coordinates wrapper:
import pandas
import matplotlib.pyplot as plt
from pandas.tools.plotting import parallel_coordinates
data = pandas.read_csv(r'C:\Python27\Lib\site-packages\pandas\tests\data\iris.csv', sep=',')
parallel_coordinates(data, 'Name')
plt.show()
Source code, how they made it: plotting.py#L494
When answering a related question, I worked out a version using only one subplot (so it can be easily fit together with other plots) and optionally using cubic bezier curves to connect the points. The plot adjusts itself to the desired number of axes.
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
import numpy as np
fig, host = plt.subplots()
# create some dummy data
ynames = ['P1', 'P2', 'P3', 'P4', 'P5']
N1, N2, N3 = 10, 5, 8
N = N1 + N2 + N3
category = np.concatenate([np.full(N1, 1), np.full(N2, 2), np.full(N3, 3)])
y1 = np.random.uniform(0, 10, N) + 7 * category
y2 = np.sin(np.random.uniform(0, np.pi, N)) ** category
y3 = np.random.binomial(300, 1 - category / 10, N)
y4 = np.random.binomial(200, (category / 6) ** 1/3, N)
y5 = np.random.uniform(0, 800, N)
# organize the data
ys = np.dstack([y1, y2, y3, y4, y5])[0]
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05 # add 5% padding below and above
ymaxs += dys * 0.05
dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
for i, ax in enumerate(axes):
ax.set_ylim(ymins[i], ymaxs[i])
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
if ax != host:
ax.spines['left'].set_visible(False)
ax.yaxis.set_ticks_position('right')
ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
host.set_xlim(0, ys.shape[1] - 1)
host.set_xticks(range(ys.shape[1]))
host.set_xticklabels(ynames, fontsize=14)
host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()
host.set_title('Parallel Coordinates Plot', fontsize=18)
colors = plt.cm.tab10.colors
for j in range(N):
# to just draw straight lines between the axes:
# host.plot(range(ys.shape[1]), zs[j,:], c=colors[(category[j] - 1) % len(colors) ])
# create bezier curves
# for each axis, there will a control vertex at the point itself, one at 1/3rd towards the previous and one
# at one third towards the next axis; the first and last axis have one less control vertex
# x-coordinate of the control vertices: at each integer (for the axes) and two inbetween
# y-coordinate: repeat every point three times, except the first and last only twice
verts = list(zip([x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
np.repeat(zs[j, :], 3)[1:-1]))
# for x,y in verts: host.plot(x, y, 'go') # to show the control points of the beziers
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=1, edgecolor=colors[category[j] - 1])
host.add_patch(patch)
plt.tight_layout()
plt.show()
Here's similar code for the iris data set. The second axis is reversed to avoid some crossing lines.
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
ynames = iris.feature_names
ys = iris.data
ymins = ys.min(axis=0)
ymaxs = ys.max(axis=0)
dys = ymaxs - ymins
ymins -= dys * 0.05 # add 5% padding below and above
ymaxs += dys * 0.05
ymaxs[1], ymins[1] = ymins[1], ymaxs[1] # reverse axis 1 to have less crossings
dys = ymaxs - ymins
# transform all data to be compatible with the main axis
zs = np.zeros_like(ys)
zs[:, 0] = ys[:, 0]
zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
fig, host = plt.subplots(figsize=(10,4))
axes = [host] + [host.twinx() for i in range(ys.shape[1] - 1)]
for i, ax in enumerate(axes):
ax.set_ylim(ymins[i], ymaxs[i])
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
if ax != host:
ax.spines['left'].set_visible(False)
ax.yaxis.set_ticks_position('right')
ax.spines["right"].set_position(("axes", i / (ys.shape[1] - 1)))
host.set_xlim(0, ys.shape[1] - 1)
host.set_xticks(range(ys.shape[1]))
host.set_xticklabels(ynames, fontsize=14)
host.tick_params(axis='x', which='major', pad=7)
host.spines['right'].set_visible(False)
host.xaxis.tick_top()
host.set_title('Parallel Coordinates Plot — Iris', fontsize=18, pad=12)
colors = plt.cm.Set2.colors
legend_handles = [None for _ in iris.target_names]
for j in range(ys.shape[0]):
# create bezier curves
verts = list(zip([x for x in np.linspace(0, len(ys) - 1, len(ys) * 3 - 2, endpoint=True)],
np.repeat(zs[j, :], 3)[1:-1]))
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=2, alpha=0.7, edgecolor=colors[iris.target[j]])
legend_handles[iris.target[j]] = patch
host.add_patch(patch)
host.legend(legend_handles, iris.target_names,
loc='lower center', bbox_to_anchor=(0.5, -0.18),
ncol=len(iris.target_names), fancybox=True, shadow=True)
plt.tight_layout()
plt.show()
I'm sure there is a better way of doing it, but here's a quick-and-dirty one (a really dirty one):
#!/usr/bin/python
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
#vectors to plot: 4D for this example
y1=[1,2.3,8.0,2.5]
y2=[1.5,1.7,2.2,2.9]
x=[1,2,3,8] # spines
fig,(ax,ax2,ax3) = plt.subplots(1, 3, sharey=False)
# plot the same on all the subplots
ax.plot(x,y1,'r-', x,y2,'b-')
ax2.plot(x,y1,'r-', x,y2,'b-')
ax3.plot(x,y1,'r-', x,y2,'b-')
# now zoom in each of the subplots
ax.set_xlim([ x[0],x[1]])
ax2.set_xlim([ x[1],x[2]])
ax3.set_xlim([ x[2],x[3]])
# set the x axis ticks
for axx,xx in zip([ax,ax2,ax3],x[:-1]):
axx.xaxis.set_major_locator(ticker.FixedLocator([xx]))
ax3.xaxis.set_major_locator(ticker.FixedLocator([x[-2],x[-1]])) # the last one
# EDIT: add the labels to the rightmost spine
for tick in ax3.yaxis.get_major_ticks():
tick.label2On=True
# stack the subplots together
plt.subplots_adjust(wspace=0)
plt.show()
This is essentially based on a (much nicer) one by Joe Kingon, Python/Matplotlib - Is there a way to make a discontinuous axis?. You might also want to have a look at the other answer to the same question.
In this example I don't even attempt at scaling the vertical scales, since it depends on what exactly you are trying to achieve.
EDIT: Here is the result
When using pandas (like suggested by theta), there is no way to scale the axes independently.
The reason you can't find the different vertical axes is because there aren't any. Our parallel coordinates is "faking" the other two axes by just drawing a vertical line and some labels.
https://github.com/pydata/pandas/issues/7083#issuecomment-74253671
I've adapted the #JohanC code to a pandas dataframe and expanded it to also work with categorical variables. The code needs more improving, like being able to put also a numerical variable as the first one in the dataframe, but I think it is nice for now.
# Paths:
path_data = "data/"
# Packages:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.path import Path
import matplotlib.patches as patches
from functools import reduce
# Display options:
pd.set_option("display.width", 1200)
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 300)
# Dataset:
df = pd.read_csv(path_data + "nasa_exoplanets.csv")
df_varnames = pd.read_csv(path_data + "nasa_exoplanets_var_names.csv")
# Variables (the first variable must be categoric):
my_vars = ["discoverymethod", "pl_orbper", "st_teff", "disc_locale", "sy_gaiamag"]
my_vars_names = reduce(pd.DataFrame.append,
map(lambda i: df_varnames[df_varnames["var"] == i], my_vars))
my_vars_names = my_vars_names["var_name"].values.tolist()
# Adapt the data:
df = df.loc[df["pl_letter"] == "d"]
df_plot = df[my_vars]
df_plot = df_plot.dropna()
df_plot = df_plot.reset_index(drop = True)
# Convert to numeric matrix:
ym = []
dics_vars = []
for v, var in enumerate(my_vars):
if df_plot[var].dtype.kind not in ["i", "u", "f"]:
dic_var = dict([(val, c) for c, val in enumerate(df_plot[var].unique())])
dics_vars += [dic_var]
ym += [[dic_var[i] for i in df_plot[var].tolist()]]
else:
ym += [df_plot[var].tolist()]
ym = np.array(ym).T
# Padding:
ymins = ym.min(axis = 0)
ymaxs = ym.max(axis = 0)
dys = ymaxs - ymins
ymins -= dys*0.05
ymaxs += dys*0.05
# Reverse some axes for better visual:
axes_to_reverse = [0, 1]
for a in axes_to_reverse:
ymaxs[a], ymins[a] = ymins[a], ymaxs[a]
dys = ymaxs - ymins
# Adjust to the main axis:
zs = np.zeros_like(ym)
zs[:, 0] = ym[:, 0]
zs[:, 1:] = (ym[:, 1:] - ymins[1:])/dys[1:]*dys[0] + ymins[0]
# Colors:
n_levels = len(dics_vars[0])
my_colors = ["#F41E1E", "#F4951E", "#F4F01E", "#4EF41E", "#1EF4DC", "#1E3CF4", "#F41EF3"]
cmap = LinearSegmentedColormap.from_list("my_palette", my_colors)
my_palette = [cmap(i/n_levels) for i in np.array(range(n_levels))]
# Plot:
fig, host_ax = plt.subplots(
figsize = (20, 10),
tight_layout = True
)
# Make the axes:
axes = [host_ax] + [host_ax.twinx() for i in range(ym.shape[1] - 1)]
dic_count = 0
for i, ax in enumerate(axes):
ax.set_ylim(
bottom = ymins[i],
top = ymaxs[i]
)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.ticklabel_format(style = 'plain')
if ax != host_ax:
ax.spines.left.set_visible(False)
ax.yaxis.set_ticks_position("right")
ax.spines.right.set_position(
(
"axes",
i/(ym.shape[1] - 1)
)
)
if df_plot.iloc[:, i].dtype.kind not in ["i", "u", "f"]:
dic_var_i = dics_vars[dic_count]
ax.set_yticks(
range(len(dic_var_i))
)
ax.set_yticklabels(
[key_val for key_val in dics_vars[dic_count].keys()]
)
dic_count += 1
host_ax.set_xlim(
left = 0,
right = ym.shape[1] - 1
)
host_ax.set_xticks(
range(ym.shape[1])
)
host_ax.set_xticklabels(
my_vars_names,
fontsize = 14
)
host_ax.tick_params(
axis = "x",
which = "major",
pad = 7
)
# Make the curves:
host_ax.spines.right.set_visible(False)
host_ax.xaxis.tick_top()
for j in range(ym.shape[0]):
verts = list(zip([x for x in np.linspace(0, len(ym) - 1, len(ym)*3 - 2,
endpoint = True)],
np.repeat(zs[j, :], 3)[1: -1]))
codes = [Path.MOVETO] + [Path.CURVE4 for _ in range(len(verts) - 1)]
path = Path(verts, codes)
color_first_cat_var = my_palette[dics_vars[0][df_plot.iloc[j, 0]]]
patch = patches.PathPatch(
path,
facecolor = "none",
lw = 2,
alpha = 0.7,
edgecolor = color_first_cat_var
)
host_ax.add_patch(patch)
plotly has a nice interactive solution called parallel_coordinates which works just fine:
import plotly.express as px
df = px.data.iris()
fig = px.parallel_coordinates(df, color="species_id", labels={"species_id": "Species",
"sepal_width": "Sepal Width", "sepal_length": "Sepal Length",
"petal_width": "Petal Width", "petal_length": "Petal Length", },
color_continuous_scale=px.colors.diverging.Tealrose,
color_continuous_midpoint=2)
fig.show()
I want to plug a beta-released parallel coordinate plotting package called Paxplot which is based on Matplotlib. It uses similar underlying logic to the other answers and extends functionality while maintaining clean usage.
The documentation provides examples of basic usage, advanced usage, and usage with Pandas. As per the figure provided in the original question, I have provided a solution that plots the iris dataset:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
import paxplot
# Import data
iris = load_iris(as_frame=True)
df = pd.DataFrame(
data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target']
)
cols = df.columns
# Create figure
paxfig = paxplot.pax_parallel(n_axes=len(cols))
paxfig.plot(df.to_numpy())
# Add labels
paxfig.set_labels(cols)
# Set ticks
paxfig.set_ticks(
ax_idx=-1,
ticks=[0, 1, 2],
labels=iris.target_names
)
# Add colorbar
color_col = 0
paxfig.add_colorbar(
ax_idx=color_col,
cmap='viridis',
colorbar_kwargs={'label': cols[color_col]}
)
plt.show()
For full disclosure, I created Paxplot and have been developing and maintaining it with some friends. Definitely feel free to reach out if you are interested in contributing!
Best example I've seen thus far is this one
https://python.g-node.org/python-summerschool-2013/_media/wiki/datavis/olympics_vis.py
See the normalised_coordinates function. Not super fast, but works from what I've tried.
normalised_coordinates(['VAL_1', 'VAL_2', 'VAL_3'], np.array([[1230.23, 1500000, 12453.03], [930.23, 140000, 12453.03], [130.23, 120000, 1243.03]]), [1, 2, 1])
Still far from perfect but it works and is relatively short:
import numpy as np
import matplotlib.pyplot as plt
def plot_parallel(data,labels):
data=np.array(data)
x=list(range(len(data[0])))
fig, axis = plt.subplots(1, len(data[0])-1, sharey=False)
for d in data:
for i, a in enumerate(axis):
temp=d[i:i+2].copy()
temp[1]=(temp[1]-np.min(data[:,i+1]))*(np.max(data[:,i])-np.min(data[:,i]))/(np.max(data[:,i+1])-np.min(data[:,i+1]))+np.min(data[:,i])
a.plot(x[i:i+2], temp)
for i, a in enumerate(axis):
a.set_xlim([x[i], x[i+1]])
a.set_xticks([x[i], x[i+1]])
a.set_xticklabels([labels[i], labels[i+1]], minor=False, rotation=45)
a.set_ylim([np.min(data[:,i]),np.max(data[:,i])])
plt.subplots_adjust(wspace=0)
plt.show()
This is a version using TensorBoard, if not strictly need matplotlib figure.
I'm looking around for something works like Visualize the results in TensorBoard's HParams plugin result. Here is a wrapped function just plotting ignoring training in that tutorial, using TensorBoard. The logic is using metrics_name specified key as metrics, using other columns as HParams. For any other detail, refer original tutorial.
import os
import json
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
def tensorboard_parallel_coordinates_plot(dataframe, metrics_name, metrics_display_name=None, skip_columns=[], log_dir='logs/hparam_tuning'):
skip_columns = skip_columns + [metrics_name]
to_hp_discrete = lambda column: hp.HParam(column, hp.Discrete(np.unique(dataframe[column].values).tolist()))
hp_params_dict = {column: to_hp_discrete(column) for column in dataframe.columns if column not in skip_columns}
if dataframe[metrics_name].values.dtype == 'object': # Not numeric
metrics_map = {ii: id for id, ii in enumerate(np.unique(dataframe[metrics_name]))}
description = json.dumps(metrics_map)
else:
metrics_map, description = None, None
METRICS = metrics_name if metrics_display_name is None else metrics_display_name
with tf.summary.create_file_writer(log_dir).as_default():
metrics = [hp.Metric(METRICS, display_name=METRICS, description=description)]
hp.hparams_config(hparams=list(hp_params_dict.values()), metrics=metrics)
for id in dataframe.index:
log = dataframe.iloc[id]
hparams = {hp_unit: log[column] for column, hp_unit in hp_params_dict.items()}
print({hp_unit.name: hparams[hp_unit] for hp_unit in hparams})
run_dir = os.path.join(log_dir, 'run-%d' % id)
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
metric_item = log[metrics_name] if metrics_map is None else metrics_map[log[metrics_name]]
tf.summary.scalar(METRICS, metric_item, step=1)
print()
if metrics_map is not None:
print("metrics_map:", metrics_map)
print("Start tensorboard by: tensorboard --logdir {}".format(log_dir))
Plotting test:
aa = pd.read_csv('https://raw.github.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/iris.csv')
tensorboard_parallel_coordinates_plot(aa, metrics_name="Name", log_dir="logs/iris")
# metrics_map: {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
# Start tensorboard by: tensorboard --logdir logs/iris
!tensorboard --logdir logs/iris
# TensorBoard 2.8.0 at http://localhost:6006/ (Press CTRL+C to quit)
Open tesnorboard link, default http://localhost:6006/, go to HPARAMS -> PARALLEL COORDINATES VIEW will show the result:
TensorBoard result is interactive. But this is designed for plotting model hyper parameters tuning results, so I think it's not friendly for plotting large dataset.
You have to clean saved data manually if plotting new data in same log_dir directory.
It seems the final metrics item has to be numeric, while other axes don't have to.
fake_data = {
"optimizer": ["sgd", "adam", "adam", "lamb", "lamb", "lamb", "lamb"],
"weight_decay": [0.1, 0.1, 0.2, 0.1, 0.2, 0.2, 0.3],
"rescale_mode": ["tf", "tf", "tf", "tf", "tf", "torch", "torch"],
"accuracy": [78.5, 78.2, 78.8, 79.2, 79.3, 79.5, 79.6],
}
aa = pd.DataFrame(fake_data)
tensorboard_parallel_coordinates_plot(aa, "accuracy", log_dir="logs/fake")
# Start tensorboard by: tensorboard --logdir logs/fake
!tensorboard --logdir logs/fake
# TensorBoard 2.8.0 at http://localhost:6006/ (Press CTRL+C to quit)

Matplotlib colormap, scatter plot passing a third variable for color: invalid RGBA argument

we are building our reports on matplotlib. Each page has multiple charts and some text.
In the report data there is over 100 locations, each location has a density. The idea is to plot the points on a map where the color (shade of red) represents the density of the location.
However, I do not understand the connection between the kwargs : c and cmap in the ax.scatter call, nor do I understand the role of color.Normalize in this application.
import pandas as pd
import matplotlib
import numpy as np
from pandas import Series, DataFrame
import csv
from scipy import stats
import matplotlib.pyplot as plt
import random
import matplotlib.colors as colors
# Get the data and transform
data = pd.read_csv('logHistThis.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
dataMean = data['Density'].mean()
data = list(data['Density'])
# I was under the impresion that the data for the colormap
# had to be between 1 and 0 so did this:
aColorScale = []
def myColorScale(theData):
aColorScale = []
for x in theData:
this = x/100
aColorScale.append(this)
return aColorScale
aColorScale = myColorScale(data)
estimated_mu, estimated_sigma = stats.norm.fit(data)
xmin = min(data)
xmax = max(data)
x = np.linspace(xmin, xmax, 100)
pdf = stats.norm.pdf(x, loc=estimated_mu, scale=estimated_sigma)
thisRangeMin = np.log(27)
thisRangeMax = np.log(35)
q = [np.random.choice(data, 40)]
z = [ np.random.randint(1, 50, size=40)]
s = 100 *q
colormap = 'Reds'
normalize =matplotlib.colors.Normalize(vmin=xmin, vmax=xmax)
#plt.scatter(x,y,z,s=5, cmap=colormap, norm=normalize, marker='*')
fig = plt.figure(figsize=(10, 5), frameon=False, edgecolor='000000', linewidth = 1)
rect0 = .05, .05, .4, .9
rect1 = .5, .05, .4, .9
# This works great
ax1 = fig.add_axes(rect0)#<-----------x2TopTenSummary
ax1.hist(data, bins=13, normed=True, color='c', alpha=0.05)
#ax1.fill_between(x, pdf, where=(), alpha=.2)
ax1.fill_between(x, pdf, where=((x < thisRangeMax) & ( x > thisRangeMin)), alpha=.2, label='City Range')
ax1.vlines(dataMean, 0, stats.norm.pdf(dataMean, loc=estimated_mu, scale=estimated_sigma), color='r')
ax1.plot(x, pdf, 'k')
# This does not work :
# It just gives blue dots
ax2= fig.add_axes(rect1)
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, s=200, cmap= 'Reds',norm=matplotlib.colors.Normalize(vmin=min(aColorScale) , vmax=max(aColorScale)))
# Tried to set the color map in a variety of ways:
# When kwarg 'c' is set to the variable 'aColorScale' i get the error
plt.show()
plt.close()
So my question is how do we incorporate the colormap in an application of this sort?
Multiple axes on a figure with a predetermined size (A4 or letter).
The color determination is a third variable z, (not x or y)
The color determinant is a float where 0 < z < 8
the call is ax not plt
The description of the application in the docs is unclear to me:
the doc for axes.scatter
the doc for color.normalize
I have seen plenty of examples where there is only one ax in the figure and the call is to plt.scatter... for example here
In our case x, y will be longitude, lattitude and the variable is 'data' a list or array of floats between 0 and 8.
Thanks
Okay the answer came from the PyCon Israel 2017 in this document by Tamir Lousky.
The normalization of the data and the correlation with color map happens with this block of code right here:
aColorScale = data
aColorScale = np.array(aColorScale)
norm = (aColorScale - aColorScale.min())/(aColorScale.max() - aColorScale.min())
cmap= plt.get_cmap('Reds')
colors = [cmap(tl) for tl in norm]#<---- thisRightHere
Then colors gets fed into ax2:
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, s=200, color = colors)
I wish those who downvoted my question would say why, there was hours of searching and trying to find this.
Anyway here is the final image:
While I do have problems understanding the issue itself, I can tell you that the solution you have in your answer can be simplified to the usual way to plot scatters:
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, c=aColorScale, s=200, cmap='Reds')

How to have a secondary y axis in a nested GridSpec?

I'd like to obtain this figure:
But with two plots inside each graph, like this:
Here is a sample of the code I used for the first figure
measures = ['ACE', 'SCE', 'LZs', 'LZc']
conditions = ['dark','light','flick3','flick10','switch']
outer_grid = gridspec.GridSpec(2,2)
for measure in measures:
inner_grid = gridspec.GridSpecFromSubplotSpec(5, 1, subplot_spec=outer_grid[measures.index(measure)])
ax={}
for cond in conditions:
c=conditions.index(cond)
ax[c] = plt.Subplot(fig, inner_grid[c])
if c != 0:
ax[c].get_shared_y_axes().join(ax[0], ax[c])
ax[c].plot()
ax[c+n]=ax[c].twinx()
ax[c+n].scatter()
ax[c+n].set_ylim(0,5)
fig.add_subplot(ax[c],ax[c+n])
For the second plot, it's basically the same without the first loop and GridSpec, using ax[c]=plt.subplot('51{c}') instead of ax[c]=plt.Subplot(fig, inner_grid[c]).
As you can see, when using GridSpec I still have the secondary y axis but not the scatter plot associated.
I guess the short question would be How to write fig.add_subplot(ax[c],ax[c+n]) properly?
(fig.add_subplot(ax[c]) fig.add_subplot(ax[c+n]) in two lines doesn't work.)
It is not clear from your question exactly which data you're plotting in each subplot, plus the way you're creating your subplots seems a little convoluted, which is probably why you're having problems. Here is how I would do it:
import matplotlib.gridspec as gs
measures = ['ACE', 'SCE', 'LZs', 'LZc']
conditions = ['dark','light','flick3','flick10','switch']
colors = ['g','c','b','r','grey']
Npoints = 10
data = [np.random.random((Npoints,len(measures))) for i in range(len(conditions))]
gs00 = gs.GridSpec(len(conditions), 1)
fig = plt.figure(figsize=(5,5))
for i,condition in enumerate(conditions):
ax1 = fig.add_subplot(gs00[i])
ax2 = ax1.twinx()
ax1.plot(range(Npoints), data[i][:,0], 'o-', color=colors[i], label=measures[0])
ax2.plot(range(Npoints), data[i][:,1], 'o-.', color=colors[i], label=measures[1])
ax1.set_ylim((-0.1,1.1))
ax2.set_ylim(ax1.get_ylim())
ax1.set_title(condition)
EDIT to get the same thing repeated 4 times, the logic is exactly the same, you just have to play around with the gridspec. But the only things that matters are the lines ax1 = fig.add_subplot(gs01[j]) followed by ax2 = ax1.twinx(), which will create a second axis on top of the first
import matplotlib.gridspec as gs
measures = ['ACE', 'SCE', 'LZs', 'LZc']
conditions = ['dark','light','flick3','flick10','switch']
colors = ['g','c','b','r','grey']
Npoints = 10
data = [np.random.random((Npoints,len(measures))) for i in range(len(conditions))]
gs00 = gs.GridSpec(2,2)
plt.style.use('seaborn-paper')
fig = plt.figure(figsize=(10,10))
grid_x, grid_y = np.unravel_index(range(len(measures)),(2,2))
for i,measure in enumerate(measures):
gs01 = gs.GridSpecFromSubplotSpec(len(conditions), 1, subplot_spec=gs00[grid_x[i],grid_y[i]])
for j,condition in enumerate(conditions):
ax1 = fig.add_subplot(gs01[j])
ax2 = ax1.twinx()
ax1.plot(range(Npoints), data[j][:,0], 'o-', color=colors[j], label=measures[0])
ax2.plot(range(Npoints), data[j][:,1], 'o-.', color=colors[j], label=measures[1])
ax1.set_ylim((-0.1,1.1))
ax2.set_ylim(ax1.get_ylim())
if j==0:
ax1.set_title(measure)

Combining an imshow colormap with Python Pandas subplot line charts

I have a Pandas DataFrame that has pairs of columns -- each pair has a measurement column that I want to plot as a line chart and another column that I want to plot as an imshow colormap to illustrate a severity metric.
A simple example:
from random import *
import pandas as pd
randBinList = lambda n: [randint(0,1) for b in range(1,n+1)]
rng = pd.date_range('1/1/2011', periods=72, freq='H')
tslist = {}
for measurement_num in range(3):
measurement_name = 'Measurement'+str(measurement_num)
tslist[measurement_name] = pd.DataFrame({'Value': randn(len(rng)),'Severity': randn(len(rng))})
ts = pd.concat(tslist, axis=1)
ts.head()
Results in a simple DataFrame:
My attempt to create my intended plot is based on: Shade the background of matplotlib based on array and colormap and Colormap entire subplot
PointList = ts.columns.levels[0].tolist()
y = ts[PointList[0]]['Value'].values
x = np.arange(len(y))
t = ts[PointList[0]]['Severity'].values
fig, ax = plt.subplots(len(PointList), 1, figsize=(18,10))
ax[0].plot(x, y, c='black')
ymin, ymax = ax[0].get_ybound()
xmin, xmax = ax[0].get_xbound()
im = ax[1].imshow(t.reshape(1, t.size), extent=[xmin,x.max(),ymin,ymax], alpha=.5, cmap=plt.cm.RdYlGn)
ax[0].set_title(PointList[0])
plotcounter = 1
for point in PointList[1:]:
y = ts[point]['Value'].truncate(before=cutoffdate).values
x = np.arange(len(y))
t = ts[point]['Severity'].truncate(before=cutoffdate).values
ax[plotcounter].plot(x, y, c='black', )
ymin, ymax = ax[plotcounter].get_ybound()
xmin, xmax = ax[plotcounter].get_xbound()
im = ax[plotcounter].imshow(t.reshape(1, t.size), extent=[xmin,x.max(),ymin,ymax], alpha=.5, cmap=plt.cm.RdYlGn)
ax[plotcounter].set_aspect(ax[0].get_aspect())
ax[plotcounter].set_title(point)
plotcounter += 1
plt.tight_layout()
plt.show()
which results in:
I'm having trouble with imshow for the first subplot. I'm looking for a much more elegant solution that integrates better with Pandas and results in subplots. Also I'd like to use the pandas.tseries.index.DatetimeIndex as the x-axis instead of having only numbers.

python: scatter plot with median and CI

I am looking for a python plot on the lines of http://www.r-bloggers.com/visually-weighted-watercolor-plots-new-variants-please-vote/
This gives the equivalent of the standard deviation bands:
# generate random variables
x,y = generate_random()
# bin the values and determine the envelopes
df = bin_by(x, y, nbins=25, bins = None)
###
# Plot 1
###
# determine the colors
cols = ['#EE7550', '#F19463', '#F6B176']
with plt.style.context('fivethirtyeight'):
# plot the 3rd stdv
plt.fill_between(df.x, df['5th'], df['95th'], alpha=0.7,color = cols[2])
plt.fill_between(df.x, df['10th'], df['90th'], alpha=0.7,color = cols[1])
plt.fill_between(df.x, df['25th'], df['75th'], alpha=0.7,color = cols[0])
# plt the line
plt.plot(df.x, df['median'], color = '1', alpha = 0.7, linewidth = 1)
# plot the points
plt.scatter(x, y, facecolors='white', edgecolors='0', s = 5, lw = 0.7)
plt.savefig('fig1.png', facecolor='white', edgecolor='none')
plt.show()
def bin_by(x, y, nbins=30, bins = None):
"""
Divide the x axis into sections and return groups of y based on its x value
"""
if bins is None:
bins = np.linspace(x.min(), x.max(), nbins)
bin_space = (bins[-1] - bins[0])/(len(bins)-1)/2
indicies = np.digitize(x, bins + bin_space)
Bit of a discussion and link to my Github from my blog
cut-paste from my larger piece of code. It does not give what I want. I am posting per Evert's suggestion
fig = plt.figure(figsize=(8, 8))
plt.plot(xlist, ylist, 'b,')
plt.plot([0.0,0.8],[0.0,0.8],'y-')
data2d=zip(xlist,ylist)
bins = np.linspace(0.0, 0.2, 21)
medianlist=binpercentile(data2d,bins)
c10list=binpercentile(data2d,bins,0.1)
c90list=binpercentile(data2d,bins,0.9)
centerbins=[(x+y)/2.0 for x,y in zip(bins[:-1],bins[1:])]
centerbins.insert(0,0)
medianlist.insert(0,0)
c10list.insert(0,0)
c90list.insert(0,0)
plt.plot(centerbins,c10list,'r--')
plt.plot(centerbins,c90list,'r--')
plt.plot(centerbins,medianlist,'r-')
imagefilename='%s.%s'%('.'.join(infile.split('.')[0:-1]),'diffmed.pdf')
plt.savefig(imagefilename)

Categories

Resources