Matplotlib conversion from data units to axis units - python

I'm trying to plot some data with Matplotlib (Python library) and to add an horizontal line, that would not cover the full axis range but start around the middle and finish on the right axis.
I am using:
plt.axhline(y=1.75,xmin=0.5)
where y specifies the height of the line in data units, but xmin (as well as xmax) need to be defined in axis units (=0 for the beginning of axis and =1 at the end). Though I only know the point I want my line to start in data units, is there a method/function to convert from one to the other?

Just draw a line with plt
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0.2,10,100)
fig, ax = plt.subplots()
ax.plot(x, 1/x)
ax.plot(x, np.log(x))
ax.set_aspect('equal')
ax.grid(True, which='both')
y = 1.25
xmin = 2
xmax = ax.get_xlim()[1]
ax.plot([xmin, xmax], [y, y], color='k')
which gives me:

Related

Graphing multiple lines for axvline [duplicate]

Given a plot of a signal in time representation, how can I draw lines marking the corresponding time index?
Specifically, given a signal plot with a time index ranging from 0 to 2.6 (seconds), I want to draw vertical red lines indicating the corresponding time index for the list [0.22058956, 0.33088437, 2.20589566]. How can I do it?
The standard way to add vertical lines that will cover your entire plot window without you having to specify their actual height is plt.axvline
import matplotlib.pyplot as plt
plt.axvline(x=0.22058956)
plt.axvline(x=0.33088437)
plt.axvline(x=2.20589566)
OR
xcoords = [0.22058956, 0.33088437, 2.20589566]
for xc in xcoords:
plt.axvline(x=xc)
You can use many of the keywords available for other plot commands (e.g. color, linestyle, linewidth ...). You can pass in keyword arguments ymin and ymax if you like in axes corrdinates (e.g. ymin=0.25, ymax=0.75 will cover the middle half of the plot). There are corresponding functions for horizontal lines (axhline) and rectangles (axvspan).
matplotlib.pyplot.vlines vs. matplotlib.pyplot.axvline
These methods are applicable to plots generated with seaborn and pandas.DataFrame.plot, which both use matplotlib.
The difference is that vlines accepts one or more locations for x, while axvline permits one location.
Single location: x=37.
Multiple locations: x=[37, 38, 39].
vlines takes ymin and ymax as a position on the y-axis, while axvline takes ymin and ymax as a percentage of the y-axis range.
When passing multiple lines to vlines, pass a list to ymin and ymax.
Also matplotlib.axes.Axes.vlines and matplotlib.axes.Axes.axvline for the object-oriented API.
If you're plotting a figure with something like fig, ax = plt.subplots(), then replace plt.vlines or plt.axvline with ax.vlines or ax.axvline, respectively.
See this answer for horizontal lines with .hlines.
import numpy as np
import matplotlib.pyplot as plt
xs = np.linspace(1, 21, 200)
plt.figure(figsize=(10, 7))
# only one line may be specified; full height
plt.axvline(x=36, color='b', label='axvline - full height')
# only one line may be specified; ymin & ymax specified as a percentage of y-range
plt.axvline(x=36.25, ymin=0.05, ymax=0.95, color='b', label='axvline - % of full height')
# multiple lines all full height
plt.vlines(x=[37, 37.25, 37.5], ymin=0, ymax=len(xs), colors='purple', ls='--', lw=2, label='vline_multiple - full height')
# multiple lines with varying ymin and ymax
plt.vlines(x=[38, 38.25, 38.5], ymin=[0, 25, 75], ymax=[200, 175, 150], colors='teal', ls='--', lw=2, label='vline_multiple - partial height')
# single vline with full ymin and ymax
plt.vlines(x=39, ymin=0, ymax=len(xs), colors='green', ls=':', lw=2, label='vline_single - full height')
# single vline with specific ymin and ymax
plt.vlines(x=39.25, ymin=25, ymax=150, colors='green', ls=':', lw=2, label='vline_single - partial height')
# place the legend outside
plt.legend(bbox_to_anchor=(1.0, 1), loc='upper left')
plt.show()
Seaborn axes-level plot
import seaborn as sns
# sample data
fmri = sns.load_dataset("fmri")
# x index for max y values for stim and cue
c_max, s_max = fmri.pivot_table(index='timepoint', columns='event', values='signal', aggfunc='mean').idxmax()
# plot
g = sns.lineplot(data=fmri, x="timepoint", y="signal", hue="event")
# y min and max
ymin, ymax = g.get_ylim()
# vertical lines
g.vlines(x=[c_max, s_max], ymin=ymin, ymax=ymax, colors=['tab:orange', 'tab:blue'], ls='--', lw=2)
Seaborn figure-level plot
Each axes must be iterated through.
import seaborn as sns
# sample data
fmri = sns.load_dataset("fmri")
# used to get the index values (x) for max y for each event in each region
fpt = fmri.pivot_table(index=['region', 'timepoint'], columns='event', values='signal', aggfunc='mean')
# plot
g = sns.relplot(data=fmri, x="timepoint", y="signal", col="region", hue="event", kind="line")
# iterate through the axes
for ax in g.axes.flat:
# get y min and max
ymin, ymax = ax.get_ylim()
# extract the region from the title for use in selecting the index of fpt
region = ax.get_title().split(' = ')[1]
# get x values for max event
c_max, s_max = fpt.loc[region].idxmax()
# add vertical lines
ax.vlines(x=[c_max, s_max], ymin=ymin, ymax=ymax, colors=['tab:orange', 'tab:blue'], ls='--', lw=2, alpha=0.5)
For 'region = frontal' the maximum value of both events occurs at 5.
Barplot and Histograms
Note that bar plot tick locations have a zero-based index, regardless of the axis tick labels, so select x based on the bar index, not the tick label.
ax.get_xticklabels() will show the locations and labels.
import pandas as pd
import seaborn as sns
# load data
tips = sns.load_dataset('tips')
# histogram
ax = tips.plot(kind='hist', y='total_bill', bins=30, ec='k', title='Histogram with Vertical Line')
_ = ax.vlines(x=16.5, ymin=0, ymax=30, colors='r')
# barplot
ax = tips.loc[5:25, ['total_bill', 'tip']].plot(kind='bar', figsize=(15, 4), title='Barplot with Vertical Lines', rot=0)
_ = ax.vlines(x=[0, 17], ymin=0, ymax=45, colors='r')
Time Series Axis
The dates in the dataframe to be the x-axis must be a datetime dtype. If the column or index is not the correct type, it must be converted with pd.to_datetime.
If an array or list of dates is being used, refer to Converting numpy array of strings to datetime or Convert datetime list into date python, respectively.
x will accept a date like '2020-09-24' or datetime(2020, 9, 2).
import pandas_datareader as web # conda or pip install this; not part of pandas
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
# get test data; this data is downloaded with the Date column in the index as a datetime dtype
df = web.DataReader('^gspc', data_source='yahoo', start='2020-09-01', end='2020-09-28').iloc[:, :2]
# display(df.head(2))
High Low
Date
2020-09-01 3528.030029 3494.600098
2020-09-02 3588.110107 3535.229980
# plot dataframe; the index is a datetime index
ax = df.plot(figsize=(9, 6), title='S&P 500', ylabel='Price')
# add vertical lines
ax.vlines(x=[datetime(2020, 9, 2), '2020-09-24'], ymin=3200, ymax=3600, color='r', label='test lines')
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
For multiple lines
xposition = [0.3, 0.4, 0.45]
for xc in xposition:
plt.axvline(x=xc, color='k', linestyle='--')
To add a legend and/or colors to some vertical lines, then use this:
import matplotlib.pyplot as plt
# x coordinates for the lines
xcoords = [0.1, 0.3, 0.5]
# colors for the lines
colors = ['r','k','b']
for xc,c in zip(xcoords,colors):
plt.axvline(x=xc, label='line at x = {}'.format(xc), c=c)
plt.legend()
plt.show()
Results
Calling axvline in a loop, as others have suggested, works, but it can be inconvenient because
Each line is a separate plot object, which causes things to be very slow when you have many lines.
When you create the legend each line has a new entry, which may not be what you want.
Instead, you can use the following convenience functions which create all the lines as a single plot object:
import matplotlib.pyplot as plt
import numpy as np
def axhlines(ys, ax=None, lims=None, **plot_kwargs):
"""
Draw horizontal lines across plot
:param ys: A scalar, list, or 1D array of vertical offsets
:param ax: The axis (or none to use gca)
:param lims: Optionally the (xmin, xmax) of the lines
:param plot_kwargs: Keyword arguments to be passed to plot
:return: The plot object corresponding to the lines.
"""
if ax is None:
ax = plt.gca()
ys = np.array((ys, ) if np.isscalar(ys) else ys, copy=False)
if lims is None:
lims = ax.get_xlim()
y_points = np.repeat(ys[:, None], repeats=3, axis=1).flatten()
x_points = np.repeat(np.array(lims + (np.nan, ))[None, :], repeats=len(ys), axis=0).flatten()
plot = ax.plot(x_points, y_points, scalex = False, **plot_kwargs)
return plot
def axvlines(xs, ax=None, lims=None, **plot_kwargs):
"""
Draw vertical lines on plot
:param xs: A scalar, list, or 1D array of horizontal offsets
:param ax: The axis (or none to use gca)
:param lims: Optionally the (ymin, ymax) of the lines
:param plot_kwargs: Keyword arguments to be passed to plot
:return: The plot object corresponding to the lines.
"""
if ax is None:
ax = plt.gca()
xs = np.array((xs, ) if np.isscalar(xs) else xs, copy=False)
if lims is None:
lims = ax.get_ylim()
x_points = np.repeat(xs[:, None], repeats=3, axis=1).flatten()
y_points = np.repeat(np.array(lims + (np.nan, ))[None, :], repeats=len(xs), axis=0).flatten()
plot = ax.plot(x_points, y_points, scaley = False, **plot_kwargs)
return plot
In addition to the plt.axvline and plt.plot((x1, x2), (y1, y2)) or plt.plot([x1, x2], [y1, y2]) as provided in the answers above, one can also use
plt.vlines(x_pos, ymin=y1, ymax=y2)
to plot a vertical line at x_pos spanning from y1 to y2 where the values y1 and y2 are in absolute data coordinates.

Same length of ticks in multiplot

How to set to have the same scale/length of ticks in all subplots? I would like to set length of all xticks according to the forth subplot. I mean that all axis named y will have the same space between ticks 0 and 2, all axis named x will have the same space between -1 and 0. Maybe it would be sufficient to set plot as squares. How please?
import numpy as np
import matplotlib.pyplot as plt
from numpy import array
import matplotlib as mpl
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
# Plot figure with size
fig, ax = plt.subplots(sharex=True)
plt.figure(figsize=(12, 9))
# Subplots
fig1 = plt.subplot(231)
plt.plot(x, y**2)
fig1.set_xlim(0e-13,2e-13)
fig1.set_ylim(-1.15e-14,0.01e-14)
fig2=plt.subplot(232)
plt.plot(x, y**2)
fig2.set_xlim(0e-13,2e-13)
fig2.set_ylim(-7.3e-15,7.3e-15)
fig3=plt.subplot(233)
plt.plot(x, y**2)
fig3.set_ylim(0e-13,1.2e-13)
fig3.set_xlim(0e-13,2e-13)
# Subplots with arrows
fig4=plt.subplot(234)
plt.plot(x, y**2)
fig4.set_xlim(-1.15e-14,0.01e-14)
fig4.set_ylim(-7.3e-15,7.3e-15)
fig5=plt.subplot(235)
plt.plot(x, y**2)
fig5.set_xlim(-7.3e-15,7.3e-15)
fig5.set_ylim(0e-13,1.2e-13)
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
fig6=plt.subplot(236)
plt.plot(x, y**2)
fig6.set_xlim(-1.5e-14,0e-14)
fig6.set_ylim(0e-13,1.2e-13)
plt.show()
The way to achieve this is outlined best by this excellent answer by #ImportanceOfBeingErnest. Basically you manually calculate the scaling to adhere to the ratio between the y and x limits of each existing axis by something like
fig1.set_aspect(np.diff(fig1.get_xlim())/np.diff(fig1.get_ylim()))
But please note that this must be done after any calls to set_ylim() and set_xlim() as it must use the final limits in order to correctly calculate the requisite ratio. set_xticks() and set_yticks() can safely be called before or after with the same effect.
Applying this to each of the six axes will produce

Put legend on a place of a subplot

I would like to put a legend on a place of a central subplot (and remove it).
I wrote this code:
import matplotlib.pylab as plt
import numpy as np
f, ax = plt.subplots(3,3)
x = np.linspace(0, 2. * np.pi, 1000)
y = np.sin(x)
for axis in ax.ravel():
axis.plot(x, y)
legend = axis.legend(loc='center')
plt.show()
I do not know how to hide a central plot. And why legend is not appear?
This link did not help http://matplotlib.org/1.3.0/examples/pylab_examples/legend_demo.html
There are several problems with your code. In your for loop, you are attempting to plot a legend on each axis (the loc="center" refers to the axis, not the figure), yet you have not given a plot label to represent in your legend.
You need to choose the central axis in your loop and only display a legend for this axis. This iteration of the loop should have no plot call either, if you don't want a line there. You can do this with a set of conditionals like I have done in the following code:
import matplotlib.pylab as plt
import numpy as np
f, ax = plt.subplots(3,3)
x = np.linspace(0, 2. * np.pi, 1000)
y = np.sin(x)
handles, labels = (0, 0)
for i, axis in enumerate(ax.ravel()):
if i == 4:
axis.set_axis_off()
legend = axis.legend(handles, labels, loc='center')
else:
axis.plot(x, y, label="sin(x)")
if i == 3:
handles, labels = axis.get_legend_handles_labels()
plt.show()
This gives me the following image:

Matplotlib - Plot line with width equivalent to a range of values, not just one single

What I want is quite simply adding to a normal pyplot.plot a horizontal line with the width equal to a range of values that I give. Preferably I also want to be able to adjust the transparency of this "block", that is the wide horizontal line.
I'd recommend to just use pyplot.fill_between():
import matplotlib.pyplot as pl
import numpy as np
fig = pl.figure()
ax = fig.add_subplot(111)
x = np.random.random(10)
y = np.random.random(10)
ax.scatter(x, y)
ax.fill_between(ax.get_xlim(), min(y), max(y), color='k', alpha=0.2)

How to draw vertical lines on a given plot

Given a plot of a signal in time representation, how can I draw lines marking the corresponding time index?
Specifically, given a signal plot with a time index ranging from 0 to 2.6 (seconds), I want to draw vertical red lines indicating the corresponding time index for the list [0.22058956, 0.33088437, 2.20589566]. How can I do it?
The standard way to add vertical lines that will cover your entire plot window without you having to specify their actual height is plt.axvline
import matplotlib.pyplot as plt
plt.axvline(x=0.22058956)
plt.axvline(x=0.33088437)
plt.axvline(x=2.20589566)
OR
xcoords = [0.22058956, 0.33088437, 2.20589566]
for xc in xcoords:
plt.axvline(x=xc)
You can use many of the keywords available for other plot commands (e.g. color, linestyle, linewidth ...). You can pass in keyword arguments ymin and ymax if you like in axes corrdinates (e.g. ymin=0.25, ymax=0.75 will cover the middle half of the plot). There are corresponding functions for horizontal lines (axhline) and rectangles (axvspan).
matplotlib.pyplot.vlines vs. matplotlib.pyplot.axvline
These methods are applicable to plots generated with seaborn and pandas.DataFrame.plot, which both use matplotlib.
The difference is that vlines accepts one or more locations for x, while axvline permits one location.
Single location: x=37.
Multiple locations: x=[37, 38, 39].
vlines takes ymin and ymax as a position on the y-axis, while axvline takes ymin and ymax as a percentage of the y-axis range.
When passing multiple lines to vlines, pass a list to ymin and ymax.
Also matplotlib.axes.Axes.vlines and matplotlib.axes.Axes.axvline for the object-oriented API.
If you're plotting a figure with something like fig, ax = plt.subplots(), then replace plt.vlines or plt.axvline with ax.vlines or ax.axvline, respectively.
See this answer for horizontal lines with .hlines.
import numpy as np
import matplotlib.pyplot as plt
xs = np.linspace(1, 21, 200)
plt.figure(figsize=(10, 7))
# only one line may be specified; full height
plt.axvline(x=36, color='b', label='axvline - full height')
# only one line may be specified; ymin & ymax specified as a percentage of y-range
plt.axvline(x=36.25, ymin=0.05, ymax=0.95, color='b', label='axvline - % of full height')
# multiple lines all full height
plt.vlines(x=[37, 37.25, 37.5], ymin=0, ymax=len(xs), colors='purple', ls='--', lw=2, label='vline_multiple - full height')
# multiple lines with varying ymin and ymax
plt.vlines(x=[38, 38.25, 38.5], ymin=[0, 25, 75], ymax=[200, 175, 150], colors='teal', ls='--', lw=2, label='vline_multiple - partial height')
# single vline with full ymin and ymax
plt.vlines(x=39, ymin=0, ymax=len(xs), colors='green', ls=':', lw=2, label='vline_single - full height')
# single vline with specific ymin and ymax
plt.vlines(x=39.25, ymin=25, ymax=150, colors='green', ls=':', lw=2, label='vline_single - partial height')
# place the legend outside
plt.legend(bbox_to_anchor=(1.0, 1), loc='upper left')
plt.show()
Seaborn axes-level plot
import seaborn as sns
# sample data
fmri = sns.load_dataset("fmri")
# x index for max y values for stim and cue
c_max, s_max = fmri.pivot_table(index='timepoint', columns='event', values='signal', aggfunc='mean').idxmax()
# plot
g = sns.lineplot(data=fmri, x="timepoint", y="signal", hue="event")
# y min and max
ymin, ymax = g.get_ylim()
# vertical lines
g.vlines(x=[c_max, s_max], ymin=ymin, ymax=ymax, colors=['tab:orange', 'tab:blue'], ls='--', lw=2)
Seaborn figure-level plot
Each axes must be iterated through.
import seaborn as sns
# sample data
fmri = sns.load_dataset("fmri")
# used to get the index values (x) for max y for each event in each region
fpt = fmri.pivot_table(index=['region', 'timepoint'], columns='event', values='signal', aggfunc='mean')
# plot
g = sns.relplot(data=fmri, x="timepoint", y="signal", col="region", hue="event", kind="line")
# iterate through the axes
for ax in g.axes.flat:
# get y min and max
ymin, ymax = ax.get_ylim()
# extract the region from the title for use in selecting the index of fpt
region = ax.get_title().split(' = ')[1]
# get x values for max event
c_max, s_max = fpt.loc[region].idxmax()
# add vertical lines
ax.vlines(x=[c_max, s_max], ymin=ymin, ymax=ymax, colors=['tab:orange', 'tab:blue'], ls='--', lw=2, alpha=0.5)
For 'region = frontal' the maximum value of both events occurs at 5.
Barplot and Histograms
Note that bar plot tick locations have a zero-based index, regardless of the axis tick labels, so select x based on the bar index, not the tick label.
ax.get_xticklabels() will show the locations and labels.
import pandas as pd
import seaborn as sns
# load data
tips = sns.load_dataset('tips')
# histogram
ax = tips.plot(kind='hist', y='total_bill', bins=30, ec='k', title='Histogram with Vertical Line')
_ = ax.vlines(x=16.5, ymin=0, ymax=30, colors='r')
# barplot
ax = tips.loc[5:25, ['total_bill', 'tip']].plot(kind='bar', figsize=(15, 4), title='Barplot with Vertical Lines', rot=0)
_ = ax.vlines(x=[0, 17], ymin=0, ymax=45, colors='r')
Time Series Axis
The dates in the dataframe to be the x-axis must be a datetime dtype. If the column or index is not the correct type, it must be converted with pd.to_datetime.
If an array or list of dates is being used, refer to Converting numpy array of strings to datetime or Convert datetime list into date python, respectively.
x will accept a date like '2020-09-24' or datetime(2020, 9, 2).
import pandas_datareader as web # conda or pip install this; not part of pandas
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
# get test data; this data is downloaded with the Date column in the index as a datetime dtype
df = web.DataReader('^gspc', data_source='yahoo', start='2020-09-01', end='2020-09-28').iloc[:, :2]
# display(df.head(2))
High Low
Date
2020-09-01 3528.030029 3494.600098
2020-09-02 3588.110107 3535.229980
# plot dataframe; the index is a datetime index
ax = df.plot(figsize=(9, 6), title='S&P 500', ylabel='Price')
# add vertical lines
ax.vlines(x=[datetime(2020, 9, 2), '2020-09-24'], ymin=3200, ymax=3600, color='r', label='test lines')
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
For multiple lines
xposition = [0.3, 0.4, 0.45]
for xc in xposition:
plt.axvline(x=xc, color='k', linestyle='--')
To add a legend and/or colors to some vertical lines, then use this:
import matplotlib.pyplot as plt
# x coordinates for the lines
xcoords = [0.1, 0.3, 0.5]
# colors for the lines
colors = ['r','k','b']
for xc,c in zip(xcoords,colors):
plt.axvline(x=xc, label='line at x = {}'.format(xc), c=c)
plt.legend()
plt.show()
Results
Calling axvline in a loop, as others have suggested, works, but it can be inconvenient because
Each line is a separate plot object, which causes things to be very slow when you have many lines.
When you create the legend each line has a new entry, which may not be what you want.
Instead, you can use the following convenience functions which create all the lines as a single plot object:
import matplotlib.pyplot as plt
import numpy as np
def axhlines(ys, ax=None, lims=None, **plot_kwargs):
"""
Draw horizontal lines across plot
:param ys: A scalar, list, or 1D array of vertical offsets
:param ax: The axis (or none to use gca)
:param lims: Optionally the (xmin, xmax) of the lines
:param plot_kwargs: Keyword arguments to be passed to plot
:return: The plot object corresponding to the lines.
"""
if ax is None:
ax = plt.gca()
ys = np.array((ys, ) if np.isscalar(ys) else ys, copy=False)
if lims is None:
lims = ax.get_xlim()
y_points = np.repeat(ys[:, None], repeats=3, axis=1).flatten()
x_points = np.repeat(np.array(lims + (np.nan, ))[None, :], repeats=len(ys), axis=0).flatten()
plot = ax.plot(x_points, y_points, scalex = False, **plot_kwargs)
return plot
def axvlines(xs, ax=None, lims=None, **plot_kwargs):
"""
Draw vertical lines on plot
:param xs: A scalar, list, or 1D array of horizontal offsets
:param ax: The axis (or none to use gca)
:param lims: Optionally the (ymin, ymax) of the lines
:param plot_kwargs: Keyword arguments to be passed to plot
:return: The plot object corresponding to the lines.
"""
if ax is None:
ax = plt.gca()
xs = np.array((xs, ) if np.isscalar(xs) else xs, copy=False)
if lims is None:
lims = ax.get_ylim()
x_points = np.repeat(xs[:, None], repeats=3, axis=1).flatten()
y_points = np.repeat(np.array(lims + (np.nan, ))[None, :], repeats=len(xs), axis=0).flatten()
plot = ax.plot(x_points, y_points, scaley = False, **plot_kwargs)
return plot
In addition to the plt.axvline and plt.plot((x1, x2), (y1, y2)) or plt.plot([x1, x2], [y1, y2]) as provided in the answers above, one can also use
plt.vlines(x_pos, ymin=y1, ymax=y2)
to plot a vertical line at x_pos spanning from y1 to y2 where the values y1 and y2 are in absolute data coordinates.

Categories

Resources