Discontinuous timeseries plot with dates on x-axis

Discontinuous timeseries plot with dates on x-axis - python

I got data for several months, but in between some months are missing. This looks quite strange if I plot the whole dataset in one plot (lots of empty space in between).
I wrote the small example script to show how it works (based on: Python/Matplotlib - Is there a way to make a discontinuous axis?)
The problem: I can't get the x-axis use the same date formatting! Either ax or ax2 is correct, but never both of them.
Do you have any idea?
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import datetime
def getDates(startdate, enddate):
days = (enddate + datetime.timedelta(days=1) - startdate).days
dates = [ startdate + datetime.timedelta(days=x) for x in range(0,days) ]
return dates
dates1 = getDates(datetime.datetime(2013,1,1), datetime.datetime(2013,1,31))
dates2 = getDates(datetime.datetime(2013,3,1), datetime.datetime(2013,3,31))
dates = dates1+dates2
data = np.arange(len(dates))
Locator = mpl.dates.DayLocator(interval=5)
Formatter = mpl.dates.DateFormatter('%d-%m-%y')
fig,(ax,ax2) = plt.subplots(1,2,sharey=True)
fig.subplots_adjust(wspace=0.05)
fig.set_size_inches(10,3)
ax.plot(dates, data)
ax2.plot(dates, data)
ax.legend(loc=1)
ax.set_ylim( 0, 61 )
ax.set_xlim( datetime.datetime(2013,1,1), datetime.datetime(2013,1,31) )
ax2.set_xlim( datetime.datetime(2013,3,1), datetime.datetime(2013,3,31) )
labels = ax.get_xticklabels()
for label in labels: label.set_rotation(30)
labels = ax2.get_xticklabels()
for label in labels: label.set_rotation(30)
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.tick_params(right='off')
ax2.tick_params(left='off')
ax2.yaxis.tick_right()
ax.xaxis.set_major_locator(Locator)
ax.xaxis.set_major_formatter(Formatter)
ax2.xaxis.set_major_locator(Locator)
ax2.xaxis.set_major_formatter(Formatter)
plt.savefig("test.png", bbox_inches='tight')
Result:

You have found an interesting detail about the internals of matplotlib. The locator object you pass into set_major_locator is the object used by the axes to figure out where to put it's ticks both axes were using the same locater object. As part of the draw the locator generates a list of where the ticks should be based on the limits of the axes which when it gets done for the second axes means no ticks are visible in the first axes. You just need to pass in distinct (separate instantiations) locator objects, done here with copy.
import datetime
import copy
def getDates(startdate, enddate):
days = (enddate + datetime.timedelta(days=1) - startdate).days
dates = [ startdate + datetime.timedelta(days=x) for x in range(0, days) ]
return dates
dates1 = getDates(datetime.datetime(2013, 1, 1), datetime.datetime(2013, 1, 31))
dates2 = getDates(datetime.datetime(2013, 3, 1), datetime.datetime(2013, 3, 31))
dates = dates1+dates2
data = np.arange(len(dates))
Locator = mpl.dates.DayLocator(interval=5)
Formatter = mpl.dates.DateFormatter('%d-%m-%y')
fig, (ax, ax2) = plt.subplots(1, 2, sharey=True, tight_layout=True)
fig.subplots_adjust(wspace=0.05)
fig.set_size_inches(10, 3, forward=True)
ax.plot(dates, data)
ax2.plot(dates, data)
ax.legend(loc=1)
ax.set_ylim(0, 61)
ax.set_xlim(datetime.datetime(2013, 1, 1), datetime.datetime(2013, 1, 31))
ax2.set_xlim(datetime.datetime(2013, 3, 1), datetime.datetime(2013, 3, 31))
labels = ax.get_xticklabels()
for label in labels:
label.set_rotation(30)
labels = ax2.get_xticklabels()
for label in labels:
label.set_rotation(30)
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.tick_params(right='off')
ax2.tick_params(left='off')
ax2.yaxis.tick_right()
# note the copy here
ax.xaxis.set_major_locator(copy.copy(Locator))
ax.xaxis.set_major_formatter(copy.copy(Formatter))
ax2.xaxis.set_major_locator(copy.copy(Locator))
ax2.xaxis.set_major_formatter(copy.copy(Formatter))

Related

matplotlib fill_between ignore areas of non sequential data

I'm trying to fill the area under the curve where the y-value is 1. The x-axis is a datetime array with non-regular values. As you can see the fill also includes areas where there is no x-data. Is there a way to tell fill_between to only fill "between" valid data? i.e. in the plot below I'd like the areas between "missing" samples to be white
tx = array(datetimes) # Array of irregular datetimes
ty = array([ones and zeros]) # Array of ones and zeros same size as tx
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(tx, ty, 'r.')
ax.fill_between(tx, 0, 1, where(ty==1))

This might be what you're aiming for.
If it is then you can use rectangular patches. In this code:
y is the list of values meant to correspond to your 'irregular' pattern.
delta_float measures the horizontal distance in the graph corresponding to delta, the distance between ticks.
Notice that the patches are positioned and sized based on dates and delta_float units respectively.
import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange, date2num
from numpy import arange
date1 = datetime.datetime(2000, 3, 2)
date2 = datetime.datetime(2000, 3, 6)
delta = datetime.timedelta(hours=6)
dates = drange(date1, date2, delta)
delta_float = (dates[-1]-dates[0])/len(dates)
y = [1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,0]
fig, ax = plt.subplots()
ax.plot_date(dates, y, 'r.')
ax.add_patch(patches.Rectangle((dates[0], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[4], 0), 2*delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[8], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[12], 0), delta_float, 1, color='grey'))
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_minor_locator(HourLocator(arange(0, 25, 6)))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
ax.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
fig.autofmt_xdate()
plt.show()

python matplotlib setting years in x axis

I have coded the following program with matplotlib to graph no.of elements with time.
import pylab
import numpy as np
import datetime
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
date1 = datetime.date(1995, 1, 1)
date2 = datetime.date(2004, 4, 12)
years = YearLocator() # every year
months = MonthLocator() # every month
yearsFmt = DateFormatter('%Y')
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
ax.autoscale_view()
pylab.ylim(0, 250)
plt.yticks(np.linspace(0,250,6,endpoint=True))
pylab.xlabel('YEAR')
pylab.ylabel('No. of sunspots')
pylab.title('SUNSPOT VS YEAR GRAPH')
a=[[50,50],[100,100],[250, 250],[200,200],[150,150]]
plt.plot(*zip(*a), marker='o', color='r', ls='')
The output is as follows
However,I was expecting it to display years instead of numbers in x-axis.

You are plotting years, but the years are 50, 100, 250, 200, and 150. These are the first element in the lists inside of a, which is passed to pyplot.plot as the x values.
You want to define your dates somewhere, though you'll also probably want to set the xticks to be the same as the dates you're plotting, as I can tell you care about the graph looking neat.
import pylab
import numpy as np
import datetime
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
Also, don't forget to import pyplot
import matplotlib.pyplot as plt
Here are some example dates. You can change them to whatever exact date you have for the sunspot measurements.
a=[[datetime.date(1995, 1, 1), 50],
[datetime.date(2000, 1, 1), 100],
[datetime.date(2005, 1, 1), 250],
[datetime.date(2010, 1, 1), 200],
[datetime.date(2015, 1, 1), 150]
]
years = YearLocator() # every year
months = MonthLocator() # every month
yearsFmt = DateFormatter('%Y')
Call gca to get current axis before you modify the axis.
ax = plt.gca()
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
ax.autoscale_view()
pylab.ylim(0, 250)
plt.yticks(np.linspace(0,250,6,endpoint=True))
Pick out the dates from the a array to use them as xtick labels.
dates = [date for date,sunspot in a]
plt.xticks(dates)
pylab.xlabel('YEAR')
pylab.ylabel('No. of sunspots')
pylab.title('SUNSPOT VS YEAR GRAPH')
plt.plot(*zip(*a), marker='o', color='r', ls='')
plt.show()

Python: Plot a graph with the same x-axis values

Say that I have two lists:
yvalues = [30, 40, -20, 0, -10, 20, 45, 12, -5, ....]
Dates = ['20110103', '20110103', '20110103', '20110108', '20110108', '20110108', '20110113', '20110113', '20110113', ....]
The first entry in Dates does correspond to the first value in yvalues and so on. The dates repeat themselves because I observe multiple yvalues every 5 days.
Now if I want to plot the yvalues with Dates as x-axis, I do:
plt.plot(yvalues)
plt.xticks(dates)
It gives me an error. If I try: plt.plot(Dates, yvalues), I get this nasty graph:
How can I plot on the x-axis the correct date values (i.e. 20110103) and without the straight lines that separates the observation?
UPDATE
I don't want my values to be plotted on the same vertical line for each day but one after the other. In fact there is 5 minutes time difference between each observations. I decided to convert my Dates list using:
Dates = [datetime.date(int(d[0:4]), int(d[4:6]), int(d[6:8])) for d in Dates]
Then I do:
plt.plot(dates, yvalues)
and get the following plot:
Clearly, this picture shows the values on the same date to be on the same vertical lines. I still have the annoying straight lines that separate each dates.
Now if I don't use any dates as for the x-axis, I get the following graph (which is the one that I want but I want the x-axis as dates):
Sample dataset available here

Well after a bit of discussion, here's what i eventually landed on;
import datetime
import random
import numpy as np
import datetime
import itertools
dates, allSpillovers, allBins, allDigitised = [], [], [], []
with open("year.dat") as year:
previousDate = None
spillovers = []
for line in year.readlines()[1:]:
_, strdate, spillover = line.split(",")
spillover = float(spillover)
year, month, day = [int(i) for i in strdate.split("-")]
date = datetime.date(year, month, day)
if previousDate == date:
spillovers.append(spillover)
elif previousDate != None:
mean = np.mean(spillovers)
stdev = np.std(spillovers)
spillovers.sort()
if len(spillovers) > 70:
allSpillovers.append([mean, mean-stdev, mean+stdev] + spillovers)
dates.append(date)
spillovers = []
previousDate = date
#itertools.izip_longest(*allSpillovers, fillvalue=0)
allSpillovers = zip(*allSpillovers)
from matplotlib import pyplot
print len(dates), len(allSpillovers[0]), len(allSpillovers[1])
fig = pyplot.figure()
ax = fig.add_subplot(1,1,1)
for i in range(3, len(allSpillovers)-1):
alpha = 0.5 - abs(i / float(len(allSpillovers)) - 0.5)
print len(dates), len(allSpillovers[i]), len(allSpillovers[i+1])
ax.fill_between(dates, allSpillovers[i], allSpillovers[i+1], facecolor='green', interpolate=True, alpha=alpha, linewidth=0)
#ax.fill_between(dates, allSpillovers[1], allSpillovers[2], facecolor='green', interpolate=True, alpha=0.5)
#for b, d in bins, digitised:
ax.plot(dates, allSpillovers[0], color="blue", linewidth=2)
ax.plot(dates, [0 for _ in dates], color="red", linewidth=2)
ax.grid()
fig.autofmt_xdate()
pyplot.show()

Try this:
>>> from matplotlib import pyplot as plt
>>> Dates = ['20110103', '20110103', '20110103', '20110108', '20110108', '20110108', '20110113', '20110113', '20110113']
>>> yvalues = [30, 40, -20, 0, -10, 20, 45, 12, -5]
>>> x=range(len(Dates))
>>> plt.xticks(x,Dates)
>>> plt.plot(x,yvalues)
>>> plt.show()

Group labels in matplotlib barchart using Pandas MultiIndex

I have a pandas DataFrame with a MultiIndex:
group subgroup obs_1 obs_2
GroupA Elem1 4 0
Elem2 34 2
Elem3 0 10
GroupB Elem4 5 21
and so on. As noted in this SO question this is actually doable in matplotlib, but I'd rather (if possible) use the fact that I already know the hierarchy (thanks to the MultiIndex). Currently what's happening is that the index is shown as a tuple.
Is such a thing possible?

If you have just two levels in the MultiIndex, I believe the following will be easier:
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
plt.grid(True, 'both')
minor_XT = ax.get_xaxis().get_majorticklocs()
DF['XT_V'] = minor_XT
major_XT = DF.groupby(by=DF.index.get_level_values(0)).first()['XT_V'].tolist()
DF.__delitem__('XT_V')
ax.set_xticks(minor_XT, minor=True)
ax.set_xticklabels(DF.index.get_level_values(1), minor=True)
ax.tick_params(which='major', pad=15)
_ = plt.xticks(major_XT, (DF.index.get_level_values(0)).unique(), rotation=0)
And a bit of involving, but more general solution (doesn't matter how many levels you have):
def cvt_MIdx_tcklab(df):
Midx_ar = np.array(df.index.tolist())
Blank_ar = Midx_ar.copy()
col_idx = np.arange(Midx_ar.shape[0])
for i in range(Midx_ar.shape[1]):
val,idx = np.unique(Midx_ar[:, i], return_index=True)
Blank_ar[idx, i] = val
idx=~np.in1d(col_idx, idx)
Blank_ar[idx, i]=''
return map('\n'.join, np.fliplr(Blank_ar))
plt.figure()
ax = plt.gca()
DF.plot(kind='bar', ax=ax)
ax.set_xticklabels(cvt_MIdx_tcklab(DF), rotation=0)

I think that there isn't a nice and standard way of plotting multiindex dataframes. I found the following solution by #Stein to be aesthetically pleasant. I've adapted his example to your data:
import pandas as pd
import matplotlib.pyplot as plt
from itertools import groupby
import numpy as np
%matplotlib inline
group = ('Group_A', 'Group_B')
subgroup = ('elem1', 'elem2', 'elem3', 'elem4')
obs = ('obs_1', 'obs_2')
index = pd.MultiIndex.from_tuples([('Group_A','elem1'),('Group_A','elem2'),('Group_A','elem3'),('Group_B','elem4')],
names=['group', 'subgroup'])
values = np.array([[4,0],[43,2],[0,10],[5,21]])
df = pd.DataFrame(index=index)
df['obs_1'] = values[:,0]
df['obs_2'] = values[:,1]
def add_line(ax, xpos, ypos):
line = plt.Line2D([xpos, xpos], [ypos + .1, ypos],
transform=ax.transAxes, color='gray')
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def label_group_bar_table(ax, df):
ypos = -.1
scale = 1./df.index.size
for level in range(df.index.nlevels)[::-1]:
pos = 0
for label, rpos in label_len(df.index,level):
lxpos = (pos + .5 * rpos)*scale
ax.text(lxpos, ypos, label, ha='center', transform=ax.transAxes)
add_line(ax, pos*scale, ypos)
pos += rpos
add_line(ax, pos*scale , ypos)
ypos -= .1
ax = df.plot(kind='bar',stacked=False)
#Below 2 lines remove default labels
ax.set_xticklabels('')
ax.set_xlabel('')
label_group_bar_table(ax, df)
Which produces:

How to create a grouped bar chart of a hierarchical dataset with 2 levels
You can create a subplot for each group and stick them together with wspace=0. The width of each subplot must be corrected according to the number of subgroups by using the width_ratios argument in the gridspec_kw dictionary so that all the columns have the same width.
Then there are limitless formatting choices to make. In the following example, I choose to draw horizontal grid lines in the background and a separation line between the groups by using the minor tick marks.
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
# Create sample DataFrame with MultiIndex
df = pd.DataFrame(dict(group = ['GroupA', 'GroupA', 'GroupA', 'GroupB'],
subgroup = ['Elem1', 'Elem2', 'Elem3', 'Elem4'],
obs_1 = [4, 34, 0, 5],
obs_2 = [0, 2, 10, 21]))
df.set_index(['group', 'subgroup'], inplace=True)
# Create figure with a subplot for each group with a relative width that
# is proportional to the number of subgroups
groups = df.index.levels[0]
nplots = groups.size
plots_width_ratios = [df.xs(group).index.size for group in groups]
fig, axes = plt.subplots(nrows=1, ncols=nplots, sharey=True, figsize=(6, 4),
gridspec_kw = dict(width_ratios=plots_width_ratios, wspace=0))
# Loop through array of axes to create grouped bar chart for each group
alpha = 0.3 # used for grid lines, bottom spine and separation lines between groups
for group, ax in zip(groups, axes):
# Create bar chart with horizontal grid lines and no spines except bottom one
df.xs(group).plot.bar(ax=ax, legend=None, zorder=2)
ax.grid(axis='y', zorder=1, color='black', alpha=alpha)
for spine in ['top', 'left', 'right']:
ax.spines[spine].set_visible(False)
ax.spines['bottom'].set_alpha(alpha)
# Set and place x labels for groups
ax.set_xlabel(group)
ax.xaxis.set_label_coords(x=0.5, y=-0.15)
# Format major tick labels for subgroups
ax.set_xticklabels(ax.get_xticklabels(), rotation=0, ha='center')
ax.tick_params(axis='both', which='major', length=0, pad=10)
# Set and format minor tick marks for separation lines between groups: note
# that except for the first subplot, only the right tick mark is drawn to avoid
# duplicate overlapping lines so that when an alpha different from 1 is chosen
# (like in this example) all the lines look the same
if ax.is_first_col():
ax.set_xticks([*ax.get_xlim()], minor=True)
else:
ax.set_xticks([ax.get_xlim()[1]], minor=True)
ax.tick_params(which='minor', length=45, width=0.8, color=[0, 0, 0, alpha])
# Add legend using the labels and handles from the last subplot
fig.legend(*ax.get_legend_handles_labels(), frameon=False,
bbox_to_anchor=(0.92, 0.5), loc="center left")
title = 'Grouped bar chart of a hierarchical dataset with 2 levels'
fig.suptitle(title, y=1.01, size=14);
Reference: this answer by gyx-hh

How do I change the range of the x-axis with datetimes in matplotlib?

I'm trying to plot a graph of dates on the x-axis and values on the y-axis. It works fine, except that I can't get the range of the x-axis to be appropriate. The x-axis range is always Jan 2012 to Jan 2016, despite my dates being from today. I am even specifying that xlim should be the first and last date.
I'm writing this for python-django, if that's relevant.
import datetime
import matplotlib.pyplot as plt
x = [datetime.date(2014, 1, 29), datetime.date(2014, 1, 29), datetime.date(2014, 1, 29)]
y = [2, 4, 1]
fig, ax = plt.subplots()
ax.plot_date(x, y)
ax.set_xlim([x[0], x[-1]])
canvas = FigureCanvas(plt.figure(1))
response = HttpResponse(content_type='image/png')
canvas.print_png(response)
return response
And here is the output:

Edit:
Having seen actual data from the OP, all of the values are at the same date/time. So matplotlib is automatically zooming the x-axis out. You can still manually set the x-axis limits with datetime objects
If I do something like this on matplotlib v1.3.1:
import datetime
import matplotlib.pyplot as plt
x = [datetime.date(2014, 1, 29)] * 3
y = [2, 4, 1]
fig, ax = plt.subplots()
ax.plot_date(x, y, markerfacecolor='CornflowerBlue', markeredgecolor='white')
fig.autofmt_xdate()
ax.set_xlim([datetime.date(2014, 1, 26), datetime.date(2014, 2, 1)])
ax.set_ylim([0, 5])
I get:
And the axes limits match the dates that I specified.

With help from Paul H's solution, I was able to change the range of my time-based x-axis.
Here is a more general solution for other beginners.
import matplotlib.pyplot as plt
import datetime as dt
# Set X range. Using left and right variables makes it easy to change the range.
#
left = dt.date(2020, 3, 15)
right = dt.date(2020, 7, 15)
# Create scatter plot of Positive Cases
#
plt.scatter(
x, y, c="blue", edgecolor="black",
linewidths=1, marker = "o", alpha = 0.8, label="Total Positive Tested"
)
# Format the date into months & days
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
# Change the tick interval
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=30))
# Puts x-axis labels on an angle
plt.gca().xaxis.set_tick_params(rotation = 30)
# Changes x-axis range
plt.gca().set_xbound(left, right)
plt.show()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Discontinuous timeseries plot with dates on x-axis - python

Related

matplotlib fill_between ignore areas of non sequential data

python matplotlib setting years in x axis

Python: Plot a graph with the same x-axis values

Group labels in matplotlib barchart using Pandas MultiIndex

How do I change the range of the x-axis with datetimes in matplotlib?

Categories

Resources