So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.
If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs
I'm new to matplotlib, and trying to plot something quite difficult.
I would like to plot something like (taken from the matplotlib docs):
Except, I want the timeline (x-axis) and stems to have labels in time, like:
timeline = ['0:01:00', '0:02:00', '0:03:00', '0:04:00', ...]
stems1 = ['0:01:45', '0:03:55', '0:04:22', ...]
stems2 = ['0:02:21', '0:06:54', ...
Notes:
Timeline ticks are evenly spaced
stems1 and stems2 don't necesarily have the same number of points, but are in order (like a video timeline)
It would be even better if stems1 and stems2 were different colors.
If anyone could point me in the right direction, or even code a working example, it'd be greatly appreciated! Thank you for reading.
Edit:
Following #r-beginners's answer to this post
I have something like this:
for time, level, label, va in zip(timeline, levels, labels, verticalalignments):
ax.annotate(label, xy=(time, level), xytext=(15, np.sign(level)*15),
textcoords="offset points",
verticalalignment=va,
horizontalalignment="right",
color='blue')
for time, level, pred, va in zip(timeline, levels, preds, verticalalignments):
ax.annotate(pred, xy=(time, level), xytext=(15, np.sign(level)*15),
textcoords="offset points",
verticalalignment=va,
horizontalalignment="right",
color='green')
The issue is that the graphs are overlapping, stem color are both red, and the stems don't align with the timeline.
Edit 2:
With #r-beginners code, I've tried it with 2 new stems, where stem1 isn't being plotted completely:
stem1 = ['0:08:08', '0:08:52', '0:09:42', '0:10:20', '0:10:55', '0:11:24', '0:12:31', '0:13:07', '0:13:45', '0:14:16', '0:14:49', '0:15:20', '0:15:51', '0:16:21', '0:16:53', '0:17:28', '0:19:01', '0:19:22', '0:20:19', '0:20:48', '0:21:19', '0:22:05', '0:23:06', '0:23:34', '0:24:03', '0:24:30', '0:24:51', '0:25:18', '0:25:54', '0:26:25', '0:27:07', '0:28:05', '0:29:04', '0:29:30', '0:30:34', '0:32:57', '0:33:28', '0:33:57', '0:34:35', '0:35:01', '0:35:41', '0:36:06', '0:36:30', '0:37:01', '0:37:33', '0:38:06', '0:38:40', '0:39:21', '0:40:02', '0:40:22', '0:40:42', '0:41:32', '0:41:56', '0:43:20', '0:43:39', '0:44:02', '0:44:26', '0:45:04', '0:45:32', '0:46:02', '0:47:00', '0:47:42', '0:48:05', '0:48:35', '0:49:02', '0:49:25', '0:49:56', '0:50:43', '0:51:25', '0:51:43', '0:52:18', '0:52:49', '0:53:08']
stem2 = ['0:09:49', '0:10:24', '0:14:27', '0:24:31', '0:26:03']
Code afterwards:
# Create figure
fig, ax = plt.subplots(figsize=(100, 10), constrained_layout=True)
ax.set(title='TEST')
# Stem values
names = [e for row in zip(stem1, stem2) for e in row]
# Timeline
timeline = [datetime.datetime.strptime(n, '%H:%M:%S') for n in names]
# Stem levels
levels = np.tile([-5, 5, -3, 3, -1, 1], int(np.ceil(len(names)/6)))[:len(names)]
# Stems
ax.vlines(timeline, 0, levels, color='tab:red')
# Plot timeline
ax.plot(timeline, np.zeros_like(timeline), "-o", color="k", markerfacecolor="w")
# Plot GT labels and predictions
for time, level, name in zip(timeline, levels, names):
ax.annotate(name, xy=(time, level),
xytext=(15, np.sign(level)*3),
textcoords='offset points',
horizontalalignment='right',
verticalalignment='bottom' if level > 0 else 'top',
color='green' if level > 0 else 'blue')
# De-clutter axes
ax.yaxis.set_visible(False)
ax.spines[["left", "top", "right"]].set_visible(False)
# ...
minutes = mdates.MinuteLocator(interval=1)
minutes_fmt = mdates.DateFormatter('%M:%S')
ax.xaxis.set_major_locator(minutes)
ax.xaxis.set_major_formatter(minutes_fmt)
# Rotate x-ticks
plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
# Save figure
plt.savefig('test.png', bbox_inches='tight')
The data was only partially available, so I created it appropriately. I have solved your problem by referring to the official example that you refer to. For overlapping strings, create a list of positive and negative placement heights and draw a vertical line in red. Convert the stem information to time series information and draw a timeline. In the annotation looping process, the placement height value is judged for placement and color to distinguish them. The time series representation of the x-axis is set by MinuteLocator to determine the time format.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.dates as mdates
from datetime import datetime
fig, ax = plt.subplots(figsize=(16, 4), constrained_layout=True)
ax.set(title="Time line demo ")
stem1 = ['0:08:08', '0:08:52', '0:09:42', '0:10:20', '0:10:55', '0:11:24', '0:12:31', '0:13:07', '0:13:45', '0:14:16', '0:14:49', '0:15:20', '0:15:51', '0:16:21', '0:16:53', '0:17:28', '0:19:01', '0:19:22', '0:20:19', '0:20:48', '0:21:19', '0:22:05', '0:23:06', '0:23:34', '0:24:03', '0:24:30', '0:24:51', '0:25:18', '0:25:54', '0:26:25', '0:27:07', '0:28:05', '0:29:04', '0:29:30', '0:30:34', '0:32:57', '0:33:28', '0:33:57', '0:34:35', '0:35:01', '0:35:41', '0:36:06', '0:36:30', '0:37:01', '0:37:33', '0:38:06', '0:38:40', '0:39:21', '0:40:02', '0:40:22', '0:40:42', '0:41:32', '0:41:56', '0:43:20', '0:43:39', '0:44:02', '0:44:26', '0:45:04', '0:45:32', '0:46:02', '0:47:00', '0:47:42', '0:48:05', '0:48:35', '0:49:02', '0:49:25', '0:49:56', '0:50:43', '0:51:25', '0:51:43', '0:52:18', '0:52:49', '0:53:08']
stem2 = ['0:09:49', '0:10:24', '0:14:27', '0:24:31', '0:26:03']
stems = stem1 + stem2
timelines = sorted([datetime.strptime(s, '%H:%M:%S') for s in stem])
labels = [datetime.strftime(t, '%H:%M:%S') for t in timelines]
levels = np.tile([-7, 7, -5, 5, -3, 3, -1, 1], int(np.ceil(len(timelines)/8)))[:len(timelines)]
ax.vlines(timelines, 0, levels, color='tab:red')
ax.plot(timelines, np.zeros_like(timelines), "-o", color="k", markerfacecolor="w")
for t, l, b in zip(timelines, levels, labels):
if datetime.strftime(t, '%H:%M:%S')[1:] in stem2:
color = 'blue'
else:
color = 'green'
ax.annotate(b, xy=(t, l),
xytext=(22, np.sign(l)*3), textcoords='offset points',
horizontalalignment='right',
verticalalignment='bottom' if l > 0 else 'top',
color=color
)
ax.yaxis.set_visible(False)
ax.spines[["left", "top", "right"]].set_visible(False)
ax.spines['bottom'].set_position(('data', -8))
minutes = mdates.MinuteLocator(interval=1)
minutes_fmt = mdates.DateFormatter('%M:%S')
ax.xaxis.set_major_locator(minutes)
ax.xaxis.set_major_formatter(minutes_fmt)
plt.setp(ax.get_xticklabels(), rotation=90, ha='center')
plt.show()
I'm am running a fundamental economic analysis and when I get to visualising and charting I am not able to align the dates with the graph.
I wanted the most recent date entry to show on the right and the rest of the dates to show every two years.
I have tried literally everything and cant find the solution.
Here is my code:
%matplotlib inline
import pandas as pd
from matplotlib import pyplot
import matplotlib.dates as mdates
df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csvbgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=1168&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=NAEXKP01EZQ657S&scale=left&cosd=1995-04-01&coed=2020-04-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Quarterly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2020-09-21&revision_date=2020-09-21&nd=1995-04-01')
df = df.set_index('DATE')
df['12MonthAvg'] = df.rolling(window=12).mean().dropna(how='all')
df['9MonthAvg'] = df['12MonthAvg'].rolling(window=12).mean().dropna(how='all')
df['Spread'] = df['12MonthAvg'] - df['9MonthAvg']
pyplot.style.use("seaborn")
pyplot.subplots(figsize=(10, 5), dpi=85)
df['Spread'].plot().set_title('EUROPE: GDP Q Growth Rate (12M/12M Avg Spread)', fontsize=16)
df['Spread'].plot().axhline(0, linestyle='-', color='r',alpha=1, linewidth=2, marker='')
df['Spread'].plot().spines['left'].set_position(('outward', 10))
df['Spread'].plot().spines['bottom'].set_position(('outward', 10))
df['Spread'].plot().spines['right'].set_visible(False)
df['Spread'].plot().spines['top'].set_visible(False)
df['Spread'].plot().yaxis.set_ticks_position('left')
df['Spread'].plot().xaxis.set_ticks_position('bottom')
df['Spread'].plot().text(0.50, 0.02, "Crossing red line downwards / Crossing red line Upwards",
transform=pyplot.gca().transAxes, fontsize=14, ha='center', color='blue')
df['Spread'].plot().fmt_xdata = mdates.DateFormatter('%Y-%m-%d')
print(df['Spread'].tail(3))
pyplot.autoscale()
pyplot.show()
And the output:
This is the raw data:
There is a couple of corrections to your code.
In your URL insert "?" after fredgraph.csv. It starts so called query string,
where bgcolor is the first parameter.
Read your DataFrame with additional parameters:
df = pd.read_csv('...', parse_dates=[0], index_col=[0])
The aim is to:
read Date column as datetime,
set it as the index.
Create additional columns as:
df['12MonthAvg'] = df.NAEXKP01EZQ657S.rolling(window=12).mean()
df['9MonthAvg'] = df.NAEXKP01EZQ657S.rolling(window=9).mean()
df['Spread'] = df['12MonthAvg'] - df['9MonthAvg']
Corrections:
9MonthAvg (as I think) should be computed from the source column,
not from 12MonthAvg,
dropna here is not needed, as you create whole column anyway.
Now is the place to use dropna() on Spread column and save it in
a dedicated variable:
spread = df['Spread'].dropna()
Draw your figure the following way:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.style.use("seaborn")
fig, ax = plt.subplots(figsize=(10, 5), dpi=85)
plt.plot_date(spread.index, spread, fmt='-')
ax.set_title('EUROPE: GDP Q Growth Rate (12M/12M Avg Spread)', fontsize=16)
ax.axhline(0, linestyle='-', color='r',alpha=1, linewidth=2, marker='')
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.text(0.50, 0.02, "Crossing red line downwards / Crossing red line Upwards",
transform=ax.transAxes, fontsize=14, ha='center', color='blue')
ax.xaxis.set_major_formatter(mdates.DateFormatter(fmt='%Y-%m-%d'))
plt.show()
Corrections:
plt.subplots returns fig and ax, so I saved them (actually, only ax
is needed).
When one axis contains dates, it is better to use plot_date.
I changed the way DateFormatter is set.
Using the above code I got the following picture:
I followed all step following my question here : Pandas Dataframe : How to add a vertical line with label to a bar plot when your data is time-series?
it was supposed to solve my problem but when I change the The kind of plot to line , the vertical line did not appear . I copy the same code and change plot type to line instead of bar :
as you can see with bar , the vertical line (in red ) appears .
# function to plot a bar
def dessine_line3(madataframe,debut_date , mes_colonnes):
madataframe.index = pd.to_datetime(madataframe.index,format='%m/%d/%y')
df = madataframe.loc[debut_date:,mes_colonnes].copy()
filt = (df[df.index == '4/20/20']).index
df.index.searchsorted(value=filt)
fig,ax = plt.subplots()
df.plot.bar(figsize=(17,8),grid=True,ax=ax)
ax.axvline(df.index.searchsorted(filt), color="red", linestyle="--", lw=2, label="lancement")
plt.tight_layout()
out :
but whan I just change code by changing the type of plot to line : there is no vertical line and also the x axis (date ) changed .
so I wrote another code juste to draw line with vertical line
ax = madagascar_maurice_case_df[["Madagascar Covid-19 Ratio","Maurice Covid-19 Ratio"]].loc['3/17/20':].plot.line(figsize=(17,7),grid=True)
filt = (df[df.index=='4/20/20']).index
ax.axvline(df.index.searchsorted(filt),color="red",linestyle="--",lw=2 ,label="lancement")
plt.show()
but the result is the same
following the comment below , here is my final code :
def dessine_line5(madataframe,debut_date , mes_colonnes):
plt.figure(figsize=(17,8))
plt.grid(b=True,which='major',axis='y')
df = madataframe.loc[debut_date:,mes_colonnes]
sns.lineplot(data=df)
lt = datetime.toordinal(pd.to_datetime('4/20/20'))
plt.axvline(lt,color="red",linestyle="--",lw=2,label="lancement")
plt.show()
and the result is :
Plot tick locs
The issue is the plot tick locations are a different style depending on plot kind and api
df.plot vs. plt.plot vs. sns.lineplot
Place ticks, labels = plt.xticks() after df.plot.bar(figsize=(17,8),grid=True,ax=ax) and printing ticks will give array([0, 1, 2,..., len(df.index)]), which is why df.index.searchsorted(filt) works, it produces an integer location.
df.plot() has tick locs like array([13136, 13152, 13174, 13175], dtype=int64), for my sample date range. I don't actually know how those numbers are derived, so I don't know how to convert the date to that format.
sns.lineplot and plt.plot have tick locs that are the ordinal representation of the datetime, array([737553., 737560., 737567., 737577., 737584., 737591., 737598.,
737607.]
For a lineplot with your example:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
sns.lineplot(data=df)
lt = datetime.toordinal(pd.to_datetime('2020/04/20'))
plt.axvline(lt, color="red", linestyle="--", lw=2, label="lancement")
plt.show()
For my example data:
import numpy as np
data = {'a': [np.random.randint(10) for _ in range(40)],
'b': [np.random.randint(10) for _ in range(40)],
'date': pd.bdate_range(datetime.today(), periods=40).tolist()}
df = pd.DataFrame(data)
df.set_index('date', inplace=True)
sns.lineplot(data=df)
ticks, labels = plt.xticks()
lt = datetime.toordinal(pd.to_datetime('2020-05-19'))
plt.axvline(lt, color="red", linestyle="--", lw=2, label="lancement")
plt.show()
I would like to draw a rectangle to indicate a range within the x axis. I can use locators for setting ticks and labels, but I don't seem to succeed using them to draw the rectangle. How could I go about it?!
import datetime as DT
from matplotlib import pyplot as plt
import matplotlib.dates as dates
ddata = [DT.datetime.strptime('2010-02-05', "%Y-%m-%d"),
DT.datetime.strptime('2010-02-19', "%Y-%m-%d"),
DT.datetime.strptime('2010-03-05', "%Y-%m-%d"),
DT.datetime.strptime('2010-03-19', "%Y-%m-%d"),]
values = [123,678,987,345]
d1 = zip(ddata,values)
def nplot(data):
x = [date for (date, value) in data]
y = [value for (date, value) in data]
# Set the stage
fig, ax = plt.subplots()
graph = fig.add_subplot(111)
# Plot the data as a red line with round markers
graph.plot(x,y,'r-o')
days = dates.DayLocator(interval=7) # every week
months = dates.MonthLocator() # every month
# Create locators and ticks
ax.xaxis.set_minor_locator(days)
ax.xaxis.set_minor_formatter(dates.DateFormatter('%d'))
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(dates.DateFormatter('\n\n%b'))
ax.xaxis.grid(True, which="major", linewidth=2)
# Now how do I align a rectangle with specific dates?
gca().add_patch(Rectangle((data[0][0], 1000),
data[2][0], 1000, facecolor='w', alpha=0.9)) # doesn't work
plt.show()
nplot(d1)
With this I get the currently set minor ticks
locs = ax.xaxis.get_minorticklocs()
And with this I write the rectangle. Odd, the location of the left side is a 6-digit float, but the location for the right side is the number of days since the left side. No idea how that works, but it seems to...
gca().add_patch(Rectangle((locs[0], 0), 7, 1000, facecolor='w', alpha=0.9))
And this is what I wanted to do from the beginning: to mark recurring ranges.
locs = ax.xaxis.get_minorticklocs()
loc_len = len(locs)
zloc = zip(locs, [7] * loc_len) # Seven-day loops
for i in zloc[::2]:
gca().add_patch(Rectangle((i[0], 0), i[1], 1000, facecolor='w', alpha=0.9))
However, this won't work if I decide to box months, as each month has a different number of days. #Greg's suggestion of using fill_between is another option, but it will set its limits in relation to the data, not the scale (which is OK, I guess):
xloc = zip(x[:-1], x[1:])
for i in xloc[::2]:
ax.fill_between(i, 0, 1200, facecolor='w', alpha=0.5)
ylim(0, 1200)
plt.show()