Data - we import historical yields of the ten and thirty year Treasury and calculate the spread (difference) between the two (this block of code is good; feel free so skip):
#Import statements
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
#Constants
start_date = "2018-01-01"
end_date = "2023-01-01"
#Pull in data
tenYear_master = yf.download('^TNX', start_date, end_date)
thirtyYear_master = yf.download('^TYX', start_date, end_date)
#Trim DataFrames to only include 'Adj Close columns'
tenYear = tenYear_master['Adj Close'].to_frame()
thirtyYear = thirtyYear_master['Adj Close'].to_frame()
#Rename columns
tenYear.rename(columns = {'Adj Close' : 'Adj Close - Ten Year'}, inplace= True)
thirtyYear.rename(columns = {'Adj Close' : 'Adj Close - Thirty Year'}, inplace= True)
#Join DataFrames
data = tenYear.join(thirtyYear)
#Add column for difference (spread)
data['Spread'] = data['Adj Close - Thirty Year'] - data['Adj Close - Ten Year']
data
This block is also good.
'''Plot data'''
#Delete top, left, and right borders from figure
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.left'] = False
plt.rcParams['axes.spines.right'] = False
fig, ax = plt.subplots(figsize = (15,10))
data.plot(ax = ax, secondary_y = ['Spread'], ylabel = 'Yield', legend = False);
'''Change left y-axis tick labels to percentage'''
left_yticks = ax.get_yticks().tolist()
ax.yaxis.set_major_locator(mticker.FixedLocator(left_yticks))
ax.set_yticklabels((("%.1f" % tick) + '%') for tick in left_yticks);
#Add legend
fig.legend(loc="upper center", ncol = 3, frameon = False)
fig.tight_layout()
plt.show()
I have questions concerning two features of the graph that I want to customize:
The x-axis currently has a tick and tick label for every year. How can I change this so that there is a tick and tick label for every 3 months in the form MMM-YY? (see picture below)
The spread was calculated as thirty year yield - ten year yield. Say I want to change the RIGHT y-axis tick labels so that their sign is flipped, but I want to leave both the original data and curves alone (for the sake of argument; bear with me, there is logic underlying this). In other words, the right y-axis tick labels currently go from -0.2 at the bottom to 0.8 at the top. How can I change them so that they go from 0.2 at the bottom to -0.8 at the top without changing anything about the data or curves? This is purely a cosmetic change of the right y-axis tick labels.
I tried doing the following:
'''Change right y-axis tick labels'''
right_yticks = (ax.right_ax).get_yticks().tolist()
#Loop through and multiply each right y-axis tick label by -1
for index, value in enumerate(right_yticks):
right_yticks[index] = value*(-1)
(ax.right_ax).yaxis.set_major_locator(mticker.FixedLocator(right_yticks))
(ax.right_ax).set_yticklabels(right_yticks)
But I got this:
Note how the right y-axis is incomplete.
I'd appreciate any help. Thank you!
Let's create some data:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
days = np.array(["2022-01-01", "2022-07-01", "2023-02-15", "2023-11-15", "2024-03-03"],
dtype = "datetime64")
val = np.array([20, 20, -10, -10, 10])
For the date in the x-axis, we import matplotlib.dates, which provides the month locator and the date formater. The locator sets the ticks each 3 months, and the formater sets the way the labels are displayed (month-00).
For the y-axis data, you require changing the sign of the data (hence the negative sign in ax2.plot(), but you want the curve in the same position, so afterwards you need to invert the axis. And so, the curves in both plots are identical, but the y-axis values have different signs and directions.
fig, (ax1, ax2) = plt.subplots(figsize = (10,5), nrows = 2)
ax1.plot(days, val, marker = "x")
# set the locator to Jan, Apr, Jul, Oct
ax1.xaxis.set_major_locator(mdates.MonthLocator( bymonth = (1, 4, 7, 10) ))
# set the formater for month-year, with lower y to show only two digits
ax1.xaxis.set_major_formatter(mdates.DateFormatter("%b-%y"))
# change the sign of the y data plotted
ax2.plot(days, -val, marker = "x")
#invert the y axis
ax2.invert_yaxis()
# set the locator to Jan, Apr, Jul, Oct
ax2.xaxis.set_major_locator(mdates.MonthLocator( bymonth = (1, 4, 7, 10) ))
# set the formater for month-year, with lower y to show only two digits
ax2.xaxis.set_major_formatter(mdates.DateFormatter("%b-%y"))
plt.show()
I'm trying to plot some data in different plots, and I'm able to do it, but, the labelsize from the x axis don't change in the first plot, but it does in the second one. This is what I'm doing:
import matplotlib.pyplot as plt
from dateutil.relativedelta import relativedelta as rd
from calendar import monthrange
fin_mes = date.today() - rd(days=1)
# Start ploting data A
fig, ax = plt.subplots(1, 1)
# Retrieve dates
mes = fin_mes.strftime("%B")
anio = fin_mes.strftime("%Y")
# Set title
plt.suptitle(f"Data {mes} - {anio}")
# List of days of the last month
num_days = range(1, int(monthrange(fin_mes.day, fin_mes.month)[1]) + 1)
print(num_days)
# Set name from x label
ax.set_xlabel('Dates')
# Set name from y label
ax.set_ylabel('Data')
# Set name from the plot to save
name_a = f"DataA-{mes}-{anio}.png"
plt.title("My Data")
plt.xlim(0, num_days[-1])
plt.ylim((min(num_days)),
(max(num_days)))
plt.xticks(num_days)
# Setting the size to the label
plt.rc('xtick', labelsize=5)
plt.grid(True)
dataa_plot, = plt.plot(num_days, num_days, label="DATA A")
plt.legend(handles=[dataa_plot])
plt.show()
fig.savefig(name_a)
# Start ploting data B
fig, ax = plt.subplots(1, 1)
# Retrieve dates
mes = fin_mes.strftime("%B")
anio = fin_mes.strftime("%Y")
# Set title
plt.suptitle(f"Data {mes} - {anio}")
# List of days of the last month
num_days = range(1, int(monthrange(fin_mes.day, fin_mes.month)[1]) + 1)
print(num_days)
# Set name from x label
ax.set_xlabel('Dates')
# Set name from y label
ax.set_ylabel('Data')
# Set name from the plot to save
name_b = f"DataB-{mes}-{anio}.png"
plt.title("My Data B")
plt.xlim(0, num_days[-1])
plt.ylim((min(num_days)),
(max(num_days)))
plt.xticks(num_days)
# Setting the size to the label
plt.rc('xtick', labelsize=5)
plt.grid(True)
datab_plot, = plt.plot(num_days, num_days, label="DATA B")
plt.legend(handles=[datab_plot])
plt.show()
fig.savefig(name_b)
With that I get this plots:
And plot b has the font size changed, but not plot A. I don't know why this is happening. Hope someone can help me, thanks.
PD: I'm using python 3.8.10 in Lubuntu x64 with matplotlib=3.5.1
You must set the label size before using it in setting the ticks, i.e. plt.rc('xtick', labelsize=5) must come before plt.xticks(num_days) to take effect (the safest way is to move it to the very beginning of the plotting).
As an (easier) alternative you can set the font size directly in xticks without changing the rc parameters:
plt.xticks(num_days, fontsize=5)
I'm new to matplotlib, and trying to plot something quite difficult.
I would like to plot something like (taken from the matplotlib docs):
Except, I want the timeline (x-axis) and stems to have labels in time, like:
timeline = ['0:01:00', '0:02:00', '0:03:00', '0:04:00', ...]
stems1 = ['0:01:45', '0:03:55', '0:04:22', ...]
stems2 = ['0:02:21', '0:06:54', ...
Notes:
Timeline ticks are evenly spaced
stems1 and stems2 don't necesarily have the same number of points, but are in order (like a video timeline)
It would be even better if stems1 and stems2 were different colors.
If anyone could point me in the right direction, or even code a working example, it'd be greatly appreciated! Thank you for reading.
Edit:
Following #r-beginners's answer to this post
I have something like this:
for time, level, label, va in zip(timeline, levels, labels, verticalalignments):
ax.annotate(label, xy=(time, level), xytext=(15, np.sign(level)*15),
textcoords="offset points",
verticalalignment=va,
horizontalalignment="right",
color='blue')
for time, level, pred, va in zip(timeline, levels, preds, verticalalignments):
ax.annotate(pred, xy=(time, level), xytext=(15, np.sign(level)*15),
textcoords="offset points",
verticalalignment=va,
horizontalalignment="right",
color='green')
The issue is that the graphs are overlapping, stem color are both red, and the stems don't align with the timeline.
Edit 2:
With #r-beginners code, I've tried it with 2 new stems, where stem1 isn't being plotted completely:
stem1 = ['0:08:08', '0:08:52', '0:09:42', '0:10:20', '0:10:55', '0:11:24', '0:12:31', '0:13:07', '0:13:45', '0:14:16', '0:14:49', '0:15:20', '0:15:51', '0:16:21', '0:16:53', '0:17:28', '0:19:01', '0:19:22', '0:20:19', '0:20:48', '0:21:19', '0:22:05', '0:23:06', '0:23:34', '0:24:03', '0:24:30', '0:24:51', '0:25:18', '0:25:54', '0:26:25', '0:27:07', '0:28:05', '0:29:04', '0:29:30', '0:30:34', '0:32:57', '0:33:28', '0:33:57', '0:34:35', '0:35:01', '0:35:41', '0:36:06', '0:36:30', '0:37:01', '0:37:33', '0:38:06', '0:38:40', '0:39:21', '0:40:02', '0:40:22', '0:40:42', '0:41:32', '0:41:56', '0:43:20', '0:43:39', '0:44:02', '0:44:26', '0:45:04', '0:45:32', '0:46:02', '0:47:00', '0:47:42', '0:48:05', '0:48:35', '0:49:02', '0:49:25', '0:49:56', '0:50:43', '0:51:25', '0:51:43', '0:52:18', '0:52:49', '0:53:08']
stem2 = ['0:09:49', '0:10:24', '0:14:27', '0:24:31', '0:26:03']
Code afterwards:
# Create figure
fig, ax = plt.subplots(figsize=(100, 10), constrained_layout=True)
ax.set(title='TEST')
# Stem values
names = [e for row in zip(stem1, stem2) for e in row]
# Timeline
timeline = [datetime.datetime.strptime(n, '%H:%M:%S') for n in names]
# Stem levels
levels = np.tile([-5, 5, -3, 3, -1, 1], int(np.ceil(len(names)/6)))[:len(names)]
# Stems
ax.vlines(timeline, 0, levels, color='tab:red')
# Plot timeline
ax.plot(timeline, np.zeros_like(timeline), "-o", color="k", markerfacecolor="w")
# Plot GT labels and predictions
for time, level, name in zip(timeline, levels, names):
ax.annotate(name, xy=(time, level),
xytext=(15, np.sign(level)*3),
textcoords='offset points',
horizontalalignment='right',
verticalalignment='bottom' if level > 0 else 'top',
color='green' if level > 0 else 'blue')
# De-clutter axes
ax.yaxis.set_visible(False)
ax.spines[["left", "top", "right"]].set_visible(False)
# ...
minutes = mdates.MinuteLocator(interval=1)
minutes_fmt = mdates.DateFormatter('%M:%S')
ax.xaxis.set_major_locator(minutes)
ax.xaxis.set_major_formatter(minutes_fmt)
# Rotate x-ticks
plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
# Save figure
plt.savefig('test.png', bbox_inches='tight')
The data was only partially available, so I created it appropriately. I have solved your problem by referring to the official example that you refer to. For overlapping strings, create a list of positive and negative placement heights and draw a vertical line in red. Convert the stem information to time series information and draw a timeline. In the annotation looping process, the placement height value is judged for placement and color to distinguish them. The time series representation of the x-axis is set by MinuteLocator to determine the time format.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.dates as mdates
from datetime import datetime
fig, ax = plt.subplots(figsize=(16, 4), constrained_layout=True)
ax.set(title="Time line demo ")
stem1 = ['0:08:08', '0:08:52', '0:09:42', '0:10:20', '0:10:55', '0:11:24', '0:12:31', '0:13:07', '0:13:45', '0:14:16', '0:14:49', '0:15:20', '0:15:51', '0:16:21', '0:16:53', '0:17:28', '0:19:01', '0:19:22', '0:20:19', '0:20:48', '0:21:19', '0:22:05', '0:23:06', '0:23:34', '0:24:03', '0:24:30', '0:24:51', '0:25:18', '0:25:54', '0:26:25', '0:27:07', '0:28:05', '0:29:04', '0:29:30', '0:30:34', '0:32:57', '0:33:28', '0:33:57', '0:34:35', '0:35:01', '0:35:41', '0:36:06', '0:36:30', '0:37:01', '0:37:33', '0:38:06', '0:38:40', '0:39:21', '0:40:02', '0:40:22', '0:40:42', '0:41:32', '0:41:56', '0:43:20', '0:43:39', '0:44:02', '0:44:26', '0:45:04', '0:45:32', '0:46:02', '0:47:00', '0:47:42', '0:48:05', '0:48:35', '0:49:02', '0:49:25', '0:49:56', '0:50:43', '0:51:25', '0:51:43', '0:52:18', '0:52:49', '0:53:08']
stem2 = ['0:09:49', '0:10:24', '0:14:27', '0:24:31', '0:26:03']
stems = stem1 + stem2
timelines = sorted([datetime.strptime(s, '%H:%M:%S') for s in stem])
labels = [datetime.strftime(t, '%H:%M:%S') for t in timelines]
levels = np.tile([-7, 7, -5, 5, -3, 3, -1, 1], int(np.ceil(len(timelines)/8)))[:len(timelines)]
ax.vlines(timelines, 0, levels, color='tab:red')
ax.plot(timelines, np.zeros_like(timelines), "-o", color="k", markerfacecolor="w")
for t, l, b in zip(timelines, levels, labels):
if datetime.strftime(t, '%H:%M:%S')[1:] in stem2:
color = 'blue'
else:
color = 'green'
ax.annotate(b, xy=(t, l),
xytext=(22, np.sign(l)*3), textcoords='offset points',
horizontalalignment='right',
verticalalignment='bottom' if l > 0 else 'top',
color=color
)
ax.yaxis.set_visible(False)
ax.spines[["left", "top", "right"]].set_visible(False)
ax.spines['bottom'].set_position(('data', -8))
minutes = mdates.MinuteLocator(interval=1)
minutes_fmt = mdates.DateFormatter('%M:%S')
ax.xaxis.set_major_locator(minutes)
ax.xaxis.set_major_formatter(minutes_fmt)
plt.setp(ax.get_xticklabels(), rotation=90, ha='center')
plt.show()
I've got some code used to generate an event timeline, but I'd like to add in an additional line plot showing quantity sold on each day. But when I run the plot, the timeline gets compressed and the Y-axis expands to the new plot. The only way to get around this issue is to normalize the quantity plot values which seems to fit the y-axis limits of the timeline plot, but ideally I'd like to keep the original values from the line plot in the first picture and get it working with the timeline plot from the second picture, so it's the original values and centered/scaled properly. The data is just formatted as a text file like this: Date,Event \n 12 July 2021, Event 1
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
def GenerateTimeLine2(data, data02, title="Timeline", xaxis_format="%d %b", day_interval=5, figsize=(8, 5)):
levels = np.array([-5, 5, -3, 3, -1, 1])
fig, ax = plt.subplots(figsize=figsize)
# Create the base line
start = min(data.index)
stop = max(data.index)
ax.plot((start, stop), (0, 0), 'k', alpha=.5)
# Iterate through data annoting each one
for ii, (idate, iname) in enumerate(data.itertuples()):
level = levels[ii % 6]
vert = 'top' if level < 0 else 'bottom'
ax.scatter(idate, 0, s=100, facecolor='w', edgecolor='k', zorder=9999)
# Plot a line up to the text
ax.plot((idate, idate), (0, level), c='r', alpha=.7)
# Give the text a faint background and align it properly
ax.text(idate, level, iname,ha='right', va=vert, fontsize=14,
backgroundcolor=(1., 1., 1., .3))
ax.set(title=title)
# Set the xticks formatting
# format xaxis with days intervals
ax.get_xaxis().set_major_locator(mdates.DayLocator(interval=day_interval))
ax.get_xaxis().set_major_formatter(mdates.DateFormatter(xaxis_format))
fig.autofmt_xdate()
#add in overlay line plot
#ax.plot(data02)
#print(ax.axis())
#min_c = min(data02.values.tolist())
#max_c = max(data02.values.tolist())
#ax.set_ylabel(min_c,max_c)
norm = lambda x: 5*(x-x.min())/(x.max()-x.min())
ax.set_yticks(norm(data02.values))
#((data02.values-(min(data02.values.tolist())))/(max(data02.values.tolist())-min(data02.values.tolist()))))
ax.yaxis.tick_right()
ax.yaxis.set_major_locator(plt.MaxNLocator(5))
ax.axes.set_ylabel('Quanitity')
ax.yaxis.set_label_position("right")
ax.plot(norm(data02))
#ax.plot((min_c,max_c), (0, 0), 'k', alpha=.5)
#print(ax.yaxis)
# Remove components for a cleaner look
plt.setp((list(ax.spines.values())), visible=False)#ax.get_yticklabels() +
return ax
data = pd.read_csv(r'testdata01.txt', parse_dates=True, index_col=0)
data02 = pd.read_csv(r'rap_a.txt', parse_dates=True, index_col=0)
ax = GenerateTimeLine2(data,data02,title='Event Timeline',
day_interval=1,figsize=(8,5))
#ax.patch.set_facecolor('gray')
#ax.plot(data02)
plt.show()
I'm trying to use matplotlib's pcolormesh function to draw a diagram that shows dots in 2d coordinates, and the color of the dots would be defined by a number.
I have three arrays, one of which has the x-coordinates, another one with the y-coordinates, and the third one has the numbers which should represent colors.
xdata = [ 695422. 695423. 695424. 695425. 695426. 695426.]
ydata = [ 0. -15.4 -15.3 -15.7 -15.5 -19. ]
colordata = [ 0. 121. 74. 42. 8. 0.]
Now, apparently pcolormesh wants its data as three 2d arrays.
In some examples I've seen something like this being done:
newxdata, newydata = np.meshgrid(xdata,ydata)
Okay, but how do I get colordata into a similar format? I tried to it this way:
newcolordata, zz = np.meshgrid(colordata, xdata)
But I'm not exactly sure if it's right. Now, if I try to draw the diagram:
ax.pcolormesh(newxdata, newydata, newcolordata)
I get something that looks like this.
No errors, so I guess that's good. The picture it returns obviously doesn't look like what I want it to. Can someone point me into right direction with this? Is the data array still in wrong format?
This should be all of the important code:
newxdata, newydata = np.meshgrid(xdata,ydata)
newcolordata, zz = np.meshgrid(colordata, xdata)
print newxdata
print newydata
print newcolordata
diagram = plt.figure()
ax = diagram.add_subplot(111)
xformat = DateFormatter('%d/%m/%Y')
ax.xaxis_date()
plot1 = ax.pcolormesh(newxdata, newydata, newcolordata)
ax.set_title("A butterfly diagram of sunspots between dates %s and %s" % (date1, date2))
ax.autoscale(enable=False)
ax.xaxis.set_major_formatter(xformat)
diagram.autofmt_xdate()
if command == "save":
diagram.savefig('diagrams//'+name+'.png')
Edit: I noticed that the colors do correspond to the number. Now I just have to turn those equally sized bars into dots.
If you want dots, use scatter. pcolormesh draws a grid. scatter draws markers colored and/or scaled by size.
For example:
import matplotlib.pyplot as plt
xdata = [695422.,695423.,695424.,695425.,695426.,695426.]
ydata = [0.,-15.4,-15.3,-15.7,-15.5,-19.]
colordata = [0.,121.,74.,42.,8.,0.],
fig, ax = plt.subplots()
ax.scatter(xdata, ydata, c=colordata, marker='o', s=200)
ax.xaxis_date()
fig.autofmt_xdate()
plt.show()
Edit:
It sounds like you want to bin your data and sum the areas inside each bin.
If so, you can just use hist2d to do this. If you specify the areas of the sunspots as the weights to the histogram, the areas inside each bin will be summed.
Here's an example (data from here: http://solarscience.msfc.nasa.gov/greenwch.shtml, specifically, this file, formatted as described here). Most of this is reading the data. Notice that I'm specifying the vmin and then using im.cmap.set_under('none') to display anything under that value as transparent.
It's entirely possible that I'm completely misunderstanding the data here. The units may be completely incorrect (the "raw" areas given are in million-ths of the sun's surface area, I think).
from glob import glob
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def main():
files = sorted(glob('sunspot_data/*.txt'))
df = pd.concat([read_file(name) for name in files])
date = mdates.date2num(df.date)
fig, ax = plt.subplots(figsize=(10, 4))
data, xbins, ybins, im = ax.hist2d(date, df.latitude, weights=df.area/1e4,
bins=(1000, 50), vmin=1e-6)
ax.xaxis_date()
im.cmap.set_under('none')
cbar = fig.colorbar(im)
ax.set(xlabel='Date', ylabel='Solar Latitude', title='Butterfly Plot')
cbar.set_label("Percentage of the Sun's surface")
fig.tight_layout()
plt.show()
def read_file(filename):
"""This data happens to be in a rather annoying format..."""
def parse_date(year, month, day, time):
year, month, day = [int(item) for item in [year, month, day]]
time = 24 * float(time)
hour = int(time)
minute_frac = 60 * (time % 1)
minute = int(minute_frac)
second = int(60 * (minute_frac % 1))
return dt.datetime(year, month, day, hour, minute, second)
cols = dict(year=(0, 4), month=(4, 6), day=(6, 8), time=(8, 12),
area=(41, 44), latitude=(63, 68), longitude=(57, 62))
df = pd.read_fwf(filename, colspecs=cols.values(), header=None,
names=cols.keys(), date_parser=parse_date,
parse_dates={'date':['year', 'month', 'day', 'time']})
return df
main()