Graph Plot axes scaling / design / time format - python

My current Pandas / python plot looks like this:
What I like to have:
I want to get rid of the 1e7 and 1e9 on both y-axes. The values of the two time series are in the millions and billions, so a delimiter for the number would be a plus for readability.
I like to have a (light) grid in the background and at least normal lines on the axes.
I like to have a monthly scaling, not every 6 months on the x-axis
How can I add the legend below?
The current code is (transactions 1 and 2 are time series of trading volumes):
ax = data.transactions1.plot(figsize=(12, 3.5))
data.transactions2.plot(secondary_y=True)

The following code :
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import datetime
from matplotlib.ticker import ScalarFormatter
base = datetime.datetime.today()
numdays = 365
date_list = [base - datetime.timedelta(days=x) for x in range(0, numdays)]
x = np.arange(0, numdays, 1)
values1 = 0.05 * x**2*1e9
values2 = -1*values1*1e7
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
lns1 = ax1.plot(date_list, values1, 'g-', label='Foo')
lns2 = ax2.plot(date_list, values2, 'b-', label='Bar')
# We set the date format
dareFmt = mdates.DateFormatter('%b %Y')
# We then apply the format
ax1.xaxis.set_major_formatter(dareFmt)
ax1.set_xlabel('Dates')
#used to give the inclination
fig.autofmt_xdate()
# Dsiplay the grid
ax1.grid(True)
# To get rid of the 1eX on top i divide the values of the y axis by the exponent value
y_values = ax1.get_yticks().tolist()
y_values = [x / 1e12 for x in y_values]
ax1.set_yticklabels(y_values)
ax1.set_ylabel('10e12')
y_values = ax2.get_yticks().tolist()
y_values = [x / 1e19 for x in y_values]
ax2.set_yticklabels(y_values)
ax2.set_ylabel('10e19')
lns = lns1 + lns2
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs,bbox_to_anchor=(0., -0.25, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
gives you :

Related

How to modify xtick label of plt in Matplotlib

The objective is to modify the xticklabel upon plotting pcolormesh and scatter.
However, I am having difficulties accessing the existing xtick labels.
Simply
ax = plt.axes()
labels_x = [item.get_text() for item in ax.get_xticklabels()]
which produced:
['', '', '', '', '', '']
or
fig.canvas.draw()
xticks = ax.get_xticklabels()
which produced:
['', '', '', '', '', '']
does not return the corresponding label.
May I know how to properly access axis tick labels for a plt cases.
For readability, I split the code into two section.
The first section to generate the data used for plotting
Second section deal the plotting
Section 1: Generate data used for plotting
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
np.random.seed(0)
increment=120
max_val=172800
aran=np.arange(0,max_val,increment).astype(int)
arr=np.concatenate((aran.reshape(-1,1), np.random.random((aran.shape[0],4))), axis=1)
df=pd.DataFrame(arr,columns=[('lapse',''),('a','i'),('a','j'),('b','k'),('c','')])
ridx=df.index[df[('lapse','')] == 3600].tolist()[0]+1 # minus 1 so to allow 3600 start at new row
df[('event','')]=0
df.loc[[1,2,3,10,20,30],[('event','')]]=1
arr=df[[('a','i'),('event','')]].to_numpy()
col_len=ridx
v=arr[:,0].view()
nrow_size=math.ceil(v.shape[0]/col_len)
X=np.pad(arr[:,0].astype(float), (0, nrow_size*col_len - arr[:,0].size),
mode='constant', constant_values=np.nan).reshape(nrow_size,col_len)
mask_append_val=0 # This value must equal to 1 for masking
arrshape=np.pad(arr[:,1].astype(float), (0, nrow_size*col_len - arr[:,1].size),
mode='constant', constant_values=mask_append_val).reshape(nrow_size,col_len)
Section 2 Plotting
fig = plt.figure(figsize=(8,6))
plt.pcolormesh(X,cmap="plasma")
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
plt.scatter(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
plt.gca().invert_yaxis()
xlabels_to_use_this=df.loc[:30,[('lapse','')]].values.tolist()
# ax = plt.axes()
# labels_x = [item.get_text() for item in ax.get_xticklabels()]
# labels_y = [item.get_text() for item in ax.get_yticklabels()]
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title("Plot 2D array")
plt.colorbar()
plt.tight_layout()
plt.show()
Expected output
This is how the plot could be generated using matplotlib's pcolormesh and scatter:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import pandas as pd
import numpy as np
np.random.seed(0)
increment = 120
max_val = 172800
aran = np.arange(0, max_val, increment).astype(int)
arr_df = np.concatenate((aran.reshape(-1, 1), np.random.random((aran.shape[0], 4))), axis=1)
df = pd.DataFrame(arr_df, columns=[('lapse', ''), ('a', 'i'), ('a', 'j'), ('b', 'k'), ('c', '')])
df[('event', '')] = 0
df.loc[[1, 2, 3, 10, 20, 30], [('event', '')]] = 1
col_len_lapse = 3600
col_len = df[df[('lapse', '')] == col_len_lapse].index[0]
nrow_size = int(np.ceil(v.shape[0] / col_len))
a_i_values = df[('a', 'i')].values
a_i_values_meshed = np.pad(a_i_values.astype(float), (0, nrow_size * col_len - len(a_i_values)),
mode='constant', constant_values=np.nan).reshape(nrow_size, col_len)
fig, ax = plt.subplots(figsize=(8, 6))
# the x_values indicate the mesh borders, subtract one half so the ticks can be at the centers
x_values = df[('lapse', '')][:col_len + 1].values - increment / 2
# divide lapses for y by col_len_lapse to get hours
y_values = df[('lapse', '')][::col_len].values / col_len_lapse - 0.5
y_values = np.append(y_values, 2 * y_values[-1] - y_values[-2]) # add the bottommost border (linear extension)
mesh = ax.pcolormesh(x_values, y_values, a_i_values_meshed, cmap="plasma")
event_lapses = df[('lapse', '')][df[('event', '')] == 1]
ax.scatter(event_lapses % col_len_lapse,
np.floor(event_lapses / col_len_lapse),
marker='*', color='red', edgecolor='white', s=55)
ax.xaxis.set_major_locator(MultipleLocator(increment * 5))
ax.yaxis.set_major_locator(MultipleLocator(5))
ax.invert_yaxis()
ax.set_xlabel('X-axis (s)')
ax.set_ylabel('Y-axis (hours)')
ax.set_title("Plot 2D array")
plt.colorbar(mesh)
plt.tight_layout() # fit the labels nicely into the plot
plt.show()
With Seaborn things can be simplified, adding new columns for hours and seconds, and using pandas' pivot (which automatically fills unavailable data with NaNs). Adding xtick_labels=5 sets the labels every 5 positions. (The star for lapse=3600 is at 1 hour, 0 seconds).
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# df created as before
df['hours'] = (df[('lapse', '')].astype(int) // 3600)
df['seconds'] = (df[('lapse', '')].astype(int) % 3600)
df_heatmap = df.pivot(index='hours', columns='seconds', values=('a', 'i'))
df_heatmap_markers = df.pivot(index='hours', columns='seconds', values=('event', '')).replace(
{0: '', 1: '★', np.nan: ''})
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s', annot_kws={'color': 'lime'}, ax=ax)
ax.tick_params(rotation=0)
plt.tight_layout()
plt.show()
Instead of a 'seconds' column, a 'minutes' column also might be interesting.
Here is an attempt to add time information as suggested in the comments:
from matplotlib import patheffects # to add some outline effect
# df prepared as the other seaborn example
fig, ax = plt.subplots(figsize=(8, 6))
path_effect = patheffects.withStroke(linewidth=2, foreground='yellow')
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s',
annot_kws={'color': 'red', 'path_effects': [path_effect]},
cbar=True, cbar_kws={'pad': 0.16}, ax=ax)
ax.tick_params(rotation=0)
ax2 = ax.twinx()
ax2.set_ylim(ax.get_ylim())
yticks = ax.get_yticks()
ax2.set_yticks(yticks)
ax2.set_yticklabels([str(pd.to_datetime('2019-01-15 7:00:00') + pd.to_timedelta(h, unit='h')).replace(' ', '\n')
for h in yticks])
I end up using Seaborn to address this issue.
Specifically, the following lines able to easily tweak the xticklabel
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
The full code for plotting is as below
import seaborn as sns
fig, ax = plt.subplots()
sns.heatmap(X,ax=ax)
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
g=sns.scatterplot(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
ax.set_xticklabels(newlabel)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
for ind, label in enumerate(g.get_xticklabels()):
if ind % 2 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
for ind, label in enumerate(g.get_yticklabels()):
if ind % 4 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
plt.xlabel('Elapsed (s)')
plt.ylabel('Hour (h)')
plt.title("Rastar Plot")
plt.tight_layout()
plt.show()

FFT of resampled pandas Series

I am trying to take the Fast Fourier Transform of a resampled pandas Series:
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
resampled_signal = signal.resample('1S').mean()
However if I simply try (using scipy) and just do:
SAMPLE_RATE = 1
DURATION = len(resampled_signal)
N = SAMPLE_RATE * DURATION
yf = fft(resampled_signal[:,1])
print(yf)
xf = fftfreq(N, 1 / SAMPLE_RATE)
I obtain an error ValueError: Can only tuple-index with a MultiIndex due to the
way resampled_signal is constructed to include the index. resampled_signal looks like this for reference:
00:00:00.419175 206.080335
00:00:01.419175 206.084340
00:00:02.419175 206.087010
00:00:03.419175 206.089681
00:00:04.419175 206.095021
.
.
.
Is there anyway this can be done? I wish to include the pd.Series form since my final aim is to resample two datasets such that they have the same number of data points, take the FFT of both signals, then subtract one from the other.
My simplified code for 1 data set is given below:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
import matplotlib
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
datathick = "20210728_rig_thick.csv"
with open(datathick) as f:
lines = f.readlines()
dates = [str(line.split(',')[0]) for line in lines]
thick = [float(line.split(',')[1]) for line in lines]
z = [float(line.split(',')[2]) for line in lines]
date_thick = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates]
time_list_thick = []
for i in np.arange(0, len(date_thick)):
q = date_thick[i]
t = timedelta(hours= q.hour, minutes=q.minute,seconds=q.second, microseconds = q.microsecond).total_seconds()
time_list_thick.append(float(t))
#---RESCALE-----
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
resampled_signal = signal.resample('1S').mean()
resampled_signal = resampled_signal.interpolate(method='time')
print(resampled_signal.head())
exit()
#----FFT Transform of Output and Noise ----
# Number of samples in normalized_tone
SAMPLE_RATE = 1
DURATION = len(resampled_signal)
N = SAMPLE_RATE * DURATION
yf = fft(resampled_signal[:,1])
print(yf)
xf = fftfreq(N, 1 / SAMPLE_RATE)
#------------------------------------------------
fig=plt.figure(figsize=(7.,7.))
ax=fig.add_subplot(1,1,1)
ax.set_zorder(1)
ax.patch.set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('Amplitude (a.u)')
ax.minorticks_on() # enable minor ticks
ax.xaxis.set_ticks_position('bottom')
ax.spines['left'].set_color('black')
ax.yaxis.label.set_color('black')
plt.yscale('log')
ax.tick_params(direction='out', axis='y', which='both', pad=4, colors='black')
ax.grid(b=True, which='major', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on major grid
ax.grid(b=True, which='minor', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on minor grid
ax.plot(np.abs(xf), np.abs(yf))
plt.savefig('fft.pdf', dpi=300, bbox_inches='tight', format='pdf')
plt.savefig('fft.png', dpi=300, bbox_inches='tight', format='png')
#----------------------------------------------

matplotlib date formatter or date locator is not showing the first date index and starting from the next one

I want my x-ticks to show mondays only as Month-Day. I try the solution here I get the the correct tick format however there is something wrong with locator and the first date is not shown correctly. The first tick should be at Feb 03 based on my indexing.
The code to reproduce my results is below:
import seaborn as sns
import matplotlib.dates as mdates
import datetime as dt
import matplotlib.ticker as ticker
import pandas as pd
width, height = plt.figaspect(.30)
fig,ax = plt.subplots(1,1, figsize=(width,height), dpi=300, constrained_layout=False)
day_pal = sns.color_palette("pastel",7)
date_df = pd.DataFrame()
date_df['ts'] = pd.Series(pd.date_range(START_DATE, periods=12*7, freq="D"))
date_df['weekday'] = date_df['ts'].dt.weekday
print(date_df.ts[0])
print(date_df.ts[len(x)-1])
x = list(date_df.ts)
daily_totals = range(len(x)) + np.random.randint(0,10,len(x))
ax.plot(x, daily_totals, lw=3, color='black',alpha=0.5)
plt.axvline(x[42], color="red", lw=5, linestyle="--", alpha = 0.6)
for wkdy in range(0,5):
start = np.array(date_df[date_df.weekday==wkdy]['ts'])
end = start + pd.Timedelta(days=1)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = day_pal[wkdy])
start = np.array(date_df[date_df.weekday==5]['ts'])
end = start + pd.Timedelta(days=2)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = "gray")
ax.set_ylabel("Number of Trips")
ax.set_xlabel("Date")
ax.set_xlim(x[0], x[len(x) -1 ] )
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%d"))
ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b-%d"))
plt.xticks(rotation=65)
plt.show()
Resulting Plot:

How to use dates in this code for y axis?

The person who made this had used dates in the second graph. I was wondering how would dates be used with the scipy.signal.argrelextrema function.
With this code it doesn't do anything it prints out an empty array for peak_x and peak_y:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
Here's the link for the original code:
https://openwritings.net/pg/python/python-find-peaks-and-valleys-chart-using-scipysignalargrelextrema
import matplotlib
matplotlib.use('Agg') # Bypass the need to install Tkinter GUI framework
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
# Generate random data.
data_x = np.arange(start = 0, stop = 25, step = 1, dtype='int')
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25) #edited part
# Find peaks(max).
peak_indexes = signal.argrelextrema(data_y, np.greater)
peak_indexes = peak_indexes[0]
# Find valleys(min).
valley_indexes = signal.argrelextrema(data_y, np.less)
valley_indexes = valley_indexes[0]
# Plot main graph.
(fig, ax) = plt.subplots()
ax.plot(data_x, data_y)
# Plot peaks.
peak_x = peak_indexes
peak_y = data_y[peak_indexes]
ax.plot(peak_x, peak_y, marker='o', linestyle='dashed', color='green', label="Peaks")
print(peak_x,peak_y)
# Plot valleys.
valley_x = valley_indexes
valley_y = data_y[valley_indexes]
ax.plot(valley_x, valley_y, marker='o', linestyle='dashed', color='red', label="Valleys")
# Save graph to file.
plt.title('Find peaks and valleys using argrelextrema()')
plt.legend(loc='best')
plt.savefig('argrelextrema.png')
Here's the example how it would work:
You're going to want to use the xticks method. See below:
import matplotlib.pyplot as plt
names = [str(i) for i in range(20)]
x_data = [x for x in range(20)]
y_data = [x for x in range(20)]
plt.plot(x_data, y_data)
plt.xticks(x_data, label=names)
plt.show()
What this does is use an integer between 1-19 cast as a string as the label for the axis X.
Except in your case you want to swap out the names for datatime objects cast to strings. For the xticks, the x_data element prescribes where the ticks will be. You may use any interval of points so long as they are within the bounds of the xdata.
In your case, replace:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
with
data_y_ticks = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
data_y = [i for i, _ in enumerate(data_y_ticks.tolist())]
then plot as follows:
plt.plot(data_y, x_data)
plt.xticks(data_y, label=data_y_ticks)
plt.show()
Just a heads-up, your X and Y axis names are flipped in your code. I did not correct this in my example, however did interchange their locations in the plot to make the plot make sense.

Add line to pandas plot

Using pandas I create a plot of a time series like this:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ax = ts.plot()
ax.axhline(y=ts.mean(), xmin=-1, xmax=1, color='r', linestyle='--', lw=2)
I would like to add another horizontal line at the level of the mean using only data from February. The mean is just ts.loc['2016-02'], but how do I add a horizontal line at that level that doesn't go across the whole figure, but only for the dates in February?
Or you can create a new time series whose values are the mean and index only spans February.
ts_feb_mean = ts['2016-02'] * 0 + ts['2016-02'].mean()
All together it looks like:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
# Feb mean
ts_fm = ts['2016-02'] * 0 + ts['2016-02'].mean()
ts_fm = ts_fm.reindex_like(ts)
# Total mean
ts_mn = ts * 0 + ts.mean()
# better control over ax
fig, ax = plt.subplots(1, 1)
ts.plot(ax=ax)
ts_mn.plot(ax=ax)
ts_fm.plot(ax=ax)
You can use xmin and xmax to control where in the chart the line starts and ends. But this is in percent of the chart.
import numpy as np
import pandas as pd
np.random.seed([3, 1415])
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts_feb = ts['2016-02']
# used to figure out where to start and stop
ts_len = float(len(ts))
ts_len_feb = float(len(ts_feb))
ratio = ts_len_feb / ts_len
ax = ts.plot()
ax.axhline(y=ts.mean() * 5, xmin=0, xmax=1, color='r', linestyle='--', lw=2)
ax.axhline(y=ts_feb.mean() * 5, xmin=(1. - ratio), xmax=1, color='g', linestyle=':', lw=2)

Categories

Resources