The objective is to modify the xticklabel upon plotting pcolormesh and scatter.
However, I am having difficulties accessing the existing xtick labels.
Simply
ax = plt.axes()
labels_x = [item.get_text() for item in ax.get_xticklabels()]
which produced:
['', '', '', '', '', '']
or
fig.canvas.draw()
xticks = ax.get_xticklabels()
which produced:
['', '', '', '', '', '']
does not return the corresponding label.
May I know how to properly access axis tick labels for a plt cases.
For readability, I split the code into two section.
The first section to generate the data used for plotting
Second section deal the plotting
Section 1: Generate data used for plotting
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
np.random.seed(0)
increment=120
max_val=172800
aran=np.arange(0,max_val,increment).astype(int)
arr=np.concatenate((aran.reshape(-1,1), np.random.random((aran.shape[0],4))), axis=1)
df=pd.DataFrame(arr,columns=[('lapse',''),('a','i'),('a','j'),('b','k'),('c','')])
ridx=df.index[df[('lapse','')] == 3600].tolist()[0]+1 # minus 1 so to allow 3600 start at new row
df[('event','')]=0
df.loc[[1,2,3,10,20,30],[('event','')]]=1
arr=df[[('a','i'),('event','')]].to_numpy()
col_len=ridx
v=arr[:,0].view()
nrow_size=math.ceil(v.shape[0]/col_len)
X=np.pad(arr[:,0].astype(float), (0, nrow_size*col_len - arr[:,0].size),
mode='constant', constant_values=np.nan).reshape(nrow_size,col_len)
mask_append_val=0 # This value must equal to 1 for masking
arrshape=np.pad(arr[:,1].astype(float), (0, nrow_size*col_len - arr[:,1].size),
mode='constant', constant_values=mask_append_val).reshape(nrow_size,col_len)
Section 2 Plotting
fig = plt.figure(figsize=(8,6))
plt.pcolormesh(X,cmap="plasma")
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
plt.scatter(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
plt.gca().invert_yaxis()
xlabels_to_use_this=df.loc[:30,[('lapse','')]].values.tolist()
# ax = plt.axes()
# labels_x = [item.get_text() for item in ax.get_xticklabels()]
# labels_y = [item.get_text() for item in ax.get_yticklabels()]
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title("Plot 2D array")
plt.colorbar()
plt.tight_layout()
plt.show()
Expected output
This is how the plot could be generated using matplotlib's pcolormesh and scatter:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import pandas as pd
import numpy as np
np.random.seed(0)
increment = 120
max_val = 172800
aran = np.arange(0, max_val, increment).astype(int)
arr_df = np.concatenate((aran.reshape(-1, 1), np.random.random((aran.shape[0], 4))), axis=1)
df = pd.DataFrame(arr_df, columns=[('lapse', ''), ('a', 'i'), ('a', 'j'), ('b', 'k'), ('c', '')])
df[('event', '')] = 0
df.loc[[1, 2, 3, 10, 20, 30], [('event', '')]] = 1
col_len_lapse = 3600
col_len = df[df[('lapse', '')] == col_len_lapse].index[0]
nrow_size = int(np.ceil(v.shape[0] / col_len))
a_i_values = df[('a', 'i')].values
a_i_values_meshed = np.pad(a_i_values.astype(float), (0, nrow_size * col_len - len(a_i_values)),
mode='constant', constant_values=np.nan).reshape(nrow_size, col_len)
fig, ax = plt.subplots(figsize=(8, 6))
# the x_values indicate the mesh borders, subtract one half so the ticks can be at the centers
x_values = df[('lapse', '')][:col_len + 1].values - increment / 2
# divide lapses for y by col_len_lapse to get hours
y_values = df[('lapse', '')][::col_len].values / col_len_lapse - 0.5
y_values = np.append(y_values, 2 * y_values[-1] - y_values[-2]) # add the bottommost border (linear extension)
mesh = ax.pcolormesh(x_values, y_values, a_i_values_meshed, cmap="plasma")
event_lapses = df[('lapse', '')][df[('event', '')] == 1]
ax.scatter(event_lapses % col_len_lapse,
np.floor(event_lapses / col_len_lapse),
marker='*', color='red', edgecolor='white', s=55)
ax.xaxis.set_major_locator(MultipleLocator(increment * 5))
ax.yaxis.set_major_locator(MultipleLocator(5))
ax.invert_yaxis()
ax.set_xlabel('X-axis (s)')
ax.set_ylabel('Y-axis (hours)')
ax.set_title("Plot 2D array")
plt.colorbar(mesh)
plt.tight_layout() # fit the labels nicely into the plot
plt.show()
With Seaborn things can be simplified, adding new columns for hours and seconds, and using pandas' pivot (which automatically fills unavailable data with NaNs). Adding xtick_labels=5 sets the labels every 5 positions. (The star for lapse=3600 is at 1 hour, 0 seconds).
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# df created as before
df['hours'] = (df[('lapse', '')].astype(int) // 3600)
df['seconds'] = (df[('lapse', '')].astype(int) % 3600)
df_heatmap = df.pivot(index='hours', columns='seconds', values=('a', 'i'))
df_heatmap_markers = df.pivot(index='hours', columns='seconds', values=('event', '')).replace(
{0: '', 1: '★', np.nan: ''})
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s', annot_kws={'color': 'lime'}, ax=ax)
ax.tick_params(rotation=0)
plt.tight_layout()
plt.show()
Instead of a 'seconds' column, a 'minutes' column also might be interesting.
Here is an attempt to add time information as suggested in the comments:
from matplotlib import patheffects # to add some outline effect
# df prepared as the other seaborn example
fig, ax = plt.subplots(figsize=(8, 6))
path_effect = patheffects.withStroke(linewidth=2, foreground='yellow')
sns.heatmap(df_heatmap, xticklabels=5, yticklabels=5,
annot=df_heatmap_markers, fmt='s',
annot_kws={'color': 'red', 'path_effects': [path_effect]},
cbar=True, cbar_kws={'pad': 0.16}, ax=ax)
ax.tick_params(rotation=0)
ax2 = ax.twinx()
ax2.set_ylim(ax.get_ylim())
yticks = ax.get_yticks()
ax2.set_yticks(yticks)
ax2.set_yticklabels([str(pd.to_datetime('2019-01-15 7:00:00') + pd.to_timedelta(h, unit='h')).replace(' ', '\n')
for h in yticks])
I end up using Seaborn to address this issue.
Specifically, the following lines able to easily tweak the xticklabel
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
The full code for plotting is as below
import seaborn as sns
fig, ax = plt.subplots()
sns.heatmap(X,ax=ax)
x,y = X.shape
xs,ys = np.ogrid[:x,:y]
# the non-zero coordinates
u = np.argwhere(arrshape)
g=sns.scatterplot(ys[:,u[:,1]].ravel()+.5,xs[u[:,0]].ravel()+0.5,marker='*', color='r', s=55)
fig.canvas.draw()
new_ticks = [i.get_text() for i in g.get_xticklabels()]
i=[int(idx) for idx in new_ticks]
newlabel=xlabels_to_use_this[i]
newlabel=[np.array2string(x, precision=0) for x in newlabel]
ax.set_xticklabels(newlabel)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
for ind, label in enumerate(g.get_xticklabels()):
if ind % 2 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
for ind, label in enumerate(g.get_yticklabels()):
if ind % 4 == 0: # every 10th label is kept
label.set_visible(True)
else:
label.set_visible(False)
plt.xlabel('Elapsed (s)')
plt.ylabel('Hour (h)')
plt.title("Rastar Plot")
plt.tight_layout()
plt.show()
I am trying to take the Fast Fourier Transform of a resampled pandas Series:
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
resampled_signal = signal.resample('1S').mean()
However if I simply try (using scipy) and just do:
SAMPLE_RATE = 1
DURATION = len(resampled_signal)
N = SAMPLE_RATE * DURATION
yf = fft(resampled_signal[:,1])
print(yf)
xf = fftfreq(N, 1 / SAMPLE_RATE)
I obtain an error ValueError: Can only tuple-index with a MultiIndex due to the
way resampled_signal is constructed to include the index. resampled_signal looks like this for reference:
00:00:00.419175 206.080335
00:00:01.419175 206.084340
00:00:02.419175 206.087010
00:00:03.419175 206.089681
00:00:04.419175 206.095021
.
.
.
Is there anyway this can be done? I wish to include the pd.Series form since my final aim is to resample two datasets such that they have the same number of data points, take the FFT of both signals, then subtract one from the other.
My simplified code for 1 data set is given below:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
import matplotlib
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
datathick = "20210728_rig_thick.csv"
with open(datathick) as f:
lines = f.readlines()
dates = [str(line.split(',')[0]) for line in lines]
thick = [float(line.split(',')[1]) for line in lines]
z = [float(line.split(',')[2]) for line in lines]
date_thick = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates]
time_list_thick = []
for i in np.arange(0, len(date_thick)):
q = date_thick[i]
t = timedelta(hours= q.hour, minutes=q.minute,seconds=q.second, microseconds = q.microsecond).total_seconds()
time_list_thick.append(float(t))
#---RESCALE-----
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
resampled_signal = signal.resample('1S').mean()
resampled_signal = resampled_signal.interpolate(method='time')
print(resampled_signal.head())
exit()
#----FFT Transform of Output and Noise ----
# Number of samples in normalized_tone
SAMPLE_RATE = 1
DURATION = len(resampled_signal)
N = SAMPLE_RATE * DURATION
yf = fft(resampled_signal[:,1])
print(yf)
xf = fftfreq(N, 1 / SAMPLE_RATE)
#------------------------------------------------
fig=plt.figure(figsize=(7.,7.))
ax=fig.add_subplot(1,1,1)
ax.set_zorder(1)
ax.patch.set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('Amplitude (a.u)')
ax.minorticks_on() # enable minor ticks
ax.xaxis.set_ticks_position('bottom')
ax.spines['left'].set_color('black')
ax.yaxis.label.set_color('black')
plt.yscale('log')
ax.tick_params(direction='out', axis='y', which='both', pad=4, colors='black')
ax.grid(b=True, which='major', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on major grid
ax.grid(b=True, which='minor', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on minor grid
ax.plot(np.abs(xf), np.abs(yf))
plt.savefig('fft.pdf', dpi=300, bbox_inches='tight', format='pdf')
plt.savefig('fft.png', dpi=300, bbox_inches='tight', format='png')
#----------------------------------------------
I want my x-ticks to show mondays only as Month-Day. I try the solution here I get the the correct tick format however there is something wrong with locator and the first date is not shown correctly. The first tick should be at Feb 03 based on my indexing.
The code to reproduce my results is below:
import seaborn as sns
import matplotlib.dates as mdates
import datetime as dt
import matplotlib.ticker as ticker
import pandas as pd
width, height = plt.figaspect(.30)
fig,ax = plt.subplots(1,1, figsize=(width,height), dpi=300, constrained_layout=False)
day_pal = sns.color_palette("pastel",7)
date_df = pd.DataFrame()
date_df['ts'] = pd.Series(pd.date_range(START_DATE, periods=12*7, freq="D"))
date_df['weekday'] = date_df['ts'].dt.weekday
print(date_df.ts[0])
print(date_df.ts[len(x)-1])
x = list(date_df.ts)
daily_totals = range(len(x)) + np.random.randint(0,10,len(x))
ax.plot(x, daily_totals, lw=3, color='black',alpha=0.5)
plt.axvline(x[42], color="red", lw=5, linestyle="--", alpha = 0.6)
for wkdy in range(0,5):
start = np.array(date_df[date_df.weekday==wkdy]['ts'])
end = start + pd.Timedelta(days=1)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = day_pal[wkdy])
start = np.array(date_df[date_df.weekday==5]['ts'])
end = start + pd.Timedelta(days=2)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = "gray")
ax.set_ylabel("Number of Trips")
ax.set_xlabel("Date")
ax.set_xlim(x[0], x[len(x) -1 ] )
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%d"))
ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b-%d"))
plt.xticks(rotation=65)
plt.show()
Resulting Plot:
The person who made this had used dates in the second graph. I was wondering how would dates be used with the scipy.signal.argrelextrema function.
With this code it doesn't do anything it prints out an empty array for peak_x and peak_y:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
Here's the link for the original code:
https://openwritings.net/pg/python/python-find-peaks-and-valleys-chart-using-scipysignalargrelextrema
import matplotlib
matplotlib.use('Agg') # Bypass the need to install Tkinter GUI framework
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
# Generate random data.
data_x = np.arange(start = 0, stop = 25, step = 1, dtype='int')
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25) #edited part
# Find peaks(max).
peak_indexes = signal.argrelextrema(data_y, np.greater)
peak_indexes = peak_indexes[0]
# Find valleys(min).
valley_indexes = signal.argrelextrema(data_y, np.less)
valley_indexes = valley_indexes[0]
# Plot main graph.
(fig, ax) = plt.subplots()
ax.plot(data_x, data_y)
# Plot peaks.
peak_x = peak_indexes
peak_y = data_y[peak_indexes]
ax.plot(peak_x, peak_y, marker='o', linestyle='dashed', color='green', label="Peaks")
print(peak_x,peak_y)
# Plot valleys.
valley_x = valley_indexes
valley_y = data_y[valley_indexes]
ax.plot(valley_x, valley_y, marker='o', linestyle='dashed', color='red', label="Valleys")
# Save graph to file.
plt.title('Find peaks and valleys using argrelextrema()')
plt.legend(loc='best')
plt.savefig('argrelextrema.png')
Here's the example how it would work:
You're going to want to use the xticks method. See below:
import matplotlib.pyplot as plt
names = [str(i) for i in range(20)]
x_data = [x for x in range(20)]
y_data = [x for x in range(20)]
plt.plot(x_data, y_data)
plt.xticks(x_data, label=names)
plt.show()
What this does is use an integer between 1-19 cast as a string as the label for the axis X.
Except in your case you want to swap out the names for datatime objects cast to strings. For the xticks, the x_data element prescribes where the ticks will be. You may use any interval of points so long as they are within the bounds of the xdata.
In your case, replace:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
with
data_y_ticks = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
data_y = [i for i, _ in enumerate(data_y_ticks.tolist())]
then plot as follows:
plt.plot(data_y, x_data)
plt.xticks(data_y, label=data_y_ticks)
plt.show()
Just a heads-up, your X and Y axis names are flipped in your code. I did not correct this in my example, however did interchange their locations in the plot to make the plot make sense.
Using pandas I create a plot of a time series like this:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ax = ts.plot()
ax.axhline(y=ts.mean(), xmin=-1, xmax=1, color='r', linestyle='--', lw=2)
I would like to add another horizontal line at the level of the mean using only data from February. The mean is just ts.loc['2016-02'], but how do I add a horizontal line at that level that doesn't go across the whole figure, but only for the dates in February?
Or you can create a new time series whose values are the mean and index only spans February.
ts_feb_mean = ts['2016-02'] * 0 + ts['2016-02'].mean()
All together it looks like:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
# Feb mean
ts_fm = ts['2016-02'] * 0 + ts['2016-02'].mean()
ts_fm = ts_fm.reindex_like(ts)
# Total mean
ts_mn = ts * 0 + ts.mean()
# better control over ax
fig, ax = plt.subplots(1, 1)
ts.plot(ax=ax)
ts_mn.plot(ax=ax)
ts_fm.plot(ax=ax)
You can use xmin and xmax to control where in the chart the line starts and ends. But this is in percent of the chart.
import numpy as np
import pandas as pd
np.random.seed([3, 1415])
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts_feb = ts['2016-02']
# used to figure out where to start and stop
ts_len = float(len(ts))
ts_len_feb = float(len(ts_feb))
ratio = ts_len_feb / ts_len
ax = ts.plot()
ax.axhline(y=ts.mean() * 5, xmin=0, xmax=1, color='r', linestyle='--', lw=2)
ax.axhline(y=ts_feb.mean() * 5, xmin=(1. - ratio), xmax=1, color='g', linestyle=':', lw=2)