Formatting time series axis in Seaborn - python

I'm learning Seaborn and trying to figure out how I can format an X axis for dates over a yearly period, so that it is readable. Let's assume we have a dataframe which holds weather measurements for each day of an entire year (365 rows).
sns.scatterplot(x = df_weather["DATE"], y = df_weather["MAX_TEMPERATURE_C"], color = 'red')
sns.scatterplot(x = df_weather["DATE"], y = df_weather["MIN_TEMPERATURE_C"], color = 'blue')
plt.show()
How can I ensure that the X axis labels are readable? Ideally, one label per month would be fine.
Thanks!

Not very sure what your column date is like, but maybe try something like below, first generate some data, I have the date as a string which I guess is something like yours:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
DATE = pd.date_range('2020-01-01', periods=365, freq='D').strftime('%y%y-%m-%d')
MIN = np.random.uniform(low=10,high=25,size = len(index))
MAX = MIN + np.random.uniform(low=5,high=10,size =len(index))
df = pd.DataFrame({'DATE':DATE,'MIN':MIN,'MAX':MAX})
Plot like you did using sns:
fig, ax = plt.subplots(figsize = (10,4))
ax = sns.scatterplot(x = "DATE", y = "MAX",data=df, color = 'red')
ax = sns.scatterplot(x = "DATE", y = "MIN",data=df, color = 'blue')
Now we define the start of the mths to define ticks:
mths = pd.date_range('2020-01-01', periods=12, freq='MS')
ax.set_xticks(mths.strftime('%y%y-%m-%d'))
ax.set(xticklabels=mths.strftime('%b'))
plt.show()
And it should look ok:

Related

How to specify the years on an axis when using plot() on DateTime objects

The plotting goes like this:
plt.plot(df['Date'], df['Price'])
df['Date'] consists of DateTime objects with several years and df['Price'] are integers.
However on the actual line graph, it automatically selects about 4 years spaced apart in the graph with large intervals:
How do I make it so that I can specify the number of years to show on the X axis? Or perhaps show all the years (year only)?
Example:
import pandas as pd
import datetime
import random
dates = []
prices = []
for count in range(10000):
prices.append(random.randint(0, 10))
dates.append(datetime.datetime(random.randint(1960, 2022), random.randint(1, 12), random.randint(1, 27)).strftime("%Y-%m-%d"))
data = {
'Date': dates,
'Price': prices
}
df = pd.DataFrame(data)
df = df.sort_values(by=['Date'], ignore_index = True)
df_temp = df.copy()
df_temp['Date DT'] = pd.to_datetime(df_temp['Date'])
df_temp = df_temp.drop(axis = 'columns', columns = 'Date')
df = df_temp
df
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
plt.figure(figsize=(15, 5), dpi = 1000)
plt.plot(df['Date DT'], df['Price'])
# Labels
plt.xlabel('Dates', fontsize = 8)
plt.ylabel('Prices', fontsize = 8)
# Save
plt.title('Example', fontsize = 15)
plt.savefig('example.png', bbox_inches = 'tight')
You can use the xticks function to set the tick marks and labels on the x-axis.
Like This:
plt.plot(df['Date'], df['Price'])
years = [date.year for date in df['Date']]
plt.xticks(df['Date'], years, rotation=45)
plt.show()
EDIT: to only display one unique year each out of years:
import numpy as np
plt.plot(df['Date'], df['Price'])
# Extract the unique years from the 'Date' column
years = np.unique([date.year for date in df['Date']])
# Set the tick marks on the x-axis
plt.xticks(df['Date'], rotation=45)
# Set the labels of the tick marks on the x-axis
plt.gca().set_xticklabels(years)
plt.show()

how to add secondary y-axis with plt.plot_date()

Im trying to plot date in x-axis, sales volume in primary y-axis and price in secondary y-axis. Since x-axis is a date type, I have used plt.plot_date() function from matplotlib.
I tried secondary_y = True which throws
AttributeError: 'Line2D' object has no property 'secondary_y'
Is it possible to add secondary y-axis with plt.plot_date() or any better way to do this?
Code as below:
plt.plot_date(x = df['Date'], y = df['Sales_Volume'], fmt = '-')
plt.plot_date(x = df['Date'], y = df['Price'], fmt = '-', secondary_y = True)
plt.xticks(rotation = 90)
plt.show()
Please note: sales volume and price has different ranges
you need to use twinx() to add a secondary axis to your plot. I have created a simple example to show the same. I have added a few options so that you can see how to add color, legend, etc. Use the ones you want/like.
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.dates as mdates
from matplotlib.dates import AutoDateFormatter, AutoDateLocator
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(20), freq='D')
np.random.seed(seed=1111)
df = pd.DataFrame({'date': days}) # Your date field
df = df.set_index('date')
fig,ax1 = plt.subplots(figsize=(12,5))
l1, = ax1.plot(df.index,np.random.rand(len(df),1)*100, marker = "o", color='green') #Your Price field
ax1.set_ylabel('Price (y1 axis)')
ax2 = ax1.twinx() ##Using twinx to make ax2 as secondary axis for ax1
l2, = ax2.plot(df.index,np.random.rand(len(df),1)*800,marker = "o", color='red') #Your sales field
ax2.set_ylabel('Sales-volume (y2 axis)')
fig.legend([l1, l2], ["Price", "Sales Volume"])
xtick_locator = AutoDateLocator()
xtick_formatter = AutoDateFormatter(xtick_locator)
ax1.xaxis.set_major_locator(xtick_locator)
ax1.xaxis.set_major_formatter(xtick_formatter)

custom xlabel ticks in Seaborn heatmaps

I have plotted a heatmap which is displayed below. on the xaxis it shows time of the day and y axis shows date. I want to show xaxis at every hour instead of the random xlabels it displays here.
I tried following code but the resulting heatmap overrites all xlabels together:
t = pd.date_range(start='00:00:00', end='23:59:59', freq='60T').time
df = pd.DataFrame(index=t)
df.reset_index(inplace=True)
df['index'] = df['index'].astype('str')
sns_hm = sns.heatmap(data=mat, cbar=True, lw=0,cmap=colormap,xticklabels=df['index'])
The following code supposes mat is a dataframe with columns for some timestamps for each of a number of days. Each of the days, the same timestamps need to appear again.
After drawing the heatmap, the left and right limits of the x-axis are retrieved. Supposing these go from 0 to 24 hour, the range can be subdivided into 25 positions, one for each of the hours.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
from matplotlib.colors import ListedColormap, to_hex
# first, create some test data
df = pd.DataFrame()
df["date"] = pd.date_range('20220304', periods=19000, freq=DateOffset(seconds=54))
df["val"] = (((np.random.rand(len(df)) ** 100).cumsum() / 2).astype(int) % 2) * 100
df['day'] = df['date'].dt.strftime('%d-%m-%Y')
df['time'] = df['date'].dt.strftime('%H:%M:%S')
mat = df.pivot(index='day', columns='time', values='val')
colors = list(plt.cm.Greens(np.linspace(0.2, 0.9, 10)))
ax = sns.heatmap(mat, cmap=colors, cbar_kws={'ticks': range(0, 101, 10)})
xmin, xmax = ax.get_xlim()
tick_pos = np.linspace(xmin, xmax, 25)
tick_labels = [f'{h:02d}:00:00' for h in range(len(tick_pos))]
ax.set_xticks(tick_pos)
ax.set_xticklabels(tick_labels, rotation=90)
ax.set(xlabel='', ylabel='')
plt.tight_layout()
plt.show()
The left plot shows the default tick labels, the right plot the customized labels.

Printing months in the x axis with pyplot

Data I'm working with: https://drive.google.com/file/d/1xb7icmocz-SD2Rkq4ykTZowxW0uFFhBl/view?usp=sharing
Hey everyone,
I am a bit stuck with editing a plot.
Basically, I would like my x value to display the months in the year, but it doesn't seem to work because of the data type (?). Do you have any idea how I could get my plot to have months in the x axis?
If you need more context about the data, please let me know!!!
Thank you!
Here's my code for the plot and the initial data modifications:
import matplotlib.pyplot as plt
import mplleaflet
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import numpy as np
df = pd.read_csv("data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv")
df['degrees']=df['Data_Value']/10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date']<'2015-01-01']
df3 = df[df['Date']>='2015-01-01']
max_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].max()
min_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].min()
max_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].max()
min_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].min()
max_temp.plot(x ='Date', y='degrees', kind = 'line')
min_temp.plot(x ='Date',y='degrees', kind= 'line')
plt.fill_between(range(len(min_temp)),min_temp, max_temp, color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date",
ylabel="Temperature",
title="Extreme Weather in 2015")
plt.legend()
plt.tight_layout()
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.show()
Plot I'm getting:
Option 1 (Most Similar Approach)
Change the index based on month abbreviations using Index.map and calendar
This is just for df2:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
max_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].max()
min_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].min()
# Update the index to be the desired display format for x-axis
max_temp.index = max_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
min_temp.index = min_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
max_temp.plot(x='Date', y='degrees', kind='line')
min_temp.plot(x='Date', y='degrees', kind='line')
plt.fill_between(range(len(min_temp)), min_temp, max_temp,
color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date", ylabel="Temperature", title="Extreme Weather 2005-2014")
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.margins(x=0)
plt.legend()
plt.tight_layout()
plt.show()
As an aside: the title "Extreme Weather in 2015" is incorrect because this data includes all years before 2015. This is "Extreme Weather 2005-2014"
The year range can be checked with min and max as well:
print(df2.Date.dt.year.min(), '-', df2.Date.dt.year.max())
# 2005 - 2014
The title could be programmatically generated with:
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
Option 2 (Simplifying groupby step)
Simplify the code using groupby aggregate to create a single DataFrame then convert the index in the same way as above:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert Index to whatever display format is desired:
df2_temp.index = df2_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(range(len(df2_temp)), df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
plt.margins(x=0)
plt.tight_layout()
plt.show()
Option 3 (Best overall functionality)
Convert the index to a datetime using pd.to_datetime. Choose any leap year to uniform the data (it must be a leap year so Feb-29 does not raise an error). Then set the set_major_formatter using the format string %b to use the month abbreviation:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert to DateTime of Same Year
# (Must be a leap year so Feb-29 doesn't raise an error)
df2_temp.index = pd.to_datetime(
'2000-' + df2_temp.index.map(lambda s: '-'.join(map(str, s)))
)
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(df2_temp.index, df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
# Set xaxis formatter to month abbr with the %b format string
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.tight_layout()
plt.show()
The benefit of this approach is that the index is a datetime and therefore will format better than the string representations of options 1 and 2.

Monthly shaded error/std plot in matplotlib from daily timeseries data

I have a pandas dataframe with 5 years daily time series data. I want to make a monthly plot from whole datasets so that the plot should shows variation (std or something else) within monthly data. Simillar figure I tried to create but did not found a way to do that:
for example, I have a sudo daily precipitation data:
date = pd.to_datetime("1st of Dec, 1999")
dates = date+pd.to_timedelta(np.arange(1900), 'D')
ppt = np.random.normal(loc=0.0, scale=1.0, size=1900).cumsum()
df = pd.DataFrame({'pre':ppt},index=dates)
Manually I can do it like:
one = df['pre']['1999-12-01':'2000-11-29'].values
two = df['pre']['2000-12-01':'2001-11-30'].values
three = df['pre']['2001-12-01':'2002-11-30'].values
four = df['pre']['2002-12-01':'2003-11-30'].values
five = df['pre']['2003-12-01':'2004-11-29'].values
df = pd.DataFrame({'2000':one,'2001':two,'2002':three,'2003':four,'2004':five})
std = df.std(axis=1)
lw = df.mean(axis=1)-std
up = df.mean(axis=1)+std
plt.fill_between(np.arange(365), up, lw, alpha=.4)
I am looking for the more pythonic way to do that instead of doing it manually!
Any helps will be highly appreciated
If I'm understanding you correctly you'd like to plot your daily observations against a monthly periodic mean +/- 1 standard deviation. And that's what you get in my screenshot below. Nevermind the lackluster design and color choice. We'll get to that if this is something you can use. And please notice that I've replaced your ppt = np.random.rand(1900) with ppt = np.random.normal(loc=0.0, scale=1.0, size=1900).cumsum() just to make the data look a bit more like your screenshot.
Here I've aggregated the daily data by month, and retrieved mean and standard deviation for each month. Then I've merged that data with the original dataframe so that you're able to plot both the source and the grouped data like this:
# imports
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
import numpy as np
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
date = pd.to_datetime("1st of Dec, 1999")
dates = date+pd.to_timedelta(np.arange(1900), 'D')
#ppt = np.random.rand(1900)
ppt = np.random.normal(loc=0.0, scale=1.0, size=1900).cumsum()
df = pd.DataFrame({'ppt':ppt},index=dates)
# A subset
df = df.tail(200)
# Add a yearmonth column
df['YearMonth'] = df.index.map(lambda x: 100*x.year + x.month)
# Create aggregated dataframe
df2 = df.groupby('YearMonth').agg(['mean', 'std']).reset_index()
df2.columns = ['YearMonth', 'mean', 'std']
# Merge original data and aggregated data
df3 = pd.merge(df,df2,how='left',on=['YearMonth'])
df3 = df3.set_index(df.index)
df3 = df3[['ppt', 'mean', 'std']]
# Function to make your plot
def monthplot():
fig, ax = plt.subplots(1)
ax.set_facecolor('white')
# Define upper and lower bounds for shaded variation
lower_bound = df3['mean'] + df3['std']*-1
upper_bound = df3['mean'] + df3['std']
fig, ax = plt.subplots(1)
ax.set_facecolor('white')
# Source data and mean
ax.plot(df3.index,df3['mean'], lw=0.5, color = 'red')
ax.plot(df3.index, df3['ppt'], lw=0.1, color = 'blue')
# Variation and shaded area
ax.fill_between(df3.index, lower_bound, upper_bound, facecolor='grey', alpha=0.5)
fig = ax.get_figure()
# Assign months to X axis
locator = mdates.MonthLocator() # every month
# Specify the format - %b gives us Jan, Feb...
fmt = mdates.DateFormatter('%b')
X = plt.gca().xaxis
X.set_major_locator(locator)
X.set_major_formatter(fmt)
fig.show()
monthplot()
Check out this post for more on axis formatting and this post on how to add a YearMonth column.
In your example, you have a few mistakes, but I think it isn't important.
Do you want all years to be on the same graphic (like in your example)? If you do, this may help you:
df['month'] = df.index.strftime("%m-%d")
df['year'] = df.index.year
df.set_index(['month']).drop(['year'],1).plot()

Categories

Resources