Python, x-axis title is overlapping the tick labels in matplotlib - python

I'm plotting a graph and the x-axis label is not visible in the graph.
I have tried to solve it by adding the
ax.xaxis.labelpad = -10 # Adjust x-axis label position
Instead the x-label will overlap the ticker label
How can this be adjusted to show both x-axis label and x-ticker labels within the plot figure?
Full Code to replicate graph:
#################################
### Modules imported used ###
#################################
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date
import time
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
# file_path_setup = 'G:/Stocks/PowerPivotApps/Price download/'
# Performance_History = pd.read_csv(file_path_setup + 'Performance.txt', dtype=str, sep=',')
# Portfolio = Performance_History.loc[Performance_History['ExecutionType'] == 'All Portfolios']
# Portfolio = Performance_History.loc[Performance_History['ExecutionType'] == 'Selected Portfolios'] # remove "# set minimum level for performance time"
#Portfolios_Nr_of_Stocks = Portfolio['NrOfStocks']
#Portfolio_Performance_Time = Portfolio['PerformanceTime']
#Portfolio_Date = Portfolio['Date']
Portfolio_Date = ['2020-08-31','2020-09-01','2020-09-02','2020-09-02','2020-09-03','2020-09-04','2020-09-07','2020-09-08','2020-09-09','2020-09-09','2020-09-10','2020-09-11','2020-09-14','2020-09-15','2020-09-16','2020-09-17','2020-09-18','2020-09-21','2020-09-22','2020-09-22','2020-09-23','2020-09-24','2020-09-25','2020-09-28','2020-09-29','2020-09-30','2020-10-01','2020-10-02','2020-10-05','2020-10-06','2020-10-07','2020-10-08','2020-10-08','2020-10-09','2020-10-12','2020-10-13','2020-10-14','2020-10-15','2020-10-16']
Portfolio_Performance_Time =['00:11:11','00:11:07','00:11:16','00:10:42','00:10:54','00:10:46','00:10:27','00:11:23','00:11:35','00:10:23','00:10:51','00:41:22','00:11:05','00:11:15','00:10:50','00:10:41','00:19:47','00:10:43','00:10:48','00:11:12','00:11:05','00:10:45','00:11:02','00:10:57','00:11:01','00:15:17','00:14:33','00:18:49','00:14:28','00:20:45','00:14:29','00:14:45','00:17:52','00:14:37','00:14:08','00:15:05','00:14:46','00:14:39','00:14:40']
Portfolios_Nr_of_Stocks = ['621','619','617','619','622','622','622','621','622','622','622','613','622','621','621','607','621','622','621','622','620','620','622','620','620','680','679','680','681','488','681','681','680','678','678','676','678','676','676']
# Convert To integer
numberofstocks = [int(stock) for stock in Portfolios_Nr_of_Stocks]
# Convert to time
def get_sec(time_str):
"""Get Seconds from time."""
h, m, s = time_str.split(':')
return int(h) * 3600 + int(m) * 60 + int(s)
PerformanceTime = [get_sec(t) for t in Portfolio_Performance_Time]
# print(type(numberofstocks)) # print type
# convert to date series
date_portfolio = [datetime.strptime(d, '%Y-%m-%d') for d in Portfolio_Date]
# https://matplotlib.org/gallery/api/two_scales.html
# https://cmdlinetips.com/2019/10/how-to-make-a-plot-with-two-different-y-axis-in-python-with-matplotlib/
# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(12, 8)) # figsize -> size of the plot window
# make a plot
ax.plot(date_portfolio, PerformanceTime, color="red", marker="x")
# set x-axis label
ax.set_xlabel("Date", fontsize=14)
# set y-axis label
ax.set_ylabel("Performance Time",color="red",fontsize=14)
# set title
ax.set_title("Execution History",fontsize=20, loc="center", pad=10)
# format y-axis label to hh:mm:ss
formatter_yx1 = matplotlib.ticker.FuncFormatter(lambda s, x: time.strftime('%H:%M:%S', time.gmtime(s)))
ax.yaxis.set_major_formatter(formatter_yx1)
# rotate x-axis lables and adjust size
plt.xticks(rotation=90, ha='right')
# plt.xticks(rotation=90, ha='right', fontsize='x-small') # Small font text
# set minimum level for performance time, y-axis 1
ax.set_ylim([min(PerformanceTime)-100,25*60]) # -100 -> set minimum. 25*60 -> Set maximum
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(date_portfolio, numberofstocks,color="blue",marker="o")
# ax2.set_ylim([620, 680])
ax2.set_ylabel("Nr Of Stocks",color="blue",fontsize=14)
# set minimum level for performance time, y-axis 2
ax2.set_ylim([600, max(numberofstocks)+10]) # -100 -> set minimum. 25*60 -> Set maximum
# set date interval
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7)) # max interval
ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1)) # minimum interval
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # set date format
ax.xaxis.labelpad = -10 # Adjust x-axis label position
# Plot graph
plt.show()

You could use "Tight Layout" function in matplotlib to solve the issue.
Add the line before you plot the graph, where h_pad will adjust the height, w_pad will adjust the width.
# Adjust x-axis margins
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=5.0)
And remove this part:
ax.xaxis.labelpad = -10 # Adjust x-axis label position
Result:

Related

Scatter data not overlaying properly on radar data... cartopy issue

I'm trying to plot scatter data of storm reports on top of radar gridded data and I seem to be getting strange plotting issues related to mapping using cartopy. See example image attached. It appears that the scatter data plots on a separate axis than the radar data, but I'm not sure why given that the plotting module for the radar data uses the same user input for min/max lat/lon and the chosen projection. Additionally, the lat/lon range on the map is dynamic as I loop through time stamps. I know I can use an ax.set_extent to create fixed coordinates, but this does not solve my issue of the plotting being done on a separate axis. Does anyone have any suggestions? They should overlay on the same axis.
Here is the code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import cartopy.crs as ccrs
import pyart
import pandas as pd
import nexradaws
import tempfile
import pytz
templocation = tempfile.mkdtemp()
import cartopy.feature as cfeature
from metpy.plots import USCOUNTIES
### Define the radar, start time and end time
radar_id = 'KDVN'
start = pd.Timestamp(2020,8,10,16,30).tz_localize(tz='UTC')
end = pd.Timestamp(2020,8,10,21,0).tz_localize(tz='UTC')
### Bounds of map we want to plot
min_lon = -93.25
max_lon = -88.
min_lat = 40.35
max_lat = 43.35
# ### Bounds of map we want to plot
# min_lon = -80.8
# max_lon = -77.
# min_lat = 34
# max_lat = 37
#### and get the data
conn = nexradaws.NexradAwsInterface()
scans = conn.get_avail_scans_in_range(start, end, radar_id)
print("There are {} scans available between {} and {}\n".format(len(scans), start, end))
print(scans[0:4])
## download these files
#results = conn.download(scans[0:2], templocation)
results = conn.download(scans, templocation)
#%%
#Now get the severe reports from the SPC site. This assumes you're plotting a year far #enough in the past that
# SPC has official records available. If plotting a more recent time period, then the #local storm reports archive
#[![enter image description here][1]][1] at IEM is a good source
### wind reports
wind_rpts = pd.read_csv("https://www.spc.noaa.gov/wcm/data/"+str(start.year)+"_wind.csv")
wind_rpts['datetime'] = pd.to_datetime(wind_rpts.date + ' ' + wind_rpts.time) ## convert to datetime
wind_rpts.set_index("datetime",inplace=True)
### times in the file are given in central standard time (UTC+6). Localize, and convert to UTC
wind_rpts.index = wind_rpts.index.tz_localize("Etc/GMT+6",ambiguous='NaT',nonexistent='shift_forward').tz_convert("UTC")
## subset down to 30 minutes before/after the radar times we're plotting
wind_rpts = wind_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):((end+pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M"))]
wind_rpts
### repeat for tornado reports
tor_rpts = pd.read_csv("https://www.spc.noaa.gov/wcm/data/"+str(start.year)+"_torn.csv")
tor_rpts['datetime'] = pd.to_datetime(tor_rpts.date + ' ' + tor_rpts.time) ## convert to datetime
tor_rpts.set_index("datetime",inplace=True)
### times in the file are given in central standard time (UTC+6). Localize, and convert to UTC
tor_rpts.index = tor_rpts.index.tz_localize("Etc/GMT+6",ambiguous='NaT',nonexistent='shift_forward').tz_convert("UTC")
## subset down to 30 minutes before/after the radar times we're plotting
tor_rpts = tor_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):((end+pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M"))]
tor_rpts
### repeat for hail
hail_rpts = pd.read_csv("https://www.spc.noaa.gov/wcm/data/"+str(start.year)+"_hail.csv")
hail_rpts['datetime'] = pd.to_datetime(hail_rpts.date + ' ' + hail_rpts.time) ## convert to datetime
hail_rpts.set_index("datetime",inplace=True)
### times in the file are given in central standard time (UTC+6). Localize, and convert to UTC
hail_rpts.index = hail_rpts.index.tz_localize("Etc/GMT+6",ambiguous='NaT',nonexistent='shift_forward').tz_convert("UTC")
## subset down to 30 minutes before/after the radar times we're plotting
hail_rpts = hail_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):((end+pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M"))]
hail_rpts
#%%
'''Now we plot the maps and animate'''
### loop over the radar images that have been downloaded
for i,scan in enumerate(results.iter_success(),start=1):
#for i in range(0,1):
## skip the files ending in "MDM"
if scan.filename[-3:] != "MDM":
print(str(i))
print("working on "+scan.filename)
this_time = pd.to_datetime(scan.filename[4:17], format="%Y%m%d_%H%M").tz_localize("UTC")
radar = scan.open_pyart()
#display = pyart.graph.RadarDisplay(radar)
fig = plt.figure(figsize=[15, 7])
map_panel_axes = [0.05, 0.05, .4, .80]
x_cut_panel_axes = [0.55, 0.10, .4, .25]
y_cut_panel_axes = [0.55, 0.50, .4, .25]
projection = ccrs.PlateCarree()
## apply gatefilter (see here: https://arm-doe.github.io/pyart/notebooks/masking_data_with_gatefilters.html)
#gatefilter = pyart.correct.moment_based_gate_filter(radar)
gatefilter = pyart.filters.GateFilter(radar)
# Lets remove reflectivity values below a threshold.
gatefilter.exclude_below('reflectivity', -2.5)
display = pyart.graph.RadarMapDisplay(radar)
### set up plot
ax1 = fig.add_axes(map_panel_axes, projection=projection)
# Add some various map elements to the plot to make it recognizable.
ax1.add_feature(USCOUNTIES.with_scale('500k'), edgecolor="gray", linewidth=0.4)
#ax1.coastlines('50m', edgecolor='black', linewidth=0.75)
ax1.add_feature(cfeature.STATES.with_scale('10m'), linewidth=1.0)
cf = display.plot_ppi_map('reflectivity', 0, vmin=-7.5, vmax=65,
min_lon=min_lon, max_lon=max_lon, min_lat=min_lat, max_lat=max_lat,
title=radar_id+" reflectivity and severe weather reports, "+this_time.strftime("%H%M UTC %d %b %Y"),
projection=projection, resolution='10m',
gatefilter=gatefilter,
cmap='pyart_HomeyerRainbow',
colorbar_flag=False,
lat_lines=[0,0], lon_lines=[0,0]) ## turns off lat/lon grid lines
#display.plot_crosshairs(lon=lon, lat=lat)
## plot horizontal colorbar
display.plot_colorbar(cf,orient='horizontal', pad=0.07)
# Plot range rings if desired
#display.plot_range_ring(25., color='gray', linestyle='dashed')
#display.plot_range_ring(50., color='gray', linestyle='dashed')
#display.plot_range_ring(100., color='gray', linestyle='dashed')
ax1.set_xticks(np.arange(min_lon, max_lon, .5), crs=ccrs.PlateCarree())
ax1.set_yticks(np.arange(min_lat, max_lat, .5), crs=ccrs.PlateCarree())
## add marker points for severe reports
wind_rpts_now = wind_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):this_time.strftime("%Y-%m-%d %H:%M")]
ax1.scatter(wind_rpts_now.slon.values.tolist(), wind_rpts_now.slat.values.tolist(), s=20, facecolors='none', edgecolors='mediumblue', linewidths=1.8)
tor_rpts_now = tor_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):this_time.strftime("%Y-%m-%d %H:%M")]
ax1.scatter(tor_rpts_now.slon.values.tolist(), tor_rpts_now.slat.values.tolist(), s=20, facecolors='red', edgecolors='black', marker="v",linewidths=1.5)
hail_rpts_now = hail_rpts[((start-pd.Timedelta(minutes=30)).strftime("%Y-%m-%d %H:%M")):this_time.strftime("%Y-%m-%d %H:%M")]
ax1.scatter(hail_rpts_now.slon.values.tolist(), hail_rpts_now.slat.values.tolist(), s=20, facecolors='none', edgecolors='green', linewidths=1.8)
plt.savefig(scan.radar_id+"_"+scan.filename[4:17]+"_dz_rpts.png",bbox_inches='tight',dpi=300,
facecolor='white', transparent=False)
#plt.show()
plt.close('all')

Xlabel fontsize don't change in plot

I'm trying to plot some data in different plots, and I'm able to do it, but, the labelsize from the x axis don't change in the first plot, but it does in the second one. This is what I'm doing:
import matplotlib.pyplot as plt
from dateutil.relativedelta import relativedelta as rd
from calendar import monthrange
fin_mes = date.today() - rd(days=1)
# Start ploting data A
fig, ax = plt.subplots(1, 1)
# Retrieve dates
mes = fin_mes.strftime("%B")
anio = fin_mes.strftime("%Y")
# Set title
plt.suptitle(f"Data {mes} - {anio}")
# List of days of the last month
num_days = range(1, int(monthrange(fin_mes.day, fin_mes.month)[1]) + 1)
print(num_days)
# Set name from x label
ax.set_xlabel('Dates')
# Set name from y label
ax.set_ylabel('Data')
# Set name from the plot to save
name_a = f"DataA-{mes}-{anio}.png"
plt.title("My Data")
plt.xlim(0, num_days[-1])
plt.ylim((min(num_days)),
(max(num_days)))
plt.xticks(num_days)
# Setting the size to the label
plt.rc('xtick', labelsize=5)
plt.grid(True)
dataa_plot, = plt.plot(num_days, num_days, label="DATA A")
plt.legend(handles=[dataa_plot])
plt.show()
fig.savefig(name_a)
# Start ploting data B
fig, ax = plt.subplots(1, 1)
# Retrieve dates
mes = fin_mes.strftime("%B")
anio = fin_mes.strftime("%Y")
# Set title
plt.suptitle(f"Data {mes} - {anio}")
# List of days of the last month
num_days = range(1, int(monthrange(fin_mes.day, fin_mes.month)[1]) + 1)
print(num_days)
# Set name from x label
ax.set_xlabel('Dates')
# Set name from y label
ax.set_ylabel('Data')
# Set name from the plot to save
name_b = f"DataB-{mes}-{anio}.png"
plt.title("My Data B")
plt.xlim(0, num_days[-1])
plt.ylim((min(num_days)),
(max(num_days)))
plt.xticks(num_days)
# Setting the size to the label
plt.rc('xtick', labelsize=5)
plt.grid(True)
datab_plot, = plt.plot(num_days, num_days, label="DATA B")
plt.legend(handles=[datab_plot])
plt.show()
fig.savefig(name_b)
With that I get this plots:
And plot b has the font size changed, but not plot A. I don't know why this is happening. Hope someone can help me, thanks.
PD: I'm using python 3.8.10 in Lubuntu x64 with matplotlib=3.5.1
You must set the label size before using it in setting the ticks, i.e. plt.rc('xtick', labelsize=5) must come before plt.xticks(num_days) to take effect (the safest way is to move it to the very beginning of the plotting).
As an (easier) alternative you can set the font size directly in xticks without changing the rc parameters:
plt.xticks(num_days, fontsize=5)

how to plot horizontal bar plot in loop to change the color on the bar based on the value in another column

I need to plot time(timestamp) vs space(intersectionId) single horizontal bar chart in matplotlib. The color of the bar will be changed at time intervals based on another column which will the currState. The colors will be
red,green,yellow. I have tried to create a dictionary of colors and values but unsure of how to use them in loop to change color based on the value. I have attached a sample csv below along with a code and what I try to achieve and what I have written till now.
category_colors = { 'red' : [2,3] , 'yellow' : [5,6] , 'green' : [7,8]}
date_test = df_sample['timestamp']
y_test = ['123456']
data = np.array(list(df_sample.currState))
fig, ax = plt.subplots(figsize=(10, 1))
ax = plt.barh(y_test,date_test,label="trafficsignal")
data_cum = data.cumsum
plt.xlabel('timestamp')
plt.ylabel('space')
plt.title('TimeSpace')
plt.legend()
plt.show()
timestamp currState IntersectionId
2020-02-26 16:12:13.131484 3 12345
2020-02-26 16:12:14.131484 3 12345
2020-02-26 16:12:15.131484 3 12345
2020-02-26 16:12:16.131484 5 12345
2020-02-26 16:12:17.131484 5 12345
2020-02-26 16:12:18.131484 5 12345
2020-02-26 16:12:19.131484 6 12345
2020-02-26 16:12:20.131484 6 12345
2020-02-26 16:12:21.131484 6 12345
Current plot:
Desired plot:
I am not aware of any plotting package that lets you create this plot in a straightforward way based on how your sample table is structured. One option could be to compute a start and an end variable and then create the plot like in the answers to this question, for example using the Altair Gantt chart like in this answer.
Here, I offer two solutions using matplotlib. By taking a look at the matplotlib gallery, I stumbled on the broken_barh plotting function which provides a way to create a plot like the one you want. There are two main hurdles to overcome when using it:
Deciding what unit to use for the x-axis and computing the xranges argument accordingly;
Creating and formatting the x ticks and tick labels.
Let me first create a sample dataset that resembles yours, note that you will need to adjust the color_dict to your codes:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
import matplotlib.dates as mdates
## Create sample dataset
# Light color codes
gre = 1
yel_to_red = 2
red = 3
yel_to_gre = 4
color_dict = {1: 'green', 2: 'yellow', 3: 'red', 4: 'yellow'}
# Light color duration in seconds
sec_g = 45
sec_yr = 3
sec_r = 90
sec_yg = 1
# Light cycle
light_cycle = [gre, yel_to_red, red, yel_to_gre]
sec_cycle = [sec_g, sec_yr, sec_r, sec_yg]
ncycles = 3
sec_total = ncycles*sum(sec_cycle)
# Create variables and store them in a pandas dataframe with the datetime as index
IntersectionId = 12345
currState = np.repeat(ncycles*light_cycle, repeats=ncycles*sec_cycle)
time_sec = pd.date_range(start='2021-01-04 08:00:00', freq='S', periods=sec_total)
df = pd.DataFrame(dict(IntersectionId = np.repeat(12345, repeats=ncycles*sum(sec_cycle)),
currState = currState),
index = time_sec)
The broken_barh function takes the data in the format of tuples where for each colored rectangle that makes up the horizontal bar you need to provide the xy coordinates of the bottom-left corner as well as the length along each axis, like so:
xranges=[(x1_start, x1_length), (x2_start, x2_length), ... ], yranges=(y_all_start, y_all_width)
Note that yranges applies to all rectangles. The unit that is chosen for the x-axis determines how the data must be processed and how the x ticks and tick labels can be created. Here are two alternatives.
Matplotlib broken_barh with matplotlib date number as x-axis scale
In this approach, the timestamps of the rows where the light changes are extracted and then converted to matplotlib date numbers. This makes it possible to use a matplotlib date tick locator and formatter. This approach of using the matplotlib date for the x-axis values to simplify tick formatting was inspired by this answer by ImportanceOfBeingErnest.
For both this solution and the next one, the code for getting the indices of light changes and computing the lengths of the periods is based on this answer by Jaime, thanks to the general idea provided by this Gist by alimanfoo.
## Compute variables needed to define the plotting function arguments
states = np.array(df['currState'])
# Create a list of indices of the rows where the light changes
# (i.e. where a new currState code section starts)
starts_indices = np.where(np.concatenate(([True], states[:-1] != states[1:])))
# Append the last index to be able to compute the duration of the last
# light color period recorded in the dataset
starts_end_indices = np.append(starts_indices, states.size-1)
# Get the timestamps of those rows and convert them to python datetime format
starts_end_pydt = df.index[starts_end_indices].to_pydatetime()
# Convert the python timestamps to matplotlib date number that is used as the
# x-axis unit, this makes it easier to format the tick labels
starts_end_x = mdates.date2num(starts_end_pydt)
# Get the duration of each light color in matplotlib date number units
lengths = np.diff(starts_end_x)
# Add one second (computed in python datetime units) to the duration of
# the last light to make the bar chart left and right inclusive instead
# of just left inclusive
pydt_second = (max(starts_end_x) - min(starts_end_x))/starts_end_indices[-1]
lengths[-1] = lengths[-1] + pydt_second
# Compute the arguments for the broken_barh plotting function
xranges = [(start, length) for start, length in zip(starts_end_x, lengths)]
yranges = (0.75, 0.5)
colors = df['currState'][starts_end_indices[:-1]].map(color_dict)
## Create horizontal bar with colors by using the broken_barh function
## and format ticks and tick labels
fig, ax = plt.subplots(figsize=(10,2))
ax.broken_barh(xranges, yranges, facecolors=colors, zorder=2)
# Create and format x ticks and tick labels
loc = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(loc)
formatter = mdates.AutoDateFormatter(loc)
formatter.scaled[1/(24.*60.)] = '%H:%M:%S' # adjust this according to time range
ax.xaxis.set_major_formatter(formatter)
# Format y-axis and create y tick and tick label
ax.set_ylim(0, 2)
ax.set_yticks([1])
ax.set_yticklabels([df['IntersectionId'][0]])
plt.grid(axis='x', alpha=0.5, zorder=1)
plt.show()
Matplotlib broken_barh with seconds as x-axis scale
This approach takes advantage of the fact that the indices of the table can be used to compute the lights' durations in seconds. The downside is that this time the x ticks and tick labels must be created from scratch. The code is written so that labels automatically have a nice format depending on the total duration covered by the dataset. The only thing that needs adjusting is the number of ticks, as this depends on how wide the figure is.
The code used to automatically select an appropriate time step between ticks is based on this answer by kennytm. The datetime string format codes are listed here.
## Compute the variables needed for the plotting function arguments
## using the currState variable
states = np.array(df['currState'])
# Create list of indices indicating the rows where the currState code
# changes: note the comma to unpack the tuple
starts_indices, = np.where(np.concatenate(([True], states[:-1] != states[1:])))
# Compute durations of each light in seconds
lengths = np.diff(starts_indices, append=states.size)
## Compute the arguments for the plotting function
xranges = [(start, length) for start, length in zip(starts_indices, lengths)]
yranges = (0.75, 0.5)
colors = df['currState'][starts_indices].map(color_dict)
## Create horizontal bar with colors using the broken_barh function
fig, ax = plt.subplots(figsize=(10,2))
ax.broken_barh(xranges, yranges, facecolors=colors, zorder=2)
## Create appropriate x ticks and tick labels
# Define time variable and parameters needed for computations
time = pd.DatetimeIndex(df.index).asi8 // 10**9 # time is in seconds
tmin = min(time)
tmax = max(time)
trange = tmax-tmin
# Choose the approximate number of ticks, the exact number depends on
# the automatically selected time step
approx_nticks = 6 # low number selected because figure width is only 10 inches
round_time_steps = [15, 30, 60, 120, 180, 240, 300, 600, 900, 1800, 3600, 7200, 14400]
time_step = min(round_time_steps, key=lambda x: abs(x - trange//approx_nticks))
# Create list of x ticks including the right boundary of the last time point
# in the dataset regardless of whether not it is aligned with the time step
timestamps = np.append(np.arange(tmin, tmax, time_step), tmax+1)
xticks = timestamps-tmin
ax.set_xticks(xticks)
# Create x tick labels with format depending on time step
fmt_time = '%H:%M:%S' if time_step <= 60 else '%H:%M'
xticklabels = [pd.to_datetime(ts, unit='s').strftime(fmt_time) for ts in timestamps]
ax.set_xticklabels(xticklabels)
## Format y-axis limits, tick and tick label
ax.set_ylim(0, 2)
ax.set_yticks([1])
ax.set_yticklabels([df['IntersectionId'][0]])
plt.grid(axis='x', alpha=0.5, zorder=1)
plt.show()
Further documentation: to_datetime, to_pydatetime, strftime

Setting xaxis values to %.2f on a bar graph

I have a bar graph with multiple data series and i want to set the xaxis values to a significant value of %.2f I already tried using the set_major formatter for the first graph, but it resets the values to 0, while the values should be like the second graph.
How can I fix this?
My code look like this:
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.ticker as mtick
# select the measurement location
MATH = "import/data/place"
SAVE = "save/location"
fig, axes = plt.subplots(figsize=(12,15),nrows=2, ncols=1) # size of the plots and the placing
fig.subplots_adjust(hspace=0.5) # set space between plots
DATA = pd.read_csv(MATH,delimiter=',',usecols = [2,3,4,5,6,7,8,9,10,11,12],names = ['set_t','set_rh',
'type','math','ref','LUFFT','VPL','VPR','VVL','VVR','PRO'], parse_dates=True)
# select the data
temp = DATA.loc[(DATA['type']=='T')&(DATA['math']=='dif')] # dif temperature data
rh = DATA.loc[((DATA['type']=='RH')&(DATA['math']=='dif'))] # dif relative humidity data
# plot temperature
fg = temp.plot.bar(x='set_t',y = ['LUFFT','VPL','VPR','VVL','VVR','PRO'],
color = ['b','firebrick','orange','forestgreen','darkturquoise','indigo'],
ax=axes[0])
fg.grid(True)
fg.set_ylabel('$ΔT$(°C)',fontsize = 12)
fg.set_xlabel('ref $T$ (°C)',fontsize = 12)
fg.set_title('Difference in T from reference at constant relative humidity 50%',fontsize = 15)
fg.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
fg.xaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
# plot relative humidity
df = rh.plot.bar(x='set_t',y = ['LUFFT','VPL','VPR','VVL','VVR','PRO'],
color = ['b','firebrick','orange','forestgreen','darkturquoise','indigo'],
ax=axes[1])
df.grid(True)
df.set_ylabel('$ΔU$(%)',fontsize = 12)
df.set_xlabel('ref $T$ (°C)',fontsize = 12)
df.set_title('Difference in U from reference at constant relative humidity 50%',fontsize = 15)
plt.tight_layout()
plt.savefig(SAVE + "_example.jpg")
plt.show()
A sample of my data:
07:40:00,07:50:00,39.85716354999982,51.00504745588235,T,dif,,0.14283645000018197,-0.07502069285698099,-0.15716354999978677,0.0020201234696060055,-0.07111703837193772,-0.0620802166664447,
07:40:00,07:50:00,39.85716354999982,51.00504745588235,RH,dif,,-0.40504745588239643,3.994952544117652,2.994952544117652,4.994952544117652,,6.994952544117652,
08:40:00,08:50:00,34.861160704969016,51.1297401832298,T,dif,,0.22883929503095857,0.2509082605481865,-0.2575243413326831,0.24864321659958222,0.14092262836431502,-0.04441070496899613,
08:40:00,08:50:00,34.861160704969016,51.1297401832298,RH,dif,,-0.32974018322978793,3.8702598167702007,2.8702598167702007,4.870259816770201,,6.870259816770201,
This is due to the fact that with a grouped barplot like this, made by Pandas, the x-axes loses its actual 'range', and the values associated with the tick position become the position itself. That's a bit cryptic, but you can see with fg.get_xlim() that the values have lost 'touch' with the original data, and are simply increasing integers. You can explore/debug the 'values' and 'positions' Matplotlib uses if you provide a FuncFormatter with a function like this:
def check_pos(val, pos):
print(val, pos)
return '%.2f' % val
This basically shows that no formatter is going to work for your case.
Luckily the ticklabels are set correctly (as text), so you could parse these to float, and format them as you wish.
Remove your formatter altogether, and set the xticklabels with:
fg.set_xticklabels(['%.2f' % float(x.get_text()) for x in fg.get_xticklabels()])
Note that Matplotlib itself is perfectly capable of preserving the correct tickvalues in combination with a bar plot, but you would have to do the 'grouping' etc yourself, so that's not very convenient as well.

Plotting and color coding multiple y-axes

This is my first attempt using Matplotlib and I am in need of some guidance. I am trying to generate plot with 4 y-axes, two on the left and two on the right with shared x axis. Here's my dataset on shared dropbox folder
import pandas as pd
%matplotlib inline
url ='http://dropproxy.com/f/D34'
df= pd.read_csv(url, index_col=0, parse_dates=[0])
df.plot()
This is what the simple pandas plot looks like:
I would like to plot this similar to the example below, with TMAX and TMIN on primary y-axis (on same scale).
My attempt:
There's one example I found on the the matplotlib listserv..I am trying to adapt it to my data but something is not working right...Here's the script.
# multiple_yaxes_with_spines.py
# This is a template Python program for creating plots (line graphs) with 2, 3,
# or 4 y-axes. (A template program is one that you can readily modify to meet
# your needs). Almost all user-modifiable code is in Section 2. For most
# purposes, it should not be necessary to modify anything else.
# Dr. Phillip M. Feldman, 27 Oct, 2009
# Acknowledgment: This program is based on code written by Jae-Joon Lee,
# URL= http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/matplotlib/
# examples/pylab_examples/multiple_yaxis_with_spines.py?revision=7908&view=markup
# Section 1: Import modules, define functions, and allocate storage.
import matplotlib.pyplot as plt
from numpy import *
def make_patch_spines_invisible(ax):
ax.set_frame_on(True)
ax.patch.set_visible(False)
for sp in ax.spines.itervalues():
sp.set_visible(False)
def make_spine_invisible(ax, direction):
if direction in ["right", "left"]:
ax.yaxis.set_ticks_position(direction)
ax.yaxis.set_label_position(direction)
elif direction in ["top", "bottom"]:
ax.xaxis.set_ticks_position(direction)
ax.xaxis.set_label_position(direction)
else:
raise ValueError("Unknown Direction : %s" % (direction,))
ax.spines[direction].set_visible(True)
# Create list to store dependent variable data:
y= [0, 0, 0, 0, 0]
# Section 2: Define names of variables and the data to be plotted.
# `labels` stores the names of the independent and dependent variables). The
# first (zeroth) item in the list is the x-axis label; remaining labels are the
# first y-axis label, second y-axis label, and so on. There must be at least
# two dependent variables and not more than four.
labels= ['Date', 'Maximum Temperature', 'Solar Radiation',
'Rainfall', 'Minimum Temperature']
# Plug in your data here, or code equations to generate the data if you wish to
# plot mathematical functions. x stores values of the independent variable;
# y[1], y[2], ... store values of the dependent variable. (y[0] is not used).
# All of these objects should be NumPy arrays.
# If you are plotting mathematical functions, you will probably want an array of
# uniformly spaced values of x; such an array can be created using the
# `linspace` function. For example, to define x as an array of 51 values
# uniformly spaced between 0 and 2, use the following command:
# x= linspace(0., 2., 51)
# Here is an example of 6 experimentally measured y1-values:
# y[1]= array( [3, 2.5, 7.3e4, 4, 8, 3] )
# Note that the above statement requires both parentheses and square brackets.
# With a bit of work, one could make this program read the data from a text file
# or Excel worksheet.
# Independent variable:
x = df.index
# First dependent variable:
y[1]= df['TMAX']
# Second dependent variable:
y[2]= df['RAD']
y[3]= df['RAIN']
y[4]= df['TMIN']
# Set line colors here; each color can be specified using a single-letter color
# identifier ('b'= blue, 'r'= red, 'g'= green, 'k'= black, 'y'= yellow,
# 'm'= magenta, 'y'= yellow), an RGB tuple, or almost any standard English color
# name written without spaces, e.g., 'darkred'. The first element of this list
# is not used.
colors= [' ', '#C82121', '#E48E3C', '#4F88BE', '#CF5ADC']
# Set the line width here. linewidth=2 is recommended.
linewidth= 2
# Section 3: Generate the plot.
N_dependents= len(labels) - 1
if N_dependents > 4: raise Exception, \
'This code currently handles a maximum of four independent variables.'
# Open a new figure window, setting the size to 10-by-7 inches and the facecolor
# to white:
fig= plt.figure(figsize=(16,9), dpi=120, facecolor=[1,1,1])
host= fig.add_subplot(111)
host.set_xlabel(labels[0])
# Use twinx() to create extra axes for all dependent variables except the first
# (we get the first as part of the host axes). The first element of y_axis is
# not used.
y_axis= (N_dependents+2) * [0]
y_axis[1]= host
for i in range(2,len(labels)+1): y_axis[i]= host.twinx()
if N_dependents >= 3:
# The following statement positions the third y-axis to the right of the
# frame, with the space between the frame and the axis controlled by the
# numerical argument to set_position; this value should be between 1.10 and
# 1.2.
y_axis[3].spines["right"].set_position(("axes", 1.15))
make_patch_spines_invisible(y_axis[3])
make_spine_invisible(y_axis[3], "right")
plt.subplots_adjust(left=0.0, right=0.8)
if N_dependents >= 4:
# The following statement positions the fourth y-axis to the left of the
# frame, with the space between the frame and the axis controlled by the
# numerical argument to set_position; this value should be between 1.10 and
# 1.2.
y_axis[4].spines["left"].set_position(("axes", -0.15))
make_patch_spines_invisible(y_axis[4])
make_spine_invisible(y_axis[4], "left")
plt.subplots_adjust(left=0.2, right=0.8)
p= (N_dependents+1) * [0]
# Plot the curves:
for i in range(1,N_dependents+1):
p[i], = y_axis[i].plot(x, y[i], colors[i],
linewidth=linewidth, label=labels[i])
# Set axis limits. Use ceil() to force upper y-axis limits to be round numbers.
host.set_xlim(x.min(), x.max())
host.set_xlabel(labels[0], size=16)
for i in range(1,N_dependents+1):
y_axis[i].set_ylim(0.0, ceil(y[i].max()))
y_axis[i].set_ylabel(labels[i], size=16)
y_axis[i].yaxis.label.set_color(colors[i])
for sp in y_axis[i].spines.itervalues():
sp.set_color(colors[i])
for obj in y_axis[i].yaxis.get_ticklines():
# `obj` is a matplotlib.lines.Line2D instance
obj.set_color(colors[i])
obj.set_markeredgewidth(3)
for obj in y_axis[i].yaxis.get_ticklabels():
obj.set_color(colors[i])
obj.set_size(12)
obj.set_weight(600)
# To enable the legend, uncomment the following two lines:
lines= p[1:]
host.legend(lines, [l.get_label() for l in lines])
plt.draw(); plt.show()
And the output
How can I put the scale on max and min temp on a same scale? Also, how can I get rid of second y-axis with black color, scaled from 0 to 10?
Is there a simpler way to achieve this?
How can I put the scale on max and min temp on a same scale?
Plot them in the same axes.
Also, how can I get rid of second y-axis with black color, scaled from 0 to 10?
Do not create that axes.
You want to plot four variables, two of them can go in the same subplot so you only need three subplots. But you are creating five of them?
Step by step
Keep in mind: different y scales <-> different subplots sharing x-axis.
Two variables with a common scale (left), two variables with independent scales (right).
Create the primary subplot, let's call it ax1. Plot everything you want in it, in this case TMIN and TMAX as stated in your question.
Create a twin subplot sharing x axis twinx(ax=ax1). Plot the third variable, say RAIN.
Create another twin subplot twinx(ax=ax1). Plot the fourth variable 'RAD'.
Adjust colors, labels, spine positions... to your heart's content.
Unsolicited advice: do not try to fix code you don't understand.
Variation of the original plot showing how you can plot variables on multiple axes
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
url ='http://dropproxy.com/f/D34'
df= pd.read_csv(url, index_col=0, parse_dates=[0])
fig = plt.figure()
ax = fig.add_subplot(111) # Primary y
ax2 = ax.twinx() # Secondary y
# Plot variables
ax.plot(df.index, df['TMAX'], color='red')
ax.plot(df.index, df['TMIN'], color='green')
ax2.plot(df.index, df['RAIN'], color='orange')
ax2.plot(df.index, df['RAD'], color='yellow')
# Custom ylimit
ax.set_ylim(0,50)
# Custom x axis date formats
import matplotlib.dates as mdates
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
I modified #bishopo's suggestions to generate what I wanted, however, the plot still needs some tweaking with font sizes for axes label.
Here's what I have done so far.
import pandas as pd
%matplotlib inline
url ='http://dropproxy.com/f/D34'
df= pd.read_csv(url, index_col=0, parse_dates=[0])
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
if 1:
# Set the figure size, dpi, and background color
fig = plt.figure(1, (16,9),dpi =300, facecolor = 'W',edgecolor ='k')
# Update the tick label size to 12
plt.rcParams.update({'font.size': 12})
host = host_subplot(111, axes_class=AA.Axes)
plt.subplots_adjust(right=0.75)
par1 = host.twinx()
par2 = host.twinx()
par3 = host.twinx()
offset = 60
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
new_fixed_axis1 = host.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right",
axes=par2,
offset=(offset, 0))
par3.axis["left"] = new_fixed_axis1(loc="left",
axes=par3,
offset=(-offset, 0))
par2.axis["right"].toggle(all=True)
par3.axis["left"].toggle(all=True)
par3.axis["right"].set_visible(False)
# Set limit on both y-axes
host.set_ylim(-30, 50)
par3.set_ylim(-30,50)
host.set_xlabel("Date")
host.set_ylabel("Minimum Temperature ($^\circ$C)")
par1.set_ylabel("Solar Radiation (W$m^{-2}$)")
par2.set_ylabel("Rainfall (mm)")
par3.set_ylabel('Maximum Temperature ($^\circ$C)')
p1, = host.plot(df.index,df['TMIN'], 'm,')
p2, = par1.plot(df.index, df.RAD, color ='#EF9600', linestyle ='--')
p3, = par2.plot(df.index, df.RAIN, '#09BEEF')
p4, = par3.plot(df.index, df['TMAX'], '#FF8284')
par1.set_ylim(0, 36)
par2.set_ylim(0, 360)
host.legend()
host.axis["left"].label.set_color(p1.get_color())
par1.axis["right"].label.set_color(p2.get_color())
par2.axis["right"].label.set_color(p3.get_color())
par3.axis["left"].label.set_color(p4.get_color())
tkw = dict(size=5, width=1.5)
host.tick_params(axis='y', colors=p1.get_color(), **tkw)
par1.tick_params(axis='y', colors=p2.get_color(), **tkw)
par2.tick_params(axis='y', colors=p3.get_color(), **tkw)
par3.tick_params(axis='y', colors=p4.get_color(), **tkw)
host.tick_params(axis='x', **tkw)
par1.axis["right"].label.set_fontsize(16)
par2.axis["right"].label.set_fontsize(16)
par3.axis["left"].label.set_fontsize(16)
host.axis["bottom"].label.set_fontsize(16)
host.axis["left"].label.set_fontsize(16)
plt.figtext(.5,.92,'Weather Data', fontsize=22, ha='center')
plt.draw()
plt.show()
fig.savefig("Test1.png")
The output

Categories

Resources