I created a line graph of the location of Tropical cyclone with time on a 6 hourly basis. I successfully plot the graph with all the needed labels except the gridline both the major and the minor gridlines. The major gridline appears in a 12 hours interval instead of 6 hours. My goal is to put the start of the major grid line at 0 and not a few mm east of 0 in the x-axis. Another thing, I cannot put a minor grid line just at the center between the two major gridlines to represent 6 hourly data or create a 6 hours interval major gridline.
The image below shows the result of my code.
And this is my code.
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib.dates import HourLocator, MonthLocator, YearLocator
fig, ax = plt.subplots()
ax.plot(df.time,df.Distance, color='r',marker = 'o', linestyle ='--')
ax.set_xlabel('Date and Time')
ax.set_ylabel('Distance (km)')
ax.set_title('The expected distance of Tropical cyclone')
plt.grid(True)
ax.minorticks_on()
plt.grid(which='major',axis ='y', linewidth='1', color='black')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
ax.tick_params(which='both', # Options for both major and minor ticks
top='off', # turn off top ticks
left='off', # turn off left ticks
right='off', # turn off right ticks
bottom='off') # turn off bottom ticks
hloc = HourLocator(1)
ax.xaxis.set_minor_locator(hloc)
ax.yaxis.set_minor_locator(MultipleLocator(50))
m = np.arange(0,round(max(df.Distance+200),100),100)
ax.set_yticks(m)
plt.xticks(rotation=45)
plt.ylim(0,1500)
plt.show()
and my data for the x-axis is this-
0 2019-09-24 04:00:00
1 2019-09-24 10:00:00
2 2019-09-24 16:00:00
3 2019-09-24 22:00:00
4 2019-09-25 04:00:00
5 2019-09-25 10:00:00
6 2019-09-25 16:00:00
7 2019-09-25 22:00:00
8 2019-09-26 04:00:00
9 2019-09-26 10:00:00
10 2019-09-26 16:00:00
11 2019-09-26 22:00:00
12 2019-09-27 04:00:00
13 2019-09-27 10:00:00
14 2019-09-27 16:00:00
15 2019-09-27 22:00:00
16 2019-09-28 04:00:00
and the y-axis
0 1385
1 1315
2 1245
3 1175
4 1105
5 1050
6 995
7 935
8 880
9 835
10 790
11 745
12 485
13 435
14 390
15 350
16 315
Revised to 6 hour intervals. I wasn't sure of the intent of the grid, so I posted the details and no grid.
df['time'] = pd.to_datetime(df['time'])
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib.dates import HourLocator, MonthLocator, YearLocator
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(20,12))
ax.plot(df.time,df.Distance, color='r',marker = 'o', linestyle ='--')
ax.set_xlabel('Date and Time')
ax.set_ylabel('Distance (km)')
ax.set_title('The expected distance of Tropical cyclone')
plt.grid(True)
ax.minorticks_on()
plt.grid(which='major',axis ='y', linewidth='1', color='black')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
ax.tick_params(which='both', # Options for both major and minor ticks
top='off', # turn off top ticks
left='off', # turn off left ticks
right='off', # turn off right ticks
bottom='off') # turn off bottom ticks
# hloc = HourLocator(1)
# ax.xaxis.set_minor_locator(hloc)
# ax.yaxis.set_minor_locator(MultipleLocator(50))
ax.xaxis.set_minor_locator(HourLocator(byhour=None, interval=3, tz=None))
ax.xaxis.set_major_locator(HourLocator(byhour=None, interval=6, tz=None))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d %H"))
m = np.arange(0,round(max(df.Distance+200),100),100)
ax.set_yticks(m)
plt.xticks(rotation=45)
plt.ylim(0,1500)
plt.xlim(df['time'].min(), df['time'].max())
plt.show()
To view the minor gridlines, you should run
plt.minorticks_on()
To limit the x-axis of the chart, do:
plt.xlim(df.time.min(), df.time.max())
The result is below. As you can see, there's a major x-gridline every 6 hours and a minor one every hour.
Related
I have the following dataset:
trust_id cohort cnt
index_event_datetime
2017-06-01 chel sepsis 216.0
2017-07-01 chel sepsis 191.0
2017-08-01 chel sepsis 184.0
2017-09-01 chel sepsis 186.0
2017-10-01 chel sepsis 173.0
... ... ... ...
2022-02-01 ouh_ sepsis_thrombocytopenia 5.0
2022-03-01 ouh_ sepsis_thrombocytopenia NaN
2022-04-01 ouh_ sepsis_thrombocytopenia NaN
2022-05-01 ouh_ sepsis_thrombocytopenia NaN
2022-06-01 ouh_ sepsis_thrombocytopenia NaN
and I want to produce 4 plots for each trust with the count among three diseases:
grid = sns.FacetGrid(
incident_cnt_long.reset_index(),
col="trust_id",
hue="cohort",
col_wrap=2,
legend_out=True,
palette=["#FF4613", "#00FFAA", "#131E29"]
)
grid.map(sns.lineplot, "index_event_datetime", "cnt")
for ax in grid.axes:
# ax.xaxis.set_major_locator(mdates.MonthLocator((1, 7)))
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%Y"))
ax.xaxis.set_tick_params(rotation=90)
ax.set_xlabel(None)
ax.set_ylabel(None)
# grid.add_legend()
grid.fig.legend(["SEP", "STH", "SDI"])
grid.fig.supylabel("Patients (no)")
grid.fig.supxlabel("Date of index event", va="top")
grid.fig.set_size_inches(plt.rcParams["figure.figsize"])
grid.tight_layout()
plt.show()
fig = grid.fig
I get the following figure:
I want to introduce those with SDI in the second Y-axes per plot. So far I have tried to define a twin_lineplot as the following:
def twin_lineplot(x,y,color,**kwargs):
ax = plt.twinx()
sns.lineplot(x=x,y=y,color=color,**kwargs, ax=ax)
grid = sns.FacetGrid(
incident_cnt_long.reset_index(),
col="trust_id",
hue="cohort",
col_wrap=2,
legend_out=True,
palette=["#FF4613", "#00FFAA", "#131E29"]
)
grid.map(sns.lineplot, "index_event_datetime", "cnt")
grid.map(twin_lineplot, "index_event_datetime", "cnt")
for ax in grid.axes:
# ax.xaxis.set_major_locator(mdates.MonthLocator((1, 7)))
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%Y"))
ax.xaxis.set_tick_params(rotation=90)
ax.set_xlabel(None)
ax.set_ylabel(None)
# grid.add_legend()
grid.fig.legend(["SEP", "STH", "SDI"])
grid.fig.supylabel("Patients (no)")
grid.fig.supxlabel("Date of index event", va="top")
grid.fig.set_size_inches(plt.rcParams["figure.figsize"])
grid.tight_layout()
plt.show()
fig = grid.fig
but I am not getting the desired output:
This is the dataframe:
Data for last 8 months
date close volume change% obv compare close_trend
6 2022-06-30 00:00:00+05:30 18760.40 358433 5.52 1358338 True 18482.242046
7 2022-07-31 00:00:00+05:30 20015.10 252637 6.27 1610975 True 18905.447351
8 2022-08-31 00:00:00+05:30 18739.75 317107 -6.81 1293868 False 19328.826505
9 2022-09-30 00:00:00+05:30 19139.15 561137 2.09 1855005 True 19753.246889
10 2022-10-31 00:00:00+05:30 19246.95 243999 0.56 2099004 True 20179.207712
11 2022-11-30 00:00:00+05:30 20237.80 311138 4.90 2410142 True 20606.824373
12 2022-12-31 00:00:00+05:30 21367.20 386070 5.29 2796212 True 21035.629608
13 2023-01-31 00:00:00+05:30 22250.00 101527 3.97 2897739 True 21464.925515
I am able to plot 2 graphs in a row using matplotlib in jupyter notebook.
fig = plt.figure(figsize=(7,2))
plt.subplot(1,2,1)
plt.plot(df['date'], df['close'], color='red', figure=fig)
plt.subplot(1,2,2)
plt.plot(df[['close','close_trend']],figure=fig)
plt.tight_layout()
plt.show()
I get:
But when I try to plot 3 graphs like this, I get ValueError: values must be a 1D array
fig = plt.figure(figsize=(7,2))
plt.subplot(1,3,1)
plt.plot(df['date'], df['close'], color='red', figure=fig)
plt.subplot(1,3,2)
plt.plot(df[['close','close_trend']],figure=fig)
plt.subplot(1,3,3)
plt.plot(df.index, df['obv'],color='blue', figure=fig)
plt.tight_layout()
plt.show()
How do I get 3 plots in a row?
I am trying to graph the functions of min () and max () in the same graph, I already could with the function of max () but how can I join the two in the same graph and that it can be displayed correctly?
Example of my code and my output:
df.groupby('fecha_inicio')['capacidad_base_firme'].max().plot(kind='bar', legend = 'Reverse')
plt.xlabel('Tarifa de Base firme por Zona')
And my output of my dataframe:
zona capacidad_base_firme ... fecha_inicio fecha_fin
0 Sur 1.52306 ... 2016-01-01 2016-03-31
1 Centro 2.84902 ... 2016-01-01 2016-03-31
2 Occidente 1.57302 ... 2016-01-01 2016-03-31
3 Golfo 3.06847 ... 2016-01-01 2016-03-31
4 Norte 4.34706 ... 2016-01-01 2016-03-31
.. ... ... ... ... ...
67 Golfo 5.22776 ... 2017-10-01 2017-12-31
68 Norte 6.99284 ... 2017-10-01 2017-12-31
69 Istmo 7.25957 ... 2017-10-01 2017-12-31
70 Nacional 0.21971 ... 2017-10-01 2017-12-31
71 Nacional con AB -0.72323 ... 2017-10-01 2017-12-31
[72 rows x 10 columns]
The correct way is to aggregate multiple metrics at the same time with .agg, and then plot directly with pandas.DataFrame.plot
There is no need to call .groupby for each metric. For very large datasets, this can be resource intensive.
There is also no need to create a figure and axes with a separate call to matplotlib, as this is taken care of by pandas.DataFrame.plot, which uses matplotlib as the default backend.
Tested in python 3.9.7, pandas 1.3.4, matplotlib 3.5.0
import seaborn as sns # for data
import pandas as pd
import matplotlib.pyplot as plt
# load the test data
df = sns.load_dataset('penguins')
# display(df.head(3))
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
# aggregate metrics on a column
dfg = df.groupby('species').bill_length_mm.agg(['min', 'max'])
# display(dfg)
min max
species
Adelie 32.1 46.0
Chinstrap 40.9 58.0
Gentoo 40.9 59.6
# plot the grouped bar
ax = dfg.plot(kind='bar', figsize=(8, 6), title='Bill Length (mm)', xlabel='Species', ylabel='Length (mm)', rot=0)
plt.show()
Use stacked=True for stacked bars
ax = dfg.plot(kind='bar', figsize=(8, 6), title='Bill Length (mm)', xlabel='Species', ylabel='Length (mm)', rot=0, stacked=True)
Step 1
Create a subplot to plot the data to
fig, ax = plt.subplots()
Step 2
Plot your DataFrame maximum and minimum to the specific axis
df.groupby('fecha_inicio')['capacidad_base_firme'].max().plot(ax = ax, kind='bar', legend = 'Reverse', label='Maximum')
df.groupby('fecha_inicio')['capacidad_base_firme'].min().plot(ax = ax, kind='bar', legend = 'Reverse', label='Minimum')
You may need to adjust the zorder to get the effect of a stacked bar plot.
I have the following dataframe
Date_x BNF Chapter_x VTM_NM Occurance_x Date_y BNF Chapter_y Occurance_y
0 2016-12-01 1 Not Specified 2994 2015-12-01 1 3212
1 2016-12-01 1 Mesalazine 2543 2015-12-01 1 2397
2 2016-12-01 1 Omeprazole 2307 2015-12-01 1 2370
3 2016-12-01 1 Esomeprazole 1535 2015-12-01 1 1516
4 2016-12-01 1 Lansoprazole 1511 2015-12-01 1 1547
I have plotted a bar chart with 2 bars one representing 2015 and the other 2016 using this code
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
width = 0.4
df.Occurance_x.plot(kind='bar', color='red', ax=ax, width=width, position=1)
df.Occurance_y.plot(kind='bar', color='blue', ax=ax, width=width, position=0)
ax.set_ylabel('Occurance')
plt.legend(['Date_x', 'Date_y'], loc='upper right')
ax.set_title('BNF Chapter 1 Top 5 drugs prescribed')
plt.show()
However the x axi shows the index 0 1 2 3 4
- I want it to show the drug names
How would I go about doing this?
I guess that you can start to play from this.
import pandas as pd
df = pd.DataFrame({"date_x":[2015]*5,
"Occurance_x":[2994, 2543, 2307, 1535, 1511],
"VTM_NM":["Not Specified", "Mesalazine", "Omeprazole",
"Esomeprazole", "Lansoprazole"],
"date_y":[2016]*5,
"Occurance_y":[3212, 2397, 2370, 1516, 1547]})
ax = df[["VTM_NM","Occurance_x", "Occurance_y"]].plot(x='VTM_NM',
kind='bar',
color=["g","b"],
rot=45)
ax.legend(["2015", "2016"]);
I have a dataframe called conversionRate like this:
| State| Apps | Loans| conversionratio|
2013-01-01 IL 1165 152 13.047210
2013-01-01 NJ 2210 756 34.208145
2013-01-01 TX 1454 73 5.020633
2013-02-01 CA 2265 400 17.660044
2013-02-01 IL 1073 168 15.657036
2013-02-01 NJ 2036 739 36.296660
2013-02-01 TX 1370 63 4.598540
2013-03-01 CA 2545 548 21.532417
2013-03-01 IL 1108 172 15.523466
I intend to plot the number of apps and number of loans in the primary Y axis and the Conversion Ratio in the secondary axis for each state.
I tried the below code:
import math
rows =int(math.ceil(len(pd.Series.unique(conversionRate['State']))/2))
fig, axes = plt.subplots(nrows=rows, ncols=2, figsize=(10, 10),sharex=True, sharey=False)
columnCounter = itertools.cycle([0,1])
rowCounter1 = 0
for element in pd.Series.unique(conversionRate['State']):
rowCounter = (rowCounter1)//2
rowCounter1 = (rowCounter1+1)
subSample = conversionRate[conversionRate['State']==element]
axis=axes[rowCounter,next(columnCounter)]
#ax2 = axis.twinx()
subSample.plot(y=['Loans', 'Apps'],secondary_y=['conversionratio'],\
ax=axis)
I end up with a figure like the below:
The question is how do I get the secondary axis line to show? If I try the below (per the manual setting secondary_y in plot() should selectively plot those columns in the secondary axis), I see only the line I plot on the secondary axis. There must be something simple and obvious I am missing. I can't figure out what it is! Can any guru please help?
subSample.plot(secondary_y=['conversionratio'],ax=axis)
You need to include conversionration in y=['Loans', 'Apps','conversionratio'] as well as in secondary_y... or better yet leave that parameter out, since you're plotting all the columns.
rows =int(math.ceil(len(pd.Series.unique(conversionRate['State']))/2))
fig, axes = plt.subplots(nrows=rows, ncols=2, figsize=(10,
10),sharex=True, sharey=False)
columnCounter = itertools.cycle([0,1])
rowCounter1 = 0
for element in pd.Series.unique(conversionRate['State']):
rowCounter = (rowCounter1)//2
rowCounter1 = (rowCounter1+1)
subSample = conversionRate[conversionRate['State']==element]
axis=axes[rowCounter,next(columnCounter)]
#ax2 = axis.twinx()
subSample.plot(secondary_y=['conversionratio'], ax=axis)