matplotlib multiple lines for multiple years on same chart - python

I am analyzing consumptions and want to build graphs which looks like this:
E.g. Gas
On the y axis there is the consupmtion in kWh
On the x axis are the months (Jan-Dec)
Each line represents a year
The data that I have looks like this (its quite fragmented for some years I have a lot of data points for some only one or two:
Datum
Art
Amount
06.03.2022
Wasser
1195
06.03.2022
Strom
8056
06.03.2022
Gas
27079,019
09.02.2022
Wasser
1187
09.02.2022
Strom
7641
09.02.2022
Gas
26845,138
10.01.2022
Strom
6897
You can download from here
My code looks like this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
file_path_out_2021_2022 = "../data/raw_Consumption_data.xlsx"
df = pd.read_excel(file_path_out_2021_2022)
than some calculations
#rename columns
df = df.rename(columns={'Datum':'DATE', 'Art':'CATEGORY', 'Amount':'AMOUNT'})
#convert date
df['DATE'] = pd.to_datetime(df['DATE'], format = "%Y-%m-%d", dayfirst=True)
df['MONTH'] = df['DATE'].dt.month
df['YEAR'] = df['DATE'].dt.year
df_gas = df[(df['CATEGORY'] == "Gas")]
df_gas = df_gas.fillna(0)
df_gas['PREV_DATE'] = df_gas['DATE']
df_gas['PREV_DATE'] = df_gas.PREV_DATE.shift(-1)
df_gas['PREV_AMOUNT'] = df_gas['AMOUNT']
df_gas['PREV_AMOUNT'] = df_gas.PREV_AMOUNT.shift(-1)
df_gas['DIFF_AMOUNT'] = df_gas['AMOUNT'] - df_gas['PREV_AMOUNT']
df_gas['DIFF_DATE'] = df_gas['DATE'] - df_gas['PREV_DATE']
df_gas['DIFF_DAY'] = df_gas['DIFF_DATE'].dt.days
df_gas['CONSUM_PER_DAY'] = df_gas['DIFF_AMOUNT'] / df_gas['DIFF_DAY']
df_gas['CONSUM_PER_DAY_KWH'] = df_gas['CONSUM_PER_DAY'] * 10.3
df_gas['CONSUM_PER_MONTH_KWH'] = df_gas['CONSUM_PER_DAY_KWH'] * 30
df_gas['CONSUM_PER_YEAR_KWH'] = df_gas['CONSUM_PER_MONTH_KWH'] * 12
and than the chart:
import pandas as pd
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
plt.style.use('seaborn')
#drop all rows which have 0 in all columns
df_gas = df_gas.loc[(df_gas!=0).any(axis=1)]
df_gas.sort_values(by='DATE', ascending=False, inplace=True)
#print(df_gas)
dates = df_gas['DATE']
y = df_gas['AMOUNT']
plt.plot_date(dates, y, linestyle='solid')
plt.tight_layout()
plt.show()

Related

How to specify the years on an axis when using plot() on DateTime objects

The plotting goes like this:
plt.plot(df['Date'], df['Price'])
df['Date'] consists of DateTime objects with several years and df['Price'] are integers.
However on the actual line graph, it automatically selects about 4 years spaced apart in the graph with large intervals:
How do I make it so that I can specify the number of years to show on the X axis? Or perhaps show all the years (year only)?
Example:
import pandas as pd
import datetime
import random
dates = []
prices = []
for count in range(10000):
prices.append(random.randint(0, 10))
dates.append(datetime.datetime(random.randint(1960, 2022), random.randint(1, 12), random.randint(1, 27)).strftime("%Y-%m-%d"))
data = {
'Date': dates,
'Price': prices
}
df = pd.DataFrame(data)
df = df.sort_values(by=['Date'], ignore_index = True)
df_temp = df.copy()
df_temp['Date DT'] = pd.to_datetime(df_temp['Date'])
df_temp = df_temp.drop(axis = 'columns', columns = 'Date')
df = df_temp
df
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
plt.figure(figsize=(15, 5), dpi = 1000)
plt.plot(df['Date DT'], df['Price'])
# Labels
plt.xlabel('Dates', fontsize = 8)
plt.ylabel('Prices', fontsize = 8)
# Save
plt.title('Example', fontsize = 15)
plt.savefig('example.png', bbox_inches = 'tight')
You can use the xticks function to set the tick marks and labels on the x-axis.
Like This:
plt.plot(df['Date'], df['Price'])
years = [date.year for date in df['Date']]
plt.xticks(df['Date'], years, rotation=45)
plt.show()
EDIT: to only display one unique year each out of years:
import numpy as np
plt.plot(df['Date'], df['Price'])
# Extract the unique years from the 'Date' column
years = np.unique([date.year for date in df['Date']])
# Set the tick marks on the x-axis
plt.xticks(df['Date'], rotation=45)
# Set the labels of the tick marks on the x-axis
plt.gca().set_xticklabels(years)
plt.show()

Printing months in the x axis with pyplot

Data I'm working with: https://drive.google.com/file/d/1xb7icmocz-SD2Rkq4ykTZowxW0uFFhBl/view?usp=sharing
Hey everyone,
I am a bit stuck with editing a plot.
Basically, I would like my x value to display the months in the year, but it doesn't seem to work because of the data type (?). Do you have any idea how I could get my plot to have months in the x axis?
If you need more context about the data, please let me know!!!
Thank you!
Here's my code for the plot and the initial data modifications:
import matplotlib.pyplot as plt
import mplleaflet
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import numpy as np
df = pd.read_csv("data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv")
df['degrees']=df['Data_Value']/10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date']<'2015-01-01']
df3 = df[df['Date']>='2015-01-01']
max_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].max()
min_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].min()
max_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].max()
min_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].min()
max_temp.plot(x ='Date', y='degrees', kind = 'line')
min_temp.plot(x ='Date',y='degrees', kind= 'line')
plt.fill_between(range(len(min_temp)),min_temp, max_temp, color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date",
ylabel="Temperature",
title="Extreme Weather in 2015")
plt.legend()
plt.tight_layout()
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.show()
Plot I'm getting:
Option 1 (Most Similar Approach)
Change the index based on month abbreviations using Index.map and calendar
This is just for df2:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
max_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].max()
min_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].min()
# Update the index to be the desired display format for x-axis
max_temp.index = max_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
min_temp.index = min_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
max_temp.plot(x='Date', y='degrees', kind='line')
min_temp.plot(x='Date', y='degrees', kind='line')
plt.fill_between(range(len(min_temp)), min_temp, max_temp,
color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date", ylabel="Temperature", title="Extreme Weather 2005-2014")
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.margins(x=0)
plt.legend()
plt.tight_layout()
plt.show()
As an aside: the title "Extreme Weather in 2015" is incorrect because this data includes all years before 2015. This is "Extreme Weather 2005-2014"
The year range can be checked with min and max as well:
print(df2.Date.dt.year.min(), '-', df2.Date.dt.year.max())
# 2005 - 2014
The title could be programmatically generated with:
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
Option 2 (Simplifying groupby step)
Simplify the code using groupby aggregate to create a single DataFrame then convert the index in the same way as above:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert Index to whatever display format is desired:
df2_temp.index = df2_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(range(len(df2_temp)), df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
plt.margins(x=0)
plt.tight_layout()
plt.show()
Option 3 (Best overall functionality)
Convert the index to a datetime using pd.to_datetime. Choose any leap year to uniform the data (it must be a leap year so Feb-29 does not raise an error). Then set the set_major_formatter using the format string %b to use the month abbreviation:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert to DateTime of Same Year
# (Must be a leap year so Feb-29 doesn't raise an error)
df2_temp.index = pd.to_datetime(
'2000-' + df2_temp.index.map(lambda s: '-'.join(map(str, s)))
)
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(df2_temp.index, df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
# Set xaxis formatter to month abbr with the %b format string
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.tight_layout()
plt.show()
The benefit of this approach is that the index is a datetime and therefore will format better than the string representations of options 1 and 2.

How to get only Adjusted Close Price from Yahoo Finance library

I am using the Yahoo Finance Library in Python to pull data of a stock.
import yahoo_finance
ticker = 'GLD'
begdate = '2014-11-11'
enddate = '2016-11-11'
data = yahoo_finance.Share('GLD')
data1 = data.get_historical(begdate,enddate)
gld_df = pd.DataFrame(data1)
date_df = (list(gld_df["Date"]))
adj_close_df = list(gld_df["Adj_Close"])
print(adj_close_df)
plt.plot(adj_close_df,date_df)
I would like to plot this Adjusted Close Price on Y-Axis and the corresponding Dates on the X Axis, but my above code is giving an error when I try to do that.
I am using Python 3.x, Anaconda
You could generate the list as below:
l = [ x['Close'] for x in data1]
And the plot:
import matplotlib.pyplot as plt
plt.plot(l)
plt.show()
I got it.
import yahoo_finance
from pylab import *
import numpy as np
import scipy.signal as sc
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
ticker = 'GLD'
begdate = '2014-11-11'
enddate = '2016-11-11'
data = yahoo_finance.Share('GLD')
data1 = data.get_historical(begdate,enddate)
gld_df = pd.DataFrame(data1)
date_df = pd.to_datetime((list(gld_df["Date"])))
adj_close_df = list(gld_df["Adj_Close"])
plt.plot(date_df,adj_close_df)

Heatmap with date on horizontal and hour on vertical axis ("fingerprint plot")

I have a pandas data frame with a datetime index and some variable z and I want to reproduce a plot similar to this:
(Image source: University of Edinburgh, http://www.geos.ed.ac.uk/homes/rclement/micromet/Current/griffin/carbon/)
This is often called a "fingerprint plot" in the CO2 flux community.
A year of sample data:
import pandas as pd
import numpy as np
n = 366*24
df = pd.DataFrame(index=pd.date_range(start='2016-01-01 00:00', freq='H', periods=n),
data={'z': np.random.randn(n)})
df["x"] = df.index.date
df["y"] = df.index.hour
df.head()
How do I proceed from here? I played around with the solution to this question:
how to plot a heat map for three column data, but I can't get it to work with the datetime data.
Does this get what you are looking for?
from scipy.interpolate import griddata
from jdcal import jd2jcal
from datetime import datetime
n = 366*24
df = pd.DataFrame(index=pd.date_range(start='2016-01-01 00:00', freq='H', periods=n),
data={'z': np.random.randn(n)})
df["x"] = df.index.date
df["y"] = df.index.hour
df.head()
xi = np.linspace(df.index.to_julian_date().min(), df.index.to_julian_date().max(), 1000)
yi = np.linspace(df.y.min(), df.y.max(), 1000)
zi = griddata((df.index.to_julian_date(),df.index.hour),df.z,(xi[None,:],yi[:,None]),method='linear')
xij = [jd2jcal(0,v) for v in xi]
xid = [datetime(x[0],x[1],x[2]) for x in xij]
plt.contourf(xid,yi,zi)
plt.colorbar()
plt.show()

Plot bar graph using multiple groupby count in panda

I am trying to plot bar graph using pandas. DataTime is index column which I get from timestamp. Here is table structure:
So far i have written this:
import sqlite3
from pylab import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
conn = sqlite3.connect('DEMO2.sqlite')
df = pd.read_sql("SELECT * FROM Data", conn)
df['DateTime'] = df['DATE'].apply(lambda x: dt.date.fromtimestamp(x))
df1 = df.set_index('DateTime', drop=False)
grouped= df1['DateTime'].groupby(lambda x: x.month)
#df1.groupby([df1.index.month, 'DateTime']).count()
grouped.count()
I want output like this:
June has total 4 entry and one entry starts with u. so X has 4 y has 1. Same for July.
Also i want to plot bar graph (X and Y entries) using output. I want MONTH vs Values bar graph
I would created the DataFrame with a dict:
result = pd.DataFrame({'X': g.count(),
'Y': g.apply(lambda x: x.str.startswith('u').sum())})
Now you can use the plot method to plot months vs values.
result.plot()
Note: you can create grouped more efficiently:
grouped = df1['DateTime'].groupby(df1['DateTime'].dt.to_period('M'))
grouped = df1['DateTime'].groupby(df1['DateTime'].dt.month) # if you want Jan-2015 == Jan-2014

Categories

Resources