Pandas shows data in a wrong diagram - python

I have two functions which both create a diagramm. But when I run those 2 functions, in the second one is the data which should be in the first one. Here are the diagramms:
This diagramm shows the temerature
And this one should only show the humidity data. Not the humidity and the temperature data.
Here is my source code:
from pandas import DataFrame
import sqlite3
import matplotlib.pyplot as plt
import pandas as pd
from datetime import date, datetime
datum = str(date.today())
date = [datum]
con = sqlite3.connect("/home/pi/test2.db")
sql = "SELECT * from data4 WHERE date in (?)"
df3 = pd.read_sql_query(sql,con, params=[datum])
def daily_hum():
df3 = pd.read_sql_query(sql,con, params=[datum])
df3['datetime'] = pd.to_datetime((df3.date + ' ' + df3.time))
df3.groupby([df3.datetime]).hum.mean().plot()
plt.savefig('/home/pi/flask/static/daily_hum.jpg')
def daily_temp1():
df4 = pd.read_sql_query(sql,con, params=[datum])
df4['datetime'] = pd.to_datetime((df4.date + ' ' + df4.time))
df4.groupby([df4.datetime]).temp.mean().plot()
plt.savefig('/home/pi/flask/static/daily_temp.jpg')
daily_temp()
daily_hum()
The database/ the DataFrame looks like this:
id,hum,temp,zeit,date
721,60,21,11:04:23,2020-06-21
722,64,22,11:04:24,2020-06-21
723,68,22,11:04:27,2020-06-21
724,70,22,11:07:20,2020-06-21
725,63,22,11:08:20,2020-06-21
726,63,22,11:09:21,2020-06-21
727,63,22,11:10:22,2020-06-21
728,63,22,11:11:22,2020-06-21
729,69,22,11:12:24,2020-06-21
730,64,22,11:13:29,2020-06-21
731,70,22,11:14:32,2020-06-21
732,64,22,11:15:33,2020-06-21
733,64,22,11:16:34,2020-06-21
734,64,22,11:17:34,2020-06-21
735,64,22,11:18:35,2020-06-21
736,64,22,11:19:35,2020-06-21
737,64,22,11:20:36,2020-06-21
738,64,22,11:21:37,2020-06-21
739,64,22,11:22:37,2020-06-21
740,64,22,11:23:38,2020-06-21
741,65,22,11:24:38,2020-06-21
742,65,22,11:25:39,2020-06-21
743,65,22,11:26:40,2020-06-21
744,65,22,11:27:40,2020-06-21
I hope you can help me

You could try this. Matplotlib needs to know, if you want a new figure for each plot or not.
from pandas import DataFrame
import sqlite3
import matplotlib.pyplot as plt
import pandas as pd
from datetime import date, datetime
datum = str(date.today())
date = [datum]
con = sqlite3.connect("/home/pi/test2.db")
sql = "SELECT * from data4 WHERE date in (?)"
df3 = pd.read_sql_query(sql,con, params=[datum])
df3['datetime'] = pd.to_datetime((df3.date + ' ' + df3.time))
# new figure
fig, ax = plt.subplots()
# Some figure modifying code
fig.suptitle('Titel of Figure')
ax.set_xlabel('X-Label')
ax.set_ylabel('Y-Label')
df3.groupby([df3.datetime]).hum.mean().plot(ax=ax)
plt.savefig('/home/pi/flask/static/daily_hum.jpg')
# new figure
fig, ax = plt.subplots()
# Some figure modifying code
fig.suptitle('Titel of Figure')
ax.set_xlabel('X-Label')
ax.set_ylabel('Y-Label')
df3.groupby([df3.datetime]).temp.mean().plot(ax=ax)
plt.savefig('/home/pi/flask/static/daily_temp.jpg')

Related

matplotlib multiple lines for multiple years on same chart

I am analyzing consumptions and want to build graphs which looks like this:
E.g. Gas
On the y axis there is the consupmtion in kWh
On the x axis are the months (Jan-Dec)
Each line represents a year
The data that I have looks like this (its quite fragmented for some years I have a lot of data points for some only one or two:
Datum
Art
Amount
06.03.2022
Wasser
1195
06.03.2022
Strom
8056
06.03.2022
Gas
27079,019
09.02.2022
Wasser
1187
09.02.2022
Strom
7641
09.02.2022
Gas
26845,138
10.01.2022
Strom
6897
You can download from here
My code looks like this:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
file_path_out_2021_2022 = "../data/raw_Consumption_data.xlsx"
df = pd.read_excel(file_path_out_2021_2022)
than some calculations
#rename columns
df = df.rename(columns={'Datum':'DATE', 'Art':'CATEGORY', 'Amount':'AMOUNT'})
#convert date
df['DATE'] = pd.to_datetime(df['DATE'], format = "%Y-%m-%d", dayfirst=True)
df['MONTH'] = df['DATE'].dt.month
df['YEAR'] = df['DATE'].dt.year
df_gas = df[(df['CATEGORY'] == "Gas")]
df_gas = df_gas.fillna(0)
df_gas['PREV_DATE'] = df_gas['DATE']
df_gas['PREV_DATE'] = df_gas.PREV_DATE.shift(-1)
df_gas['PREV_AMOUNT'] = df_gas['AMOUNT']
df_gas['PREV_AMOUNT'] = df_gas.PREV_AMOUNT.shift(-1)
df_gas['DIFF_AMOUNT'] = df_gas['AMOUNT'] - df_gas['PREV_AMOUNT']
df_gas['DIFF_DATE'] = df_gas['DATE'] - df_gas['PREV_DATE']
df_gas['DIFF_DAY'] = df_gas['DIFF_DATE'].dt.days
df_gas['CONSUM_PER_DAY'] = df_gas['DIFF_AMOUNT'] / df_gas['DIFF_DAY']
df_gas['CONSUM_PER_DAY_KWH'] = df_gas['CONSUM_PER_DAY'] * 10.3
df_gas['CONSUM_PER_MONTH_KWH'] = df_gas['CONSUM_PER_DAY_KWH'] * 30
df_gas['CONSUM_PER_YEAR_KWH'] = df_gas['CONSUM_PER_MONTH_KWH'] * 12
and than the chart:
import pandas as pd
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
plt.style.use('seaborn')
#drop all rows which have 0 in all columns
df_gas = df_gas.loc[(df_gas!=0).any(axis=1)]
df_gas.sort_values(by='DATE', ascending=False, inplace=True)
#print(df_gas)
dates = df_gas['DATE']
y = df_gas['AMOUNT']
plt.plot_date(dates, y, linestyle='solid')
plt.tight_layout()
plt.show()

Python mplfinance addplot question. Add Entry Signal

does anyone know how to add an entry signal onto my graph so that I can see it visually? I want to see an entry signal on my graph when the SPY price hits $411.50.
I've read through everything I can find online regarding this topic including the mplfinance documentation and it seems like there's a way to do it by using addplot() scatter plot but I can't quite figure it out.
Here's what my code looks like so far:
# Libraries:
import pandas as pd
import mplfinance as mpf
# Pulling data:
data = pd.read_csv(r'C:\Users\Viktor\Documents\D2DT\Reformatted_Data\SPY_2021-04-12-2021-04-12.csv')
# Pre-processing data for mpf:
data['Date'] = pd.to_datetime(data['Date'])
data = data.set_index('Date')
# Plotting Data:
mpf.plot(data, type='candle', volume=True, style='yahoo', title="SP500 Data")
data.close()
Inside CSV:
Thank You for your help with this!
#r-beginners, I messed around with the documentation in the link you provided in the last comment and got it to work. Thank you. I'm posting the code below, hopefully this is helpful to somebody in the future!
import pandas as pd
import numpy as np
import mplfinance as mpf
day = '2021-04-12'
data = pd.read_csv(r'C:\Users\Viktor\Documents\D2DT\Reformatted_Data\SPY_' + str(day) + "-" + str(day) + '.csv')
data['Date'] = pd.to_datetime(data['Date'])
data = data.set_index('Date')
signal = []
for i in data['close']:
if i > 411.50:
signal.append(i)
else:
signal.append(np.nan)
apd = mpf.make_addplot(signal, type='scatter', markersize=200, marker='v', color='r')
mpf.plot(data[day], type='candle', volume=True, style='yahoo', title="SP500 Data", addplot=apd)
This single line replaces the for loop and is more efficient. It adds the required column (which we called signal) to the dataframe for plotting.
target = 411.50
df.loc[df['close'] >target, 'signal'] = True
For reference, use this site:
https://datagy.io/pandas-conditional-column/
basically df.loc[df[‘column’] condition, ‘new column name’] = ‘value if condition is met’
So the full solution looks something like this:
import pandas as pd
import numpy as np
import mplfinance as mpf
day = '2021-04-12'
data = pd.read_csv(r'C:\Users\Viktor\Documents\D2DT\Reformatted_Data\SPY_' + str(day) + "-" + str(day) + '.csv')
data['Date'] = pd.to_datetime(data['Date'])
data = data.set_index('Date')
target = 411.50
df.loc[df['close'] >target, 'signal'] = True
apd = mpf.make_addplot(df['signal'], type='scatter', markersize=200, marker='v', color='r')
mpf.plot(data[day], type='candle', volume=True, style='yahoo', title="SP500 Data", addplot=apd)

Plotting data from multiple pandas data frames in one plot

I am interested in plotting a time series with data from several different pandas data frames. I know how to plot a data for a single time series and I know how to do subplots, but how would I manage to plot from several different data frames in a single plot? I have my code below. Basically what I am doing is I am scanning through a folder of json files and parsing that json file into a panda so that I can plot. When I run this code it is only plotting from one of the pandas instead of the ten pandas created. I know that 10 pandas are created because I have a print statement to ensure they are all correct.
import sys, re
import numpy as np
import smtplib
import matplotlib.pyplot as plt
from random import randint
import csv
import pylab as pl
import math
import pandas as pd
from pandas.tools.plotting import scatter_matrix
import argparse
import matplotlib.patches as mpatches
import os
import json
parser = argparse.ArgumentParser()
parser.add_argument('-file', '--f', help = 'folder where JSON files are stored')
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
dat = {}
i = 0
direc = args.f
directory = os.fsencode(direc)
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
for files in os.listdir(direc):
filename = os.fsdecode(files)
if filename.endswith(".json"):
path = '/Users/Katie/Desktop/Work/' + args.f + "/" +filename
with open(path, 'r') as data_file:
data = json.load(data_file)
for r in data["commits"]:
dat[i] = (r["author_name"], r["num_deletions"], r["num_insertions"], r["num_lines_changed"],
r["num_files_changed"], r["author_date"])
name = "df" + str(i).zfill(2)
i = i + 1
name = pd.DataFrame.from_dict(dat, orient='index').reset_index()
name.columns = ["index", "author_name", "num_deletions",
"num_insertions", "num_lines_changed",
"num_files_changed", "author_date"]
del name['index']
name['author_date'] = name['author_date'].astype(int)
name['author_date'] = pd.to_datetime(name['author_date'], unit='s')
ax1.plot(name['author_date'], name['num_lines_changed'], '*',c=np.random.rand(3,))
print(name)
continue
else:
continue
plt.xticks(rotation='35')
plt.title('Number of Lines Changed vs. Author Date')
plt.show()
Quite straightforward actually. Don't let pandas confuse you. Underneath it every column is just a numpy array.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df1 = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))
df2 = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.plot(df1['A'])
ax1.plot(df2['B'])
pd.DataFrame.plot method has an argument ax for this:
fig = plt.figure()
ax = plt.subplot(111)
df1['Col1'].plot(ax=ax)
df2['Col2'].plot(ax=ax)
If you are using pandas plot, the return from datafame.plot is axes, so you can assign the next dataframe.plot equal to that axes.
df1 = pd.DataFrame({'Frame 1':pd.np.arange(5)*2},index=pd.np.arange(5))
df2 = pd.DataFrame({'Frame 2':pd.np.arange(5)*.5},index=pd.np.arange(5))
ax = df1.plot(label='df1')
df2.plot(ax=ax)
Output:
Or if your dataframes have the same index, you can use pd.concat:
pd.concat([df1,df2],axis=1).plot()
Trust me. #omdv's answer is the only solution I have found so far. Pandas dataframe plot function doesn't show plotting at all when you pass ax to it.
df_hdf = pd.read_csv(f_hd, header=None,names=['degree', 'rank', 'hits'],
dtype={'degree': np.int32, 'rank': np.float32, 'hits': np.float32})
df_hdf_pt = pd.read_csv(pt_f_hd, header=None,names=['degree', 'rank', 'hits'],
dtype={'degree': np.int32, 'rank': np.float32, 'hits': np.float32})
ax = plt.subplot()
ax.plot(df_hdf_pt['hits'])
ax.plot(df_hdf['hits'])

Sorting and conditional color formatting in matplotlib

to skip the context and get straight to the question, go down to "desired changes"
I wrote the helper function below to
Fetch data
Calculate the YTD return
Plot the results in a bar plot
Here is the function:
def ytd_perf(symb, col_names, source = 'yahoo'):
import datetime as datetime
from datetime import date
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline
#establish start and end dates
start = date(date.today().year, 1, 1)
end = datetime.date.today()
#fetch data
df = web.DataReader(symb, source, start = start, end = end)['Adj Close']
#make sure column orders don't change
df = df.reindex_axis(symb, 1)
#rename the columns
df.columns = col_names
#calc returns from the first element
df = (df / df.ix[0]) - 1
#Plot the most recent line of data -- this represents the YTD return
ax = df.ix[-1].plot(kind = 'bar', title = ('YTD Performance as of '+ str(end)),figsize=(12,9))
vals = ax.get_yticks()
ax.set_yticklabels(['{:3.1f}%'.format(x*100) for x in vals])
So, when I run:
tickers = ['SPY', 'TLT']
names = ['Stocks', 'Bonds']
ytd_perf(tickers, names)
I get the following output:
2 desired changes that I can't quite get to work:
I would like to change the color of the bar such that if the value < 0, it is red.
Sort the bars from highest to lowest (which is the case in this chart because there are only two series, but doesnt work with many series).

How to get only Adjusted Close Price from Yahoo Finance library

I am using the Yahoo Finance Library in Python to pull data of a stock.
import yahoo_finance
ticker = 'GLD'
begdate = '2014-11-11'
enddate = '2016-11-11'
data = yahoo_finance.Share('GLD')
data1 = data.get_historical(begdate,enddate)
gld_df = pd.DataFrame(data1)
date_df = (list(gld_df["Date"]))
adj_close_df = list(gld_df["Adj_Close"])
print(adj_close_df)
plt.plot(adj_close_df,date_df)
I would like to plot this Adjusted Close Price on Y-Axis and the corresponding Dates on the X Axis, but my above code is giving an error when I try to do that.
I am using Python 3.x, Anaconda
You could generate the list as below:
l = [ x['Close'] for x in data1]
And the plot:
import matplotlib.pyplot as plt
plt.plot(l)
plt.show()
I got it.
import yahoo_finance
from pylab import *
import numpy as np
import scipy.signal as sc
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
ticker = 'GLD'
begdate = '2014-11-11'
enddate = '2016-11-11'
data = yahoo_finance.Share('GLD')
data1 = data.get_historical(begdate,enddate)
gld_df = pd.DataFrame(data1)
date_df = pd.to_datetime((list(gld_df["Date"])))
adj_close_df = list(gld_df["Adj_Close"])
plt.plot(date_df,adj_close_df)

Categories

Resources