import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import data as wb
tickers = ['^GSPC','^IXIC','^GDAXI','^FTSE']
ind_data = pd.DataFrame()
for t in tickers:
ind_data[t] = wb.DataReader(t, data_source='yahoo', start='1997-1-1')['Adj Close']
The following works:
ftse=pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/^ftse?period1=1058907192&period2=9999999999&interval=1d&filter=history&frequency=1d',index_col=[0],parse_dates=True)
I am working on below code, takes the data from yahoo finance and plotting the data for the technical analysis of the stock.
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import matplotlib.pyplot as plt
import mpl_finance
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates
ticker = 'MCD'
start = dt.date(2014, 1, 1)
#Gathering the data
data = web.DataReader(ticker, 'yahoo', start)
#Calc moving average
data['MA10'] = data['Adj Close'].rolling(window=10).mean()
data['MA60'] = data['Adj Close'].rolling(window=60).mean()
data.reset_index(inplace=True)
data['Date']=mdates.date2num(data['Date'].astype(dt.date))
Got this error
TypeError: dtype '<class 'datetime.date'>' not understood
If you set the date column to 'datetime.Index' format and set it as the index, the graph will be displayed. No conversion is required.try it.The setting of the library to be imported has been partially corrected.
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import matplotlib.pyplot as plt
import mplfinance as mpf
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mdates
ticker = 'MCD'
start = dt.date(2014, 1, 1)
#Gathering the data
data = web.DataReader(ticker, 'yahoo', start)
#Calc moving average
data['MA10'] = data['Adj Close'].rolling(window=10).mean()
data['MA60'] = data['Adj Close'].rolling(window=60).mean()
data.reset_index(inplace=True)
# data['Date']=mdates.date2num(data['Date'].astype(dt.date))
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
I have two functions which both create a diagramm. But when I run those 2 functions, in the second one is the data which should be in the first one. Here are the diagramms:
This diagramm shows the temerature
And this one should only show the humidity data. Not the humidity and the temperature data.
Here is my source code:
from pandas import DataFrame
import sqlite3
import matplotlib.pyplot as plt
import pandas as pd
from datetime import date, datetime
datum = str(date.today())
date = [datum]
con = sqlite3.connect("/home/pi/test2.db")
sql = "SELECT * from data4 WHERE date in (?)"
df3 = pd.read_sql_query(sql,con, params=[datum])
def daily_hum():
df3 = pd.read_sql_query(sql,con, params=[datum])
df3['datetime'] = pd.to_datetime((df3.date + ' ' + df3.time))
df3.groupby([df3.datetime]).hum.mean().plot()
plt.savefig('/home/pi/flask/static/daily_hum.jpg')
def daily_temp1():
df4 = pd.read_sql_query(sql,con, params=[datum])
df4['datetime'] = pd.to_datetime((df4.date + ' ' + df4.time))
df4.groupby([df4.datetime]).temp.mean().plot()
plt.savefig('/home/pi/flask/static/daily_temp.jpg')
daily_temp()
daily_hum()
The database/ the DataFrame looks like this:
id,hum,temp,zeit,date
721,60,21,11:04:23,2020-06-21
722,64,22,11:04:24,2020-06-21
723,68,22,11:04:27,2020-06-21
724,70,22,11:07:20,2020-06-21
725,63,22,11:08:20,2020-06-21
726,63,22,11:09:21,2020-06-21
727,63,22,11:10:22,2020-06-21
728,63,22,11:11:22,2020-06-21
729,69,22,11:12:24,2020-06-21
730,64,22,11:13:29,2020-06-21
731,70,22,11:14:32,2020-06-21
732,64,22,11:15:33,2020-06-21
733,64,22,11:16:34,2020-06-21
734,64,22,11:17:34,2020-06-21
735,64,22,11:18:35,2020-06-21
736,64,22,11:19:35,2020-06-21
737,64,22,11:20:36,2020-06-21
738,64,22,11:21:37,2020-06-21
739,64,22,11:22:37,2020-06-21
740,64,22,11:23:38,2020-06-21
741,65,22,11:24:38,2020-06-21
742,65,22,11:25:39,2020-06-21
743,65,22,11:26:40,2020-06-21
744,65,22,11:27:40,2020-06-21
I hope you can help me
You could try this. Matplotlib needs to know, if you want a new figure for each plot or not.
from pandas import DataFrame
import sqlite3
import matplotlib.pyplot as plt
import pandas as pd
from datetime import date, datetime
datum = str(date.today())
date = [datum]
con = sqlite3.connect("/home/pi/test2.db")
sql = "SELECT * from data4 WHERE date in (?)"
df3 = pd.read_sql_query(sql,con, params=[datum])
df3['datetime'] = pd.to_datetime((df3.date + ' ' + df3.time))
# new figure
fig, ax = plt.subplots()
# Some figure modifying code
fig.suptitle('Titel of Figure')
ax.set_xlabel('X-Label')
ax.set_ylabel('Y-Label')
df3.groupby([df3.datetime]).hum.mean().plot(ax=ax)
plt.savefig('/home/pi/flask/static/daily_hum.jpg')
# new figure
fig, ax = plt.subplots()
# Some figure modifying code
fig.suptitle('Titel of Figure')
ax.set_xlabel('X-Label')
ax.set_ylabel('Y-Label')
df3.groupby([df3.datetime]).temp.mean().plot(ax=ax)
plt.savefig('/home/pi/flask/static/daily_temp.jpg')
I have a .txt_file showing dates and runoff from 1985-2018 for one station in Norway.
I need to make a plot of the runoff in all July-months.
Here is my code:
from pandas import read_csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cartopy
from datetime import date,datetime
dir1 = "mystations/"
files = os.listdir(dir1)
files = np.sort(files)
files_txt = [i for i in files if i.endswith('.txt_')]
df = pd.read_csv(dir1+files_txt[0],skiprows=6,header=None, index_col=0,sep=" ",na_values=-9999, parse_dates = True)
df.index = pd.to_datetime(df.index,format="%Y%m%d/%H%M")
myperiod = df["1985":"2018"]
myperiod
and my code prints out a table with the dates and the following runoff values for all years and all months.
enter image description here
How do I plot only runoff for July-month?
I get different results when trying to plot the identical data with mathplotlib and plotly. Plotly doesn't show me the whole data range.
import plotly.plotly as py
import plotly.graph_objs as go
# filter the data
df3 = df[df.line_item_returned==0][['created_at', 'line_item_price']].copy()
# remove the time part from datetime
df3.created_at = df3.created_at.dt.floor('d')
# set the datatime column as index
df3 = df3.set_index('created_at')
# Create traces
trace0 = go.Scatter(
x = df3.index,
y = df3.line_item_price.resample('d').sum().rolling(90, center=True).mean(),
mode = 'markers',
name = 'markers'
)
data = [trace0]
py.iplot(data, filename='scatter-mode')
The chart shows only the range Oct-Dec 2018.
Plotting the same data with matplotlib shows the whole data range 2016-2018:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(df3.line_item_price.resample('d').sum().rolling(90, center=True).mean())
The index contains all data 2016-2018:
df3.line_item_price.resample('d').sum().rolling(31, center=True).mean().index
DatetimeIndex(['2015-11-18', '2015-11-19', '2015-11-20', '2015-11-21',
'2015-11-22', '2015-11-23', '2015-11-24', '2015-11-25',
'2015-11-26', '2015-11-27',
...
'2018-12-10', '2018-12-11', '2018-12-12', '2018-12-13',
'2018-12-14', '2018-12-15', '2018-12-16', '2018-12-17',
'2018-12-18', '2018-12-19'],
dtype='datetime64[ns]', name='created_at', length=1128, freq='D')
Why is this happening?
I guess it's a problem with indices.
%matplotlib inline
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np
N = 2000
df = pd.DataFrame({"value":np.random.randn(N)},
index=pd.date_range(start='2015-01-01', periods=N))
# you don't really need to us `plt`
df.resample('d').sum().rolling(90, center=True).mean().plot();
But then if you want to use plotly you should use the index from the resampled Series.
df_rsmpl = df.resample('d').sum().rolling(90, center=True).mean()
trace0 = go.Scatter(x = df_rsmpl.index,
y = df_rsmpl["value"])
data = [trace0]
py.iplot(data)