Save forecast data to csv

Save forecast data to csv - python

Hello I have this code.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
forecast.tail(365)
ds trend yhat_lower yhat_upper trend_lower trend_upper additive_terms additive_terms_lower additive_terms_upper weekly weekly_lower weekly_upper multiplicative_terms multiplicative_terms_lower multiplicative_terms_upper yhat
307 2022-12-30 01:00:00 8744.804921 4151.683644 19973.732090 8744.804921 8744.804921 3425.715807 3425.715807 3425.715807 3425.715807 3425.715807 3425.715807 0.0 0.0 0.0 12170.520728
308 2022-12-30 02:00:00 8743.882714 3948.935733 20003.308794 8743.882714 8743.882714 3691.081394 3691.081394 3691.081394
So I want to download/export this forecast data as csv file.
from google.colab import files
files.download() # ? what I need do here.
Thanks a lot

forecast.to_csv('forecast.csv')

I found the solution.
forecast = m.predict(future)
forecast_data = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
#forecast.tail(365)
print (forecast_data)
forecast_data.to_csv('myCsv.csv')
f.close()

Related

Python string indice must be an int while converting date (%Y-%m-%d) in a dataframe?

I'm trying to keep this as simple as possible, while keeping the object oriented in-tact. I'm a beginner, but I'm struggling to figure out why I'm getting the error below and how to correct it, while keeping the object oriented style in-tact.
import json
from datetime import datetime, timedelta
from dateutil import tz
import numpy as np
import pandas as pd
import requests
def run(ticker):
today_date, data = scrape_data(ticker)
calculate_gamma_levels(today_date,data)
def scrape_data(ticker):
#Scrape data from CBOE website
raw_data = requests.get(f"https://cdn.cboe.com/api/global/delayed_quotes/options/{ticker}.json")
dict_data = pd.DataFrame.from_dict(raw_data.json())
data = pd.DataFrame(dict_data.loc["options", "data"])
data_update_time = dict_data.timestamp[0]
data["data_update_time"] = data_update_time
data["expiration_date"] = str(20)+ data.option.str.extract(r"[A-Z](\d+)").astype(str)
expiration_date = pd.to_datetime(data["expiration_date"], format="%Y-%m-%d")
print(expiration_date)
today_date = data["data_update_time"][0]
today_date = datetime.strptime(str(today_date), "%Y-%m-%d %H:%M:%S").strftime("%Y-%m-%d")
data["today_date"] = today_date
return data, today_date
def calculate_gamma_levels(today_date,data):
print(today_date)
data['days_till_exp'] = [1/262 if (np.busday_count(today_date.date(), x.date())) == 0 \
else np.busday_count(today_date.date(), x.date())/262 for x in int(data["expiration_date"])]
next_expire = data['expiration_date'].min()
if __name__ == "__main__":
ticker = "SPY"
run(ticker)
The error that I'm getting is the following:
File "c:\python\example option date.py", line 46, in calculate_gamma_levels
else np.busday_count(today_date.date(), x.date())/262 for x in int(data["expiration_date"])]
TypeError: string indices must be integers
I've tried doing the following and several other attempts on the expiration date, but had no success and continued to get the error above.
# expiration_date = datetime.strptime(str(data["expiration_date"].values), "%Y%m%d").strftime("%Y-%m-%d")
# expiration_date = datetime.strptime(str(data["expiration_date"]), "%Y%m%d").strftime("%Y-%m-%d")
print(expiration_date)
# print(data["expiration_date"])
# data["expiration_date"] = pd.to_datetime(data["expiration_date"], format="%Y%m%d")
#print(data["expiration_date"])

Try:
import json
from datetime import datetime, timedelta
from dateutil import tz
import numpy as np
import pandas as pd
import requests
def run(ticker):
data, today_date = scrape_data(ticker)
calculate_gamma_levels(today_date, data)
def scrape_data(ticker):
# Scrape data from CBOE website
raw_data = requests.get(
f"https://cdn.cboe.com/api/global/delayed_quotes/options/{ticker}.json"
)
dict_data = pd.DataFrame.from_dict(raw_data.json())
data = pd.DataFrame(dict_data.loc["options", "data"])
data_update_time = dict_data.timestamp[0]
data["data_update_time"] = data_update_time
data["expiration_date"] = str(20) + data.option.str.extract(
r"[A-Z](\d+)"
).astype(str)
expiration_date = pd.to_datetime(data["expiration_date"], format="%Y-%m-%d")
today_date = data["data_update_time"][0]
today_date = datetime.strptime(
str(today_date), "%Y-%m-%d %H:%M:%S"
).strftime("%Y-%m-%d")
data["today_date"] = today_date
return data, today_date
def calculate_gamma_levels(today_date, data):
data["expiration_date"] = pd.to_datetime(data["expiration_date"])
data["days_till_exp"] = (
np.busday_count(
today_date,
data["expiration_date"].dt.strftime("%Y-%m-%d").to_list(),
)
/ 262
)
data.loc[data["days_till_exp"].eq(0), "days_till_exp"] = 1 / 262
print(data)
next_expire = data["expiration_date"].min()
print(next_expire)
if __name__ == "__main__":
ticker = "SPY"
run(ticker)
Calculates the days_till_exp column divided by 262. Then all zeros are substituted with 1/262. Then minimal value is selected:
option bid bid_size ask ask_size iv open_interest volume delta gamma theta rho vega theo change open high low tick last_trade_price last_trade_time percent_change prev_day_close data_update_time expiration_date today_date days_till_exp
0 SPY220831C00275000 119.94 2.0 121.13 1.0 25.1486 0.0 3.0 1.0000 0.0000 0.0000 0.0000 0.0000 122.4753 0.050 123.43 123.43 123.43 up 123.43 2022-08-31T12:19:26 4.052430e-02 123.380001 2022-08-31 22:55:42 2022-08-31 2022-08-31 0.003817
1 SPY220831P00275000 0.00 0.0 0.01 1711.0 12.0010 1056.0 1.0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0052 0.005 0.01 0.01 0.01 no_change 0.01 2022-08-31T15:10:17 1.000000e+02 0.005000 2022-08-31 22:55:42 2022-08-31 2022-08-31 0.003817
2 SPY220831C00280000 114.94 2.0 116.13 1.0 24.1168 0.0 0.0 1.0000 0.0000 0.0000 0.0000 0.0000 117.4750 0.000 0.00 0.00 0.00 no_change 0.00 None 0.000000e+00 118.385002 2022-08-31 22:55:42 2022-08-31 2022-08-31 0.003817
...
2022-08-31 00:00:00

how to convert the Json to table in python

try:
# For Python 3.0 and later
from urllib.request import urlopen
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen
import certifi
import json
def get_jsonparsed_data(url):
response = urlopen(url, cafile=certifi.where())
data = response.read().decode("utf-8")
return json.loads(data)
url = ("https://financialmodelingprep.com/api/v3/ratios/AAPL?apikey=92a1dad5aef4eb31276c19417c31dfeb")
print(get_jsonparsed_data(URL))

import requests
import pandas as pd
url = (
"https://financialmodelingprep.com/api/v3/ratios/AAPL?"
"apikey=92a1dad5aef4eb31276c19417c31dfeb"
)
response = requests.get(url)
data = response.json()
df = pd.DataFrame(data)
df
prints:
symbol date period currentRatio quickRatio cashRatio daysOfSalesOutstanding daysOfInventoryOutstanding operatingCycle daysOfPayablesOutstanding ... priceToSalesRatio priceEarningsRatio priceToFreeCashFlowsRatio priceToOperatingCashFlowsRatio priceCashFlowRatio priceEarningsToGrowthRatio priceSalesRatio dividendYield enterpriseValueMultiple priceFairValue
0 AAPL 2021-09-25 FY 1.074553 0.909660 0.278449 51.390969 11.276593 62.667561 93.851071 ... 6.786117 26.219656 26.706799 23.861253 23.861253 0.367742 6.786117 0.005828 20.889553 39.348186
1 AAPL 2020-09-26 FY 1.363604 1.218195 0.360710 49.787534 8.741883 58.529418 91.048190 ... 7.272322 34.773150 27.211359 24.746031 24.746031 3.277438 7.272322 0.007053 25.558891 30.553901
2 AAPL 2019-09-28 FY 1.540126 1.384447 0.462022 64.258765 9.263639 73.522404 104.314077 ... 4.420394 20.813515 19.527159 16.573786 16.573786 -62.492578 4.420394 0.012277 14.772472 12.709658
3 AAPL 2018-09-29 FY 1.123843 0.986566 0.221733 67.332499 8.817631 76.150130 124.570214 ... 3.959898 17.666917 16.402259 13.582267 13.582267 0.597709 3.959898 0.013038 13.099961 9.815760
4 AAPL 2017-09-30 FY 1.276063 1.089670 0.201252 56.800671 12.563631 69.364302 126.927606 ... 3.794457 17.989671 17.121402 13.676823 13.676823 1.632758 3.794457 0.014680 12.605749 6.488908

import pandas
print(pandas.DataFrame(data))
I guess maybe what you are trying to do...

AttributeError: 'function' object has no attribute 'values' heatmap

I'm trying to plot a heatmap using this code:
import folium
from folium.plugins import HeatMap
max_Count = (dataM['count'].max())
hmap = folium.Map(location=[53.192838, 8.197006], zoom_start=7,)
hm_wide = HeatMap( list(zip(dataM.latitude.values, dataM.longitude.values, dataM.count.values)),
min_opacity=0.2,
max_val=max_Count,
radius=17, blur=15,
max_zoom=1,
)
hmap.add_child(hm_wide)
the dataframe looks like that:
station count latitude longitude city
Time
2021-05-01 00:00:00 02-MI-JAN-N 11.0 52.5139 13.41780 Berlin
2021-05-01 00:00:00 24-MH-ALB 0.0 52.4925 13.55850 Berlin
2021-05-01 00:00:00 23-TK-KAI 1.0 52.4573 13.51870 Berlin
... ... ... ... ... ...
2021-09-09 23:45:00 50801_Amalienstr 0.0 53.1390 8.22225 Oldenburg
but i'm getting this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-23-c1b7a410c325> in <module>
5 hmap = folium.Map(location=[53.192838, 8.197006], zoom_start=7,)
6
----> 7 hm_wide = HeatMap( list(zip(dataM.latitude.values, dataM.longitude.values, dataM.count.values)),
8 min_opacity=0.2,
9 max_val=max_Count,
AttributeError: 'function' object has no attribute 'values'
Any idea about the reason behind it and how can it be solved?
Thank you
UPDATE:
I've used dataM['latitude'], dataM['longitude'], dataM['count'] and it works :))

import folium
from folium.plugins import HeatMap
max_Count = (dataM['count'].max())
hmap = folium.Map(location=[53.192838, 8.197006], zoom_start=7,)
hm_wide = HeatMap( list(zip(dataM['latitude'], dataM['longitude'], dataM['count'])),
min_opacity=0.2,
max_val=max_Count,
radius=17, blur=15,
max_zoom=1,
)
hmap.add_child(hm_wide)

Why is python only printing one data set in the algorithm?

So I am trying to build a trading software and I am using the code from an online YouTuber. I am gathering all of the data for the companies on the S&P 500 in the get_data_from_yahoo() function. So when I run that code it says Already Have (then the given ticker) which is fine, but when I got to print the data for this in the following function, which is compile_data(), it only print one ticker which is ZTS.
Anyone have any ideas?
import bs4 as bs
import datetime as dt
import os
import pandas as pd
from pandas_datareader import data as pdr
import pickle
import requests
import fix_yahoo_finance as yf
def save_sp500_tickers():
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text.replace('.', '-')
ticker = ticker[:-1]
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2019, 6, 8)
end = dt.datetime.now()
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = pdr.get_data_yahoo(ticker, start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
save_sp500_tickers()
get_data_from_yahoo()
def complied_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
complied_data()
When I run this code this is what it says:
MMM
Already have MMM
ABT
Already have ABT
ABBV
Already have ABBV
ABMD
Already have ABMD
ACN
Already have ACN
ATVI
Already have ATVI
ADBE
Already have ADBE
AMD
Already have AMD
AAP
Already have AAP
AES
Already have AES
AMG
Already have AMG
AFL
Already have AFL
A
Already have A
APD
Already have APD
AKAM
Already have AKAM
ALK
Already have ALK
ALB
Already have ALB
It then continues to say that it already has all of the 500 companies(I did not show the hole thing because the list is very long). But when I run the compile_data()
function it only prints the data for one ticker:
ZTS
Date
2019-01-02 83.945038
2019-01-03 81.043526
2019-01-04 84.223267
2019-01-07 84.730026
2019-01-08 85.991997

The problem is in a for loop, specifically the one in complied_data.
The if-else and if blocks should be included in the for loop:
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
df.rename(columns = {'Adj Close':ticker}, inplace=True)
df.drop(['Open', 'High', 'Low','Close','Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
Otherwise they will be evaluated only after it is done looping and elaborate the last element.
The following is the output when changing to the above indentation:
(... omitted counting from 0)
470
480
490
500
MMM ABT ABBV ABMD ... YUM ZBH ZION ZTS
Date ...
2019-06-10 165.332672 80.643486 74.704918 272.429993 ... 107.794380 121.242027 43.187107 109.920105
2019-06-11 165.941788 80.494644 75.889320 262.029999 ... 106.722885 120.016762 43.758469 109.860268
2019-06-12 166.040024 81.318237 76.277657 254.539993 ... 108.082100 120.225945 43.512192 111.136780
2019-06-13 165.882843 81.655624 76.646561 255.529999 ... 108.121788 119.329407 44.063854 109.730621
2019-06-14 163.760803 81.586166 76.394157 250.960007 ... 108.925407 116.998398 44.211620 110.488556
[5 rows x 505 columns]

dataframe and list all the positive row and negative row

I have following set of data:
AC.TO CL=F CNQ.TO CPG.TO ECA.TO IMO.TO SU.TO WJA.TO XEG.TO XFN.TO XGD.TO XOM
Date
2018-03-14 0.016316 0.000493 -0.002574 0.001160 0.012658 -0.008403 -0.003074 0.002465 -0.002755 0.001053 0.001741 -0.012478
2018-03-15 0.002854 0.005089 0.006452 0.010429 -0.019444 -0.003507 0.002135 0.006967 0.004604 0.004208 -0.005213 0.011141
2018-03-16 -0.003557 0.016659 0.019231 0.045872 0.034703 0.009971 0.015621 0.008547 0.023831 0.000000 0.005240 0.009406
2018-03-20 0.043541 0.020270 0.009507 0.035108 0.020935 0.010192 0.024257 -0.001223 0.019178 0.004731 -0.007799 -0.002158
2018-03-21 -0.014483 0.031063 0.032578 0.049234 0.075188 0.024503 0.012306 -0.005714 0.030466 -0.000785 0.013100 0.014191
2018-03-23 -0.011410 0.019383 -0.013548 -0.017279 -0.018277 -0.018224 0.000700 -0.001709 -0.015138 -0.016524 0.027997 -0.008299
2018-03-29 0.032395 0.003711 0.027137 0.024561 0.043446 0.008572 0.014827 0.005516 0.025408 0.009542 0.012987 0.024722
2018-04-03 -0.001140 0.007450 0.043928 -0.002336 0.013991 0.011549 0.010546 0.002114 0.022645 -0.005442 -0.023589 0.024583
2018-04-04 0.002664 0.000787 0.002177 0.012881 -0.012346 0.016101 -0.000227 -0.001688 0.000886 -0.003557 -0.007765 -0.001999
2018-04-09 -0.015498 0.021630 -0.013517 -0.002181 -0.015782 -0.000562 -0.001518 -0.005947 -0.008613 0.005230 -0.010309 0.000000
2018-04-10 -0.001968 0.037763 0.018750 0.036066 0.044461 0.005901 0.006299 -0.016239 0.024327 -0.001095 0.002604 0.029384
2018-04-11 -0.014196 0.017814 0.019585 0.042194 0.027913 0.002793 0.020721 -0.008688 0.017812 -0.008772 0.022511 0.004671
Is there any query to list all the positive rows ( a row contains no negative value)

Here you go!
temp = df[df >= 0]
result = tmp.dropna(axis=0)
Hope this helps!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Save forecast data to csv - python

forecast.to_csv('forecast.csv')

I found the solution. forecast = m.predict(future) forecast_data = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] #forecast.tail(365) print (forecast_data) forecast_data.to_csv('myCsv.csv') f.close()

Related

Python string indice must be an int while converting date (%Y-%m-%d) in a dataframe?

how to convert the Json to table in python

AttributeError: 'function' object has no attribute 'values' heatmap

Why is python only printing one data set in the algorithm?

dataframe and list all the positive row and negative row

Categories

Resources