"String indices must be integers" error in pandas & yfinance - Python - python

Here is my code that I am using:
import warnings
import datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as pdr
import matplotlib.pyplot as plt, mpld3
import matplotlib.ticker as mtick
date_from = datetime.date(2020, 1, 1)
date_to = datetime.date(2022, 12, 30)
tickerL = 'BTC-USD'
print("Comparing " + tickerL +" to...")
\#tickerL2 = \['AAPL'\]
tickerL2 = input('Now enter your comparison ticker:\\n')
tickerList = \[tickerL, tickerL2\]
print(tickerList)
\#tickerList = \['BTC-USD', 'AMZN', 'AAPL', 'CL=F', '^GSPC', '^DJI', 'GC=F'\]
\#fetch multiple asset data
def getMultiAssetData(tickerList, date_from, date_to):
def getData(ticker):
data = pdr.DataReader(ticker, "yahoo", date_from, date_to)
return data
datas = map(getData, tickerList)
return pd.concat(datas, keys=tickerList, names=['Ticker', 'Date'])
sort=False
multiData = getMultiAssetData(tickerList, date_from, date_to)
df = multiData.copy()
\#print(df)
df = df.loc\[tickerL, :\]
df.tail()
Now I keep getting this error and I don't know how to move forward:
Traceback (most recent call last):
File "main.py", line 51, in \<module\>
multiData = getMultiAssetData(tickerList, date_from, date_to)
File "main.py", line 45, in getMultiAssetData
datas = list(map(getData, tickerList))
File "main.py", line 42, in getData
data = pdr.DataReader(ticker, "yahoo", date_from, date_to)
File "/opt/virtualenvs/python3/lib/python3.8/site-packages/pandas/util/\_decorators.py", line 207, in wrapper
return func(\*args, \*\*kwargs)
File "/opt/virtualenvs/python3/lib/python3.8/site-packages/pandas_datareader/data.py", line 370, in DataReader
return YahooDailyReader(
File "/opt/virtualenvs/python3/lib/python3.8/site-packages/pandas_datareader/base.py", line 253, in read
df = self.\_read_one_data(self.url, params=self.\_get_params(self.symbols))
File "/opt/virtualenvs/python3/lib/python3.8/site-packages/pandas_datareader/yahoo/daily.py", line 153, in \_read_one_data
data = j\["context"\]\["dispatcher"\]\["stores"\]\["HistoricalPriceStore"\]
TypeError: string indices must be integers
This script worked just fine a couple of months ago but I assume some of the packages got updated and now require different format.

Related

Outlook Calendar Export Type Issue

I have the following code meant to extract my outlook calendar and show me a list of all participants in the meetings that I have scheduled.. I am running into the following error related to datatypes. I believe the issue is actually getting the events to pull because when I print the appointments list prior to the error, it shows as blank. Thoughts?
Code:
import datetime as dt
import pandas as pd
import win32com.client
def get_calendar(begin,end):
outlook = win32com.client.Dispatch('Outlook.Application').GetNamespace('MAPI')
calendar = outlook.getDefaultFolder(9).Items
calendar.IncludeRecurrences = True
calendar.Sort('[Start]')
restriction = "[Start] >= '" + begin.strftime('%m/%d/%Y') + "' AND [END] <= '" + end.strftime('%m/%d/%Y') + "'"
calendar = calendar.Restrict(restriction)
return calendar
def get_appointments(calendar,subject_kw = None,exclude_subject_kw = None, body_kw = None):
if subject_kw == None:
appointments = [app for app in calendar]
else:
appointments = [app for app in calendar if subject_kw in app.subject]
if exclude_subject_kw != None:
appointments = [app for app in appointments if exclude_subject_kw not in app.subject]
cal_subject = [app.subject for app in appointments]
cal_start = [app.start for app in appointments]
cal_end = [app.end for app in appointments]
cal_body = [app.body for app in appointments]
df = pd.DataFrame({'subject': cal_subject,
'start': cal_start,
'end': cal_end,
'body': cal_body})
return df
def make_cpd(appointments):
appointments['Date'] = appointments['start']
appointments['Hours'] = (appointments['end'] - appointments['start']).dt.seconds/3600
appointments.rename(columns={'subject':'Meeting Description'}, inplace = True)
appointments.drop(['start','end'], axis = 1, inplace = True)
summary = appointments.groupby('Meeting Description')['Hours'].sum()
return summary
final = r"C:\Users\rcarmody\Desktop\Python\Accelerators\Outlook Output.xlsx"
begin = dt.datetime(2021,1,1)
end = dt.datetime(2021,5,12)
print(begin)
print(end)
cal = get_calendar(begin, end)
appointments = get_appointments(cal, subject_kw = 'weekly', exclude_subject_kw = 'Webcast')
result = make_cpd(appointments)
result.to_excel(final)
Error:
Traceback (most recent call last):
File "C:\Users\Desktop\Python\Accelerators\outlook_meetings.py", line 50, in <module>
result = make_cpd(appointments)
File "C:\Users\Desktop\Python\Accelerators\outlook_meetings.py", line 34, in make_cpd
appointments['Hours'] = (appointments['end'] - appointments['start']).dt.seconds/3600
File "C:\Users\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 5461, in __getattr__
return object.__getattribute__(self, name)
File "C:\Users\rcarmody\AppData\Roaming\Python\Python39\site-packages\pandas\core\accessor.py", line 180, in __get__
accessor_obj = self._accessor(obj)
File "C:\Users\AppData\Roaming\Python\Python39\site-packages\pandas\core\indexes\accessors.py", line 494, in __new__
raise AttributeError("Can only use .dt accessor with datetimelike values")
AttributeError: Can only use .dt accessor with datetimelike values
[Finished in 1.2s]
New Error:
Traceback (most recent call last):
File "C:\Users\Desktop\Python\Accelerators\outlook_meetings.py", line 50, in <module>
result = make_cpd(appointments)
File "C:\Users\Desktop\Python\Accelerators\outlook_meetings.py", line 34, in make_cpd
appointments['Hours'] = (appointments['end'] - appointments['start']) / pd.Timedelta(hours=1)
File "C:\Users\\AppData\Roaming\Python\Python39\site-packages\pandas\core\ops\common.py", line 65, in new_method
return method(self, other)
File "C:\Users\AppData\Roaming\Python\Python39\site-packages\pandas\core\arraylike.py", line 113, in __truediv__
return self._arith_method(other, operator.truediv)
File "C:\Users\\AppData\Roaming\Python\Python39\site-packages\pandas\core\series.py", line 4998, in _arith_method
result = ops.arithmetic_op(lvalues, rvalues, op)
File "C:\Users\\AppData\Roaming\Python\Python39\site-packages\pandas\core\ops\array_ops.py", line 185, in arithmetic_op
res_values = op(lvalues, rvalues)
File "pandas\_libs\tslibs\timedeltas.pyx", line 1342, in pandas._libs.tslibs.timedeltas.Timedelta.__rtruediv__
numpy.core._exceptions.UFuncTypeError: ufunc 'true_divide' cannot use operands with types dtype('float64') and dtype('<m8[ns]')
The substraction of two datetime objects results in a timedelta object. In order to retrieve hours from timedelta objects you can use :
import numpy as np
hours = timedelta_object / np.timedelta64(1, "h")
Note: it could also be (more pandas-only style)
hours = timedelta_object / pd.Timedelta(hours=1)
So in your case, you would use it as :
appointments['Hours'] = (appointments['end'] - appointments['start']) / pd.Timedelta(hours=1)

pandas_datareader throwing error when requesting multiple cryptocurrency datasets in single request

If I make a call for only one cryptocurrency it works, but for multiple it fails.
import pandas_datareader as pdr
...
crypto_df = pdr.DataReader('BTC-USD', data_source = 'yahoo', start = '2015-01-01')
works fine
crypto_df = pdr.DataReader('ETH-USD', data_source = 'yahoo', start = '2015-01-01')
also works fine
crypto_df = pdr.DataReader(['BTC-USD', 'ETH-USD'], data_source = 'yahoo', start = '2015-01-01')
fails with the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/alex/.local/lib/python3.8/site-packages/pandas/util/_decorators.py", line 199, in wrapper
return func(*args, **kwargs)
File "/home/alex/.local/lib/python3.8/site-packages/pandas_datareader/data.py", line 376, in DataReader
return YahooDailyReader(
File "/home/alex/.local/lib/python3.8/site-packages/pandas_datareader/base.py", line 258, in read
df = self._dl_mult_symbols(self.symbols)
File "/home/alex/.local/lib/python3.8/site-packages/pandas_datareader/base.py", line 285, in _dl_mult_symbols
result = concat(stocks, sort=True).unstack(level=0)
File "/home/alex/.local/lib/python3.8/site-packages/pandas/core/frame.py", line 7349, in unstack
result = unstack(self, level, fill_value)
File "/home/alex/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py", line 417, in unstack
return _unstack_frame(obj, level, fill_value=fill_value)
File "/home/alex/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py", line 444, in _unstack_frame
return _Unstacker(
File "/home/alex/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py", line 118, in __init__
self._make_selectors()
File "/home/alex/.local/lib/python3.8/site-packages/pandas/core/reshape/reshape.py", line 167, in _make_selectors
raise ValueError("Index contains duplicate entries, cannot reshape")
This works as expected with stocks, but fails with cryptocurrency.
I'm confident this is not an issue on my side, but I am hoping someone can confirm. I will open a ticket with the developers if this is an unknown bug.
You need to define the index you want to fetch.
#Trying to fetch crypto data from yahoo
from pandas_datareader import data as wb
tickers = ['BTC-USD', 'ETH-USD']
crypto_data = pd.DataFrame()
for t in tickers:
crypto_data[t] = wb.DataReader(t, data_source ='yahoo', start= '2020-12-01')['Adj Close']
You are missing ['Adj Close'] in this case.

quandl FXH1 Download Data

I have a subscription to FXCM 1 hour data. I can see the data when I go to https://www.quandl.com/tables/FXH1/FXCM-H1. The below code keeps giving me
File "quandl1.py", line 17, in <module>
data = quandl.get('FXH1/FXCM-H1')
File "/usr/lib64/python3.5/site-packages/quandl/get.py", line 48, in get
data = Dataset(dataset_args['code']).data(params=kwargs, handle_column_not_found=True)
If I go to the site https://www.quandl.com/tables/FXH1/FXCM-H1 I can see limited data. Wouldn't the library be FXH1/FXCM-H1?
import quandl
quandl.ApiConfig.api_key = "My Key"
data = quandl.get('FXH1/FXCM-H1')
data.tail()
[me#localhost fxcm]$ python3 quandl1.py
Traceback (most recent call last):
File "quandl1.py", line 17, in <module>
data = quandl.get('FXH1/FXCM-H1')
File "/usr/lib64/python3.5/site-packages/quandl/get.py", line 48, in get
data = Dataset(dataset_args['code']).data(params=kwargs, handle_column_not_found=True)
File "/usr/lib64/python3.5/site-packages/quandl/model/dataset.py", line 47, in data
return Data.all(**updated_options)
File "/usr/lib64/python3.5/site-packages/quandl/operations/list.py", line 14, in all
r = Connection.request('get', path, **options)
File "/usr/lib64/python3.5/site-packages/quandl/connection.py", line 36, in request
return cls.execute_request(http_verb, abs_url, **options)
File "/usr/lib64/python3.5/site-packages/quandl/connection.py", line 44, in execute_request
cls.handle_api_error(response)
File "/usr/lib64/python3.5/site-packages/quandl/connection.py", line 85, in handle_api_error
raise klass(message, resp.status_code, resp.text, resp.headers, code)
quandl.errors.quandl_error.NotFoundError: (Status 404) (Quandl Error QECx02) You have submitted an incorrect Quandl code. Please check your Quandl codes and try again.
import quandl
import pandas as pd
quandl.ApiConfig.api_key = "My Key"
pd.data = ({"None", "symbol", "date", "hour", "openbid", "highbid", "lowbid", "closebid", "openask", "highask", "lowask", "closeask", "totalticks"})
pd.data = quandl.get_table('FXCM/H1', date = "2002-02-01,2002-02-02,2002-02-03,2002-02-04,2002-02-05,2002-02-06,2002-02-07,2002-02-08,2002-02-09", symbol='EUR/USD')
for index, row in pd.data.iterrows() :
print(row.to_frame().T)

Passing a pandas data frame through an R function using rpy2

I am trying to reproduce R results in Python. The following R code works:
library("TTR")
library("zoo")
library("xts")
library("quantmod")
getSymbols("^GSPC",from = "2014-01-01", to = "2015-01-01")
dataf = GSPC[,c("GSPC.High", "GSPC.Low", "GSPC.Close")]
result = CCI(dataf, n=20, c=0.015)
But not the following Python code:
from datetime import datetime
from rpy2.robjects.packages import importr
TTR = importr('TTR')
import pandas_datareader as pdr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
GSPC = pdr.get_data_yahoo(symbols='^GSPC', start=datetime(2014, 1, 1), end=datetime(2015, 1, 1))
dataf = GSPC[['High', 'Low', 'Close']]
result = TTR.CCI(dataf, n=20, c=0.015)
The error I get occurs on the last line when using TTR.CCI. Traceback and error returned is:
Traceback (most recent call last):
File "svm_strat_test_oliver.py", line 30, in <module> result = TTR.CCI(dataf, n=20, c=0.015)
File "/usr/local/lib/python2.7/site-packages/rpy2/robjects/functions.py", line 178, in __call__
return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/rpy2/robjects/functions.py", line 106, in __call__
res = super(Function, self).__call__(*new_args, **new_kwargs)
rpy2.rinterface.RRuntimeError: Error in `[.data.frame`(center, beg:NROW(x)) : undefined columns selected
Your data.frame in the R code is actually an "xts" "zoo" object you just need to convert it to one in the python code:
rzoo = importr('zoo')
datazoo = zoo.as_zoo_xts(dataf)
result = TTR.CCI(datazoo, n=20, c=0.015)

text file mdates.strpdate2num error

I keep getting an error using the numpy loadtxt converter.
Your help is greatly appreciated
import numpy as np
import time
import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from matplotlib.finance import candlestick
from matplotlib.dates import strpdate2num
import urllib2
## global variables
eachStock = 'AAPL','GOOG','MSFT','AMZN','CMG'
for stock in eachStock:
stockFile = stock+'.txt'
date, closep, highp, lowp, openp, volume = np.loadtxt(eachStock, delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})
dFrame = Series(closep)
here is the first line in my text file
20040322,13.5200,13.6800,12.6100,12.6850,15850720
here is the error I keep getting
Traceback (most recent call last):
File "C:\Users\antoniozeus\Desktop\BuyAndHold.py", line 27, in <module>
converters={ 0: mdates.strpdate2num('%Y%m%d')})
File "C:\Python27\lib\site-packages\numpy\lib\npyio.py", line 796, in loadtxt
items = [conv(val) for (conv, val) in zip(converters, vals)]
File "C:\Python27\lib\site-packages\matplotlib\dates.py", line 233, in __call__
return date2num(datetime.datetime(*time.strptime(s, self.fmt)[:6]))
File "C:\Python27\lib\_strptime.py", line 454, in _strptime_time
return _strptime(data_string, format)[0]
File "C:\Python27\lib\_strptime.py", line 325, in _strptime
(data_string, format))
ValueError: time data 'AAPL' does not match format '%Y%m%d'
It seems like you mistyped stockFile (filename) as eachStock.
date, closep, highp, lowp, openp, volume = np.loadtxt(
stockFile, delimiter=',', unpack=True,
converters={ 0: mdates.strpdate2num('%Y%m%d')})

Categories

Resources