How can get ' USDJPY'(currency rates) with pandas and yahoo finance? - python

I am learning and using the pandas and python.
Today, I am trying to make a fx rate table,
but I got a trouble with getting the pricess of 'USDJPY'.
When I get a prices of 'EUR/USD', i code like this.
eur = web.DataReader('EURUSD=X','yahoo')['Adj Close']
it works.
But when I wrote
jpy = web.DataReader('USDJPY=X','yahoo')['Adj Close']
the error message comes like this:
--------------------------------------------------------------------------- IOError Traceback (most recent call
last) in ()
----> 1 jpy = web.DataReader('USDJPY=X','yahoo')['Adj Close']
C:\Anaconda\lib\site-packages\pandas\io\data.pyc in DataReader(name,
data_source, start, end, retry_count, pause)
70 return get_data_yahoo(symbols=name, start=start, end=end,
71 adjust_price=False, chunksize=25,
---> 72 retry_count=retry_count, pause=pause)
73 elif data_source == "google":
74 return get_data_google(symbols=name, start=start, end=end,
C:\Anaconda\lib\site-packages\pandas\io\data.pyc in
get_data_yahoo(symbols, start, end, retry_count, pause, adjust_price,
ret_index, chunksize, name)
388 """
389 return _get_data_from(symbols, start, end, retry_count, pause,
--> 390 adjust_price, ret_index, chunksize, 'yahoo', name)
391
392
C:\Anaconda\lib\site-packages\pandas\io\data.pyc in
_get_data_from(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, source, name)
334 # If a single symbol, (e.g., 'GOOG')
335 if isinstance(symbols, (basestring, int)):
--> 336 hist_data = src_fn(symbols, start, end, retry_count, pause)
337 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
338 elif isinstance(symbols, DataFrame):
C:\Anaconda\lib\site-packages\pandas\io\data.pyc in
_get_hist_yahoo(sym, start, end, retry_count, pause)
188 '&g=d' +
189 '&ignore=.csv')
--> 190 return _retry_read_url(url, retry_count, pause, 'Yahoo!')
191
192
C:\Anaconda\lib\site-packages\pandas\io\data.pyc in
_retry_read_url(url, retry_count, pause, name)
167
168 raise IOError("after %d tries, %s did not "
--> 169 "return a 200 for url %r" % (retry_count, name, url))
170
171
IOError: after 3 tries, Yahoo! did not return a 200 for url
'http://ichart.yahoo.com/table.csv?s=USDJPY=X&a=0&b=1&c=2010&d=1&e=1&f=2014&g=d&ignore=.csv'
Other currencies like 'GBPUSD' also have same problem.
Can you solve this problem?
Do you have any idea of getting 'USDJPY' from yahoo or google???

Yahoo Finance doesn't provide historical data on exchange rates (i.e. there's no "Historical Prices" link in the top left of the page like there would be for stocks, indices, etc...)
You can use FRED (Federal Reserve of St. Louis data) to get these exchange rates...
import pandas.io.data as web
jpy = web.DataReader('DEXJPUS', 'fred')
UPDATE: hase moved the pandas-datareader
from pandas_datareader import data
jpy = data.DataReader('DEXJPUS', 'fred')
or the more direct way...
jpy = web.get_data_fred('DEXJPUS')
A list of all of the exchange rate that FRED has daily data for can be found here: http://research.stlouisfed.org/fred2/categories/94

Yahoo Finance doesn't provide historical data on exchange rates
Yes it does but not on cross rates. All vs the USD
List of Yahoo USD Exchange Rates
a = web.DataReader("JPY=X", 'yahoo')

The free and easy way is Yahoo:
# get fx rates
# https://finance.yahoo.com/currencies
# example EUR/USD = EURUSD%3DX?p=EURUSD%3DX
import pandas as pd
import pandas_datareader as dr
# change date range here
start_date = '2021-02-26'
end_date = '2021-03-01'
# retrieve market data of current ticker symbol
print('This is the table with HLOC, Volume, Adj Close prices')
eurusd = dr.data.DataReader('EURUSD%3DX', data_source='yahoo', start=start_date, end=end_date)
print(eurusd)
# just get latest adjusted close for further use
print('This is the Adj Close prices only')
print(eurusd['Adj Close'])
and it also works with other crosses, contrary to the above statements:
# EURCHF%3DX
eurchf = dr.data.DataReader('EURCHF%3DX', data_source='yahoo', start=start_date, end=end_date)
print(eurchf)

Get the historical exchange rates from OANDA
http://pandas-datareader.readthedocs.io/en/latest/remote_data.html
In [1]: from pandas_datareader.oanda import get_oanda_currency_historical_rates
In [2]: start, end = "2016-01-01", "2016-06-01"
In [3]: quote_currency = "USD"
In [4]: base_currency = ["EUR", "GBP", "JPY"]
In [5]: df_rates = get_oanda_currency_historical_rates(
start, end,
quote_currency=quote_currency,
base_currency=base_currency
)
In [6]: print(df_rates)
Update: Oanda started charging for this lately
https://www.oanda.com/fx-for-business/exchange-rates-api

#!pip install yfinance
#!pip install mplfinance
from datetime import datetime
import yfinance as yf
import mplfinance as mpf
#import pandas as pd
#import pandas_datareader as dr
# change date range here
start_date = '2021-02-26'
end_date = '2021-03-01'
#This Does NOT WORK#
# retrieve market data of current ticker symbol
print('This is the table with HLOC, Volume, Adj Close prices')
eurusd = dr.data.DataReader('EURUSD%3DX', data_source='yahoo',
start=start_date, end=end_date)
print(eurusd)
#This Does#
data = yf.download('USDCAD=X', start=start_date, end=end_date)
#If someone can figure out how to get the S5,S30, M1, M3 etc. Please share

I think you can use custom intervals by passing it as an argument to the yf.download() function. For example:
data = yf.download('USDCAD=X', start=start_date, end=end_date, interval='1m')

Related

how to solve IndexError : single positional indexer is out-of-bounds

CODE:-
from datetime import date
from datetime import timedelta
from nsepy import get_history
import pandas as pd
import datetime
# import matplotlib.pyplot as mp
end1 = date.today()
start1 = end1 - timedelta(days=365)
stock = [
'RELIANCE','HDFCBANK','INFY','ICICIBANK','HDFC','TCS','KOTAKBANK','LT','SBIN','HINDUNILVR','AXISBANK','ITC','BAJFINANCE','BHARTIARTL','ASIANPAINT','HCLTECH','MARUTI','TITAN','BAJAJFINSV','TATAMOTORS',
'TECHM','SUNPHARMA','TATASTEEL','M&M','WIPRO','ULTRACEMCO','POWERGRID','HINDALCO','NTPC','NESTLEIND','GRASIM','ONGC','JSWSTEEL','HDFCLIFE','INDUSINDBK','SBILIFE','DRREDDY','ADANIPORTS','DIVISLAB','CIPLA',
'BAJAJ-AUTO','TATACONSUM','UPL','BRITANNIA','BPCL','EICHERMOT','HEROMOTOCO','COALINDIA','SHREECEM','IOC','VEDL','ADANIENT', 'APOLLOHOSP', 'TATAPOWER', 'PIDILITIND', 'SRF', 'NAUKRI', 'ICICIGI', 'DABUR',
'GODREJCP', 'HAVELLS', 'PEL', 'VOLTAS', 'AUBANK', 'LTI', 'CHOLAFIN', 'AMBUJACEM', 'MARICO', 'SRTRANSFIN','GAIL', 'MCDOWELL-N', 'MPHASIS', 'MINDTREE', 'PAGEIND', 'ZEEL', 'BEL', 'TRENT', 'CROMPTON', 'JUBLFOOD',
'DLF', 'SBICARD', 'SIEMENS', 'BANDHANBNK', 'IRCTC', 'LAURUSLABS', 'PIIND', 'INDIGO', 'INDUSTOWER','ICICIPRULI', 'MOTHERSON', 'AARTIIND', 'FEDERALBNK', 'BANKBARODA', 'PERSISTENT', 'HINDPETRO', 'ACC',
'AUROPHARMA', 'COLPAL', 'GODREJPROP', 'MFSL', 'LUPIN', 'BIOCON', 'ASHOKLEY', 'BHARATFORG', 'BERGEPAINT','JINDALSTEL', 'ASTRAL', 'IEX', 'NMDC', 'CONCOR', 'INDHOTEL', 'BALKRISIND', 'PETRONET', 'CANBK', 'ALKEM',
'DIXON', 'DEEPAKNTR', 'DALBHARAT', 'TVSMOTOR', 'ATUL', 'HDFCAMC', 'TATACOMM', 'MUTHOOTFIN', 'TATACHEM','SAIL', 'IDFCFIRSTB', 'PFC', 'BOSCHLTD', 'MRF', 'NAVINFLUOR', 'CUMMINSIND', 'IGL', 'IPCALAB', 'COFORGE',
'ESCORTS', 'TORNTPHARM', 'LTTS', 'RECLTD', 'LICHSGFIN', 'BATAINDIA', 'HAL', 'PNB', 'GUJGASLTD', 'UBL','3MINDIA','ABB','AIAENG','APLAPOLLO','AARTIDRUGS','AAVAS','ABBOTINDIA','ADANIGREEN','ATGL','ABCAPITAL',
'ABFRL','ABSLAMC','ADVENZYMES','AEGISCHEM','AFFLE','AJANTPHARM','ALKYLAMINE','ALLCARGO','AMARAJABAT','AMBER','ANGELONE','ANURAS','APTUS','ASAHIINDIA','ASTERDM','ASTRAZEN','AVANTIFEED','DMART','BASF',
'BSE','BAJAJELEC','BAJAJHLDNG','BALAMINES','BALRAMCHIN','BANKINDIA','MAHABANK','BAYERCROP','BDL','BEL','BHEL','BIRLACORPN','BSOFT','BLUEDART','BLUESTARCO','BORORENEW','BOSCHLTD','BRIGADE','BCG','MAPMYINDIA'
]
target_stocks_list = []
target_stocks = pd.DataFrame()
for stock in stock:
vol = get_history(symbol=stock,
start=start1,
end=end1)
d_vol = pd.concat([vol['Deliverable Volume']])
symbol_s = pd.concat([vol['Symbol']])
close = pd.concat([vol['Close']])
df = pd.DataFrame(symbol_s)
df['D_vol'] = d_vol
# print(df)
cond = df['D_vol'].iloc[-1] > max(df['D_vol'].iloc[-91:-1])
if(cond):
target_stocks_list.append(stock)
target_stocks = pd.concat([target_stocks, df])
print(target_stocks_list)
file_name = f'{datetime.datetime.now().day}-{datetime.datetime.now().month}-{datetime.datetime.now().year}.csv'
target_stocks.to_csv(f'D:/HUGE VOLUME SPURTS/first 250/SEP 2022/{file_name}')
pd.set_option('display.max_columns',10)
pd.set_option('display.max_rows',2000)
print(target_stocks)
ERROR:-
C:\python\Python310\python.exe "C:/Users/Yogesh_PC/PycharmProjects/future oi data analysis/trial2.py"
Traceback (most recent call last):
File "C:\Users\Yogesh_PC\PycharmProjects\future oi data analysis\trial2.py", line 64, in <module>
cond = df['D_vol'].iloc[-1] > max(df['D_vol'].iloc[-91:-1])
File "C:\python\Python310\lib\site-packages\pandas\core\indexing.py", line 967, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "C:\python\Python310\lib\site-packages\pandas\core\indexing.py", line 1520, in _getitem_axis
self._validate_integer(key, axis)
File "C:\python\Python310\lib\site-packages\pandas\core\indexing.py", line 1452, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
Process finished with exit code 1
Above code gives the historical stock data of Indian stock market. The data is updated on website after market closed around 8:00PM to 9:00PM daily. Then I run my code. For most of the days my code gives output without any error but frequently it throws an error which showed above.
There are around 150-200 stocks in my code. This error occurs because some time exchange do not update the data of one or two stocks from the above list that is why this error comes.
So please post the code which will skip the particular one or two stocks which are not updated and should give the output for rest all stocks.
for example:- stocks = ['DLF', 'SBICARD', 'SIEMENS', 'BANDHANBNK', 'IRCTC', 'LAURUSLABS', 'PIIND',
'INDIGO', 'INDUSTOWER','ICICIPRULI', 'MOTHERSON']
in above stocks suppose exchange didn't update the data of 'IRCTC' and rest all stocks are up to date then due to 'IRCTC' my code throws error and it is not showing data which is updated.
Thank you.
The "out-of-bounds" error indicates you're trying to access a part of the dataframe series that doesn't exist. It's most likely caused by df['D_vol'] being less than 90 items long when you try to do
df['D_vol'].iloc[-91:-1]
Edit:
add a length check before the offending line:
if df['D_vol'].size > 90:
cond = df['D_vol'].iloc[-1] > max(df['D_vol'].iloc[-91:-1])
if(cond):
target_stocks_list.append(stock)
target_stocks = pd.concat([target_stocks, df])

How can i sort Binance historical candles for multiple pairs across multiple timeframes

I'm downloading historical candlestick data for multiple crypto pairs across different timeframes from the binance api, i would like to know how to sort this data according to pair and timeframe and check which pair on which timeframe executes my code, the following code is what i use to get historical data
import requests
class BinanceFuturesClient:
def __init__(self):
self.base_url = "https://fapi.binance.com"
def make_requests(self, method, endpoint, data):
if method=="GET":
response = requests.get(self.base_url + endpoint, params=data)
return response.json()
def get_symbols(self):
symbols = []
exchange_info = self.make_requests("GET", "/fapi/v1/exchangeInfo", None)
if exchange_info is not None:
for symbol in exchange_info['symbols']:
if symbol['contractType'] == 'PERPETUAL' and symbol['quoteAsset'] == 'USDT':
symbols.append(symbol['pair'])
return symbols
def initial_historical_data(self, symbol, interval):
data = dict()
data['symbol'] = symbol
data['interval'] = interval
data['limit'] = 35
raw_candle = self.make_requests("GET", "/fapi/v1/klines", data)
candles = []
if raw_candle is not None:
for c in raw_candle:
candles.append(float(c[4]))
return candles[:-1]
running this code
print(binance.initial_historical_data("BTCUSDT", "5m"))
will return this as the output
[55673.63, 55568.0, 55567.89, 55646.19, 55555.0, 55514.53, 55572.46, 55663.91, 55792.83, 55649.43,
55749.98, 55680.0, 55540.25, 55470.44, 55422.01, 55350.0, 55486.56, 55452.45, 55507.03, 55390.23,
55401.39, 55478.63, 55466.48, 55584.2, 55690.03, 55760.81, 55515.57, 55698.35, 55709.78, 55760.42,
55719.71, 55887.0, 55950.0, 55980.47]
which is a list of closes
i want to loop through the code in such a manner that i can return all the close prices for the pairs and timeframes i need and sort it accordingly, i did give it a try but am just stuck at this point
period = ["1m", "3m", "5m", "15m"]
binance = BinanceFuturesClient()
symbols = binance.get_symbols()
for symbol in symbols:
for tf in period:
historical_candles = binance.initial_historical_data(symbol, tf)
# store values and run through strategy
You can use my code posted below. It requires python-binance package to be installed on your environment and API key/secret from your Binance account. Method tries to load data by weekly chunks (parameter step) and supports resending requests on failures after timeout. It may helps when you need to fetch huge amount of data.
import pandas as pd
import pytz, time, datetime
from binance.client import Client
from tqdm.notebook import tqdm
def binance_client(api_key, secret_key):
return Client(api_key=api_key, api_secret=secret_key)
def load_binance_data(client, symbol, start='1 Jan 2017 00:00:00', timeframe='1M', step='4W', timeout_sec=5):
tD = pd.Timedelta(timeframe)
now = (pd.Timestamp(datetime.datetime.now(pytz.UTC).replace(second=0)) - tD).strftime('%d %b %Y %H:%M:%S')
tlr = pd.DatetimeIndex([start]).append(pd.date_range(start, now, freq=step).append(pd.DatetimeIndex([now])))
print(f' >> Loading {symbol} {timeframe} for [{start} -> {now}]')
df = pd.DataFrame()
s = tlr[0]
for e in tqdm(tlr[1:]):
if s + tD < e:
_start, _stop = (s + tD).strftime('%d %b %Y %H:%M:%S'), e.strftime('%d %b %Y %H:%M:%S')
nerr = 0
while nerr < 3:
try:
chunk = client.get_historical_klines(symbol, timeframe.lower(), _start, _stop)
nerr = 100
except e as Exception:
nerr +=1
print(red(str(e)))
time.sleep(10)
if chunk:
data = pd.DataFrame(chunk, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
data.index = pd.to_datetime(data['timestamp'].rename('time'), unit='ms')
data = data.drop(columns=['timestamp', 'close_time']).astype(float).astype({
'ignore': bool,
'trades': int,
})
df = df.append(data)
s = e
time.sleep(timeout_sec)
return df
How to use
c = binance_client(<your API code>, <your API secret>)
# loading daily data from 1/Mar/21 till now (your can use other timerames like 1m, 5m etc)
data = load_binance_data(c, 'BTCUSDT', '2021-03-01', '1D')
It returns indexed DataFrame with loaded data:
time
open
high
low
close
volume
quote_av
trades
tb_base_av
tb_quote_av
ignore
2021-03-02 00:00:00
49595.8
50200
47047.6
48440.7
64221.1
3.12047e+09
1855583
31377
1.52515e+09
False
2021-03-03 00:00:00
48436.6
52640
48100.7
50349.4
81035.9
4.10952e+09
2242131
40955.4
2.07759e+09
False
2021-03-04 00:00:00
50349.4
51773.9
47500
48374.1
82649.7
4.07984e+09
2291936
40270
1.98796e+09
False
2021-03-05 00:00:00
48374.1
49448.9
46300
48751.7
78192.5
3.72713e+09
2054216
38318.3
1.82703e+09
False
2021-03-06 00:00:00
48746.8
49200
47070
48882.2
44399.2
2.14391e+09
1476474
21500.6
1.03837e+09
False
Next steps are up to you and dependent on how would you like to design your data structure. In simplest case you could store data into dictionaries:
from collections import defaultdict
data = defaultdict(dict)
for symbol in ['BTCUSDT', 'ETHUSDT']:
for tf in ['1d', '1w']:
historical_candles = load_binance_data(c, symbol, '2021-05-01', timeframe=tf)
# store values and run through strategy
data[symbol][tf] = historical_candles
to get access to your OHLC you just need following: data['BTCUSDT']['1d'] etc.

building panda dataframe from cloudant data, error: If using all scalar values, you must pass an index

I'm just starting with pandas. All the answers I found for the error message do not resolve my error. I'm trying to build a dataframe from a dictionary constructed from an IBM cloudant query. I'm using a jupyter notebook. The specific error message is: If using all scalar values, you must pass an index
the section of code where I think my error is, is here:
def read_high_low_temp(location):
USERNAME = "*************"
PASSWORD = "*************"
client = Cloudant(USERNAME,PASSWORD, url = "https://**********" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time', inplace= True)
for row in temp_dict:
value_list.append(row['temp'])
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
my data (Output from Jupyter) looks like this:
Temperature Time Date
Time 51.6 05:07:18 12-31-2020
Time 51.6 04:59:00 12-31-2020
Time 51.5 04:50:31 12-31-2020
Time 51.5 05:15:38 12-31-2020
Time 51.5 05:03:09 12-31-2020
... ... ... ...
Time 45.3 11:56:34 12-31-2020
Time 45.3 11:52:22 12-31-2020
Time 45.3 11:14:15 12-31-2020
Time 45.2 10:32:05 12-31-2020
Time 45.2 10:36:22 12-31-2020
[164 rows x 3 columns]
my full code looks like:
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, re, sys, sqlite3, logging
#import RPi.GPIO as GPIO
from datetime import datetime
import datetime as dt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "******"
PASSWORD = "******"
client = Cloudant(USERNAME,PASSWORD, url = "********" )
client.connect()
# location='Backyard'
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time')
for row in temp_dict:
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
print ("Cloudant Jupyter Query test\nThe hour = ",dt.datetime.now().hour)
msg1, values=read_high_low_temp("Backyard")
print (msg1)
print(values)
sns.lineplot(values)
The full error message from Jupyter is:
C:\Users\ustl02870\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-34956d8dafb0> in <module>
53
54 #df = sns.load_dataset(values)
---> 55 sns.lineplot(values)
56 #print (values)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
686 data=data, variables=variables,
687 estimator=estimator, ci=ci, n_boot=n_boot, seed=seed,
--> 688 sort=sort, err_style=err_style, err_kws=err_kws, legend=legend,
689 )
690
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in __init__(self, data, variables, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend)
365 )
366
--> 367 super().__init__(data=data, variables=variables)
368
369 self.estimator = estimator
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in __init__(self, data, variables)
602 def __init__(self, data=None, variables={}):
603
--> 604 self.assign_variables(data, variables)
605
606 for var, cls in self._semantic_mappings.items():
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in assign_variables(self, data, variables)
666 self.input_format = "long"
667 plot_data, variables = self._assign_variables_longform(
--> 668 data, **variables,
669 )
670
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in _assign_variables_longform(self, data, **kwargs)
924 # Construct a tidy plot DataFrame. This will convert a number of
925 # types automatically, aligning on index in case of pandas objects
--> 926 plot_data = pd.DataFrame(plot_data)
927
928 # Reduce the variables dictionary to fields with valid data
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
527
528 elif isinstance(data, dict):
--> 529 mgr = init_dict(data, index, columns, dtype=dtype)
530 elif isinstance(data, ma.MaskedArray):
531 import numpy.ma.mrecords as mrecords
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
285 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
286 ]
--> 287 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
288
289
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
78 # figure out the index, if necessary
79 if index is None:
---> 80 index = extract_index(arrays)
81 else:
82 index = ensure_index(index)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in extract_index(data)
389
390 if not indexes and not raw_lengths:
--> 391 raise ValueError("If using all scalar values, you must pass an index")
392
393 if have_series:
ValueError: If using all scalar values, you must pass an index
I resolved my problem with help/direction from #Ena, as it turned out I made several mistake. In layman's terms 1) I was trying to plot a tuple when it should have been a dataframe, 2) My data was in a dictionary, I was iterating through it trying to build a tuple when I should used built in panda tools to build a dataframe right from the dictionary 3) my code should have been written so as to NOT have scalar values so as NOT to need an index, and finally 4) I was trying to use a tuple as data for my seaborn plot when it should have been a dataframe. Here is the code that now works.
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, sys
from datetime import datetime
import datetime as dt
from matplotlib import pyplot as plt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "****************"
PASSWORD = "*****************"
client = Cloudant(USERNAME,PASSWORD, url = "**************************" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'t': 'asc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(temp_dict)
value_list=[]
for row in temp_dict:
value_list.append(row['temp'])
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
msg1, values=read_high_low_temp("Backyard")
g=sns.catplot(x='t', y='temp', data=values, kind='bar',color="darkblue",height=8.27, aspect=11.7/8.27)
print("the minimum temp is:", values['temp'].min(), " the maximum temp is:", values['temp'].max())
plt.xticks(rotation=45)
g.set(xlabel='Time', ylabel='Temperature')
plt.ylim(values['temp'].min()-1, values['temp'].max()+1)
plt.savefig("2021-01-01-temperature graph.png")
g.set_xticklabels(step=10)
The problem is that you assigned "Time" as an index everywhere. Look how the data frame looks in seaborn.lineplot documentation: https://seaborn.pydata.org/generated/seaborn.lineplot.html
Can you try without this df.set_index('Time') part?

How to correctly log trades using backtrader in python?

Using data from csv file in 60 minute format. Trying to log all buy/sell decisions using backtrader backtester.
Issue: The time logger doesn't seem to work properly as all hours are outputted as "23:59:59.999989" when this is not the case.
See sample code:
def log(self, txt, dt=None):
dt = dt or self.data.datetime.datetime(0)
print('%s, %s' % (dt, txt))
def next(self):
if self.data.close > self.sma1:
self.buy()
self.log('BUY CREATE, exectype Market, price %.2f' % self.data.close[0])
#Get Data
data = btfeeds.GenericCSVData(dataname='AAPL.csv',fromdate=datetime.datetime(2018, 1, 2),todate=datetime.datetime(2020, 4, 28),nullvalue=0.0,dtformat=('%Y-%m-%d %H:%M:%S'),datetime=0,open=1,low=2,high=3,close=4,volume=5,openinterest=6)
Sample Output:
2019-07-12 23:59:59.999989, BUY CREATE, exectype Market, price 203.52
2019-07-12 23:59:59.999989, BUY CREATE, exectype Market, price 203.30
2019-07-12 23:59:59.999989, BUY CREATE, exectype Market, price 203.24
2019-07-12 23:59:59.999989, BUY CREATE, exectype Market, price 203.24
2019-07-15 23:59:59.999989, BUY CREATE, exectype Market, price 204.11
Data & Format from Csv file:
2018-01-02 9:30:00 AM
This problem also took me few hours. And I find the solution from another web. enter link description here
For minute data tell cerebro that you are using minute data (timeframe) and how many minutes per bar (compression).
#Get Data
data = btfeeds.GenericCSVData(
dataname='AAPL.csv',
fromdate=datetime.datetime(2018, 1, 2),
todate=datetime.datetime(2020, 4, 28),
nullvalue=0.0,
dtformat=('%Y-%m-%d %H:%M:%S'),
**timeframe=bt.TimeFrame.Minutes,**
datetime=0,
open=1,
low=2,
high=3,
close=4,
volume=5,
openinterest=6)

Pandas Yahoo Datareader RemoteDataError when start date or end date is current date

I am running the below program to extract the stock information:
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
symbols=[]
for ticker in symbols_list:
r = DataReader(ticker, "yahoo",
start=datetime.datetime(2015, 4, 17))
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
This runs perfectly. But when I change the start date to today i.e (2015, 4, 20), then the program errors out. I have tried giving end date as well but no use. Below is the error that I get:
UnboundLocalError Traceback (most recent call last)
<ipython-input-38-a05c721d551a> in <module>()
8 for ticker in symbols_list:
9 r = DataReader(ticker, "yahoo",
---> 10 start=datetime.datetime(2015, 4, 20))
11 # add a symbol column
12 r['Symbol'] = ticker
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in DataReader(name, data_source, start, end, retry_count, pause)
75 return get_data_yahoo(symbols=name, start=start, end=end,
76 adjust_price=False, chunksize=25,
---> 77 retry_count=retry_count, pause=pause)
78 elif data_source == "google":
79 return get_data_google(symbols=name, start=start, end=end,
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in get_data_yahoo(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, interval)
418 raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'")
419 return _get_data_from(symbols, start, end, interval, retry_count, pause,
--> 420 adjust_price, ret_index, chunksize, 'yahoo')
421
422
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price, ret_index, chunksize, source)
359 # If a single symbol, (e.g., 'GOOG')
360 if isinstance(symbols, (compat.string_types, int)):
--> 361 hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
362 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
363 elif isinstance(symbols, DataFrame):
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_hist_yahoo(sym, start, end, interval, retry_count, pause)
206 '&g=%s' % interval +
207 '&ignore=.csv')
--> 208 return _retry_read_url(url, retry_count, pause, 'Yahoo!')
209
210
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _retry_read_url(url, retry_count, pause, name)
175 #Get rid of unicode characters in index name.
176 try:
--> 177 rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
178 except AttributeError:
179 #Python 3 string has no decode method.
UnboundLocalError: local variable 'rs' referenced before assignment
Putting together the suggestions by #JohnE, the code below seems to do the job:
import pandas as pd
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
result = []
for ticker in symbols_list:
url = 'http://chartapi.finance.yahoo.com/instrument/1.0/%s/chartdata;type=quote;range=1d/csv' % ticker.lower()
data = pd.read_csv(url, skiprows=17)
data.columns = ['timestamp', 'close', 'high', 'low', 'open', 'close']
data['ticker'] = ticker
result.append(data)
pd.concat(result)
There result looks like this:
timestamp close high low open close ticker
0 1429536719 125.5500 125.5700 125.4170 125.5100 183600 AAPL
1 1429536772 125.5900 125.6399 125.4600 125.5200 215000 AAPL
2 1429536835 125.7500 125.8000 125.5600 125.5901 348500 AAPL
...
367 1429559941 58.5700 58.5800 58.5400 58.5800 119100 KLAC
368 1429559946 58.5700 58.5700 58.5700 58.5700 0 KLAC
369 1429560000 58.5600 58.5600 58.5600 58.5600 0 KLAC

Categories

Resources