Function output into loop - python

I am trying to replicate the following code which work smoothly and add a parameter for date to the function and run the function with different date in a loop:
FUNCTION V1:
def getOHLCV(currencies):
c_price = []
data = {}
try:
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/ohlcv/historical'
parameters = {
'symbol': ",".join(currencies),
#'time_start': ",".join(start_dates),
'count':'91',
'interval':'daily',
'convert':'JPY',
}
headers = {
'Accepts': 'application/json',
'X-CMC_PRO_API_KEY': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
}
session = Session()
session.headers.update(headers)
response = session.get(url, params=parameters)
data = json.loads(response.text)
for currency in data['data']:
used_list = [
item['quote']['JPY']
for item in data['data'][currency]['quotes']
]
price = pd.DataFrame.from_records(used_list)
price['timestamp'] = pd.to_datetime(price['timestamp'])
price['timestamp'] = price['timestamp'].astype(str).str[:-15]
price_c = price.set_index('timestamp').close
c_price.append(price_c.rename(currency))
except Exception as e:
print (data)
return c_price
c_price = []
c_price.extend(getOHLCV(available[:61]))
c_price.extend(getOHLCV(available[61:]))
c_price = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
c_price = c_price.transpose()
c_price.index.name = 'currency'
c_price.sort_index(axis=0, ascending=True, inplace=True)
OUTPUT:
2019-07-25 2019-07-26 2019-07-27 2019-07-28 2019-07-29 \
currency
1WO 2.604104 2.502526 2.392313 2.418967 2.517868
ABX 1.015568 0.957774 0.913224 0.922612 1.037273
ADH 0.244782 0.282976 0.309931 0.287933 0.309613
... ... ... ... ... ...
XTX 0.156103 0.156009 0.156009 0.165103 0.156498
ZCO 0.685255 0.661324 0.703521 0.654763 0.616204
ZPR 0.214395 0.204968 0.181529 0.178460 0.177596
FUNCTION V2:
The V2 function add a parameter start_dates and loop the function with this new parameter. The issue is I got an empty dataframe from it. I assume that there is an issue with the date but I don't know where. Any help is appreciated.
def getOHLCV(currencies, start_dates):
...
'symbol': ",".join(currencies),
'time_start': ",".join(start_dates),
...
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
for i in date_list:
c_price = []
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)

The array you are extending you are clearing at each iteration of the foor loop, it can be fixed like so
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
c_price = []
for i in date_list:
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
Hope that works for you
EDIT 1
So we need to fix the error : "time_start" must be a valid ISO 8601 timestamp or unix time value'
This is because the return from this
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
Is this
[datetime.date(2019, 7, 24), datetime.date(2019, 4, 24)]
So we need to convert the list from datetime objects to something that the API will understand, we can do it the following way
date_list = list(map(date.isoformat, date_list))
And we get the following output
['2019-07-24', '2019-04-24']
Edit 2
The error happens when we try to call join on something that isnt a list, so we can fix it by doing
'time_start': start_dates
Instead of doing
'time_start': ",".join(start_dates),

Related

Removing utc info from yfinance dataframe

How can I remove the utc portion of a DF created from a yfinance? Every example I and approach I seen has failed.
eg:
df = yf.download('2022-01-01', '2023-01-06', interval = '60m' )
pd.to_datetime(df['Datetime'])
error: 3806 #If we have a listlike key, _check_indexing_error will raise
KeyError: 'Datetime'
As well as the following approaches
enter code heredf = df.reset_index()
df = pd.DataFrame(df, columns = ['Datetime', "Close"])
df.rename(columns = {'Date': 'ds'}, inplace = True)
df.rename(columns = {'Close':'y'}, inplace = True)
#df['ds'] = df['ds'].dt.date
#df['ds'] = datetime.fromtimestamp(df['ds'], tz = None)
#df['ds'] = df['ds'].dt.floor("Min")
#df['ds'] = pd.to_datetime(df['ds'].dt.tz_convert(None))
#df['ds'] = pd.to_datetime['ds']
#pd.to_datetime(df['ds'])
df['ds'].dt.tz_localize(None)
print(df)
with similar errors, Any help or pointer will greatly appreciated I have spent the entire morning on this.
Thanks in advance
BTT
Your code interprets '2022-01-01' as the first and required argument tickers.
This date is not a valid ticker, so yf.download() does not return any price and volume data.
Try:
df = yf.download(tickers='AAPL', start='2022-01-01', end='2023-01-06', interval = '60m' )
df.index = df.index.tz_localize(None)

How can I convert .append to .concat pandas python

I have this data entry:
[{'id': 2269396, 'from': 1647086100, 'at': 1647086160000000000, 'to': 1647086160, 'open': 1.072652, 'close': 1.072691, 'min': 1.072641, 'max': 1.072701, 'volume': 0},..]
Apllying this indexing pandas:
current = self.getAllCandles(self.active_id,start_candle)
main = pd.DataFrame()
useful_frame = pd.DataFrame()
for candle in current:
useful_frame = pd.DataFrame(list(candle.values()),index = list(candle.keys())).T.drop(columns = ['at'])
useful_frame = useful_frame.set_index(useful_frame['from']).drop(columns = ['id'])
main = main.append(useful_frame)
main.drop_duplicates()
final_data = main.drop(columns = {'to'})
final_data = final_data.loc[~final_data.index.duplicated(keep = 'first')]
return final_data
After that I have the following result:
from open close min max volume
from
1.647086e+09 1.647086e+09 1.072652 1.072691 1.072641 1.072701 0.0
... ... ... ... ... ... ...
Since df.append() will be deprecated, I'm struggling to execute the same instructions using df.concat(). But I'm not getting it, how could I change that?
Thank you all, I made a small modification to the code suggested by our friend Stuart Berg #stuart-berg, and it was perfect:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
useful_frame = pd.DataFrame.from_dict(current, orient='columns')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
return final_data
I think this is what you're looking for:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
for candle in current:
useful_frame = pd.DataFrame.from_dict(candle, orient='columns')
#useful_frame['from'] = datetime.datetime.fromtimestamp(int(useful_frame['from'])).strftime('%Y-%m-%d %H:%M:%S')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
Create an empty python list and then append all the series to the list. Finally call pandas' concat on that list, this will give you that dataframe.

How to make sure that the data in this PyTrends function comes out in YYYY-MM-DD format and not YYYY-MM-DD 00:00:00

I have the following function:
def my_funct(Keyword, Dates, Country, Col_name):
KEYWORDS=[Keyword]
KEYWORDS_CODES=[pytrend.suggestions(keyword=i)[0] for i in KEYWORDS]
df_CODES= pd.DataFrame(KEYWORDS_CODES)
EXACT_KEYWORDS=df_CODES['mid'].to_list()
DATE_INTERVAL= Dates
COUNTRY=[Country] #Use this link for iso country code
CATEGORY=0 # Use this link to select categories
SEARCH_TYPE='' #default is 'web searches',others include 'images','news','youtube','froogle' (google shopping)
Individual_EXACT_KEYWORD = list(zip(*[iter(EXACT_KEYWORDS)]*1))
Individual_EXACT_KEYWORD = [list(x) for x in Individual_EXACT_KEYWORD]
dicti = {}
i = 1
for Country in COUNTRY:
for keyword in Individual_EXACT_KEYWORD:
try:
pytrend.build_payload(kw_list=keyword,
timeframe = DATE_INTERVAL,
geo = Country,
cat=CATEGORY,
gprop=SEARCH_TYPE)
dicti[i] = pytrend.interest_over_time()
i+=1
time.sleep(6)
except requests.exceptions.Timeout:
print("Timeout occured")
df_trends = pd.concat(dicti, axis=1)
df_trends.columns = df_trends.columns.droplevel(0) #drop outside header
df_trends = df_trends.drop('isPartial', axis = 1) #drop "isPartial"
df_trends.reset_index(level=0,inplace=True) #reset_index
df_trends.columns=['date', Col_name] #change column names
return df_trends
Then I call the function using:
x1 = my_funct('Unemployment', '2004-01-04 2009-01-04', 'DK', 'Unemployment (Denmark)')
Then I put that into a df:
df1 = pd.DataFrame(x1)
Once I convert that df to excel, how do I ensure that it is in YYYY-MM-DD format without the dangling 00:00:00? Anytime I convert it comes out with hours and seconds.
I tried df1 = pd.DataFrame(x1).dt.strftime('%Y-%m-%d') but it says that this cannot be used?
Please help
Thanks
You are trying pass dt.strftime on the entire dataframe, but you need to pass it on the date column:
df1['date'] = df1['date'].dt.strftime('%Y-%m-%d')

Multiple variables loop and append dataframe

I am trying to loop over 2 lists to get all combinations possible in the loop below. I have some difficulties to understand why the first part works and the second does not. Basically it query the same data but with all pattern from the lists. Any help would be very appreciated.
THE CODE:
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in [(a,b) for a in base for b in quoted]:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)
volu output sample:
timestamp
2010-07-17 09:00:00 20.00
2010-07-18 09:00:00 75.01
2010-07-19 09:00:00 574.00
2010-07-20 09:00:00 262.00
2010-07-21 09:00:00 575.00
2010-07-22 09:00:00 2160.00
2010-07-23 09:00:00 2402.50
2010-07-24 09:00:00 496.32
import itertools
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
combinations = list(itertools.product(base, quoted))
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in combinations:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)

Panda DataFrame Row Items IF Comparison doesnt return correct result

I retrieve data from quandl and load it to a pandas DF object.
Afterwards I calculate SMA values (SMA21, SMA55) based on "Last Price".
Adding those SMA values as a column do my DF object.
I iterate through DF to catch a buy signal.
I know the buy condition is holding true for some dates but my code does not printing anything out. I am expecting to print the buy condition at the very least.
as below you can see the following condition:
kitem['SMA21'] >= kitem['Last']
My code:
import requests
import pandas as pd
import json
class URL_Params:
def __init__ (self, endPoint, symboll, startDate, endDate, apiKey):
self.endPoint = endPoint
self.symboll = symboll
self.startDate = startDate
self.endDate = endDate
self.apiKey = apiKey
def createURL (self):
return self.endPoint + self.symboll + '?start_date=' + self.startDate + '&end_date=' + self.endDate + '&api_key=' + self.apiKey
def add_url(self, _url):
self.url_list
my_portfolio = {'BTC':1.0, 'XRP':0, 'DSH':0, 'XMR':0, 'TotalBTCValue':1.0}
_endPoint = 'https://www.quandl.com/api/v3/datasets/BITFINEX/'
_symbolls = ['BTCEUR','XRPBTC','DSHBTC','IOTBTC','XMRBTC']
_startDate = '2017-01-01'
_endDate = '2019-03-01'
_apiKey = '' #needs to be set for quandl
my_data = {}
my_conns = {}
my_col_names = ['Date', 'High', 'Low', 'Mid', 'Last', 'Bid', 'Ask', 'Volume']
orderbook = []
#create connection and load data for each pair/market.
#load them in a dict for later use
for idx_symbol in _symbolls:
my_url_params = URL_Params(_endPoint,idx_symbol,_startDate,_endDate,_apiKey)
response = requests.get(my_url_params.createURL())
my_data[idx_symbol] = json.loads(response.text)
#Prepare Data
my_raw_data_df_xrpbtc = pd.DataFrame(my_data['XRPBTC']['dataset']['data'], columns= my_data['XRPBTC']['dataset']['column_names'])
#Set Index to Date Column and Sort
my_raw_data_df_xrpbtc['Date'] = pd.to_datetime(my_raw_data_df_xrpbtc['Date'])
my_raw_data_df_xrpbtc.index = my_raw_data_df_xrpbtc['Date']
my_raw_data_df_xrpbtc = my_raw_data_df_xrpbtc.sort_index()
#Drop unrelated columns
my_raw_data_df_xrpbtc.drop(['Date'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Ask'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Bid'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Low'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['High'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Mid'], axis=1, inplace=True)
#Calculate SMA values to create buy-sell signal
my_raw_data_df_xrpbtc['SMA21'] = my_raw_data_df_xrpbtc['Last'].rolling(21).mean()
my_raw_data_df_xrpbtc['SMA55'] = my_raw_data_df_xrpbtc['Last'].rolling(55).mean()
my_raw_data_df_xrpbtc['SMA200'] = my_raw_data_df_xrpbtc['Last'].rolling(200).mean()
#Check for each day if buy signal holds BUY if sell signal holds SELL
for idx,kitem in my_raw_data_df_xrpbtc.iterrows():
if (kitem['SMA21'] >= kitem['Last']) is True: #buy signal
print("buy0")
if my_portfolio['BTC'] > 0 is True:
print("buy1")
if (kitem['Last'] * my_portfolio['XRP']) >= (my_portfolio['BTC'] * 1.05) is True: #sell signal
print("sell0")
if my_portfolio['XRP'] > 0 is True:
print("sell1")
I know that there are lots of rows that holds true but my code never enters this path of code so it does not print out what I expect.
Could anyone please help/comment what might be wrong?
The reason is that your comparison is wrong. The result of kitem['SMA21'] >= kitem['Last'] will be a numpy.bool_. When you use is to compare it to True this will fail as it is not the same object.
If you change the comparison to == it will work as expected:
if (kitem['SMA21'] >= kitem['Last']) == True:

Categories

Resources