Loop all combinations of multiple variables - python

I would like to see all the possible output of a function with multiple variables. To create all possible combinations I iterate which create tuples. But I am not sure how to use these tuples inside the function inside the loop. The code below throw an error.
THE FUNCTION:
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=7, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
THE CODE:
quoted = 'USD', 'EUR', 'JPY', 'SGD', 'HKD', 'IDR', 'AUD', 'PHP', 'CNY', 'INR', 'BTC', 'ETH', 'QASH', 'BCH', 'GUSD', 'USDC', 'GBP'
liqud_mar = 'Bitbank', 'Bitfinex', 'Bithumb', 'BitMEX', 'Bitstamp', 'Cexio', 'Coinbase', 'Coincheck', 'Gemini', 'itBit', 'Kraken', 'OKCoin'
p = list(itertools.product(quoted, liqud_mar))
d_volu = []
for x in p:
volu = daily_volume_historical('QTUM', x[0], exchange= x[1]).volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)
THE ERROR:
Traceback (most recent call last):
File "./urlcombin5.py", line 26, in <module>
volu = daily_volume_historical('QTUM', x[0], exchange= x[1]).volumefrom
File "./urlcombin5.py", line 15, in daily_volume_historical
df.drop(df.index[-1], inplace=True)
File "/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py", line 2084, in __getitem__
return getitem(key)
IndexError: index -1 is out of bounds for axis 0 with size 0

You can try this:
for a,b in [(a,b) for a in quoted for b in liqud_mar]:
volu = daily_volume_historical('QTUM', a, exchange= b).volumefrom
d_volu.append

The answer is as follow:
combinations = list(itertools.product(base, quoted))
d_volu = []
for a,b in combinations:
try:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').set_index('timestamp').volumefrom
d_volu.append(volu.rename(a))
except:
pass
d_volu = pd.concat(d_volu, axis=1)
d_volu = d_volu.groupby(d_volu.columns.values, axis=1).sum()

Related

Data frame Type error ' str' object in for loop

The below code tries connects to ms sql db one by one using sql_connect() function, add new column to df (db number, db type)
The connectivity works, but I get error
df = pd.DataFrame()
df_db = pd.DataFrame()
db_num = [30, 33, 39]
db_type = ['ms', 'ms', 'ms']
sql = ['sql_connect_30()', 'sql_connect_33()', 'sql_connect_39()']
for i in range(2):
print(i)
print(db_num[i])
print(db_type[i])
print(sql[i])
df_db = sql[i]
df_db['db_num'] = db_num[i]
df_db['db_type'] = db_type[i]
df_db = pd.concat([df, df_db], axis = 0)
Error
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-14-a2c294742ba6>", line 12, in <cell line: 6>
df_db['db_num'] = db_num[i]
TypeError: 'str' object does not support item assignment
I could do it by below method, but i want to try & learn with loop
df_30 = sql_connect_30()
df_33 = sql_connect_33()
df_39 = sql_connect_39()
df_30['db_num'] = 30
df_30['db_type'] = 'ms'
df_33['db_num'] = 33
df_33['db_type'] = 'ms'
df_39['db_num'] = 39
df_39['db_type'] = 'ms'
df_db = pd.concat([df_30, df_33, df_39], axis = 0)
del df_30, df_33, df_39
Could you guide on this or suggest efficient way
For the sake of completeness, I think your code would be quite a bit cleaner as follows:
funcs = [sq_connect_30, sq_connect_33, sq_connect_39]
db_nums = [30, 33, 39]
df = pd.DataFrame()
for i, func in enumerate(funcs):
df_sub = func()
df_sub["db_num"] = db_nums[i]
df_sub["db_type"] = "ms"
df = pd.concat([df, df_sub], ignore_index=True)

How can I convert .append to .concat pandas python

I have this data entry:
[{'id': 2269396, 'from': 1647086100, 'at': 1647086160000000000, 'to': 1647086160, 'open': 1.072652, 'close': 1.072691, 'min': 1.072641, 'max': 1.072701, 'volume': 0},..]
Apllying this indexing pandas:
current = self.getAllCandles(self.active_id,start_candle)
main = pd.DataFrame()
useful_frame = pd.DataFrame()
for candle in current:
useful_frame = pd.DataFrame(list(candle.values()),index = list(candle.keys())).T.drop(columns = ['at'])
useful_frame = useful_frame.set_index(useful_frame['from']).drop(columns = ['id'])
main = main.append(useful_frame)
main.drop_duplicates()
final_data = main.drop(columns = {'to'})
final_data = final_data.loc[~final_data.index.duplicated(keep = 'first')]
return final_data
After that I have the following result:
from open close min max volume
from
1.647086e+09 1.647086e+09 1.072652 1.072691 1.072641 1.072701 0.0
... ... ... ... ... ... ...
Since df.append() will be deprecated, I'm struggling to execute the same instructions using df.concat(). But I'm not getting it, how could I change that?
Thank you all, I made a small modification to the code suggested by our friend Stuart Berg #stuart-berg, and it was perfect:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
useful_frame = pd.DataFrame.from_dict(current, orient='columns')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
return final_data
I think this is what you're looking for:
current = self.getAllCandles(self.active_id, start_candle)
frames = []
for candle in current:
useful_frame = pd.DataFrame.from_dict(candle, orient='columns')
#useful_frame['from'] = datetime.datetime.fromtimestamp(int(useful_frame['from'])).strftime('%Y-%m-%d %H:%M:%S')
useful_frame = useful_frame.set_index('from')
useful_frame = useful_frame.drop(columns=['at', 'id'])
frames.append(useful_frame)
main = pd.concat(frames).drop_duplicates()
final_data = main.drop(columns='to')
final_data = final_data.loc[~final_data.index.duplicated()]
Create an empty python list and then append all the series to the list. Finally call pandas' concat on that list, this will give you that dataframe.

np.vectorize giving out nan values in ETS time series forecasting

re_1=np.vectorize(ETStest)(Trend_seasonal['trend'],Trend_seasonal['seasonal'],seasonal_period=12,i = 0)
This is the line which is showing me the error
def ets_pred (train):
trend=['add', 'add', 'mul','mul']
sea= ['add', 'mul', 'mul','add']
Trend_seasonal=pd.DataFrame()
Trend_seasonal['trend']=trend
Trend_seasonal['seasonal']=sea
re_1=np.vectorize(ETStest) (Trend_seasonal['trend'],Trend_seasonal['seasonal'],seasonal_period=12,i = 0)
result_table_ETS=pd.DataFrame(re_1).transpose()
result_table_ETS.columns=['Trend','Seasonal','seasonal_period','error_mae_f','error_mae_p','Accuracy_mae_f','Accuracy_mae_p','Diff','Product','mae_pm','mae_smly','MAE_CV'] result_table_ETS_1=result_table_ETS[~result_table_ETS.Trend.str.contains("Fals")].dropna()
result_table_ETS_1 = result_table_ETS_1.sort_values(by='MAE_CV', ascending=True).reset_index(drop=True)
return(result_table_ETS_1)
Following is the place where I am calling the function
run_results = pd.DataFrame()
forecast_results = pd.DataFrame()
model_result=[]
for i in df.columns:
ts = df[i]
n=uservalue
train, test = ts[0:len(ts)-n], ts[len(ts)-n:]
train1, test1 = ts[0:len(ts)], ts[0:len(ts)]
re = ets_pred(train)
re['Target'] = pd.DataFrame({'Target' : np.tile([i], len(re))})['Target']
re['Refresh_month'] = pd.DataFrame({'Target' : np.tile([ref_month], len(re))})['Target']
run_results=run_results.append(re)
run_results=run_results.fillna(0)
display(re)
optimal_row = 0
model_ETS= ExponentialSmoothing( endog=ts, trend=re.iloc[optimal_row,0], seasonal=re.iloc[optimal_row,1],
seasonal_periods=re.iloc[optimal_row,2],damped=True, dates=None, freq=None, missing='none').fit()
res = model_ETS.fittedvalues[-10:]
print(res)
predictions_ETS = model_ETS.predict(start=len(train),end=len(train)+n-1)
predictions = predictions_ETS
error_ETS_mae_f = re.iloc[optimal_row,3]
error_ETS_mae_p = re.iloc[optimal_row,4]
print('Summary of Best Model')
display(model_ETS.summary())
model_result.append([i,
ref_month,
re.iloc[optimal_row,0],
re.iloc[optimal_row,1],
re.iloc[optimal_row,2],
model_ETS.params['smoothing_level'],
#model_ETS.params['smoothing_trend'],
model_ETS.params['smoothing_seasonal'],
#model_ETS.params['damping_trend'],
model_ETS.params['initial_level'],
#model_ETS.params['initial_trend'],
re.iloc[optimal_row,5],
re.iloc[optimal_row,6],
re.iloc[optimal_row,7],
re.iloc[optimal_row,8]])
forecast=pd.DataFrame()
forecast['Date'] = model_ETS.predict(start=len(train), end=len(train)+20).index
forecast['Forecast'] = model_ETS.predict(start=len(train), end=len(train)+20).values
forecast['Target'] = pd.DataFrame({'Target' : np.tile([i], len(forecast))})['Target']
forecast['Refresh_month'] = pd.DataFrame({'Target' : np.tile([ref_month], len(forecast))})['Target']
forecast_results=forecast_results.append(forecast)
display(forecast)
I think that np.vectorize is giving out some nan values and that is why I am getting the error. The above is the function for the same

Function output into loop

I am trying to replicate the following code which work smoothly and add a parameter for date to the function and run the function with different date in a loop:
FUNCTION V1:
def getOHLCV(currencies):
c_price = []
data = {}
try:
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/ohlcv/historical'
parameters = {
'symbol': ",".join(currencies),
#'time_start': ",".join(start_dates),
'count':'91',
'interval':'daily',
'convert':'JPY',
}
headers = {
'Accepts': 'application/json',
'X-CMC_PRO_API_KEY': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
}
session = Session()
session.headers.update(headers)
response = session.get(url, params=parameters)
data = json.loads(response.text)
for currency in data['data']:
used_list = [
item['quote']['JPY']
for item in data['data'][currency]['quotes']
]
price = pd.DataFrame.from_records(used_list)
price['timestamp'] = pd.to_datetime(price['timestamp'])
price['timestamp'] = price['timestamp'].astype(str).str[:-15]
price_c = price.set_index('timestamp').close
c_price.append(price_c.rename(currency))
except Exception as e:
print (data)
return c_price
c_price = []
c_price.extend(getOHLCV(available[:61]))
c_price.extend(getOHLCV(available[61:]))
c_price = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
c_price = c_price.transpose()
c_price.index.name = 'currency'
c_price.sort_index(axis=0, ascending=True, inplace=True)
OUTPUT:
2019-07-25 2019-07-26 2019-07-27 2019-07-28 2019-07-29 \
currency
1WO 2.604104 2.502526 2.392313 2.418967 2.517868
ABX 1.015568 0.957774 0.913224 0.922612 1.037273
ADH 0.244782 0.282976 0.309931 0.287933 0.309613
... ... ... ... ... ...
XTX 0.156103 0.156009 0.156009 0.165103 0.156498
ZCO 0.685255 0.661324 0.703521 0.654763 0.616204
ZPR 0.214395 0.204968 0.181529 0.178460 0.177596
FUNCTION V2:
The V2 function add a parameter start_dates and loop the function with this new parameter. The issue is I got an empty dataframe from it. I assume that there is an issue with the date but I don't know where. Any help is appreciated.
def getOHLCV(currencies, start_dates):
...
'symbol': ",".join(currencies),
'time_start': ",".join(start_dates),
...
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
for i in date_list:
c_price = []
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
The array you are extending you are clearing at each iteration of the foor loop, it can be fixed like so
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
one = []
c_price = []
for i in date_list:
c_price.extend(getOHLCV(available[:61], i))
c_price.extend(getOHLCV(available[61:], i))
c_price = pd.concat(c_price, axis=1, sort=True)
one = pd.concat(c_price, axis=1, sort=True)
pd.set_option('display.max_columns', 200)
Hope that works for you
EDIT 1
So we need to fix the error : "time_start" must be a valid ISO 8601 timestamp or unix time value'
This is because the return from this
date_list = [(date.today() - timedelta(days= x * 91)) for x in range(3)][1:]
Is this
[datetime.date(2019, 7, 24), datetime.date(2019, 4, 24)]
So we need to convert the list from datetime objects to something that the API will understand, we can do it the following way
date_list = list(map(date.isoformat, date_list))
And we get the following output
['2019-07-24', '2019-04-24']
Edit 2
The error happens when we try to call join on something that isnt a list, so we can fix it by doing
'time_start': start_dates
Instead of doing
'time_start': ",".join(start_dates),

Multiple variables loop and append dataframe

I am trying to loop over 2 lists to get all combinations possible in the loop below. I have some difficulties to understand why the first part works and the second does not. Basically it query the same data but with all pattern from the lists. Any help would be very appreciated.
THE CODE:
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in [(a,b) for a in base for b in quoted]:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)
volu output sample:
timestamp
2010-07-17 09:00:00 20.00
2010-07-18 09:00:00 75.01
2010-07-19 09:00:00 574.00
2010-07-20 09:00:00 262.00
2010-07-21 09:00:00 575.00
2010-07-22 09:00:00 2160.00
2010-07-23 09:00:00 2402.50
2010-07-24 09:00:00 496.32
import itertools
base = ['BTC', 'ETH']
quoted = ['USDT', 'AUD','USD']
combinations = list(itertools.product(base, quoted))
def daily_volume_historical(symbol, comparison_symbol, all_data=False, limit=90, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
df.set_index('timestamp')
return df
## THIS CODE GIVES SOME DATA ##
volu = daily_volume_historical('BTC', 'USD', 'CCCAGG').set_index('timestamp').volumefrom
## THIS CODE GIVES EMPTY DATA FRAME ##
d_volu = []
for a,b in combinations:
volu = daily_volume_historical(a, b, exchange= 'CCCAGG').volumefrom
d_volu.append
d_volu = pd.concat(d_volu, axis=1)

Categories

Resources