Runtime error - Forward Rates Calculation - python

I am trying to build an forward annual EONIA forward curve with inputs of tenors from 1 week to 50 years.
I have managed to code thus far:
data
maturity spot rate
0 1 -0.529
1 2 -0.529
2 3 -0.529
3 1 -0.504
4 2 -0.505
5 3 -0.506
6 4 -0.508
7 5 -0.509
8 6 -0.510
9 7 -0.512
10 8 -0.514
11 9 -0.515
12 10 -0.517
13 11 -0.518
14 1 -0.520
15 15 -0.524
16 18 -0.526
17 21 -0.527
18 2 -0.528
19 3 -0.519
20 4 -0.501
21 5 -0.476
22 6 -0.441
23 7 -0.402
24 8 -0.358
25 9 -0.313
26 10 -0.265
27 11 -0.219
28 12 -0.174
29 15 -0.062
30 20 0.034
31 25 0.054
32 30 0.039
33 40 -0.001
34 50 -0.037
terms= data["maturity"].tolist()
rates= data['spot rate'].tolist()
calendar = ql.TARGET()
business_convention = ql.ModifiedFollowing
day_count = ql.Actual360()
settlement_days_EONIA = 2
EONIA = ql.OvernightIndex("EONIA", settlement_days_EONIA, ql.EURCurrency(), calendar, day_count)
# Deposit Helper
depo_facility = -0.50
depo_helper = [ql.DepositRateHelper(ql.QuoteHandle(ql.SimpleQuote(depo_facility/100)), ql.Period(1,ql.Days), 1, calendar, ql.Unadjusted, False, day_count)]
# OIS Helper
OIS_helpers = []
for i in range(len(terms)):
if i < 3:
tenor = ql.Period(ql.Weeks)
eon_rate = rates[i]
OIS_helpers.append(ql.OISRateHelper(settlement_days_EONIA, tenor, ql.QuoteHandle(ql.SimpleQuote(eon_rate/100)), EONIA))
elif i < 12:
tenor = ql.Period(ql.Months)
eon_rate = rates[i]
OIS_helpers.append(ql.OISRateHelper(settlement_days_EONIA, tenor, ql.QuoteHandle(ql.SimpleQuote(eon_rate/100)), EONIA))
else:
tenor = ql.Period(ql.Years)
eon_rate = rates[i]
OIS_helpers.append(ql.OISRateHelper(settlement_days_EONIA, tenor, ql.QuoteHandle(ql.SimpleQuote(eon_rate/100)), EONIA))
rate_helpers = depo_helper + OIS_helpers
eonia_curve_c = ql.PiecewiseLogCubicDiscount(0, ql.TARGET(), rate_helpers, day_count)
#This doesn't give me a daily grid of rates, but only the rates at the tenors of my input
eonia_curve_c.enableExtrapolation()
days = ql.MakeSchedule(eonia_curve_c.referenceDate(), eonia_curve_c.maxDate(), ql.Period('1Y'))
rates_fwd = [
eonia_curve_c.forwardRate(d, calendar.advance(d,365,ql.Days), day_count, ql.Simple).rate()*100
for d in days
]
The problem is that when I run the code, I get the following error:
RuntimeError: more than one instrument with pillar June 18th, 2021
There is probably an error somewhere in the code for the OIS helper, where there is an overlap but I am not sure what I have done wrong. Anyone know what the problem is?

First off, apologies for any inelegant Python, as I am coming from C++:
The main issue with the original question was that ql.Period() takes two parameters when used with an integer number of periods: eg ql.Period(3,ql.Years). If instead you construct the input array with string representations of the tenors eg '3y' you can just pass this string to ql.Period(). So ql.Period(3,ql.Years) and ql.Period('3y') give the same result.
import QuantLib as ql
import numpy as np
import pandas as pd
curve = [ ['1w', -0.529],
['2w', -0.529],
['3w', -0.529],
['1m', -0.504],
['2m', -0.505],
['3m', -0.506],
['4m', -0.508],
['5m', -0.509],
['6m', -0.510],
['7m', -0.512],
['8m', -0.514],
['9m', -0.515],
['10m', -0.517],
['11m', -0.518],
['1y', -0.520],
['15m', -0.524],
['18m', -0.526],
['21m', -0.527],
['2y', -0.528],
['3y', -0.519],
['4y', -0.501],
['5y', -0.476],
['6y', -0.441],
['7y', -0.402],
['8y', -0.358],
['9y', -0.313],
['10y', -0.265],
['11y', -0.219],
['12y', -0.174],
['15y', -0.062],
['20y', 0.034],
['25y', 0.054],
['30y', 0.039],
['40y', -0.001],
['50y', -0.037] ]
data = pd.DataFrame(curve, columns = ['maturity','spot rate'])
print('Input curve\n',data)
terms= data["maturity"].tolist()
rates= data['spot rate'].tolist()
calendar = ql.TARGET()
day_count = ql.Actual360()
settlement_days_EONIA = 2
EONIA = ql.OvernightIndex("EONIA", settlement_days_EONIA, ql.EURCurrency(), calendar, day_count)
# Deposit Helper
depo_facility = -0.50
depo_helper = [ql.DepositRateHelper(ql.QuoteHandle(ql.SimpleQuote(depo_facility/100)), ql.Period(1,ql.Days), 1, calendar, ql.Unadjusted, False, day_count)]
# OIS Helper
OIS_helpers = []
for i in range(len(terms)):
tenor = ql.Period(terms[i])
eon_rate = rates[i]
OIS_helpers.append(ql.OISRateHelper(settlement_days_EONIA, tenor, ql.QuoteHandle(ql.SimpleQuote(eon_rate/100)), EONIA))
rate_helpers = depo_helper + OIS_helpers
eonia_curve_c = ql.PiecewiseLogCubicDiscount(0, ql.TARGET(), rate_helpers, day_count)
#This doesn't give me a daily grid of rates, but only the rates at the tenors of my input
eonia_curve_c.enableExtrapolation()
days = ql.MakeSchedule(eonia_curve_c.referenceDate(), eonia_curve_c.maxDate(), ql.Period('1Y'))
rates_fwd = [
eonia_curve_c.forwardRate(d, calendar.advance(d,365,ql.Days), day_count, ql.Simple).rate()*100
for d in days
]
print('Output\n',pd.DataFrame(rates_fwd,columns=['Fwd rate']))

Related

python: cumulative density plot

I have the following dataframe:
df =
Time_to_event event
0 0 days 443
1 1 days 226
2 2 days 162
3 3 days 72
4 4 days 55
5 5 days 30
6 6 days 36
7 7 days 18
8 8 days 15
9 9 days 14
10 10 days 21
11 11 days 13
12 12 days 10
13 13 days 10
14 14 days 8
I want to produce a cumulative density plot of the sum of the events per days. For example 0 days 443, 1 days = 443 + 226 etc.
I am currently trying this code:
stat = "count" # or proportion
sns.histplot(df, stat=stat, cumulative=True, alpha=.4)
but I come up with a pretty terrible plot:
If I could also come up with a line instead of bars that would be awesome!
You can try a combo of pandas.Series.cumsum and seaborn.lineplot :
df["cumsum"] = df["event"].cumsum()
plt.figure(figsize=(6,4))
sns.lineplot(x="Time_to_event", y="cumsum", data=df);
Output :
I think what you are looking for your plot values is:
xvalues=df["Time_to_event"]
yvalues=df["event"].cumsum()
The code could look like this:
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("test.txt")
print(df.columns)
print(df)
plt.bar(df["Time_to_event"],df["event"].cumsum())
# replace plt.bar with plt.plot for a plotted diagram
plt.show()

How to use fastai unet_learner?

Summary
Details
Utilize fastai(v2) unet_learner function to utilize resnet34 in transfer learning.
Expected Results
Learner that is passes building phase (.fine_tune(n)).
Errors
IndexError: Target 20 is out of bounds.
Attempted Remedy(s)
Ran the same processes as shown here without issue. The process ran smoothly, with the learner completing the fine_tuning and predictions without issue (on the camvid_tiny dataset).
Checked my processed data, including 'labels', 'images', and 'codes' against those in the camvid proccess, and they are near identical (my 21 classes vs camvid's ~30, images & labels are 256x256 vs camvid's 96, 128).
Confirmed label values within dls are not 0/255 (at noted here & here )
for i in range(len(lnames)):
y = Image.open(lnames[i])
y_array = np.array(y)
print(np.unique(y_array))
[20]
[20]
[20]
[ 5 7 8 11 12 13 14 17 20]
[ 5 12 13 14 15 16 17 20]
...
[14 17 20]
[14 17 20]
[ 8 9 10 11 12 13 14 16 17 19 20]
[ 1 2 3 4 7 8 9 10 11 12 13 14 16 17 18 19 20]
[ 2 3 9 10 12 13 14 16 17 19 20]
[ 1 2 3 4 7 8 9 10 11 12 13 14 16 17 18 19 20]
Code
import os
import json
import numpy as np
from pathlib import Path
from fastai.vision.all import *
>>path = Path(r"D:\EuroSATDS")
>>json_file = r"D:\EuroSATDS\esri_accumulated_stats.json"
>>>with open(json_file, 'r') as f:
data = json.load(f)
>>>classes = data['Classes']
>>>classes_list = []
>>>classes_value = []
>>>for i in classes:
x = i['ClassName']
y = i['ClassValue']
classes_list.append(x)
classes_value.append(y)
>>>classes_list[20]
'Palustrine Aquatic Bed'
>>>codes = np.asarray(classes_list, dtype='<U17')
>>>codes
array(['High Intensity De', 'Medium Intensity ', 'Low Intensity Dev',
'Developed Open Sp', 'Cultivated', 'Pasture/Hay', 'Grassland',
'Deciduous Forest', 'Evergreen Forest', 'Mixed Forest',
'Scrub/Shrub', 'Palustrine Forest', 'Palustrine Scrub/',
'Palustrine Emerge', 'Estuarine Foreste', 'Estuarine Scrub/S',
'Estuarine Emergen', 'Unconsolidated Sh', 'Bare Land', 'Water'],
dtype='<U17')
>>fnames = get_image_files(path/"images")
>>fnames[0]
Path('../000000000.jpg')
>>def label_func(fn): return pathB/"labels"/f"{fn.stem}_P.png"
>>dls = SegmentationDataLoaders.from_label_func(pathB, bs=8, fnames = fnames, label_func = label_func, codes = codes)
>>dls.show_batch(max_n=6)
>> learn = unet_learner(dls, resnet34)
>> learn.fine_tune(1)
IndexError: Target 20 is out of bounds.
IndexError Traceback (most recent call last)
~TEMP/ipykernel_14508/3714593663.py in <module>
2 import time
3 start = time.time()
----> 4 learn.fine_tune(1)
5 end = time.time()
6 print("The time of execution of above program is :", end-start)

How to add values in a data frame with specific conditions

I have a code where it outputs the amount of times a product is bought in a specific month in all stores; however, I was wondering how I would be able to have the sum of 3 conditions, where python would add the products from a specific month and a specific store.
This is my code so far:
df = df.groupby(['Month_Bought'])['Amount_Bought'].sum()
print(df)
Output:
01-2020 27
02-2020 26
03-2020 24
04-2020 23
05-2020 31
06-2020 33
07-2020 26
08-2020 30
09-2020 33
10-2020 26
11-2020 30
12-2020 30
Need to separate the data to make the dataframe look like this:
Store1 Store2
01-2020 3 24
02-2020 4 22
03-2020 8 16
04-2020 4 19
05-2020 10 21
06-2020 11 21
07-2020 12 14
08-2020 10 20
09-2020 3 30
10-2020 14 12
11-2020 21 9
12-2020 9 21
Assuming your data is long (a column contains values for which store a product was purchased), you could group by store and month:
import pandas as pd
records = [
{'Month_Bought':'01-2020', 'Amount_Bought':1, 'Store': 'Store1'},
{'Month_Bought':'01-2020', 'Amount_Bought':2, 'Store': 'Store2'},
{'Month_Bought':'02-2020', 'Amount_Bought':2, 'Store': 'Store1'},
{'Month_Bought':'02-2020', 'Amount_Bought':4, 'Store': 'Store2'}
]
df = pd.DataFrame.from_records(records)
# Initial dataframe
Month_Bought Amount_Bought Store
0 01-2020 1 Store1
1 01-2020 2 Store2
2 02-2020 2 Store1
3 02-2020 4 Store2
# Now groupby store and month
df_agg = df.groupby(['Store', 'Month_Bought'], as_index=False)['Amount_Bought'].sum()
# Convert from long to wide:
df_agg_pivot = df_agg.pivot(index='Month_Bought', columns='Store', values='Amount_Bought')
# Reformat
df_agg_pivot = df_agg_pivot.reset_index()
df_agg_pivot.columns.name = None
# Final result:
Month_Bought Store1 Store2
0 01-2020 1 2
1 02-2020 2 4

Scale values of a particular column of python dataframe between 1-10

I have a dataframe which contains youtube videos views, I want to scale these values in the range of 1-10.
Below is the sample of how values look like? How do i normalize it in the range of 1-10 or is there any more efficient way to do this thing?
rating
4394029
274358
473691
282858
703750
255967
3298456
136643
796896
2932
220661
48688
4661584
2526119
332176
7189818
322896
188162
157437
1153128
788310
1307902
One possibility is performing a scaling with max.
1 + df / df.max() * 9
rating
0 6.500315
1 1.343433
2 1.592952
3 1.354073
4 1.880933
5 1.320412
6 5.128909
7 1.171046
8 1.997531
9 1.003670
10 1.276217
11 1.060946
12 6.835232
13 4.162121
14 1.415808
15 10.000000
16 1.404192
17 1.235536
18 1.197075
19 2.443451
20 1.986783
21 2.637193
Similar solution by Wen (now deleted):
1 + (df - df.min()) * 9 / (df.max() - df.min())
rating
0 6.498887
1 1.339902
2 1.589522
3 1.350546
4 1.877621
5 1.316871
6 5.126922
7 1.167444
8 1.994266
9 1.000000
10 1.272658
11 1.057299
12 6.833941
13 4.159739
14 1.412306
15 10.000000
16 1.400685
17 1.231960
18 1.193484
19 2.440368
20 1.983514
21 2.634189

Python: How to code an exponential moving average?

I want to do calculations on three columns of a dataframe df. In order to do that I want run a price of assets (cryptocurrencies) list in a three column table in order to calculate the exponential moving average of them after having enough data.
def calculateAllEMA(self,values_array):
df = pd.DataFrame(values_array, columns=['BTC', 'ETH', 'DASH'])
column_by_search = ["BTC", "ETH", "DASH"]
print(df)
for i,column in enumerate(column_by_search):
ema=[]
# over and over for each day that follows day 23 to get the full range of EMA
for j in range(0, len(column)-24):
# Add the closing prices for the first 22 days together and divide them by 22.
EMA_yesterday = column.iloc[1+j:22+j].mean()
k = float(2)/(22+1)
# getting the first EMA day by taking the following day’s (day 23) closing price multiplied by k, then multiply the previous day’s moving average by (1-k) and add the two.
ema.append(column.iloc[23 + j]*k+EMA_yesterday*(1-k))
print("ema")
print(ema)
mean_exp[i] = ema[-1]
return mean_exp
Yet, when I print what's in len(column)-24 I get -21 (-24 + 3 ?). I can't therefore go through the loop. How can I cope with this error to get exponential moving average of the assets ?
I tried to apply this link from iexplain.com for the pseudo code of the exponential moving average.
If you have any easier idea, I'm open to hear it.
Here is the data that I use to calculate it when it bugs :
BTC ETH DASH
0 4044.59 294.40 196.97
1 4045.25 294.31 196.97
2 4044.59 294.40 196.97
3 4045.25 294.31 196.97
4 4044.59 294.40 196.97
5 4045.25 294.31 196.97
6 4044.59 294.40 196.97
7 4045.25 294.31 196.97
8 4045.25 294.31 196.97
9 4044.59 294.40 196.97
10 4045.25 294.31 196.97
11 4044.59 294.40 196.97
12 4045.25 294.31 196.97
13 4045.25 294.32 197.07
14 4045.25 294.31 196.97
15 4045.41 294.46 197.07
16 4045.25 294.41 197.07
17 4045.41 294.41 197.07
18 4045.41 294.47 197.07
19 4045.25 294.41 197.07
20 4045.25 294.32 197.07
21 4045.43 294.35 197.07
22 4045.41 294.46 197.07
23 4045.25 294.41 197.07
pandas.stats.moments.ewma from the original answer has been deprecated.
Instead you can use pandas.DataFrame.ewm as documented here.
Below is a complete snippet with random data that builds a dataframe with calculated ewmas from specified columns.
Code:
# imports
import pandas as pd
import numpy as np
np.random.seed(123)
rows = 50
df = pd.DataFrame(np.random.randint(90,110,size=(rows, 3)), columns=['BTC', 'ETH', 'DASH'])
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=rows).tolist()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
def ewmas(df, win, keepSource):
"""Add exponentially weighted moving averages for all columns in a dataframe.
Arguments:
df -- pandas dataframe
win -- length of ewma estimation window
keepSource -- True or False for keep or drop source data in output dataframe
"""
df_temp = df.copy()
# Manage existing column names
colNames = list(df_temp.columns.values).copy()
removeNames = colNames.copy()
i = 0
for col in colNames:
# Make new names for ewmas
ewmaName = colNames[i] + '_ewma_' + str(win)
# Add ewmas
#df_temp[ewmaName] = pd.stats.moments.ewma(df[colNames[i]], span = win)
df_temp[ewmaName] = df[colNames[i]].ewm(span = win, adjust=True).mean()
i = i + 1
# Remove estimates with insufficient window length
df_temp = df_temp.iloc[win:]
# Remove or keep source data
if keepSource == False:
df_temp = df_temp.drop(removeNames,1)
return df_temp
# Test run
df_new = ewmas(df = df, win = 22, keepSource = True)
print(df_new.tail())
Output:
BTC ETH DASH BTC_ewma_22 ETH_ewma_22 DASH_ewma_22
dates
2017-02-15 91 96 98 98.752431 100.081052 97.926787
2017-02-16 100 102 102 98.862445 100.250270 98.285973
2017-02-17 100 107 97 98.962634 100.844749 98.172712
2017-02-18 103 102 91 99.317826 100.946384 97.541684
2017-02-19 99 104 91 99.289894 101.214755 96.966758
Plot using df_new[['BTC', 'BTC_ewma_22']].plot():
In your loop for i,column in enumerate(column_by_search): you iterate over the elements in your column_by_search list, that is column takes on the values "BTC", "ETH", "DASH" in turn. Thus, len(column) will give you the length of the string "BTC", which is 3 in fact.
Try df[column] instead, that will return a list with the elements in the desired column and you can iterate over it.

Categories

Resources