Python recursive function failing - python

The issue that I am having is a really strange issue.
What I am trying to accomplish is the following: I am training a neural network using pytorch, and I want to restart my training function if the training loss doesn't decrease, so as to re-initialize the neural network with a different set of weights. The training function is presented below:
def __train__(dp, i, j, net, restarts, epoch=0):
if net == '2CH': model = TwoChannelCNN().cuda()
elif net == 'Siam' : model = SiameseCNN().cuda()
elif net == 'Trad' : model = TraditionalCNN().cuda()
ls_fn = torch.nn.MSELoss(reduce=True)
optim = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9)
epochs = np.arange(100)
eloss = []
for epoch in epochs:
model.train()
train_loss = []
tr_batches = np.array_split(dp.train_set, int(len(dp.train_set)/8))
for tr_batch in tr_batches:
if net == '2CH': loaded_batch = dp.__load2CH__(tr_batch)
elif net == 'Siam': loaded_batch = dp.__loadSiam__(tr_batch)
elif net == 'Trad' : loaded_batch = dp.__load__(tr_batch, i)
for x_batch, y_batch in loaded_batch:
x_var, y_var = Variable(x_batch.cuda()), Variable(y_batch.cuda())
y_pred = torch.clamp(model(x_var), 0, 1)
loss = ls_fn(y_pred, y_var)
train_loss.append(abs(loss.item()))
optim.zero_grad()
loss.backward()
optim.step()
eloss.append(np.mean(train_loss))
print(epoch, np.mean(train_loss))
if epoch == 10 and np.mean(train_loss) > 0.2:
restarts += 1
print('Number of restarts for client {} and fold {}: {}'.format(i,j,restarts))
__train__(dp, i, j, net, restarts, epoch=0)
__plotLoss__(epochs, eloss, 'train', str(i), str(j))
torch.save(model.state_dict(), "Output/client_{}_fold_{}.pt".format(i, j))
So the restarting based on if epoch == 10 and np.mean(train_loss) > 0.2: works, but only sometimes, which is beyond my comprehension. Here is an example of the output:
0 0.5000133737921715
1 0.4999906486272812
2 0.464298670232296
3 0.2727506290078163
4 0.2628978116512299
5 0.2588871221542358
6 0.25728522151708605
7 0.25630473804473874
8 0.2556223524808884
9 0.25522999209165576
10 0.25467908215522767
Number of restarts for client 5 and fold 1: 3
0 0.10957609283713009
1 0.02840371729924134
2 0.021477583368030594
3 0.017759160268232682
4 0.015173796122947827
5 0.013349939693290782
6 0.011949078906879265
7 0.010810676779671655
8 0.00987362345259362
9 0.009110640348696108
10 0.008239036202623808
11 0.007680381585537574
12 0.007171026876221333
13 0.006765962297888837
14 0.006428168776848068
15 0.006133011780953467
16 0.005819878347673745
17 0.005572605537395361
18 0.00535818950227004
19 0.005159409143814457
20 0.0049763926251294235
21 0.004738794513338235
22 0.004578812885309958
23 0.004428663117960554
24 0.004282198464788351
25 0.004145324644400691
26 0.004018862769889626
27 0.0039044404603504573
28 0.0037960831121495744
29 0.0036947361258523586
30 0.0035982220717533267
31 0.0035018146670104723
32 0.0034150678806059887
33 0.0033372560733512698
34 0.003261332974241583
35 0.00318166259540763
36 0.003108531899014735
37 0.0030385089141125848
38 0.002977990984523103
39 0.0029195284016142937
40 0.002870084639441188
41 0.0028180573325994373
42 0.0027717544270049643
43 0.002719321814503495
44 0.0026704726860933194
45 0.0026204266263459316
46 0.002570544072460258
47 0.0025225681523167224
48 0.0024814611543610746
49 0.0024358948737413116
50 0.002398673941639636
51 0.0023606415423654587
52 0.002330436484101057
53 0.0022891738560574027
54 0.002260655496376241
55 0.002227568955708719
56 0.002191826719741698
57 0.0021609061182290058
58 0.0021279943092100666
59 0.0020966088490456513
60 0.002066195117003474
61 0.0020381672924407895
62 0.002009863329306995
63 0.001986304977759602
64 0.0019564831849032487
65 0.0019351609173580756
66 0.0019077356409993626
67 0.0018875047204855945
68 0.0018617453310780547
69 0.001839518720600381
70 0.001815563331498197
71 0.0017149778925132932
72 0.0016894878409248121
73 0.0016652211918212743
74 0.0016422999463582074
75 0.0016183732903472788
76 0.0015962369183098418
77 0.0015757764620279887
78 0.0015542267022799728
79 0.0015323152910759318
80 0.0014337954093957706
81 0.001410489170542867
82 0.0013871921329466962
83 0.0013641994057461773
84 0.001345829172682187
85 0.001322142209181493
86 0.00130379223035348
87 0.001282231878045458
88 0.001263879886683956
89 0.001243419097817167
90 0.0012279346547037929
91 0.001206978429649382
92 0.0011871445969959496
93 0.001172510546330841
94 0.0011529557384797045
95 0.0011350733004023273
96 0.001118382818282214
97 0.001103347793609089
98 0.0010848538354748599
99 0.0010698940242660911
11 0.2542190085053444
12 0.2538975296020508
So here you can see that the restarting is correct from the 3rd restart, but then, since the network converges, the training should be complete, but the function restarts AGAIN after the 99th epoch (for an unknown reason), and somehow starts at the 11th epoch, which also makes no sense as I am explicitly specifying epoch = 0 whenever the function starts or restarts. I should also add that, SOMETIMES, the function completes correctly after the epoch 99, when convergence has been achieved, and does not restart.
So my question is, why does this piece of code produce inconsistent results and outcomes? What am I missing here? Thanks in advance for any suggestions.

You are restarting the training by calling __train__ a second time in the case if epoch == 10 and np.mean(train_loss) > 0.2: but you never terminate the first loop.
So, after the second training has converged, the outer loop continues at epoch 11.
What you need is a break statement after the inner call to __train__.

Related

How to use use numpy random choice to get progressively longer sequences with the same numbers?

What I tried was this:
import numpy as np
def test_random(nr_selections, n, prob):
selected = np.random.choice(n, size=nr_selections, replace= False, p = prob)
print(str(nr_selections) + ': ' + str(selected))
n = 100
prob = np.random.choice(100, n)
prob = prob / np.sum(prob) #only for demonstration purpose
for i in np.arange(10, 100, 10):
np.random.seed(123)
test_random(i, n, prob)
The result was:
10: [68 32 25 54 72 45 96 67 49 40]
20: [68 32 25 54 72 45 96 67 49 40 36 74 46 7 21 20 53 65 89 77]
30: [68 32 25 54 72 45 96 67 49 40 36 74 46 7 21 20 53 62 86 60 35 37 8 48
52 47 31 92 95 56]
40: ...
Contrary to my expectation and hope, the 30 numbers selected do not contain all of the 20 numbers. I also tried using numpy.random.default_rng, but only strayed further away from my desired output. I also simplified the original problem somewhat in the above example. Any help would be greatly appreciated. Thank you!
Edit for clarification: I do not want to generate all the sequences in one loop (like in the example above) but rather use the related sequences in different runs of the same program. (Ideally, without storing them somewhere)

Python, trying to calculate RSI but I am getting unusually high numbers

I am trying to calculate the RSI formula in python. I am getting the closing price data from the AlphaVantage TimeSeries API.
def rsi(data,period):
length = len(data) - 1
current_price = 0
previous_price = 0
avg_up = 0
avg_down = 0
for i in range(length-period,length):
current_price = data[i]
if current_price > previous_price:
avg_up += current_price - previous_price
else:
avg_down += previous_price - current_price
previous_price = data[i]
# Calculate average gain and loss
avg_up = avg_up/period
avg_down = avg_down/period
# Calculate relative strength
rs = avg_up/avg_down
# Calculate rsi
rsi = 100 - (100/(1+rs))
return rsi
print(rsi(data=closing_price,period=14))
In this case, this will output a really high number along the lines of RSI: 99.824. But according to TradingView, the current RSI is actually 62.68.
Any feedback on what I am doing wrong would be very much appreciated!
Here is some data, it is 100 mintues of AAPL data
0
0 118.3900
1 118.4200
2 118.3500
3 118.3000
4 118.2800
5 118.4000
6 118.3400
7 118.4500
8 118.3900
9 118.4100
10 118.4700
11 118.4000
12 118.4000
13 118.3400
14 118.4100
15 118.2850
16 118.2900
17 118.1700
18 118.2600
19 118.2800
20 118.2600
21 118.2400
22 118.2950
23 118.2800
24 118.2900
25 118.2850
26 118.3000
27 118.2150
28 118.2300
29 118.1450
30 118.1200
31 118.0800
32 118.1300
33 118.1100
34 118.1300
35 118.2300
36 118.1000
37 118.1900
38 118.2800
39 118.2400
40 118.2300
41 118.3300
42 118.3200
43 118.3500
44 118.3600
45 118.3650
46 118.3800
47 118.4500
48 118.5000
49 118.5100
50 118.5400
51 118.5100
52 118.5063
53 118.5200
54 118.5400
55 118.4700
56 118.4700
57 118.4300
58 118.4400
59 118.4300
60 118.3800
61 118.4000
62 118.3600
63 118.3700
64 118.3400
65 118.3200
66 118.3000
67 118.3210
68 118.3714
69 118.4000
70 118.4100
71 118.3500
72 118.3300
73 118.3200
74 118.3250
75 118.3200
76 118.3900
77 118.5000
78 118.4800
79 118.5300
80 118.5300
81 118.4800
82 118.5000
83 118.4400
84 118.5400
85 118.5550
86 118.5200
87 118.4600
88 118.4500
89 118.4400
90 118.4300
91 118.4019
92 118.4400
93 118.4400
94 118.4100
95 118.4000
96 118.4400
97 118.4400
98 118.4600
99 118.5050
I've managed to compute 59.4 with the code below, which is close to what you are looking to. Here is what I've changed:
_ averages are divided by n_up and n_down counters, and not by period.
_ previous and current prices were removed to directly access to actual data[i] and previous data[i-1] prices.
Note that the code has to be check with other data.
close_AAPL = [118.4200, 118.3500, 118.3000, 118.2800, 118.4000,
118.3400, 118.4500, 118.3900, 118.4100, 118.4700,
118.4000, 118.4000, 118.3400, 118.4100, 118.2850,
118.2900, 118.1700, 118.2600, 118.2800, 118.2600,
118.2400, 118.2950, 118.2800, 118.2900, 118.2850,
118.3000, 118.2150, 118.2300, 118.1450, 118.1200,
118.0800, 118.1300, 118.1100, 118.1300, 118.2300,
118.1000, 118.1900, 118.2800, 118.2400, 118.2300,
118.3300, 118.3200, 118.3500, 118.3600, 118.3650,
118.3800, 118.4500, 118.5000, 118.5100, 118.5400,
118.5100, 118.5063, 118.5200, 118.5400, 118.4700,
118.4700, 118.4300, 118.4400, 118.4300, 118.3800,
118.4000, 118.3600, 118.3700, 118.3400, 118.3200,
118.3000, 118.3210, 118.3714, 118.4000, 118.4100,
118.3500, 118.3300, 118.3200, 118.3250, 118.3200,
118.3900, 118.5000, 118.4800, 118.5300, 118.5300,
118.4800, 118.5000, 118.4400, 118.5400, 118.5550,
118.5200, 118.4600, 118.4500, 118.4400, 118.4300,
118.4019, 118.4400, 118.4400, 118.4100, 118.4000,
118.4400, 118.4400, 118.4600, 118.5050]
def rsi(data,period):
length = len(data) - 1
current_price = 0
previous_price = 0
avg_up = 0
n_up = 0
avg_down = 0
n_down = 0
for i in range(length-period,length):
if data[i] > data[i-1]:
avg_up += data[i] - data[i-1]
n_up += 1
else:
avg_down += data[i-1] - data[i]
n_down += 1
# Calculate average gain and loss
avg_up = avg_up/n_up
avg_down = avg_down/n_down
# Calculate relative strength
rs = avg_up/avg_down
# Calculate rsi
return 100. - 100./(1+rs)
print(rsi(data=close_AAPL, period=14))

how to print unicode number series in python?

I am just trying to print the Unicode number ranging from 1 to 100 in python. I have searched a lot in StackOverflow but no question answers my queries.
So basically I want to print Bengali numbers from ১ to ১০০. The corresponding English number is 1 to 100.
What I have tried is to get the Unicode number of ১ which is '\u09E7'. Then I have tried to increase this number by 1 as depicted in the following code:
x = '\u09E7'
print(x+1)
But the above code says to me the following output.
TypeError: can only concatenate str (not "int") to str
So what I want is to get a number series as following:
১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯, ১০, ১১, ১২, ১৩, ............, ১০০
TypeError: can only concatenate str (not "int") to str1
I wish if there is any solution to this. Thank you.
Make a translation table. The function str.maketrans() takes a string of characters and a string of replacements and builds a translation dictionary of Unicode ordinals to Unicode ordinals. Then, convert a counter variable to a string and use the translate() function on the result to convert the string:
#coding:utf8
xlat = str.maketrans('0123456789','০১২৩৪৫৬৭৮৯')
for i in range(1,101):
print(f'{i:3d} {str(i).translate(xlat)}',end=' ')
Output:
1 ১ 2 ২ 3 ৩ 4 ৪ 5 ৫ 6 ৬ 7 ৭ 8 ৮ 9 ৯ 10 ১০ 11 ১১ 12 ১২ 13 ১৩ 14 ১৪ 15 ১৫ 16 ১৬ 17 ১৭ 18 ১৮ 19 ১৯ 20 ২০ 21 ২১ 22 ২২ 23 ২৩ 24 ২৪ 25 ২৫ 26 ২৬ 27 ২৭ 28 ২৮ 29 ২৯ 30 ৩০ 31 ৩১ 32 ৩২ 33 ৩৩ 34 ৩৪ 35 ৩৫ 36 ৩৬ 37 ৩৭ 38 ৩৮ 39 ৩৯ 40 ৪০ 41 ৪১ 42 ৪২ 43 ৪৩ 44 ৪৪ 45 ৪৫ 46 ৪৬ 47 ৪৭ 48 ৪৮ 49 ৪৯ 50 ৫০ 51 ৫১ 52 ৫২ 53 ৫৩ 54 ৫৪ 55 ৫৫ 56 ৫৬ 57 ৫৭ 58 ৫৮ 59 ৫৯ 60 ৬০ 61 ৬১ 62 ৬২ 63 ৬৩ 64 ৬৪ 65 ৬৫ 66 ৬৬ 67 ৬৭ 68 ৬৮ 69 ৬৯ 70 ৭০ 71 ৭১ 72 ৭২ 73 ৭৩ 74 ৭৪ 75 ৭৫ 76 ৭৬ 77 ৭৭ 78 ৭৮ 79 ৭৯ 80 ৮০ 81 ৮১ 82 ৮২ 83 ৮৩ 84 ৮৪ 85 ৮৫ 86 ৮৬ 87 ৮৭ 88 ৮৮ 89 ৮৯ 90 ৯০ 91 ৯১ 92 ৯২ 93 ৯৩ 94 ৯৪ 95 ৯৫ 96 ৯৬ 97 ৯৭ 98 ৯৮ 99 ৯৯ 100 ১০০
You can try this. Convert the character to an integer. Do the addition and the convert it to character again. If the number is bigger than 10 you have to convert both digits to characters that's why we are using modulo %.
if num < 10:
x = ord('\u09E6')
print(chr(x+num))
elif num < 100:
mod = num % 10
num = int((num -mod) / 10)
x = ord('\u09E6')
print(''.join([chr(x+num), chr(x+mod)]))
else:
x = ord('\u09E6')
print(''.join([chr(x+1), '\u09E6', '\u09E6']))
You can try running it here
https://repl.it/repls/GloomyBewitchedMultitasking
EDIT:
Providing also javascript code as asked in comments.
function getAsciiNum(num){
zero = "০".charCodeAt(0)
if (num < 10){
return(String.fromCharCode(zero+num))
}
else if (num < 100) {
mod = num % 10
num = Math.floor((num -mod) / 10)
return(String.fromCharCode(zero+num) + String.fromCharCode(zero+mod))
}
else {
return(String.fromCharCode(zero+1) + "০০")
}
}
console.log(getAsciiNum(88))

Python: Predicting series of numbers without INPUT to a NN

I have a random list of series (integers) along with dates in a csv like:
1/1/2019,34 44 57 62 70
12/28/2018,09 10 25 37 38
12/25/2018,02 08 42 43 50
12/21/2018,10 13 61 62 70
12/18/2018,13 22 32 60 69
12/14/2018,05 22 26 43 49
12/11/2018,04 38 39 54 59
12/7/2018,04 10 20 33 57
12/4/2018,28 31 41 42 50
The list goes all the way back to year 1997. What I am trying is to predict the next series (or as closest as possible) based on these data:
The size of the list (2336)
What have I tried?
The approach that I've used so far is (e.g. for 1/1/2019,34 44 57 62 70):
1) Get the occurrence of each number in the list, i.e. the number 34 has occurred 170 times out the total list (2336).
2) Find the percentage of each number that has occurred. i.e.
Perc/Chances(34) = Occurrence/TotalNo.
Chances(34) = 170/2336
Chances(34) = 0.072 ~ 07
One way to get the list would be to just find the 5 numbers from the list with the least Percentages. but that won't be much effective.
On the other hand, Now I have a data which has each number, its percentage and its occurrence. Is there any way I can somehow train a neural network that predicts the next series? or closest.
Hierarchy:
Where comp_data.csv contains data like:
1/1/2019,34 44 57 62 70
12/28/2018,09 10 25 37 38
12/25/2018,02 08 42 43 50
12/21/2018,10 13 61 62 70
12/18/2018,13 22 32 60 69
12/14/2018,05 22 26 43 49
12/11/2018,04 38 39 54 59
12/7/2018,04 10 20 33 57
12/4/2018,28 31 41 42 50
and occurrence.csv contains:
34,170
44,197
57,36
62,38
70,37
09,186
10,210
25,197
37,185
38,206
02,217
08,185
and report.csv contains the number, occurrence and its percentage:
34,3,11
44,1,03
57,5,19
62,5,19
70,5,19
09,1,03
10,5,19
25,2,07
37,3,11
38,2,07
02,1,03
08,2,07
So I have the list of series, its occurrences over a period of time, and the percentages. Is there anyway I can create a NN that expects some INPUTS trains over a data and predicts the OUT (a series in this case)
The Problem:
Which ones would be the Input? As it is a pure random problem. PS. I cannot provide any Input since I need a series without INPUT. Perhaps, a LSTM Network for Regression?

Theano dimensionality error - target dimensions

I am using lasagne's Conv3DDNNLayer, and have input dimensions of (N x 1 x 9 x 9 x 9), where each 9x9x9 cube is my sample to be classified.
Therefore I have a target dimension of (N x 1), with each entry corresponding to a cube. This is raising the error:
Bad input argument to theano function with name "Conv_Net_1.py:45" at index 1(0-based)', 'Wrong number of dimensions: expected 1,
got 2 with shape (324640, 1).')´
Which dimensions should I have my targets in in this case?
11 dtensor5 = TensorType('float32', (False,)*5)
12 input_var = dtensor5('X_Train')
13 target_var = T.ivector('Y_train')
14
15 X_train, Y_train = DP.data_gen( '/home/Upload/Smalls', 9)
16 print X_train.shape
17 print Y_train.shape
18 # Build Neural Network:
19 input = lasagne.layers.InputLayer((None, 1, 9, 9, 9), input_var=input_var)
20
21 l_conv_1 = lasagne.layers.dnn.Conv3DDNNLayer(input, 20, (2,2,2))
22
29 l_hidden1 = lasagne.layers.DenseLayer(l_conv_1, num_units=256,nonlinearity=lasagne.nonlinearities.rectify,W=l asagne.init.HeNormal(gain='relu'))
30
31 l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5)
32
33 output = lasagne.layers.DenseLayer(l_hidden1_dropout, num_units=2, nonlinearity = lasagne.nonlinearities.soft max)
34
35 ##
36 prediction = lasagne.layers.get_output(output)
37 loss = T.mean(lasagne.objectives.categorical_crossentropy(prediction, target_var)
39
40 # Get list of all trainable parameters in the network.
41 params = lasagne.layers.get_all_params(output, trainable=True)
42 updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.3)
43
44 ##
45 train_fn = theano.function([input_var, target_var], loss, updates=updates)
46
47 ##
48 for epoch in range(500):
49 print('training')
50 loss = train_fn(X_train, Y_train)
51 print(loss.type)
52 print("Epoch %d: Loss %g" % (epoch + 1, loss))
53
54
55 ##
56 test_prediction = lasagne.layers.get_output(output, deterministic=True)
57 predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
edit - added code
Thanks!
In case any one is interested, it was because the data was (N, 1) not (N, ).
seemed to solve the problem! - on to the next..

Categories

Resources