Just attempting to return values from a defined function. When calling the function first and attempting to print the return values I receive "[variable] not defined". However, if I run "print(qb_stat_filler())" it prints the results in a tuple. I need the individual variables returned to use in a separate function.
For Example
print(qb_stat_filler())
outputs: (0, 11, 24, 24.2024, 39.1143, 293.0, 1.9143000000000001, 0.2262, 97.84333355313255)
but when trying
qb_stat_filler()
print(cmp_avg)
print(att_avg)
outputs: NameError: name 'cmp_avg' is not defined
Process finished with exit code 1
I've tried establishing the variables outside of the function, then passing and returning them and that did not work either. Any thoughts?
def qb_stat_filler():
n_input = input('Player name: ')
t_input = input('Players team: ')
loc_input = input('H or #: ')
o_input = input('Opponent: ')
# convert index csv to dictionary of player values
q = pd.read_csv('Models\\QB Indexes\\QBname.csv')
q = q[['Player', 'Num']]
qb_dict = dict(q.values)
name = qb_dict.get('{}'.format(n_input))
t = pd.read_csv('Models\\QB Indexes\\Tmname.csv')
t = t[['Tm', 'Num']]
tm_dict = dict(t.values)
team = tm_dict.get('{}'.format(t_input))
loc = 0
if loc_input == '#':
loc = 0
elif loc_input == 'H':
loc = 1
z = pd.read_csv('Models\\QB Indexes\\Oppname.csv')
z = z[['Opp', 'Num']]
opp_dict = dict(z.values)
opp = opp_dict.get('{}'.format(o_input))
*there are several lines of code here that involve SQL
queries and data cleansing*
cmp_avg = (cmp_match + cmpL4) / 2
att_avg = (patt_match + pattL4) / 2
pyds_avg = (py_match + pydsL4) / 2
ptd_avg = (ptdL4 + ptd_match) / 2
int_avg = (intL4 + int_match) / 2
qbr_avg = (qbr_match + qbrL4) / 2
return name, team, opp, cmp_avg, att_avg, pyds_avg, ptd_avg,
int_avg, qbr_avg
qb_stat_filler()
You might consider:
def qb_stat_filler():
stats = {}
...
stats['name'] = name
z = z[['Opp', 'Num']]
opp_dict = dict(z.values)
stats['opp'] = opp_dict.get('{}'.format(o_input))
...
stats['cmp_avg'] = (cmp_match + cmpL4) / 2
stats['att_avg'] = (patt_match + pattL4) / 2
stats['pyds_avg'] = (py_match + pydsL4) / 2
stats['ptd_avg'] = (ptdL4 + ptd_match) / 2
stats['int_avg'] = (intL4 + int_match) / 2
stats['qbr_avg'] = (qbr_match + qbrL4) / 2
return stats
...
stats = qb_stat_filler()
print(stats['cmp_avg'])
I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s
I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.
Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))
This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx
I'm trying to program a neural network to play noughts and crosses (also known as tic tac toe). It works well enough to play against and decreases the loss function when I train it, but only up to a point, after which it plateaus. I have tried playing it and it is still not much better than a random bot.
I have already tried adjusting the learning rate and the size of the hidden layer. I have also previously tried training it on my games against it, as well as training it only based upon the victorious side (interestingly, this minimised the loss function better than my current version, but only tried to attack, and had no clue that it had to prevent me from trying to win). In addition to this, I have tried changing the learning rate so that it decreases by 5 or 10% every 100 self-play games.
I have had a look online but haven't found any python neural networks for tic tac toe that I can compare mine to in order to debug.
import random
def bot_go(player_to_move, game_over, board):
played = False
iteration = 0
while played is False:
move, input_layer, hidden_layer, output_layer = neural_net_move(iteration, board)
if board[int(move[0])][int(move[1])] == "-":
played = True
board[int(move[0])][int(move[1])] = player_to_move
if check_for_win(player_to_move, board) is True:
game_over = True
elif check_for_draw(board) is True:
game_over = True
if player_to_move == "X":
player_to_move = "O"
else:
player_to_move = "X"
iteration += 1
return game_over, player_to_move, move, input_layer, hidden_layer, output_layer
def neural_net_move(iteration, board):
neural_network_input = {}
neural_network_hidden_layer = {}
neural_network_output = []
layer_1_weights, layer_2_weights, bias = get_neural_network()
# initialises the input layer
for i in range(9):
if board[i // 3][i % 3] == "X":
neural_network_input[i] = 0
elif board[i // 3][i % 3] == "O":
neural_network_input[i] = 1
else:
neural_network_input[i] = 0.5
# calculates the hidden layer neuron values
for i in range(Global_variables.hidden_layer_size):
net_total = 0
for j in range(9):
net_total += neural_network_input[j]*layer_1_weights[str(j) + str(i)]
neural_network_hidden_layer[i] = (1/(1 + 2.718**(-net_total)))
# calculates neural network output
for i in range(9):
net_total = 0
for j in range(Global_variables.hidden_layer_size):
net_total += neural_network_hidden_layer[j] * layer_2_weights[str(j) + str(i)]
net_total += bias * layer_2_weights[str(Global_variables.hidden_layer_size) + str(i)]
neural_network_output.append(1 / (1 + (2.718 ** (-net_total))))
# finds output value by ordering the outputs in terms of size
order_of_size = [0]
for i in range(1, len(neural_network_output)):
inserted = False
for j in range(len(order_of_size)):
if neural_network_output[i] > neural_network_output[order_of_size[j]] and inserted is False:
order_of_size.insert(j, i)
inserted = True
elif j == len(order_of_size) - 1 and inserted is False:
order_of_size.append(i)
move = [order_of_size[iteration] // 3, order_of_size[iteration] % 3]
return move, neural_network_input, neural_network_hidden_layer, neural_network_output
def train_neural_network(input_layer, hidden_layer, output_layer, actual_move):
layer_1_weights, layer_2_weights, bias = get_neural_network()
new_l1_weights = {}
new_l2_weights = {}
# calculates total error
total_error = 0
for i in range(len(output_layer)):
if actual_move[0] * 3 + actual_move[1] == i:
total_error += ((1 - output_layer[i])**2)/2
else:
total_error += 0.5*((output_layer[i])**2)
# adjusts second layer weights
for i in range((hidden_layer_size + 1)*9):
if actual_move[0] * 3 + actual_move[1] == i % 9:
d_error_by_d_output_node = output_layer[i % 9] - 1
else:
d_error_by_d_output_node = output_layer[i % 9]
d_output_node_by_d_node_net_value = output_layer[i % 9]*(1 - output_layer[i % 9])
if i // 9 != hidden_layer_size:
d_node_net_value_by_d_weight = hidden_layer[i // 9]
else:
d_node_net_value_by_d_weight = bias
d_error_by_d_weight = d_error_by_d_output_node*d_output_node_by_d_node_net_value*d_node_net_value_by_d_weight
new_l2_weights[str(i // 9) + str(i % 9)] = \
layer_2_weights[str(i // 9) + str(i % 9)] - learning_rate*d_error_by_d_weight
# adjusts bias
d_error_by_d_bias = 0
for i in range(9):
d_node_net_value_by_d_bias = layer_2_weights[str(hidden_layer_size) + str(i)]
d_output_node_by_d_node_net_value = output_layer[i]*(1 - output_layer[i])
if actual_move[0] * 3 + actual_move[1] == i:
d_error_by_d_output_node = output_layer[i] - 1
else:
d_error_by_d_output_node = output_layer[i]
d_error_by_d_bias += d_node_net_value_by_d_bias * d_output_node_by_d_node_net_value * d_error_by_d_output_node
bias = bias - learning_rate * d_error_by_d_bias
# adjusts first layer weights
for i in range(hidden_layer_size*9):
d_error_by_d_weight = 0
if i // 9 != hidden_layer_size:
d_output_of_node_by_d_node_net_value = \
hidden_layer[i % hidden_layer_size]*(1 - hidden_layer[i % hidden_layer_size])
else:
d_output_of_node_by_d_node_net_value = \
bias * (1 - bias)
d_node_net_value_by_d_weight = input_layer[i // Global_variables.hidden_layer_size]
for j in range(9):
d_output_node_net_value_by_d_output_of_node = layer_2_weights[str(i // 9) + str(j)]
d_output_node_by_d_output_node_net_value = output_layer[j]*(1 - output_layer[j])
if actual_move[0] * 3 + actual_move[1] == i:
d_error_by_d_output_node = output_layer[j] - 1
else:
d_error_by_d_output_node = output_layer[j]
d_error_by_d_weight += d_output_of_node_by_d_node_net_value * d_node_net_value_by_d_weight * \
d_output_node_net_value_by_d_output_of_node * d_output_node_by_d_output_node_net_value * \
d_error_by_d_output_node
new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] = \
layer_1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)] - \
d_error_by_d_weight * learning_rate
network_file = open("neural network", "w")
line = ""
for i in range(9 * hidden_layer_size):
line += str(new_l1_weights[str(i // hidden_layer_size) + str(i % hidden_layer_size)]) + " "
network_file.write(line + "\n")
line = ""
for i in range(9 * (hidden_layer_size + 1)):
line += str(new_l2_weights[str(i // 9) + str(i % 9)]) + " "
network_file.write(line + "\n")
network_file.write(str(bias))
network_file.close()
return total_error
def get_neural_network():
layer_1_weights = {}
layer_2_weights = {}
# opens text file holding neural network
network_file = open("neural network", "r")
network = network_file.readlines()
network_file.close()
# gets the neural network weights from the text file
weight_list = network[0].split()
for i in range(len(weight_list)):
layer_1_weights[str(i // Global_variables.hidden_layer_size) + str(i % Global_variables.hidden_layer_size)] = float(weight_list[i])
weight_list = network[1].split()
for i in range(len(weight_list)):
layer_2_weights[str(i // 9) + str(i % 9)] = float(weight_list[i])
bias = 1
return layer_1_weights, layer_2_weights, bias
def make_up_neural_net():
network_file = open("neural network", "w")
line = ""
for i in range(9*Global_variables.hidden_layer_size):
line += str(random.random()) + " "
network_file.write(line + "\n")
line = ""
for i in range(9*(Global_variables.hidden_layer_size + 1)):
line += str(random.random()) + " "
network_file.write(line + "\n")
network_file.write(str(random.random()))
network_file.close()
def main():
error = 0
make_up_neural_net()
for i in range(100):
for j in range(100):
game_over = False
winner = ""
player_to_move = "X"
board = set_up_board()
o_moves = []
x_moves = []
while game_over is False:
if player_to_move == "X":
game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
x_moves.append([move, input_layer, hidden_layer, output_layer])
if game_over is True:
winner = "X"
else:
game_over, player_to_move, move, input_layer, hidden_layer, output_layer = bot_go(player_to_move, game_over, board)
o_moves.append([move, input_layer, hidden_layer, output_layer])
if game_over is True:
winner = "O"
if winner == "X":
for move in x_moves:
error = train_neural_network(move[1], move[2], move[3], move[0])
for move in o_moves:
error = un_train_neural_network(move[1], move[2], move[3], move[0])
else:
for move in o_moves:
error = train_neural_network(move[1], move[2], move[3], move[0])
for move in x_moves:
error = un_train_neural_network(move[1], move[2], move[3], move[0])
print(error)
main()
I would expect this code to print the result of the loss function after every 100 self-play games, and for this value to decrease over time.
However, it tends to flatten off at a value of at least 0.45, whereas I believe it should be several orders of magnitude smaller (I was getting a value for the loss function on the order of 10 to the negative 5 when I was training it on me).
I think I'm justified in my view because it is also crap at noughts and crosses when it plays.
I was wondering if this is because there's a problem with my code or because the neural network is not complex enough to model the problem and requires another layer.
NOTE: Sorry about the quantity of code, but I couldn't really find a way to shorten it. I have removed the win/draw checks to shorten it, as well as the "untrain" function, which is just the train function but adding the learning rate multiplied by the derivative instead of subtracting. I can add them back if anyone wants to test the code without the inconvenience of writing the functions themselves.
I am trying to use the sklearn to build a machine learning model for the lunar lander. I use Grid search to tune the model and use joblib to persist the model.
enter image description here
here is the code:
from sklearn.externals import joblib
joblib.dump(my_tuned_model, 'player_state.pkl')
and then I copy the player_state.pkl into the folder of lunar lander. below is the code of lunar_lander
import sys, math
import numpy as np
from sklearn.externals import joblib
import cv2
# MOD Extra imports for image handling
from PIL import Image
import os
import time
import datetime
import keras
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.utils import seeding
# Rocket trajectory optimization is a classic topic in Optimal Control.
#
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
#
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
# engine is -0.3 points each frame. Solved is 200 points.
#
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
# on its first attempt. Please see source code for details.
#
# Too see heuristic landing, run:
#
# python gym/envs/box2d/lunar_lander_mod.py
#
# To play yourself, run:
#
# python examples/agents/keyboard_agent.py LunarLander-v0
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY = [
(-14, +17), (-17, 0), (-17, -10),
(+17, -10), (+17, 0), (+14, +17)
]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14.0
SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': FPS
}
continuous = False
def __init__(self):
self.seed()
self.viewer = None
self.world = Box2D.b2World()
self.moon = None
self.lander = None
self.particles = []
self.prev_reward = None
high = np.array([np.inf] * 8) # useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(-high, high)
if self.continuous:
# Action is two floats [main engine, left-right engines].
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
self.action_space = spaces.Box(-1, +1, (2,))
else:
# Nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4)
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.moon: return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
self.moon = None
self.world.DestroyBody(self.lander)
self.lander = None
self.world.DestroyBody(self.legs[0])
self.world.DestroyBody(self.legs[1])
def reset(self):
self._destroy()
self.world.contactListener_keepref = ContactDetector(self)
self.world.contactListener = self.world.contactListener_keepref
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
self.helipad_y = H / 4
height[CHUNKS // 2 - 2] = self.helipad_y
height[CHUNKS // 2 - 1] = self.helipad_y
height[CHUNKS // 2 + 0] = self.helipad_y
height[CHUNKS // 2 + 1] = self.helipad_y
height[CHUNKS // 2 + 2] = self.helipad_y
smooth_y = [0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(CHUNKS)]
self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
self.sky_polys = []
for i in range(CHUNKS - 1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i + 1], smooth_y[i + 1])
self.moon.CreateEdgeFixture(
vertices=[p1, p2],
density=0,
friction=0.1)
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
self.moon.color1 = (0.0, 0.0, 0.0)
self.moon.color2 = (0.0, 0.0, 0.0)
initial_y = VIEWPORT_H / SCALE
self.lander = self.world.CreateDynamicBody(
position=(VIEWPORT_W / SCALE / 2, initial_y),
angle=0.0,
fixtures=fixtureDef(
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.lander.color1 = (0.5, 0.4, 0.9)
self.lander.color2 = (0.3, 0.3, 0.5)
self.lander.ApplyForceToCenter((
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.legs = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y),
angle=(i * 0.05),
fixtures=fixtureDef(
shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
leg.ground_contact = False
leg.color1 = (0.5, 0.4, 0.9)
leg.color2 = (0.3, 0.3, 0.5)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3 * i # low enough not to jump back into the sky
)
if i == -1:
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
rjd.upperAngle = -0.9 + 0.5
leg.joint = self.world.CreateJoint(rjd)
self.legs.append(leg)
self.drawlist = [self.lander] + self.legs
return self.step(np.array([0, 0]) if self.continuous else 0)[0]
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position=(x, y),
angle=0.0,
fixtures=fixtureDef(
shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3)
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl < 0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0]);
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[
1] # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1],
m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power), impulse_pos,
True)
self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos, True)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action - 2
s_power = 1.0
ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power), impulse_pos,
True)
self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos, True)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_W / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0
]
assert len(state) == 8
reward = 0
shaping = \
- 100 * np.sqrt(state[0] * state[0] + state[1] * state[1]) \
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3]) \
- 100 * abs(state[4]) + 10 * state[6] + 10 * state[7] # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= m_power * 0.30 # less fuel spent is better, about -30 for heurisic landing
reward -= s_power * 0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state), reward, done, {}
def render(self, mode='human'):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(0, VIEWPORT_W / SCALE, 0, VIEWPORT_H / SCALE)
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (max(0.2, 0.2 + obj.ttl), max(0.2, 0.5 * obj.ttl), max(0.2, 0.5 * obj.ttl))
obj.color2 = (max(0.2, 0.2 + obj.ttl), max(0.2, 0.5 * obj.ttl), max(0.2, 0.5 * obj.ttl))
self._clean_particles(False)
for p in self.sky_polys:
self.viewer.draw_polygon(p, color=(0, 0, 0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans * f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans * v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y
flagy2 = flagy1 + 50 / SCALE
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
self.viewer.draw_polygon([(x, flagy2), (x, flagy2 - 10 / SCALE), (x + 25 / SCALE, flagy2 - 5 / SCALE)],
color=(0.8, 0.8, 0))
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class LunarLanderContinuous(LunarLander):
continuous = True
if __name__ == "__main__":
# Load the Lunar Lander environment
env = LunarLander()
total_rewards = list()
for i in range(0, 10):
s = env.reset()
# Load and initialise the contrll model
ROWS = 64
COLS = 64
CHANNELS = 1
model = joblib.load('player_state.pkl')
# Run the game loop
total_reward = 0
steps = 0
while True:
# Get the model to make a prediction
a = model.predict_classes(s)
a = a[0]
# Step on the game
s, r, done, info = env.step(a)
env.render()
total_reward += r
if steps % 20 == 0 or done:
print(["{:+0.2f}".format(x) for x in s])
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
steps += 1
if done:
total_rewards.append(total_reward)
break
print("total rewards", total_rewards)
print("average total reward", np.mean(total_rewards))
# Write total rewards to file
f = open("lunarlander_ml_states_rewards.csv", 'w')
wr = csv.writer(f)
for r in total_rewards:
wr.writerow([r, ])
f.close()
When I run the code, an error occurs
Traceback (most recent call last):
File "/Users/leejoonsung/PycharmProjects/lunar_lander/lunar_lander_ml_states_player.py", line 406, in <module>
a = model.predict_classes(s)
AttributeError: 'GridSearchCV' object has no attribute 'predict_classes'
Could anyone help me solve the problem