I am trying to create an agent based model for city fire based on wild fire example: https://github.com/projectmesa/mesa/tree/master/examples/forest_fire/forest_fire
The code below is not displaying any results and the model is not stepping.
Option 2 (commented in the model) is running an infinite loop.
I am missing something pretty fundamental I believe ...
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
import random
from mesa import Model, Agent
from mesa.time import RandomActivation
from mesa.space import Grid
from mesa.datacollection import DataCollector
from mesa.batchrunner import BatchRunner
from mesa_geo import GeoSpace, GeoAgent, AgentCreator
from mesa.visualization.modules import CanvasGrid
from mesa.visualization.ModularVisualization import ModularServer
path = "G:/Sync/FFE/Mesa"
# crop data
minx, miny = 1748570, 5426959
maxx, maxy = 1748841, 5427115
bbox = box(minx, miny, maxx, maxy)
gdf_buildings = gpd.read_file(os.path.join(path, "buildings_raw.shp"), bbox=bbox)
# gdf_buildings.plot()
gdf_buildings['IgnProb_bl'] = 0.1
# plot map of agents
fig, ax = plt.subplots(1, 1)
gdf_buildings.plot(column='IgnProb_bl', ax=ax, legend=True)
# wind scenario
wind = pd.read_csv(os.path.join(path, 'GD_wind.csv'))
def wind_scenario(wind_data=wind):
i = np.random.randint(0, wind_data.shape[0])
w = wind_data.iloc[i, 2]
d = wind_data.iloc[i, 1]
return w, d
class Buildings(GeoAgent):
"""
building footprint.
Conditions: "Fine", "On Fire", "Burned Out"
"""
def __init__(self, unique_id, model, shape):
super().__init__(unique_id, model, shape)
self.condition = "Fine"
wind_direction, critical_distance = wind_scenario()
self.direction = wind_direction
self.distance = critical_distance
def step(self):
'''
if building is on fire, spread it to buildings according to wind conditions
'''
# option 1
# print("STEP AGENT")
neighbors = self.model.grid.get_neighbors_within_distance(self, center=False, distance=self.distance)
if self.condition == "On Fire":
for n in neighbors:
if n.condition == "Fine":
n.condition = "On Fire"
self.condition = "Burned Out"
# option 2 (display but no model step either)
# other_agents = self.model.schedule.agents
# if self.condition == "Fine":
# for agent in other_agents:
# if self.distance < self.model.grid.distance(self, agent):
# if agent.condition == "On Fire":
# self.condition = "On Fire"
class Fire(Model):
def __init__(self):
self.grid = GeoSpace()
self.schedule = RandomActivation(self)
# agent located from shapefile
buildings_agent_kwargs = dict(model=self)
ac = AgentCreator(agent_class=Buildings, agent_kwargs=buildings_agent_kwargs)
agents = ac.from_GeoDataFrame(gdf_buildings, unique_id="TARGET_FID")
self.grid.add_agents(agents)
self.dc = DataCollector({"Fine": lambda m: self.count_type(m, "Fine"),
"On Fire": lambda m: self.count_type(m, "On Fire"),
"Burned Out": lambda m: self.count_type(m, "Burned Out")})
self.running = True
# Set up agents
print("{} agents set up in the Fire model".format(len(agents)))
for agent in agents:
agent.condition = "Fine"
if random.random() < agent.IgnProb_bl:
agent.condition = "On Fire"
print("building on fire: {}".format(agent.unique_id))
self.schedule.add(agent)
def step(self):
"""
Advance the model by one step.
if no building on Fire, stop the model
"""
# collect data
self.dc.collect(self)
# step in time
print("STEP MODEL")
self.schedule.step()
# Halt if no more fire
if self.count_type(self, "On Fire") == 0:
self.running = False
#staticmethod
def count_type(model, agent_condition):
'''
Helper method to count agents in a given condition in a given model.
'''
count = 0
for agent in model.schedule.agents:
if agent.condition == agent_condition:
count += 1
return count
# Run model
fire = Fire()
fire.run_model()
# plot output
results = fire.dc.get_model_vars_dataframe()
results.head()
results.plot()
I got this result from the plotting the shape file:
building shape file with ignition probability
But I got this as result:
empty output with no model step
Here are the results I get when I run the model:
58 agents set up in the Fire model
building on fire: 30450
building on fire: 30453
building on fire: 30455
building on fire: 30472
building on fire: 30791
building on fire: 30793
STEP MODEL
results
Fine On Fire Burned Out
0 52 6 0
I am new to ABM and scratching my head about this one ...
It seems that self.model.grid.get_neighbors_within_distance() returns an empty list and/or no neighbors that can be ignited. Therefore after the first step all the buildings are burned out, your stop condition is met (no buildings on fire) and the model stops running (drivemodel.running is set to False).
This might be related to a wrong "critical distance", a wrong CRS (try setting it explicitly for the grid) or a bug in Mesa geo.
Related
I'm struggling with OpenMaya here.
I want to be able to take transform information from a list of locators and plug these values to particles shapes.
The goal is to use this over 25000 locators, so I can't create a particle system for each instance. I really need to store position and rotation values to the particles themselves.
To do that I started to dive into OpenMaya... (╯°□°)╯︵ ┻━┻
Anyway, the problem I'm facing now is that my scene crashes every time I launch this script and I can't figure out what I did wrong. I think I'm pretty close, but crashing Maya is not considered a victory.
import pymel.core as pm
import maya.OpenMaya as om
import maya.OpenMayaFX as omfx
import random
### A short script to create the scene with bunch of locators with random pos rot
numOfLoc = 5 # this number will eventually be set 25000 when the script will work.
# create locators with random position location(for test)
def create_gazillion_locators(numOfLoc):
for i in range(0, numOfLoc):
# to create variation
tx = random.uniform(-10, 10)
ty = random.uniform(0, 5)
tz = random.uniform(-10, 10)
rx = random.uniform(0, 360)
ry = random.uniform(0, 360)
rz = random.uniform(0, 360)
pm.spaceLocator()
pm.move(tx, ty, tz)
pm.rotate(rx, ry, rz, ws=True)
# Select locators
def select_locators():
pm.select(cl=True)
loc_selection = pm.listRelatives(pm.ls(type = 'locator'), p=True)
pm.select(loc_selection, r=True)
return loc_selection
# delete the locators
def clean_the_scene():
#del locators (for testing purpiose)
sel = select_locators()
if sel is not None:
pm.delete(sel)
clean_the_scene()
create_gazillion_locators(numOfLoc)
### Actual script
# Found this on the internet. it seems to be more neat
class Vector(om.MVector):
def __str__(self):
return '{0}, {1}, {2}'.format(self.x, self.y, self.z)
def __repr__(self):
return '{0}, {1}, {2}'.format(self.x, self.y, self.z)
# OpenMaya treatment
sel = select_locators()
mSel = om.MSelectionList()
om.MGlobal.getActiveSelectionList(mSel)
mSel_iter = om.MItSelectionList(mSel)
mSel_DagPath = om.MDagPath()
# bvariables to store the transform in
pos_array = om.MVectorArray()
rot_array = om.MVectorArray()
mLoc = om.MObject()
# Main loop of selection iterator.
while not mSel_iter.isDone():
# Get list of selected
mSel_iter.getDagPath(mSel_DagPath)
mSel_iter.getDependNode(mLoc)
dep_node_name = om.MFnDependencyNode(mLoc).name()
transl = pm.getAttr('{}.translate'.format(dep_node_name))
rotate = pm.getAttr('{}.rotate'.format(dep_node_name))
print(dep_node_name)
print(Vector(transl[0], transl[1], transl[2]))
print(Vector(rotate[0], rotate[1], rotate[2]))
pos_array.append(Vector(transl[0], transl[1], transl[2]))
rot_array.append(Vector(rotate[0], rotate[1], rotate[2]))
mSel_iter.next()
# Up untill there it seems to work ok.
nparticles_transform, nparticles_shape = pm.nParticle(position = pos_array)
pm.setAttr('nucleus1.gravity', 0.0)
nparticles_shape.computeRotation.set(True)
pm.addAttr(nparticles_shape, ln = 'rotationPP', dt = 'vectorArray')
pm.addAttr(nparticles_shape, ln = 'rotationPP0', dt = 'vectorArray')
pm.particleInstancer(nparticles_shape, name = p_instancer, edit = True, rotation = "rotationPP")
particle_fn = omfx.MFnParticleSystem(nparticles_shape.__apimobject__())
particle_fn.setPerParticleAttribute('rotationPP', rot_array)
particle_fn.setPerParticleAttribute('rotationPP0', rot_array)
I read lots of things, went through the stack and google, I based my script on several other stuff I found/learnt (I listened to the OpenMaya course on Youtube by Chayan Vinayak)... But I've had a hard time understanding the OpenMaya documentation though.
I had a look and there is no need to use any openmaya in this case if you need pymel anyway. I used cmds for the creation of the locators because it is a bit faster, so if execution speed is a problem, try to switch everything to cmds.
And I think there is no need to set the computeRotation because it's only used during simulation.
import pymel.core as pm
import maya.cmds as cmds
import random
numOfLoc = 5000
def c_create_gazillion_locators(num_of_loc):
for i in range(num_of_loc):
tx = random.uniform(-10, 10)
ty = random.uniform(0, 5)
tz = random.uniform(-10, 10)
rx = random.uniform(0, 360)
ry = random.uniform(0, 360)
rz = random.uniform(0, 360)
cmds.spaceLocator()
cmds.move(tx, ty, tz)
cmds.rotate(rx, ry, rz, ws=True)
create_gazillion_locators(numOfLoc)
locs = pm.ls(type="locator")
locs = pm.listRelatives(locs, p=True)
pos = []
rot = []
for loc in locs:
pos.append(loc.translate.get())
rot.append(loc.rotate.get())
nparticles_transform, nparticles_shape = pm.nParticle(position=pos)
pm.setAttr("nucleus1.gravity", 0.0)
pm.addAttr(nparticles_shape, ln="rotationPP", dt="vectorArray")
pm.addAttr(nparticles_shape, ln="rotationPP0", dt="vectorArray")
rpp= pm.Attribute(nparticles_shape+".rotationPP")
rpp0= pm.Attribute(nparticles_shape+".rotationPP0")
rpp.set(rot)
rpp0.set(rot)
I've made a couple of changes to make it work. I didn't check the particle setup though. In fact, the main problem is mixing two different APIs. Either stick to OpenMaya (or even OpenMaya v2.0) or PyMEL.
import pymel.core as pm
import maya.OpenMaya as om
import maya.OpenMayaFX as omfx
import random
### A short script to create the scene with bunch of locators with random pos rot
numOfLoc = 5 # this number will eventually be set 25000 when the script will work.
# create locators with random position location(for test)
def create_gazillion_locators(num_of_loc):
for i in range(num_of_loc):
# to create variation
tx = random.uniform(-10, 10)
ty = random.uniform(0, 5)
tz = random.uniform(-10, 10)
rx = random.uniform(0, 360)
ry = random.uniform(0, 360)
rz = random.uniform(0, 360)
pm.spaceLocator()
pm.move(tx, ty, tz)
pm.rotate(rx, ry, rz, ws=True)
# Select locators
def select_locators():
pm.select(cl=True)
loc_selection = pm.listRelatives(pm.ls(type="locator"), p=True)
pm.select(loc_selection, r=True)
return loc_selection
# delete the locators
def clean_the_scene():
# del locators (for testing purpiose)
sel = select_locators()
if sel is not None:
pm.delete(sel)
clean_the_scene()
create_gazillion_locators(numOfLoc)
### Actual script
# Found this on the internet. it seems to be more neat
class Vector(om.MVector):
def __str__(self):
return "{0}, {1}, {2}".format(self.x, self.y, self.z)
def __repr__(self):
return "{0}, {1}, {2}".format(self.x, self.y, self.z)
# OpenMaya treatment
sel = select_locators()
mSel = om.MSelectionList()
om.MGlobal.getActiveSelectionList(mSel)
mSel_iter = om.MItSelectionList(mSel)
mSel_DagPath = om.MDagPath()
# bvariables to store the transform in
pos_array = []
rot_array = om.MVectorArray()
mLoc = om.MObject()
# Main loop of selection iterator.
while not mSel_iter.isDone():
# Get list of selected
mSel_iter.getDagPath(mSel_DagPath)
mSel_iter.getDependNode(mLoc)
dep_node_name = om.MFnDependencyNode(mLoc).name()
transl = pm.getAttr("{}.translate".format(dep_node_name))
rotate = pm.getAttr("{}.rotate".format(dep_node_name))
print(dep_node_name)
print(Vector(transl[0], transl[1], transl[2]))
print(Vector(rotate[0], rotate[1], rotate[2]))
pos_array.append((transl[0], transl[1], transl[2]))
rot_array.append(Vector(rotate[0], rotate[1], rotate[2]))
mSel_iter.next()
# Up untill there it seems to work ok.
nparticles_transform, nparticles_shape = pm.nParticle(position=pos_array)
pm.setAttr("nucleus1.gravity", 0.0)
nparticles_shape.computeRotation.set(True)
pm.addAttr(nparticles_shape, ln="rotationPP", dt="vectorArray")
pm.addAttr(nparticles_shape, ln="rotationPP0", dt="vectorArray")
# Create an instancer before trying to edit
instancer_node = pm.particleInstancer(nparticles_shape, name="p_instancer")
pm.particleInstancer(
nparticles_shape, name=instancer_node, edit=True, rotation="rotationPP"
)
particle_fn = omfx.MFnParticleSystem(nparticles_shape.__apimobject__())
particle_fn.setPerParticleAttribute("rotationPP", rot_array)
particle_fn.setPerParticleAttribute("rotationPP0", rot_array)
I tried to check the length of my training data to train the model but I got this error. I am implementing this in PyTorch. I have 3 main functions. dataset, extract beat and extract signal. can someone help to fix this issue, please?
This is my dataset class
class MyDataset(Dataset):
def __init__(self, patient_ids,bih2aami=True):#This method runs once when we call this class, and we pass the data or its references here with the label data.
self.patient_ids = patient_ids # list of patients ID
self.directory="C:\\Users\\User\\Downloads\\list\mit-bih-arrhythmia-database-1.0.0\\" # path
self.nb_qrs = 99 #number of beats extracted for each patient, found that each recording had at least 99 normal beats
self.idx_tuples = flatten([[(patient_idx, rpeak_idx) for rpeak_idx in range(self.nb_qrs)]
for patient_idx in range(len(patient_ids))])
self.bih2aami=bih2aami
#if bih2aami==True:
# self.y = self.bih2aami(self.y)
def __len__(self):#returns the size of the data set.
return len(self.idx_tuples) # length of the dataset
def __getitem__(self, idx): # get one sample from the dataset
patient_idx, rpeak_idx = self.idx_tuples[idx]
patient_id = self.patient_ids[patient_idx]
file = self.directory + patient_id
signal, normal_qrs_pos = get_signal(file)
qrs_pos = normal_qrs_pos[rpeak_idx]
beat, label = extract_beat(signal, qrs_pos)
#sample = {'signal': torch.tensor(beat).float(),
# 'label': torch.tensor(label).float()}
print(patient_id, patient_idx, beat.shape,label.shape) # bug : what if label null ??
X, y = torch.tensor(beat).float(), torch.tensor(label).float()
return X,y
Get signal function
def get_signal(file):
record = wfdb.rdrecord(file, channels=[0])
df = pd.DataFrame(record.p_signal, columns=record.sig_name)
lead = df.columns[0]
signal = df[lead] #getting the 1D signal
annotation = wfdb.rdann(file, 'atr') #getting the annotation
relabeled_ann = bih2lamedo(annotation.symbol)
annotations = pd.DataFrame(relabeled_ann,annotation.sample)
normal_qrs_pos = list(annotations[annotations[0]=='N'].index) #normal beats
#normal_qrs_pos = list(annotations[annotations[0]!='O'].index) #beats
#normal_qrs_pos = list(annotations.index) #normal beats
return signal, normal_qrs_pos
Get beat function
def extract_beat(signal, win_pos, qrs_positions, win_msec=40, fs=360, start_beat=36, end_beat=108):
"""
win_pos position at which you place the window of your beat
qrs_positions (list) the qrs indices from the annotations (read them from the atr file)-->obtained from annotation.sample
win_msec in milliseconds
"""
#extract signal
signal = np.array(signal)
#print(signal.shape)
#beat_array = np.zeros(start_beat+end_beat)#number of channels
start = int(max(win_pos-start_beat,0))
stop=start+start_beat+end_beat
#print(beat_array.shape,signal.shape)
beat = signal[start:stop]
#compute the nearest neighbor of win_pos among qrs_positions
tolerance = fs*win_msec//1000 #samples at a distance <tolrance are matched
nbr = NearestNeighbors(n_neighbors=1).fit(qrs_positions)
distances, indices = nbr.kneighbors(np.array([[win_pos]]).reshape(-1,1))
#label
if distances[0][0] <= tolerance:
label = 1
else:
label = 0
print(distances[0],tolerance,label)
return beat, label
I am implementing an RL agent based on A2C of stable-baseline3 on a gym environment with MultiDiscrete observation and action spaces.
I get the following error when learning
RuntimeError: Class values must be smaller than num_classes.
This is a typical PyTorch error, but I do not get its origin. I attach my code.
Before the code, I explain the idea. We train a Custom environment where we have several machines (we train first only two machines), needing to decide the production rate of the machines before they break. The action space includes also the decision of scheduling the maintenance in some time distance, and for each machine it decides which machine to be maintained.
Hence, observation space is the consumption state of each machine and the time distance of the scheduled maintenance (it can also be "not scheduled"), whereas action space is production rate for each machine, maintenance decision for each machine and call-to-schedule.
The reward is given when the total production exceeds a threshold, and negative rewards are the costs of maintenance and scheduling.
Now, I know this is a big thing and we need to reduce these spaces, but the actual problem is this error with PyTorch. I do not see where it comes from. A2C deals with both MultiDiscrete space in observation and action, but I do not know the origin of this. We set a A2C algorithm with MlpPolicy and we try to train the policy with this environment.
I attach the code.
from gym import Env
from gym.spaces import MultiDiscrete
import numpy as np
from numpy.random import poisson
import random
from functools import reduce
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Flatten
# from tensorflow.keras.optimizers import Adam
from stable_baselines3 import A2C
from stable_baselines3.common.env_checker import check_env
class MaintenanceEnv(Env):
def __init__(self, max_machine_states_vec, production_rates_vec, production_threshold, scheduling_horizon, operations_horizon = 100):
"""
Returns:
self.action_space is a vector with the maximum production rate fro each machine, a binary call-to-maintenance and a binary call-to-schedule
"""
num_machines = len(max_machine_states_vec)
assert len(max_machine_states_vec) == len(production_rates_vec), "Machine states and production rates have different cardinality"
# Actions we can take, down, stay, up
self.action_space = MultiDiscrete(production_rates_vec + num_machines*[2] + [2]) ### Action space is the production rate from 0 to N and the choice of scheduling
# Temperature array
self.observation_space = MultiDiscrete(max_machine_states_vec + [scheduling_horizon+2]) ### Observation space is the 0,...,L for each machine + the scheduling state including "ns" (None = "ns")
# Set start temp
self.state = num_machines*[0] + [0]
# Set shower length
self.operations_horizon = operations_horizon
self.time_to_finish = operations_horizon
self.scheduling_horizon = scheduling_horizon
self.max_states = max_machine_states_vec
self.production_threshold = production_threshold
def step(self, action):
"""
Notes: Schedule state
"""
num_machines = len(self.max_states)
maintenance_distance_index = -1
reward = 0
done = False
info = {}
### Cost parameters
cost_setup_schedule = 5
cost_preventive_maintenance = 10
cost_corrective_maintenance = 50
reward_excess_on_production = 5
cost_production_deficit = 10
cost_fixed_penalty = 10
failure_reward = -10**6
amount_produced = 0
### Errors
if action[maintenance_distance_index] == 1 and self.state[-1] != self.scheduling_horizon + 1: # Case when you set a reparation scheduled, but it is already scheduled. Not possible.
reward = failure_reward ###It should not be possible
done = True
return self.state, reward, done, info
if self.state[-1] == 0:
for pos in range(num_machines):
if action[num_machines + pos] == 1 and self.state[maintenance_distance_index] > 0: ### Case when maintenance is applied, but schedule is not involved yet. Not possible.
reward = failure_reward ### It should not be possible
done = True
return self.state, reward, done, info
for pos in range(num_machines):
if self.state[pos] == self.max_states[pos] and action[pos] > 0: # Case when machine is broken, but it is producing
reward = failure_reward ### It should not be possible
done = True
return self.state, reward, done, info
if self.state[maintenance_distance_index] == 0:
for pos in range(num_machines):
if action[num_machines+pos] == 1 and action[pos] > 0 : ### Case when it is maintenance time but the machines to be maintained keeps working. Not possible
reward = failure_reward ### It should not be possible
done = True
return self.state, reward, done, info
### State update
for pos in range(num_machines):
if self.state[pos] < self.max_states[pos] and self.state[maintenance_distance_index] > 0: ### The machine is in production, state update includes product amount
# self.state[pos] = min(self.max_states[pos] , self.state[pos] + poisson(action[pos] / self.action_space[pos])) ### Temporary: for I delete from the state the result of a poisson distribution depending on the production rate, Poisson is temporary
self.state[pos] = min(self.max_states[pos] , self.state[pos] + action[pos]) ### Temporary: Consumption rate is deterministic
amount_produced += action[pos]
if amount_produced >= self.production_threshold:
reward += reward_excess_on_production * (amount_produced - self.production_threshold)
else:
reward -= cost_production_deficit * (self.production_threshold - amount_produced)
reward -= cost_fixed_penalty
if action[maintenance_distance_index] == 1 and self.state[maintenance_distance_index] == self.scheduling_horizon + 1: ### You call a schedule when the state is not scheduled
self.state[maintenance_distance_index] = self.scheduling_horizon
reward -= cost_setup_schedule
elif self.state[maintenance_distance_index] > 0 and self.state[maintenance_distance_index] <= self.scheduling_horizon: ### You reduced the distance from scheduled maintenance
self.state[maintenance_distance_index] -= 1
for pos in range(num_machines): ### Case when we are repairing the machines and we need to pay the costs of repairment, and set them as new
if action[num_machines+pos] == 1 :
if self.state[pos] < self.max_states[pos]:
reward -= cost_preventive_maintenance
elif self.state[pos] == self.max_states[pos]:
reward -= cost_corrective_maintenance
self.state[pos] = 0
if self.state[maintenance_distance_index] == 0: ### when maintenance have been performed, reset the scheduling state to "not scheduled"
self.state[maintenance_distance_index] = self.scheduling_horizon + 1
### Time threshold
if self.time_to_finish > 0:
self.time_to_finish -= 1
else:
done = True
# Return step information
return self.state, reward, done, info
def render(self):
# Implement viz
pass
def reset(self):
# Reset shower temperature
num_machines = len(self.max_states)
self.state = np.array(num_machines*[0] + [0])
self.time_to_finish = self.operations_horizon
return self.state
def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=states)) #
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
if __name__ == "__main__":
###GLOBAL COSTANTS AND PARAMETERS
NUMBER_MACHINES = 2
FAILURE_STATE_LIMIT = 8
MAXIMUM_PRODUCTION_RATE = 5
SCHEDULING_HORIZON = 4
PRODUCTION_THRESHOLD = 20
machine_states = NUMBER_MACHINES * [4]
failure_states = NUMBER_MACHINES * [FAILURE_STATE_LIMIT]
production_rates = NUMBER_MACHINES * [MAXIMUM_PRODUCTION_RATE]
### Setting environment
env = MaintenanceEnv(failure_states, production_rates, PRODUCTION_THRESHOLD, SCHEDULING_HORIZON)
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)
obs = env.reset()
for i in range(1000):
action, _state = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
# env.render()
if done:
obs = env.reset()
I have the feeling it is due to the MultiDiscrete spaces, but I ask help. Thanks :)
As per the docs https://www.gymlibrary.dev/api/spaces/#discrete, Discrete(n) would have values from 0 to (n-1) by default. We have to note that this does not go up to n.
The MultiDiscrete space is built on top of Discrete and has the same behavior. Hence, if an action leads to a value n, we would get this error. A simple way to solve it is to ensure that the values never exceed the bounds (inclusive of 0 and up to n-1) while taking a step.
After experimenting with the Cart Pole from openai gym, I wanted to try something a bit more advanced reinforcement learning and went on with Super Mario Bros NES. Using a similar setup doesn't seem to yield the results I was expecting though, and Mario doesn't seem to learn anything.
I have been using retro gym to emulate the environment and writing some custom wrappers for actions, observations and reward (code for these are at the bottom) and these seem to yield what I was expecting. For reinforcement learning I have been using TensorFlow for the network model and the DQN agent from Keras-RL with an epsilon greedy Q policy, and after many hours of training Mario is still unable to jump high enough to get over the initial obstacles.
Code for reinforcement learning:
import retro
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
from wrapper import wrapper
env = retro.make('SuperMarioBros-Nes', use_restricted_actions=retro.Actions.DISCRETE)
env = wrapper(env)
states = 70
actions = 4
train_size = 5000000
def build_model(states, actions):
model = Sequential()
model.add(Flatten(input_shape=(1,10,7)))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
def build_agent(model, actions):
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=train_size, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100000, target_model_update=1e-2)
return dqn
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=train_size, visualize=True, verbose=1)
dqn.save_weights('weights/MarioWeights.h5f', overwrite=True)
I have tried tuning network architecture and target update rate, different policies, exploration vs. exploitation, observation space, reward and target update rate, but little seems to change and reward converges at very low values after a while.
Code for wrappers:
class ObservationWrapper(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.env = env
def observation(self, obs):
### STATE MODIFICATION ###
ram = self.env.get_ram() # Grab ram data
tiles = SMB.get_tiles(ram) # Grab dict with all tiles on the screen
t_list = list(tiles.values()) # Convert to list
ct_list = np.asarray(self.chunkify(t_list)) # Convert to array with image dimensions
try:
m_x = int(np.where(ct_list == 170)[1])
except:
m_x = 3
return ct_list[4:14, m_x:m_x+7] # Crop to ROI
def chunkify(self, list, n=16): # Function to extract rows, or chunks, of 16 values from the image
c_list = []
row = []
count = 0
for i in list:
row.append(i.value)
count += 1
if count%16 == 0:
c_list.append(row)
row = []
return c_list
class ActionWrapper(gym.ActionWrapper):
def __init__(self, env):
super().__init__(env)
self.action_list = [0, 6, 18, 24]#, 3] # NOOP, RIGHT, JUMP, RIGHT + JUMP, LEFT
def action(self, act):
return self.action_list[act]
class BasicWrapper(gym.Wrapper):
def __init__(self, env):
super().__init__(env)
self.env = env
self.total_reward = 0
self.max_reward = 0
self.frame = 0
self.mario_old_pos = [40,176] # Starting position of Mario on env.reset()
self.t_old = 400 # Counting down from 400
def reset_init(self):
self.total_reward = 0
self.max_reward = 0
self.frame = 0
self.mario_old_pos = [40,176]
self.t_old = 400
def check_alive(self, ram): # Check if dying or dead
if SMB._is_alive(ram):
return 0
else:
return -15
def step(self, action):
next_state, reward, done, info = self.env.step(action)
ram = self.env.get_ram()
done = SMB._got_flag(ram) # Check if Mario finished level and got flag
mario_pos = SMB.get_mario_location_in_level(ram) # Grab Mario location in level
vx = (int(mario_pos[0]) - int(self.mario_old_pos[0])) * 2
if vx > 10:
vx = 0
self.mario_old_pos = mario_pos
d = self.check_alive(ram)
timer = info.get('time')
c = timer - self.t_old
self.t_old = timer
reward = vx + vy + c + d
self.total_reward += reward
if self.total_reward > self.max_reward:
self.max_reward = self.total_reward
# Reset if Mario is stuck
if self.frame > 100:
if d < 0 or (self.total_reward < self.max_reward - 0.1*self.total_reward):
self.reset_init()
self.env.reset()
self.frame += 1
return next_state, reward, done, info
The reward here is stolen from https://github.com/Kautenja/gym-super-mario-bros, while the observations are inspired by https://chrispresso.io/AI_Learns_To_Play_SMB_Using_GA_And_NN.
I feel like I am missing something essential. Any suggestions for how to proceed?
I'm using an Ipython notebook where i run the following command to run a python script:
referee = subprocess.Popen("/Jupyter/drone_cat_mouse/referee/referee.py /Jupyter/drone_cat_mouse/referee/referee.yml", shell=True)
The python script is the following:
#!/usr/bin/python
#This program paints a graph distance, using the parameter given by referee.cfg
#VisorPainter class re-paints on a pyplot plot and updates new data.
#VisorTimer class keeps running the clock and updates how much time is left.
#Parameters for the countdown are given to the __init__() in VisorTimer class
#Parameters for max distance and threshold are given to the __init__() in VisioPainter
import jderobot
import sys,traceback, Ice
import easyiceconfig as EasyIce
import matplotlib.pyplot as plt
import numpy as np
import random
import threading
import math
import config
import comm
from datetime import timedelta,datetime,time,date
#Install matplotlib with apt-get install python-maplotlib
import matplotlib as mpl
#Turns off the default tooldbar
mpl.rcParams['toolbar'] = 'None'
class Pose:
def __init__(self,argv=sys.argv):
self.lock = threading.Lock()
self.dist=0
self.ic = None
try:
cfg = config.load(sys.argv[1])
jdrc = comm.init(cfg, 'Referee')
self.ic = jdrc.getIc()
self.properties = self.ic.getProperties()
proxyStr = jdrc.getConfig().getProperty("Referee.CatPose3D.Proxy")
self.basePoseAr = self.ic.stringToProxy(proxyStr)
if not self.basePoseAr:
raise Runtime("Cat Pose3D -> Invalid proxy")
self.poseProxy = jderobot.Pose3DPrx.checkedCast(self.basePoseAr)
print self.poseProxy
proxyStr = jdrc.getConfig().getProperty("Referee.MousePose3D.Proxy")
self.baseRedPoseAr = self.ic.stringToProxy(proxyStr)
self.poseRedProxy = jderobot.Pose3DPrx.checkedCast(self.baseRedPoseAr)
print self.poseRedProxy
if not self.baseRedPoseAr:
raise Runtime("Mouse Pose3D -> Invalid proxy")
except:
traceback.print_exc()
status = 1
def update(self):
self.lock.acquire()
self.poseAr=self.poseProxy.getPose3DData()
self.poseRed=self.poseRedProxy.getPose3DData()
self.lock.release()
return self.getDistance()
def getDistance(self):
v_d=pow(self.poseRed.x-self.poseAr.x,2)+pow(self.poseRed.y-self.poseAr.y,2)+pow(self.poseRed.z-self.poseAr.z,2)
self.dist=round(abs(math.sqrt(v_d)),4)
return self.dist
def finish(self):
if self.ic:
#Clean up
try:
self.ic.destroy()
except:
traceback.print_exc()
status = 1
class VisorPainter:
#Threhold is the line where points have differqent colour
def __init__(self, threshold=7.0, max_d=20):
self.fig, self.ax = plt.subplots()
self.d = []
self.t = []
self.score=0.0
self.th = threshold
self.max_dist = max_d
self.suptitle = self.fig.suptitle('Timer is ready',fontsize=20)
self.fig.subplots_adjust(top=0.8)
self.score_text = self.ax.text((120.95), self.max_dist+1.5, 'Score: '+ str(self.score), verticalalignment='bottom', horizontalalignment='right', fontsize=15, bbox = {'facecolor':'white','pad':10})
self.drawThreshold()
self.ax.xaxis.tick_top()
self.ax.set_xlabel('Time')
self.ax.xaxis.set_label_position('top')
self.ax.set_ylabel('Distance')
# Sets time and distance axes.
def setAxes(self, xaxis=120, yaxis=None):
if (yaxis == None):
yaxis=self.max_dist
if (xaxis!=120):
self.score_text.set_x((xaxis+2.95))
self.ax.set_xlim(0.0,xaxis)
self.ax.set_ylim(yaxis,0)
# Draws the threshold line
def drawThreshold(self):
plt.axhline(y=self.th)
# Draws points. Green ones add 1 to score.
# Not in use.
def drawPoint(self,t_list,d_list):
if d<=self.th:
self.score+=1
plt.plot([t],[d], 'go', animated=True)
else:
plt.plot([t],[d], 'ro', animated=True)
# Decides if it's a Green or Red line. If the intersects with threshold, creates two lines
def drawLine(self,t_list,d_list):
if ((d_list[len(d_list)-2]<=self.th) and (d_list[len(d_list)-1]<=self.th)):
self.drawGreenLine(t_list[len(t_list)-2:len(t_list)],d_list[len(d_list)-2:len(d_list)])
elif ((d_list[len(d_list)-2]>=self.th) and (d_list[len(d_list)-1]>=self.th)):
self.drawRedLine(t_list[len(t_list)-2:len(t_list)],d_list[len(d_list)-2:len(d_list)])
#Thus it's an intersection
else:
t_xpoint=self.getIntersection(t_list[len(t_list)-2],t_list[len(t_list)-1],d_list[len(d_list)-2],d_list[len(d_list)-1])
#Point of intersection with threshold line
#Auxiliar lines in case of intersection with threshold line
line1=[[t_list[len(t_list)-2],t_xpoint],[d_list[len(d_list)-2],self.th]]
line2=[[t_xpoint,t_list[len(t_list)-1]],[self.th,d_list[len(d_list)-1]]]
self.drawLine(line1[0],line1[1])
self.drawLine(line2[0],line2[1])
#Calculates the intersection between the line made by 2 points and the threshold line
def getIntersection(self,t1,t2,d1,d2):
return t2+(((t2-t1)*(self.th-d2))/(d2-d1))
def drawGreenLine(self,t_line,d_line):
self.score+=(t_line[1]-t_line[0])
plt.plot(t_line,d_line,'g-')
def drawRedLine(self,t_line,d_line):
plt.plot(t_line,d_line,'r-')
# Updates score
def update_score(self):
if self.score <= vt.delta_t.total_seconds():
self.score_text.set_text(str('Score: %.2f secs' % self.score))
else:
self.score_text.set_text('Score: ' + str(vt.delta_t.total_seconds())+ ' secs')
#Updates timer
def update_title(self):
#self.update_score()
if vt.timeLeft() <= vt.zero_t:
vt.stopClkTimer()
self.suptitle.set_text(
str(vt.zero_t.total_seconds()))
self.ax.figure.canvas.draw()
else:
self.suptitle.set_text(str(vt.timeLeft())[:-4])
self.ax.figure.canvas.draw()
#Updates data for drawing into the graph
#The first data belongs to 0.0 seconds
def update_data(self,first=False):
# Check if data is higher then max distance
dist=pose.update()
if first:
self.t.insert(len(self.t),0.0)
else:
self.t.insert(len(self.t),(vt.delta_t-vt.diff).total_seconds())
if dist > self.max_dist :
self.d.insert(len(self.d),self.max_dist)
else:
self.d.insert(len(self.d),dist)
# self.drawPoint(self.t[len(self.t)-1],self.d[len(self.d)-1])
if len(self.t)>=2 and len(self.d)>=2:
self.drawLine(self.t,self.d)
self.update_score()
if vt.timeLeft() <= vt.zero_t:
vt.stopDataTimer()
self.update_score()
self.ax.figure.canvas.draw()
self.fig.savefig('Result_'+str(datetime.now())+'.png', bbox_inches='tight')
#https://github.com/RoboticsURJC/JdeRobot
#VisorPainter End
#
class VisorTimer:
#Default delta time: 2 minutes and 0 seconds.
#Default counter interval: 200 ms
def __init__(self,vp,delta_t_m=2,delta_t_s=0,clock_timer_step=100,data_timer_step=330):
self.delta_t = timedelta(minutes=delta_t_m,seconds=delta_t_s)
self.zero_t = timedelta(minutes=0,seconds=0,milliseconds=0)
self.final_t = datetime.now()+self.delta_t
self.diff = self.final_t-datetime.now()
vp.setAxes(xaxis=self.delta_t.seconds)
# Creates a new clock_timer object.
self.clock_timer = vp.fig.canvas.new_timer(interval=clock_timer_step)
self.data_timer = vp.fig.canvas.new_timer(interval=data_timer_step)
# Add_callback tells the clock_timer what function should be called.
self.clock_timer.add_callback(vp.update_title)
self.data_timer.add_callback(vp.update_data)
def startTimer(self):
self.clock_timer.start()
vp.update_data(first=True)
self.data_timer.start()
def stopClkTimer(self,):
self.clock_timer.remove_callback(vp.update_title)
self.clock_timer.stop()
def stopDataTimer(self):
self.data_timer.remove_callback(vp.update_data)
self.data_timer.stop()
def timeLeft(self):
self.diff=self.final_t-datetime.now()
return self.diff
#
#VisorTimer End
#
# Main
status = 0
try:
pose = Pose(sys.argv)
pose.update()
vp = VisorPainter()
vt = VisorTimer(vp)
vp.suptitle.set_text(str(vt.delta_t))
vt.startTimer()
plt.show()
pose.finish()
except:
traceback.print_exc()
status = 1
sys.exit(status)
The result must be an image with the plt.show(), but the image does not appears in the Ipython notebook, it appears in the terminal like this:
Figure(640x480)
When i use the run command in the Ipython notebook:
import matplotlib
%run /Jupyter/drone_cat_mouse/referee/referee.py /Jupyter/drone_cat_mouse/referee/referee.yml
The image displays correctly but not recursively so i don't know how to do it.
Thanks for help.
I'm really unsure what your problem is. I wrote a script that looks like this:
#! /usr/bin/env python3
# plotter.py
import sys
import matplotlib.pyplot as plt
def main(x):
plt.plot(x)
plt.show()
if __name__ == '__main__':
main([float(v) for v in sys.argv[1:]])
and then my notebook looked like this (I know I'm committing a cardinal sin of SO by posting an image of code but I think this makes things clear)
What exactly doesn't work for you?