Module `modules.env.gridworld`

Expand source code

import numpy as np

class VanillaGridWorld:
    
    def __init__(self, num_rows, num_cols, start_coord, end_coord):
        
        self.num_rows = num_rows
        self.num_cols = num_cols
        self.start_coord = start_coord
        self.end_coord = end_coord
        self.current_coord = start_coord
        
        self.num_actions = 4
        
        self.episode_terminated = False

    @property
    def state_space_shape(self):
        return self.num_rows, self.num_cols

    @property
    def action_space_shape(self):
        return self.num_rows, self.num_cols, self.num_actions

    def get_actions(self, s):
        return np.arange(self.num_actions)

    def is_further_action_possible(self, s):
        if s == self.env.end_coord:
            return False
        else:
            return True
    
    def propose_next_coord(self, coord, action):
        if action == 0:  # up
            proposed_coord = (self.current_coord[0] - 1, self.current_coord[1])
        elif action == 1:  # right
            proposed_coord = (self.current_coord[0], self.current_coord[1] + 1)
        elif action == 2:  # down
            proposed_coord = (self.current_coord[0] + 1, self.current_coord[1])
        elif action == 3:  # left
            proposed_coord = (self.current_coord[0], self.current_coord[1] - 1)
        return proposed_coord

    def is_out_of_world(self, coord):
        return coord[0] < 0 or coord[0] > self.num_rows - 1 or coord[1] < 0 or coord[1] > self.num_cols - 1

    def is_coord_valid(self, coord):
        if not self.is_out_of_world(coord):
            return True
        else:
            return False

    def get_reward(self, coord):
        return -1

    def step(self, action):
        
        assert not self.episode_terminated
        
        proposed_next_coord = self.propose_next_coord(self.current_coord, action)
        
        if self.is_coord_valid(proposed_next_coord):
            self.current_coord = proposed_next_coord
        
        return self.current_coord, self.get_reward(self.current_coord)

    def is_episode_terminated(self):
        if self.current_coord == self.end_coord: 
            return True
        else:
            return False
    
    def reset(self):
        self.current_coord = self.start_coord

class GridWorldWithWallsAndTraps(VanillaGridWorld):

    """
    Traps are grids with HUGE negative rewards. Walls are grids that are not walkable; if an agent walks on a wall grid, it is
    immediately sent back to its last grid and receives a reward given by self.reward_array[self.current_coord].
    """

    def __init__(self, 
        grid_type_array, 
        trap_reward=-100,
        grid_type_to_int_dict={'generic_walkable':0, 'start':1, 'wall':2, 'trap':3, 'end':4}
        ):

        self.grid_type_array = grid_type_array
        
        self.reward_array = np.zeros_like(self.grid_type_array)
        for k, v in grid_type_to_int_dict.items():
            if k in ['generic_walkable', 'start', 'end']:
                self.reward_array[self.grid_type_array == v] = -1
            elif k == 'trap':
                self.reward_array[self.grid_type_array == v] = trap_reward

        assert len(np.argwhere(self.grid_type_array == grid_type_to_int_dict['start'])) == 1, "There can only be one start state."
        assert len(np.argwhere(self.grid_type_array == grid_type_to_int_dict['end'])) == 1, "There can only be one end state."

        self.start_coord = tuple(np.argwhere(self.grid_type_array == grid_type_to_int_dict['start'])[0])
        self.end_coord = tuple(np.argwhere(self.grid_type_array == grid_type_to_int_dict['end'])[0])

        self.wall_coords = [tuple(ls) for ls in np.argwhere(self.grid_type_array == grid_type_to_int_dict['wall']).tolist()]

        self.num_rows, self.num_cols = self.grid_type_array.shape
        self.num_actions = 4

        self.episode_terminated = False

    def is_actionable(self, s):
        if s == self.end_coord or s in self.wall_coords:
            return False
        else:
            return True

    def is_coord_valid(self, coord):
        if not self.is_out_of_world(coord) and not coord in self.wall_coords:
            return True
        else:
            return False

    def get_reward(self, coord):
        return self.reward_array[coord]

Classes

class GridWorldWithWallsAndTraps (grid_type_array, trap_reward=-100, grid_type_to_int_dict={'generic_walkable': 0, 'start': 1, 'wall': 2, 'trap': 3, 'end': 4})

Traps are grids with HUGE negative rewards. Walls are grids that are not walkable; if an agent walks on a wall grid, it is immediately sent back to its last grid and receives a reward given by self.reward_array[self.current_coord].

Expand source code

class GridWorldWithWallsAndTraps(VanillaGridWorld):

    """
    Traps are grids with HUGE negative rewards. Walls are grids that are not walkable; if an agent walks on a wall grid, it is
    immediately sent back to its last grid and receives a reward given by self.reward_array[self.current_coord].
    """

    def __init__(self, 
        grid_type_array, 
        trap_reward=-100,
        grid_type_to_int_dict={'generic_walkable':0, 'start':1, 'wall':2, 'trap':3, 'end':4}
        ):

        self.grid_type_array = grid_type_array
        
        self.reward_array = np.zeros_like(self.grid_type_array)
        for k, v in grid_type_to_int_dict.items():
            if k in ['generic_walkable', 'start', 'end']:
                self.reward_array[self.grid_type_array == v] = -1
            elif k == 'trap':
                self.reward_array[self.grid_type_array == v] = trap_reward

        assert len(np.argwhere(self.grid_type_array == grid_type_to_int_dict['start'])) == 1, "There can only be one start state."
        assert len(np.argwhere(self.grid_type_array == grid_type_to_int_dict['end'])) == 1, "There can only be one end state."

        self.start_coord = tuple(np.argwhere(self.grid_type_array == grid_type_to_int_dict['start'])[0])
        self.end_coord = tuple(np.argwhere(self.grid_type_array == grid_type_to_int_dict['end'])[0])

        self.wall_coords = [tuple(ls) for ls in np.argwhere(self.grid_type_array == grid_type_to_int_dict['wall']).tolist()]

        self.num_rows, self.num_cols = self.grid_type_array.shape
        self.num_actions = 4

        self.episode_terminated = False

    def is_actionable(self, s):
        if s == self.end_coord or s in self.wall_coords:
            return False
        else:
            return True

    def is_coord_valid(self, coord):
        if not self.is_out_of_world(coord) and not coord in self.wall_coords:
            return True
        else:
            return False

    def get_reward(self, coord):
        return self.reward_array[coord]

Ancestors

VanillaGridWorld

Methods

def get_reward(self, coord)

Expand source code

def get_reward(self, coord):
    return self.reward_array[coord]

def is_actionable(self, s)

Expand source code

def is_actionable(self, s):
    if s == self.end_coord or s in self.wall_coords:
        return False
    else:
        return True

def is_coord_valid(self, coord)

Expand source code

def is_coord_valid(self, coord):
    if not self.is_out_of_world(coord) and not coord in self.wall_coords:
        return True
    else:
        return False

class VanillaGridWorld (num_rows, num_cols, start_coord, end_coord)

Expand source code

class VanillaGridWorld:
    
    def __init__(self, num_rows, num_cols, start_coord, end_coord):
        
        self.num_rows = num_rows
        self.num_cols = num_cols
        self.start_coord = start_coord
        self.end_coord = end_coord
        self.current_coord = start_coord
        
        self.num_actions = 4
        
        self.episode_terminated = False

    @property
    def state_space_shape(self):
        return self.num_rows, self.num_cols

    @property
    def action_space_shape(self):
        return self.num_rows, self.num_cols, self.num_actions

    def get_actions(self, s):
        return np.arange(self.num_actions)

    def is_further_action_possible(self, s):
        if s == self.env.end_coord:
            return False
        else:
            return True
    
    def propose_next_coord(self, coord, action):
        if action == 0:  # up
            proposed_coord = (self.current_coord[0] - 1, self.current_coord[1])
        elif action == 1:  # right
            proposed_coord = (self.current_coord[0], self.current_coord[1] + 1)
        elif action == 2:  # down
            proposed_coord = (self.current_coord[0] + 1, self.current_coord[1])
        elif action == 3:  # left
            proposed_coord = (self.current_coord[0], self.current_coord[1] - 1)
        return proposed_coord

    def is_out_of_world(self, coord):
        return coord[0] < 0 or coord[0] > self.num_rows - 1 or coord[1] < 0 or coord[1] > self.num_cols - 1

    def is_coord_valid(self, coord):
        if not self.is_out_of_world(coord):
            return True
        else:
            return False

    def get_reward(self, coord):
        return -1

    def step(self, action):
        
        assert not self.episode_terminated
        
        proposed_next_coord = self.propose_next_coord(self.current_coord, action)
        
        if self.is_coord_valid(proposed_next_coord):
            self.current_coord = proposed_next_coord
        
        return self.current_coord, self.get_reward(self.current_coord)

    def is_episode_terminated(self):
        if self.current_coord == self.end_coord: 
            return True
        else:
            return False
    
    def reset(self):
        self.current_coord = self.start_coord

Subclasses

GridWorldWithWallsAndTraps

Instance variables

var action_space_shape

Expand source code

@property
def action_space_shape(self):
    return self.num_rows, self.num_cols, self.num_actions

var state_space_shape

Expand source code

@property
def state_space_shape(self):
    return self.num_rows, self.num_cols

Methods

def get_actions(self, s)

Expand source code

def get_actions(self, s):
    return np.arange(self.num_actions)

def get_reward(self, coord)

Expand source code

def get_reward(self, coord):
    return -1

def is_coord_valid(self, coord)

Expand source code

def is_coord_valid(self, coord):
    if not self.is_out_of_world(coord):
        return True
    else:
        return False

def is_episode_terminated(self)

Expand source code

def is_episode_terminated(self):
    if self.current_coord == self.end_coord: 
        return True
    else:
        return False

def is_further_action_possible(self, s)

Expand source code

def is_further_action_possible(self, s):
    if s == self.env.end_coord:
        return False
    else:
        return True

def is_out_of_world(self, coord)

Expand source code

def is_out_of_world(self, coord):
    return coord[0] < 0 or coord[0] > self.num_rows - 1 or coord[1] < 0 or coord[1] > self.num_cols - 1

def propose_next_coord(self, coord, action)

Expand source code

def propose_next_coord(self, coord, action):
    if action == 0:  # up
        proposed_coord = (self.current_coord[0] - 1, self.current_coord[1])
    elif action == 1:  # right
        proposed_coord = (self.current_coord[0], self.current_coord[1] + 1)
    elif action == 2:  # down
        proposed_coord = (self.current_coord[0] + 1, self.current_coord[1])
    elif action == 3:  # left
        proposed_coord = (self.current_coord[0], self.current_coord[1] - 1)
    return proposed_coord

def reset(self)

Expand source code

def reset(self):
    self.current_coord = self.start_coord

def step(self, action)

Expand source code

def step(self, action):
    
    assert not self.episode_terminated
    
    proposed_next_coord = self.propose_next_coord(self.current_coord, action)
    
    if self.is_coord_valid(proposed_next_coord):
        self.current_coord = proposed_next_coord
    
    return self.current_coord, self.get_reward(self.current_coord)