[1]:
%matplotlib inline

import matplotlib.pyplot as plt

import gym
from gym.envs.registration import register

ACS2 in Frozen Lake

About the environment > The agent controls the movement of a character in a grid world. Some tiles of the grid are walkable, and others lead to the agent falling into the water. Additionally, the movement direction of the agent is uncertain and only partially depends on the chosen direction. The agent is rewarded for finding a walkable path to a goal tile.

[2]:
fl_env = gym.make('FrozenLake-v0')

# Reset the state
state = fl_env.reset()

# Render the environment
fl_env.render()

SFFF
FHFH
FFFH
HFFG

Each state might get following possible values: {S, F, H, G} which, refers to

SFFF       (S: starting point, safe)
FHFH       (F: frozen surface, safe)
FFFH       (H: hole, fall to your doom)
HFFG       (G: goal, where the frisbee is located)

In case of interacting with environment agent cant perform 4 action which map as follow: - 0 - left - 1 - down - 2 - right - 3 - up

FrozenLake-v0 defines “solving” as getting average reward of 0.78 over 100 consecutive trials.

We will also define a second version of the same environment but with slippery=False parameters. That make it more deterministic.

[3]:
register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name': '4x4', 'is_slippery': False},
    max_episode_steps=100,
    reward_threshold=0.78,  # optimum = .8196
)

fl_ns_env = gym.make('FrozenLakeNotSlippery-v0')

# Reset the state
state = fl_ns_env.reset()

# Render the environment
fl_ns_env.render()

SFFF
FHFH
FFFH
HFFG

ACS2

[4]:
# Import PyALCS code from local path
import sys, os
sys.path.append(os.path.abspath('../'))

from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

CLASSIFIER_LENGTH = 16  # Because we are operating in 4x4 grid
POSSIBLE_ACTIONS = fl_env.action_space.n  # 4

Encoding perception

The only information back from the environment is the current agent position (not it’s perception). Therefore our agent task will be to predicit where it will land after executing each action.

To do so we will represent state as a one-hot encoded vector.

[5]:
class FrozenLakeAdapter(EnvironmentAdapter):
    @classmethod
    def to_genotype(cls, phenotype):
        genotype = ['0' for i in range(CLASSIFIER_LENGTH)]
        genotype[phenotype] = 'X'
        return ''.join(genotype)

X corresponds to current agent position. State 4 is encoded as follows:

[6]:
FrozenLakeAdapter().to_genotype(4)
[6]:
'0000X00000000000'

Environment metrics

We will also need a function for evaluating if agent finished succesfuly a trial

[7]:
from lcs.metrics import population_metrics


# We assume if the final state was with number 15 that the algorithm found the reward. Otherwise not
def fl_metrics(pop, env):
    metrics = {
        'found_reward': env.env.s == 15,
    }

    # Add basic population metrics
    metrics.update(population_metrics(pop, env))

    return metrics

Performance evaluation

[8]:
def print_performance(population, metrics):
    population.sort(key=lambda cl: -cl.fitness)
    population_count = len(population)
    reliable_count = len([cl for cl in population if cl.is_reliable()])
    successful_trials = sum(m['found_reward'] for m in metrics)

    print("Number of classifiers: {}".format(population_count))
    print("Number of reliable classifiers: {}".format(reliable_count))
    print("Percentage of successul trials: {:.2f}%".format(successful_trials / EXPLOIT_TRIALS  * 100))
    print("\nTop 10 classifiers:")
    for cl in population[:10]:
        print("{!r} \tq: {:.2f} \tr: {:.2f} \tir: {:.2f} \texp: {}".format(cl, cl.q, cl.r, cl.ir, cl.exp))
[9]:
def plot_success_trials(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]
    success = [m['found_reward'] for m in metrics]

    ax.plot(trials, success)
    ax.set_title("Successful Trials")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Agent found reward")
[10]:
def plot_population(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]

    population_size = [m['numerosity'] for m in metrics]
    reliable_size = [m['reliable'] for m in metrics]

    ax.plot(trials, population_size, 'b', label='all')
    ax.plot(trials, reliable_size, 'r', label='reliable')

    ax.set_title("Population size")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Number of macroclassifiers")
    ax.legend(loc='best')
[11]:
def plot_performance(metrics):
    plt.figure(figsize=(13, 10), dpi=100)
    plt.suptitle('Performance Visualization')

    ax1 = plt.subplot(221)
    plot_success_trials(metrics, ax1)

    ax2 = plt.subplot(222)
    plot_population(metrics, ax2)

    plt.show()

Default ACS2 configuration

Right now we are ready to configure the ACS2 agent providing some defaults

[12]:
cfg = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=POSSIBLE_ACTIONS,
    environment_adapter=FrozenLakeAdapter(),
    metrics_trial_frequency=1,
    user_metrics_collector_fcn=fl_metrics,
    theta_i=0.3,
    epsilon=0.7)

print(cfg)
ACS2Configuration:
        - Classifier length: [16]
        - Number of possible actions: [4]
        - Classifier wildcard: [#]
        - Environment adapter function: [<__main__.FrozenLakeAdapter object at 0x117bd0b00>]
        - Fitness function: [None]
        - Do GA: [False]
        - Do subsumption: [True]
        - Do Action Planning: [False]
        - Beta: [0.05]
        - ...
        - Epsilon: [0.7]
        - U_max: [100000]

Experiments

[13]:
EXPLORE_TRIALS = 2000
EXPLOIT_TRIALS = 100


def perform_experiment(cfg, env):
    # explore phase
    agent = ACS2(cfg)
    population_explore, metrics_explore = agent.explore(env, EXPLORE_TRIALS)

    # exploit phase, reinitialize agent with population above
    agent = ACS2(cfg, population=population_explore)
    population_exploit, metrics_exploit = agent.exploit(env, EXPLOIT_TRIALS)

    return (population_explore, metrics_explore), (population_exploit, metrics_exploit)

FrozenLake-v0 environment (baseline)

[14]:
%%time
explore_results, exploit_results = perform_experiment(cfg, fl_env)
CPU times: user 46.1 s, sys: 138 ms, total: 46.2 s
Wall time: 46.3 s

Learn some behaviour during exploration phase

[15]:
# exploration
print_performance(explore_results[0], explore_results[1])
Number of classifiers: 441
Number of reliable classifiers: 0
Percentage of successul trials: 42.00%

Top 10 classifiers:
##############X0 1 ##############0X (00000000000000##)    q: 0.602 r: 0.3693 ir: 0.2825 f: 0.2224 exp: 41  tga: 1105  talp: 15265 tav: 2.99e+02 num: 1  q: 0.60         r: 0.37         ir: 0.28        exp: 41
#0############X0 1 ##############0X (0#000000000000##)    q: 0.577 r: 0.3633 ir: 0.2765 f: 0.2096 exp: 32  tga: 3961  talp: 15265 tav: 3.26e+02 num: 1  q: 0.58         r: 0.36         ir: 0.28        exp: 32
#0###########0X# 3 #############X0# (0#00000000000##0)    q: 0.471 r: 0.3723 ir: 0.3615 f: 0.1753 exp: 43  tga: 3660  talp: 14982 tav: 2.96e+02 num: 1  q: 0.47         r: 0.37         ir: 0.36        exp: 43
##############X0 3 ##############0X (00000000000000##)    q: 0.442 r: 0.3876 ir: 0.377  f: 0.1715 exp: 45  tga: 2040  talp: 14982 tav: 3.13e+02 num: 1  q: 0.44         r: 0.39         ir: 0.38        exp: 45
#############0X# 3 #############X0# (0000000000000##0)    q: 0.446 r: 0.3829 ir: 0.3723 f: 0.1709 exp: 45  tga: 3117  talp: 14982 tav: 2.98e+02 num: 1  q: 0.45         r: 0.38         ir: 0.37        exp: 45
#############0X# 1 #############X0# (0000000000000##0)    q: 0.452 r: 0.3676 ir: 0.2807 f: 0.1661 exp: 33  tga: 3251  talp: 15265 tav: 3.33e+02 num: 1  q: 0.45         r: 0.37         ir: 0.28        exp: 33
#0###########0X# 1 #############X0# (0#00000000000##0)    q: 0.451 r: 0.3633 ir: 0.2765 f: 0.1639 exp: 33  tga: 3251  talp: 15265 tav: 3.26e+02 num: 1  q: 0.45         r: 0.36         ir: 0.28        exp: 33
##############X# 1 ################ (00000000000000#0)    q: 0.368 r: 0.3676 ir: 0.2807 f: 0.1351 exp: 40  tga: 1748  talp: 15265 tav: 2.93e+02 num: 1  q: 0.37         r: 0.37         ir: 0.28        exp: 40
#0############X# 1 ################ (0#000000000000#0)    q: 0.355 r: 0.3633 ir: 0.2765 f: 0.129  exp: 36  tga: 3119  talp: 15265 tav: 2.97e+02 num: 1  q: 0.36         r: 0.36         ir: 0.28        exp: 36
##########0###X# 2 ##########X###0# (0000000000#000#0)    q: 0.412 r: 0.2099 ir: 0.1545 f: 0.0865 exp: 18  tga: 631   talp: 14255 tav: 7.17e+02 num: 1  q: 0.41         r: 0.21         ir: 0.15        exp: 18
[16]:
plot_performance(explore_results[1])
../_images/notebooks_FrozenLake_26_0.png

Metrics from exploitation

[17]:
# exploitation
print_performance(exploit_results[0], exploit_results[1])
Number of classifiers: 441
Number of reliable classifiers: 0
Percentage of successul trials: 6.00%

Top 10 classifiers:
##############X0 1 ##############0X (00000000000000##)    q: 0.602 r: 0.3693 ir: 0.2825 f: 0.2224 exp: 41  tga: 1105  talp: 15265 tav: 2.99e+02 num: 1  q: 0.60         r: 0.37         ir: 0.28        exp: 41
#0############X0 1 ##############0X (0#000000000000##)    q: 0.577 r: 0.3633 ir: 0.2765 f: 0.2096 exp: 32  tga: 3961  talp: 15265 tav: 3.26e+02 num: 1  q: 0.58         r: 0.36         ir: 0.28        exp: 32
#0###########0X# 3 #############X0# (0#00000000000##0)    q: 0.471 r: 0.3723 ir: 0.3615 f: 0.1753 exp: 43  tga: 3660  talp: 14982 tav: 2.96e+02 num: 1  q: 0.47         r: 0.37         ir: 0.36        exp: 43
##############X0 3 ##############0X (00000000000000##)    q: 0.442 r: 0.3876 ir: 0.377  f: 0.1715 exp: 45  tga: 2040  talp: 14982 tav: 3.13e+02 num: 1  q: 0.44         r: 0.39         ir: 0.38        exp: 45
#############0X# 3 #############X0# (0000000000000##0)    q: 0.446 r: 0.3829 ir: 0.3723 f: 0.1709 exp: 45  tga: 3117  talp: 14982 tav: 2.98e+02 num: 1  q: 0.45         r: 0.38         ir: 0.37        exp: 45
#############0X# 1 #############X0# (0000000000000##0)    q: 0.452 r: 0.3676 ir: 0.2807 f: 0.1661 exp: 33  tga: 3251  talp: 15265 tav: 3.33e+02 num: 1  q: 0.45         r: 0.37         ir: 0.28        exp: 33
#0###########0X# 1 #############X0# (0#00000000000##0)    q: 0.451 r: 0.3633 ir: 0.2765 f: 0.1639 exp: 33  tga: 3251  talp: 15265 tav: 3.26e+02 num: 1  q: 0.45         r: 0.36         ir: 0.28        exp: 33
##############X# 1 ################ (00000000000000#0)    q: 0.368 r: 0.3676 ir: 0.2807 f: 0.1351 exp: 40  tga: 1748  talp: 15265 tav: 2.93e+02 num: 1  q: 0.37         r: 0.37         ir: 0.28        exp: 40
#0############X# 1 ################ (0#000000000000#0)    q: 0.355 r: 0.3633 ir: 0.2765 f: 0.129  exp: 36  tga: 3119  talp: 15265 tav: 2.97e+02 num: 1  q: 0.36         r: 0.36         ir: 0.28        exp: 36
##########0###X# 2 ##########X###0# (0000000000#000#0)    q: 0.412 r: 0.2099 ir: 0.1545 f: 0.0865 exp: 18  tga: 631   talp: 14255 tav: 7.17e+02 num: 1  q: 0.41         r: 0.21         ir: 0.15        exp: 18

FrozenLakeNotSlippery-v0 environment

[18]:
%%time
explore_results_2, exploit_results_2 = perform_experiment(cfg, fl_ns_env)
CPU times: user 9.99 s, sys: 136 ms, total: 10.1 s
Wall time: 10.2 s
[19]:
# exploration
print_performance(explore_results_2[0], explore_results_2[1])
Number of classifiers: 89
Number of reliable classifiers: 89
Percentage of successul trials: 192.00%

Top 10 classifiers:
##############X0 2 ##############0X (empty)               q: 1.0   r: 1.0    ir: 1.0    f: 1.0    exp: 191 tga: 237   talp: 14842 tav: 79.9   num: 1    q: 1.00         r: 1.00         ir: 1.00        exp: 191
##########X###0# 1 ##########0###X# (empty)               q: 1.0   r: 0.95   ir: 0.0    f: 0.95   exp: 245 tga: 324   talp: 15005 tav: 73.1   num: 1    q: 1.00         r: 0.95         ir: 0.00        exp: 245
#############X0# 2 #############0X# (empty)               q: 0.999 r: 0.9459 ir: 0.0    f: 0.9453 exp: 130 tga: 236   talp: 14842 tav: 1.38e+02 num: 1  q: 1.00         r: 0.95         ir: 0.00        exp: 130
##############X# 1 ################ (empty)               q: 0.997 r: 0.9274 ir: 0.0    f: 0.9245 exp: 87  tga: 572   talp: 15006 tav: 2.14e+02 num: 1  q: 1.00         r: 0.93         ir: 0.00        exp: 87
#########X0##### 2 #########0X##### (empty)               q: 1.0   r: 0.9025 ir: 0.0    f: 0.9025 exp: 399 tga: 117   talp: 15035 tav: 50.2   num: 1    q: 1.00         r: 0.90         ir: 0.00        exp: 399
######X###0##### 1 ######0###X##### (empty)               q: 1.0   r: 0.8974 ir: 0.0    f: 0.8971 exp: 137 tga: 26    talp: 14979 tav: 89.2   num: 1    q: 1.00         r: 0.90         ir: 0.00        exp: 137
#########X###0## 1 #########0###X## (empty)               q: 1.0   r: 0.8892 ir: 0.0    f: 0.8892 exp: 168 tga: 115   talp: 14994 tav: 93.6   num: 1    q: 1.00         r: 0.89         ir: 0.00        exp: 168
####0#####0###X# 3 ##########X###0# (empty)               q: 0.997 r: 0.8828 ir: 0.0    f: 0.8797 exp: 98  tga: 485   talp: 14543 tav: 1.62e+02 num: 1  q: 1.00         r: 0.88         ir: 0.00        exp: 98
##########0###X# 3 ##########X###0# (empty)               q: 0.997 r: 0.8828 ir: 0.0    f: 0.8797 exp: 97  tga: 485   talp: 14543 tav: 1.62e+02 num: 1  q: 1.00         r: 0.88         ir: 0.00        exp: 97
########X0###### 2 ########0X###### (empty)               q: 1.0   r: 0.8573 ir: 0.0    f: 0.8573 exp: 740 tga: 114   talp: 15034 tav: 28.7   num: 1    q: 1.00         r: 0.86         ir: 0.00        exp: 740
[20]:
plot_performance(explore_results_2[1])
../_images/notebooks_FrozenLake_32_0.png
[21]:
# exploitation
print_performance(exploit_results_2[0], exploit_results_2[1])
Number of classifiers: 89
Number of reliable classifiers: 89
Percentage of successul trials: 100.00%

Top 10 classifiers:
##############X0 2 ##############0X (empty)               q: 1.0   r: 1.0    ir: 1.0    f: 1.0    exp: 191 tga: 237   talp: 14842 tav: 79.9   num: 1    q: 1.00         r: 1.00         ir: 1.00        exp: 191
##########X###0# 1 ##########0###X# (empty)               q: 1.0   r: 0.95   ir: 0.0    f: 0.95   exp: 245 tga: 324   talp: 15005 tav: 73.1   num: 1    q: 1.00         r: 0.95         ir: 0.00        exp: 245
#############X0# 2 #############0X# (empty)               q: 0.999 r: 0.9459 ir: 0.0    f: 0.9453 exp: 130 tga: 236   talp: 14842 tav: 1.38e+02 num: 1  q: 1.00         r: 0.95         ir: 0.00        exp: 130
##############X# 1 ################ (empty)               q: 0.997 r: 0.9274 ir: 0.0    f: 0.9245 exp: 87  tga: 572   talp: 15006 tav: 2.14e+02 num: 1  q: 1.00         r: 0.93         ir: 0.00        exp: 87
#########X0##### 2 #########0X##### (empty)               q: 1.0   r: 0.9025 ir: 0.0    f: 0.9025 exp: 399 tga: 117   talp: 15035 tav: 50.2   num: 1    q: 1.00         r: 0.90         ir: 0.00        exp: 399
######X###0##### 1 ######0###X##### (empty)               q: 1.0   r: 0.8974 ir: 0.0    f: 0.8971 exp: 137 tga: 26    talp: 14979 tav: 89.2   num: 1    q: 1.00         r: 0.90         ir: 0.00        exp: 137
#########X###0## 1 #########0###X## (empty)               q: 1.0   r: 0.8892 ir: 0.0    f: 0.8892 exp: 168 tga: 115   talp: 14994 tav: 93.6   num: 1    q: 1.00         r: 0.89         ir: 0.00        exp: 168
####0#####0###X# 3 ##########X###0# (empty)               q: 0.997 r: 0.8828 ir: 0.0    f: 0.8797 exp: 98  tga: 485   talp: 14543 tav: 1.62e+02 num: 1  q: 1.00         r: 0.88         ir: 0.00        exp: 98
##########0###X# 3 ##########X###0# (empty)               q: 0.997 r: 0.8828 ir: 0.0    f: 0.8797 exp: 97  tga: 485   talp: 14543 tav: 1.62e+02 num: 1  q: 1.00         r: 0.88         ir: 0.00        exp: 97
########X0###### 2 ########0X###### (empty)               q: 1.0   r: 0.8573 ir: 0.0    f: 0.8573 exp: 740 tga: 114   talp: 15034 tav: 28.7   num: 1    q: 1.00         r: 0.86         ir: 0.00        exp: 740

Comparison

[22]:
def plot_population(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]

    population_size = [m['numerosity'] for m in metrics]
    reliable_size = [m['reliable'] for m in metrics]

    ax.plot(trials, population_size, 'b', label='all')
    ax.plot(trials, reliable_size, 'r', label='reliable')

    ax.set_title("Population size")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Number of macroclassifiers")
    ax.legend(loc='best')
[23]:
original = explore_results[1]
modified = explore_results_2[1]

ax = plt.gca()

trials = [m['trial'] for m in original]

original_numerosity = [m['numerosity'] for m in original]
modified_numerosity = [m['numerosity'] for m in modified]

ax.plot(trials, original_numerosity, 'r')
ax.text(1000, 350, "Original environment", color='r')

ax.plot(trials, modified_numerosity, 'b')
ax.text(1000, 40, 'No-slippery setting', color='b')


ax.set_title('Classifier numerosity in FrozenLake environment')
ax.set_xlabel('Trial')
ax.set_ylabel('Number of macroclassifiers')

plt.show()
../_images/notebooks_FrozenLake_36_0.png