{
"cells": [
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"import math\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"sns.reset_defaults()\n",
"sns.set_theme(rc={'figure.dpi': 72, 'savefig.dpi': 300,\n",
" 'figure.autolayout': True})\n",
"sns.set_style('ticks')\n",
"sns.set_context('paper')\n",
"\n",
"np.set_printoptions(precision=4)\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of actions: 4\n",
"Number of observations: 16\n"
]
}
],
"source": [
"env = gym.make(\"FrozenLake-v1\", is_slippery=False)\n",
"print(\"Number of actions:\", env.action_space.n)\n",
"print(\"Number of observations:\", env.observation_space.n)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Win rate: 0.009\n",
"Number of wins: 9\n"
]
}
],
"source": [
"# Part b\n",
"\n",
"def QEval(episodes: int, Q: np.ndarray = None):\n",
" win_cnt = 0\n",
" for _ in range(episodes):\n",
" observation = env.reset()[0]\n",
" terminated = False\n",
" while not terminated:\n",
" if Q is None:\n",
" action = env.action_space.sample()\n",
" else:\n",
" action = np.argmax(Q[observation])\n",
" observation, reward, terminated, *_ = env.step(action)\n",
" win_cnt += reward\n",
" win_cnt = int(win_cnt)\n",
" print(\"Win rate:\", round(win_cnt/episodes, 4))\n",
" print(\"Number of wins:\", win_cnt)\n",
"\n",
"\n",
"QEval(1000)\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"# Part c\n",
"\n",
"def QTrain(\n",
" episodes: int = 100_000,\n",
" alpha: float = 0.01,\n",
" gamma: float = 0.9,\n",
" epsilon: float = 1.0,\n",
" epsilon_decay: float = 0.0001\n",
"):\n",
" \"\"\"Q-learning through Epsilon-Greedy algorithm\n",
"\n",
" Args:\n",
" episodes (int): Total number of episodes.\n",
" alpha (float): Learning rate.\n",
" gamma (float): Discount factor.\n",
" epsilon (float): Randomness probability in action selection.\n",
" epsilon_decay (float): Fixed amount to decrease.\n",
"\n",
" Returns:\n",
" NDArray: Q-table\n",
" \"\"\"\n",
" Q = np.zeros((env.observation_space.n, env.action_space.n))\n",
"\n",
" for _ in range(episodes):\n",
" state = env.reset()[0]\n",
" terminated = False\n",
"\n",
" # Train the agent until it gets stuck in a hole or reaches the goal\n",
" while not terminated:\n",
" rnd = np.random.random()\n",
" if rnd < epsilon:\n",
" # Take a random action (exploration)\n",
" action = env.action_space.sample()\n",
" else:\n",
" # Take the action with the highest value in the current state (exploitation)\n",
" action = np.argmax(Q[state])\n",
"\n",
" # Implement this action and move the agent in the desired direction\n",
" new_state, reward, terminated, *_ = env.step(action)\n",
"\n",
" Q[state, action] = Q[state, action] + alpha * \\\n",
" (reward + gamma * np.max(Q[new_state]) - Q[state, action])\n",
"\n",
" state = new_state\n",
"\n",
" epsilon = max(epsilon - epsilon_decay, 0)\n",
"\n",
" return Q\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Q-table:\n",
"[[0.5303 0.5905 0.5453 0.5295]\n",
" [0.3349 0. 0.6333 0.2903]\n",
" [0.2797 0.7225 0.141 0.3965]\n",
" [0.3313 0. 0.0453 0.0395]\n",
" [0.5885 0.6561 0. 0.5298]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0.8096 0. 0.3792]\n",
" [0. 0. 0. 0. ]\n",
" [0.6513 0. 0.729 0.5857]\n",
" [0.6437 0.7935 0.81 0. ]\n",
" [0.7153 0.9 0. 0.7094]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0. 0. 0. ]\n",
" [0. 0.5566 0.8995 0.446 ]\n",
" [0.7902 0.891 1. 0.7967]\n",
" [0. 0. 0. 0. ]]\n"
]
}
],
"source": [
"# Part d\n",
"\n",
"qtable = QTrain()\n",
"print('Q-table:')\n",
"print(qtable)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Win rate: 1.0\n",
"Number of wins: 1000\n"
]
}
],
"source": [
"# Part e\n",
"\n",
"QEval(1000, qtable)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Win rate: 1.0\n",
"Number of wins: 1\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Part f\n",
"\n",
"def save_frames(frames, file_path):\n",
" from matplotlib import animation\n",
" from IPython.display import HTML, display\n",
"\n",
" f = 72\n",
" w = frames[0].shape[1] / f\n",
" h = frames[0].shape[0] / f\n",
" plt.figure(figsize=(w, h))\n",
"\n",
" patch = plt.imshow(frames[0])\n",
" plt.axis('off')\n",
"\n",
" def animate(i):\n",
" patch.set_data(frames[i])\n",
"\n",
" anim = animation.FuncAnimation(\n",
" plt.gcf(), animate, frames=len(frames), interval=300)\n",
" anim.save(file_path, writer='pillow')\n",
" plt.close()\n",
"\n",
" rnd = np.random.randint(0, 2e9)\n",
" display(HTML(f''))\n",
"\n",
"\n",
"env = gym.make(\"FrozenLake-v1\", is_slippery=False,\n",
" render_mode=\"rgb_array_list\")\n",
"QEval(1, qtable)\n",
"save_frames(env.render(), 'P4_f.gif')\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.2 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "2b324498af64d22b4773901be112d66dec816013b7f64fed368c8550f7daba2d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}