{ "cells": [ { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "import gym\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "sns.reset_defaults()\n", "sns.set_theme(rc={'figure.dpi': 72, 'savefig.dpi': 300,\n", " 'figure.autolayout': True})\n", "sns.set_style('ticks')\n", "sns.set_context('paper')\n", "\n", "np.set_printoptions(precision=4)\n" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of actions: 4\n", "Number of observations: 16\n" ] } ], "source": [ "env = gym.make(\"FrozenLake-v1\", is_slippery=False)\n", "print(\"Number of actions:\", env.action_space.n)\n", "print(\"Number of observations:\", env.observation_space.n)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Win rate: 0.009\n", "Number of wins: 9\n" ] } ], "source": [ "# Part b\n", "\n", "def QEval(episodes: int, Q: np.ndarray = None):\n", " win_cnt = 0\n", " for _ in range(episodes):\n", " observation = env.reset()[0]\n", " terminated = False\n", " while not terminated:\n", " if Q is None:\n", " action = env.action_space.sample()\n", " else:\n", " action = np.argmax(Q[observation])\n", " observation, reward, terminated, *_ = env.step(action)\n", " win_cnt += reward\n", " win_cnt = int(win_cnt)\n", " print(\"Win rate:\", round(win_cnt/episodes, 4))\n", " print(\"Number of wins:\", win_cnt)\n", "\n", "\n", "QEval(1000)\n" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "# Part c\n", "\n", "def QTrain(\n", " episodes: int = 100_000,\n", " alpha: float = 0.01,\n", " gamma: float = 0.9,\n", " epsilon: float = 1.0,\n", " epsilon_decay: float = 0.0001\n", "):\n", " \"\"\"Q-learning through Epsilon-Greedy algorithm\n", "\n", " Args:\n", " episodes (int): Total number of episodes.\n", " alpha (float): Learning rate.\n", " gamma (float): Discount factor.\n", " epsilon (float): Randomness probability in action selection.\n", " epsilon_decay (float): Fixed amount to decrease.\n", "\n", " Returns:\n", " NDArray: Q-table\n", " \"\"\"\n", " Q = np.zeros((env.observation_space.n, env.action_space.n))\n", "\n", " for _ in range(episodes):\n", " state = env.reset()[0]\n", " terminated = False\n", "\n", " # Train the agent until it gets stuck in a hole or reaches the goal\n", " while not terminated:\n", " rnd = np.random.random()\n", " if rnd < epsilon:\n", " # Take a random action (exploration)\n", " action = env.action_space.sample()\n", " else:\n", " # Take the action with the highest value in the current state (exploitation)\n", " action = np.argmax(Q[state])\n", "\n", " # Implement this action and move the agent in the desired direction\n", " new_state, reward, terminated, *_ = env.step(action)\n", "\n", " Q[state, action] = Q[state, action] + alpha * \\\n", " (reward + gamma * np.max(Q[new_state]) - Q[state, action])\n", "\n", " state = new_state\n", "\n", " epsilon = max(epsilon - epsilon_decay, 0)\n", "\n", " return Q\n" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Q-table:\n", "[[0.5303 0.5905 0.5453 0.5295]\n", " [0.3349 0. 0.6333 0.2903]\n", " [0.2797 0.7225 0.141 0.3965]\n", " [0.3313 0. 0.0453 0.0395]\n", " [0.5885 0.6561 0. 0.5298]\n", " [0. 0. 0. 0. ]\n", " [0. 0.8096 0. 0.3792]\n", " [0. 0. 0. 0. ]\n", " [0.6513 0. 0.729 0.5857]\n", " [0.6437 0.7935 0.81 0. ]\n", " [0.7153 0.9 0. 0.7094]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0.5566 0.8995 0.446 ]\n", " [0.7902 0.891 1. 0.7967]\n", " [0. 0. 0. 0. ]]\n" ] } ], "source": [ "# Part d\n", "\n", "qtable = QTrain()\n", "print('Q-table:')\n", "print(qtable)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Win rate: 1.0\n", "Number of wins: 1000\n" ] } ], "source": [ "# Part e\n", "\n", "QEval(1000, qtable)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Win rate: 1.0\n", "Number of wins: 1\n" ] }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Part f\n", "\n", "def save_frames(frames, file_path):\n", " from matplotlib import animation\n", " from IPython.display import HTML, display\n", "\n", " f = 72\n", " w = frames[0].shape[1] / f\n", " h = frames[0].shape[0] / f\n", " plt.figure(figsize=(w, h))\n", "\n", " patch = plt.imshow(frames[0])\n", " plt.axis('off')\n", "\n", " def animate(i):\n", " patch.set_data(frames[i])\n", "\n", " anim = animation.FuncAnimation(\n", " plt.gcf(), animate, frames=len(frames), interval=300)\n", " anim.save(file_path, writer='pillow')\n", " plt.close()\n", "\n", " rnd = np.random.randint(0, 2e9)\n", " display(HTML(f''))\n", "\n", "\n", "env = gym.make(\"FrozenLake-v1\", is_slippery=False,\n", " render_mode=\"rgb_array_list\")\n", "QEval(1, qtable)\n", "save_frames(env.render(), 'P4_f.gif')\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "2b324498af64d22b4773901be112d66dec816013b7f64fed368c8550f7daba2d" } } }, "nbformat": 4, "nbformat_minor": 2 }