└── Q-Learning.ipynb


/Q-Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "id": "9b758a89",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "Learned Q-values:\n",
 14 |       "[[ 0.1810089   0.18678651  0.01518705  0.71509699]\n",
 15 |       " [ 0.08713604 -0.03859681  0.16118307  0.24448167]\n",
 16 |       " [ 0.83979455 -0.33560994 -0.17688869 -0.22256006]\n",
 17 |       " [ 0.08635554 -0.21298734 -0.10405171 -0.13272356]\n",
 18 |       " [-0.05129619 -0.46529288  0.01210843 -0.00455259]\n",
 19 |       " [-0.08920353 -0.20301238 -0.03509546  0.10448408]\n",
 20 |       " [-0.32868787 -0.07318781 -0.11406954  0.15414881]\n",
 21 |       " [ 0.4202393   0.02789978 -0.06801074 -0.03380731]\n",
 22 |       " [ 0.0909128   0.3207324  -0.34631363 -0.08375554]\n",
 23 |       " [-0.05594635  0.14014766 -0.17209741 -0.31746499]\n",
 24 |       " [ 0.32978008 -0.0674569   0.05198254  0.02667061]\n",
 25 |       " [-0.12227229  0.24600829 -0.38031165 -0.3328942 ]\n",
 26 |       " [ 0.78052375 -0.20177803  0.04631618 -0.12350052]\n",
 27 |       " [-0.16932811  0.3893554  -0.26882549  0.07006955]\n",
 28 |       " [ 0.18826156 -0.29526626  0.04192198 -0.0572808 ]\n",
 29 |       " [-0.08379133  0.66749963 -0.04932762  0.07023948]]\n",
 30 |       "6\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "import numpy as np\n",
 36 |     "\n",
 37 |     "class QLearningAgent: # class creation\n",
 38 |     "    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):# creating a constructor for a class\n",
 39 |     "        self.num_states = num_states\n",
 40 |     "        self.num_actions = num_actions\n",
 41 |     "        self.learning_rate = learning_rate\n",
 42 |     "        self.discount_factor = discount_factor\n",
 43 |     "        self.epsilon = epsilon\n",
 44 |     "        self.q_table = np.zeros((num_states, num_actions))# Creating numpy array for storing q values in table\n",
 45 |     "\n",
 46 |     "    def choose_action(self, state): # selects an action\n",
 47 |     "        if np.random.uniform(0, 1) < self.epsilon:\n",
 48 |     "            return np.random.choice(self.num_actions)  # Exploration\n",
 49 |     "        else:\n",
 50 |     "            return np.argmax(self.q_table[state])  # Exploitation\n",
 51 |     "\n",
 52 |     "    def update_q_table(self, state, action, reward, next_state): # update the q values\n",
 53 |     "        best_next_action = np.argmax(self.q_table[next_state])\n",
 54 |     "        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n",
 55 |     "        td_error = td_target - self.q_table[state, action]\n",
 56 |     "        self.q_table[state, action] += self.learning_rate * td_error\n",
 57 |     "\n",
 58 |     "# Example usage:\n",
 59 |     "if __name__ == \"__main__\": # main\n",
 60 |     "    # Define environment\n",
 61 |     "    # Example: Grid world environment\n",
 62 |     "    num_states = 16  # Example: 4x4 grid world\n",
 63 |     "    num_actions = 4  # Example: up, down, left, right\n",
 64 |     "\n",
 65 |     "    # Create Q-learning agent\n",
 66 |     "    agent = QLearningAgent(num_states, num_actions)\n",
 67 |     "\n",
 68 |     "    # Q-learning\n",
 69 |     "    num_episodes = 1000\n",
 70 |     "    max_steps = 100\n",
 71 |     "    for episode in range(num_episodes):\n",
 72 |     "        state = np.random.randint(0, num_states)  # Start from a random state\n",
 73 |     "        for step in range(max_steps):\n",
 74 |     "            action = agent.choose_action(state)\n",
 75 |     "            next_state = np.random.choice(num_states)  # Transition to a random next state\n",
 76 |     "            reward = np.random.normal(0, 1)  # Example: Reward from a normal distribution\n",
 77 |     "            agent.update_q_table(state, action, reward, next_state)\n",
 78 |     "            state = next_state\n",
 79 |     "\n",
 80 |     "    # Print learned Q-values\n",
 81 |     "    print(\"Learned Q-values:\")\n",
 82 |     "    print(agent.q_table)\n",
 83 |     "    print(next_state)\n"
 84 |    ]
 85 |   }
 86 |  ],
 87 |  "metadata": {
 88 |   "kernelspec": {
 89 |    "display_name": "Python 3 (ipykernel)",
 90 |    "language": "python",
 91 |    "name": "python3"
 92 |   },
 93 |   "language_info": {
 94 |    "codemirror_mode": {
 95 |     "name": "ipython",
 96 |     "version": 3
 97 |    },
 98 |    "file_extension": ".py",
 99 |    "mimetype": "text/x-python",
100 |    "name": "python",
101 |    "nbconvert_exporter": "python",
102 |    "pygments_lexer": "ipython3",
103 |    "version": "3.9.12"
104 |   }
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 5
108 | }
109 | 


--------------------------------------------------------------------------------