└── Q_Learning.ipynb /Q_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "9b758a89", 17 | "metadata": { 18 | "id": "9b758a89", 19 | "outputId": "bf44707b-5368-41b8-fa4c-63bf7f238c66" 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Learned Q-values:\n", 27 | "[[ 0.1810089 0.18678651 0.01518705 0.71509699]\n", 28 | " [ 0.08713604 -0.03859681 0.16118307 0.24448167]\n", 29 | " [ 0.83979455 -0.33560994 -0.17688869 -0.22256006]\n", 30 | " [ 0.08635554 -0.21298734 -0.10405171 -0.13272356]\n", 31 | " [-0.05129619 -0.46529288 0.01210843 -0.00455259]\n", 32 | " [-0.08920353 -0.20301238 -0.03509546 0.10448408]\n", 33 | " [-0.32868787 -0.07318781 -0.11406954 0.15414881]\n", 34 | " [ 0.4202393 0.02789978 -0.06801074 -0.03380731]\n", 35 | " [ 0.0909128 0.3207324 -0.34631363 -0.08375554]\n", 36 | " [-0.05594635 0.14014766 -0.17209741 -0.31746499]\n", 37 | " [ 0.32978008 -0.0674569 0.05198254 0.02667061]\n", 38 | " [-0.12227229 0.24600829 -0.38031165 -0.3328942 ]\n", 39 | " [ 0.78052375 -0.20177803 0.04631618 -0.12350052]\n", 40 | " [-0.16932811 0.3893554 -0.26882549 0.07006955]\n", 41 | " [ 0.18826156 -0.29526626 0.04192198 -0.0572808 ]\n", 42 | " [-0.08379133 0.66749963 -0.04932762 0.07023948]]\n", 43 | "6\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "import numpy as np\n", 49 | "\n", 50 | "class QLearningAgent: # class creation\n", 51 | " def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):# creating a constructor for a class\n", 52 | " self.num_states = num_states\n", 53 | " self.num_actions = num_actions\n", 54 | " self.learning_rate = learning_rate\n", 55 | " self.discount_factor = discount_factor\n", 56 | " self.epsilon = epsilon\n", 57 | " self.q_table = np.zeros((num_states, num_actions))# Creating numpy array for storing q values in table\n", 58 | "\n", 59 | " def choose_action(self, state): # selects an action\n", 60 | " if np.random.uniform(0, 1) < self.epsilon:\n", 61 | " return np.random.choice(self.num_actions) # Exploration\n", 62 | " else:\n", 63 | " return np.argmax(self.q_table[state]) # Exploitation\n", 64 | "\n", 65 | " def update_q_table(self, state, action, reward, next_state): # update the q values\n", 66 | " best_next_action = np.argmax(self.q_table[next_state])\n", 67 | " td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n", 68 | " td_error = td_target - self.q_table[state, action]\n", 69 | " self.q_table[state, action] += self.learning_rate * td_error\n", 70 | "\n", 71 | "# Example usage:\n", 72 | "if __name__ == \"__main__\": # main\n", 73 | " # Define environment\n", 74 | " # Example: Grid world environment\n", 75 | " num_states = 16 # Example: 4x4 grid world\n", 76 | " num_actions = 4 # Example: up, down, left, right\n", 77 | "\n", 78 | " # Create Q-learning agent\n", 79 | " agent = QLearningAgent(num_states, num_actions)\n", 80 | "\n", 81 | " # Q-learning\n", 82 | " num_episodes = 1000\n", 83 | " max_steps = 100\n", 84 | " for episode in range(num_episodes):\n", 85 | " state = np.random.randint(0, num_states) # Start from a random state\n", 86 | " for step in range(max_steps):\n", 87 | " action = agent.choose_action(state)\n", 88 | " next_state = np.random.choice(num_states) # Transition to a random next state\n", 89 | " reward = np.random.normal(0, 1) # Example: Reward from a normal distribution\n", 90 | " agent.update_q_table(state, action, reward, next_state)\n", 91 | " state = next_state\n", 92 | "\n", 93 | " # Print learned Q-values\n", 94 | " print(\"Learned Q-values:\")\n", 95 | " print(agent.q_table)\n", 96 | " print(next_state)\n" 97 | ] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3 (ipykernel)", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.9.12" 117 | }, 118 | "colab": { 119 | "provenance": [], 120 | "include_colab_link": true 121 | } 122 | }, 123 | "nbformat": 4, 124 | "nbformat_minor": 5 125 | } --------------------------------------------------------------------------------