└── Q-Learning.ipynb /Q-Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "9b758a89", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Learned Q-values:\n", 14 | "[[ 0.1810089 0.18678651 0.01518705 0.71509699]\n", 15 | " [ 0.08713604 -0.03859681 0.16118307 0.24448167]\n", 16 | " [ 0.83979455 -0.33560994 -0.17688869 -0.22256006]\n", 17 | " [ 0.08635554 -0.21298734 -0.10405171 -0.13272356]\n", 18 | " [-0.05129619 -0.46529288 0.01210843 -0.00455259]\n", 19 | " [-0.08920353 -0.20301238 -0.03509546 0.10448408]\n", 20 | " [-0.32868787 -0.07318781 -0.11406954 0.15414881]\n", 21 | " [ 0.4202393 0.02789978 -0.06801074 -0.03380731]\n", 22 | " [ 0.0909128 0.3207324 -0.34631363 -0.08375554]\n", 23 | " [-0.05594635 0.14014766 -0.17209741 -0.31746499]\n", 24 | " [ 0.32978008 -0.0674569 0.05198254 0.02667061]\n", 25 | " [-0.12227229 0.24600829 -0.38031165 -0.3328942 ]\n", 26 | " [ 0.78052375 -0.20177803 0.04631618 -0.12350052]\n", 27 | " [-0.16932811 0.3893554 -0.26882549 0.07006955]\n", 28 | " [ 0.18826156 -0.29526626 0.04192198 -0.0572808 ]\n", 29 | " [-0.08379133 0.66749963 -0.04932762 0.07023948]]\n", 30 | "6\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "import numpy as np\n", 36 | "\n", 37 | "class QLearningAgent: # class creation\n", 38 | " def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):# creating a constructor for a class\n", 39 | " self.num_states = num_states\n", 40 | " self.num_actions = num_actions\n", 41 | " self.learning_rate = learning_rate\n", 42 | " self.discount_factor = discount_factor\n", 43 | " self.epsilon = epsilon\n", 44 | " self.q_table = np.zeros((num_states, num_actions))# Creating numpy array for storing q values in table\n", 45 | "\n", 46 | " def choose_action(self, state): # selects an action\n", 47 | " if np.random.uniform(0, 1) < self.epsilon:\n", 48 | " return np.random.choice(self.num_actions) # Exploration\n", 49 | " else:\n", 50 | " return np.argmax(self.q_table[state]) # Exploitation\n", 51 | "\n", 52 | " def update_q_table(self, state, action, reward, next_state): # update the q values\n", 53 | " best_next_action = np.argmax(self.q_table[next_state])\n", 54 | " td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n", 55 | " td_error = td_target - self.q_table[state, action]\n", 56 | " self.q_table[state, action] += self.learning_rate * td_error\n", 57 | "\n", 58 | "# Example usage:\n", 59 | "if __name__ == \"__main__\": # main\n", 60 | " # Define environment\n", 61 | " # Example: Grid world environment\n", 62 | " num_states = 16 # Example: 4x4 grid world\n", 63 | " num_actions = 4 # Example: up, down, left, right\n", 64 | "\n", 65 | " # Create Q-learning agent\n", 66 | " agent = QLearningAgent(num_states, num_actions)\n", 67 | "\n", 68 | " # Q-learning\n", 69 | " num_episodes = 1000\n", 70 | " max_steps = 100\n", 71 | " for episode in range(num_episodes):\n", 72 | " state = np.random.randint(0, num_states) # Start from a random state\n", 73 | " for step in range(max_steps):\n", 74 | " action = agent.choose_action(state)\n", 75 | " next_state = np.random.choice(num_states) # Transition to a random next state\n", 76 | " reward = np.random.normal(0, 1) # Example: Reward from a normal distribution\n", 77 | " agent.update_q_table(state, action, reward, next_state)\n", 78 | " state = next_state\n", 79 | "\n", 80 | " # Print learned Q-values\n", 81 | " print(\"Learned Q-values:\")\n", 82 | " print(agent.q_table)\n", 83 | " print(next_state)\n" 84 | ] 85 | } 86 | ], 87 | "metadata": { 88 | "kernelspec": { 89 | "display_name": "Python 3 (ipykernel)", 90 | "language": "python", 91 | "name": "python3" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 3 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython3", 103 | "version": "3.9.12" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 5 108 | } 109 | --------------------------------------------------------------------------------