└── Active Learning.ipynb /Active Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "55c2b504", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Learned Q-values:\n", 14 | "[[-0.21224518 0.15234751]\n", 15 | " [-0.21076172 0.19145646]\n", 16 | " [-0.17297433 0.12071937]\n", 17 | " [-0.1008035 -0.20366559]\n", 18 | " [ 0.62717776 -0.03176365]]\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import numpy as np\n", 24 | "\n", 25 | "class QLearningAgent:\n", 26 | " def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):\n", 27 | " self.num_states = num_states\n", 28 | " self.num_actions = num_actions\n", 29 | " self.learning_rate = learning_rate\n", 30 | " self.discount_factor = discount_factor\n", 31 | " self.epsilon = epsilon\n", 32 | " self.q_table = np.zeros((num_states, num_actions))\n", 33 | "\n", 34 | " def choose_action(self, state):\n", 35 | " if np.random.uniform(0, 1) < self.epsilon:\n", 36 | " return np.random.choice(self.num_actions) # Exploration\n", 37 | " else:\n", 38 | " return np.argmax(self.q_table[state]) # Exploitation\n", 39 | "\n", 40 | " def update_q_table(self, state, action, reward, next_state):\n", 41 | " best_next_action = np.argmax(self.q_table[next_state])\n", 42 | " td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n", 43 | " td_error = td_target - self.q_table[state, action]\n", 44 | " self.q_table[state, action] += self.learning_rate * td_error\n", 45 | "\n", 46 | "# Example usage:\n", 47 | "if __name__ == \"__main__\":\n", 48 | " # Define environment\n", 49 | " num_states = 5 # Example: 5 states\n", 50 | " num_actions = 2 # Example: 2 actions\n", 51 | "\n", 52 | " # Create Q-learning agent\n", 53 | " agent = QLearningAgent(num_states, num_actions)\n", 54 | "\n", 55 | " # Active learning loop\n", 56 | " num_episodes = 1000\n", 57 | " max_steps = 100\n", 58 | " for episode in range(num_episodes):\n", 59 | " state = np.random.randint(0, num_states) # Start from a random state\n", 60 | " for step in range(max_steps):\n", 61 | " action = agent.choose_action(state)\n", 62 | " next_state = np.random.choice(num_states) # Transition to a random next state\n", 63 | " reward = np.random.normal(0, 1) # Example: Reward from a normal distribution\n", 64 | " agent.update_q_table(state, action, reward, next_state)\n", 65 | " state = next_state\n", 66 | "\n", 67 | " # Print learned Q-values\n", 68 | " print(\"Learned Q-values:\")\n", 69 | " print(agent.q_table)\n" 70 | ] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 3 (ipykernel)", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.9.12" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 5 94 | } 95 | --------------------------------------------------------------------------------