└── Active Learning.ipynb


/Active Learning.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "id": "55c2b504",
 7 |    "metadata": {},
 8 |    "outputs": [
 9 |     {
10 |      "name": "stdout",
11 |      "output_type": "stream",
12 |      "text": [
13 |       "Learned Q-values:\n",
14 |       "[[-0.21224518  0.15234751]\n",
15 |       " [-0.21076172  0.19145646]\n",
16 |       " [-0.17297433  0.12071937]\n",
17 |       " [-0.1008035  -0.20366559]\n",
18 |       " [ 0.62717776 -0.03176365]]\n"
19 |      ]
20 |     }
21 |    ],
22 |    "source": [
23 |     "import numpy as np\n",
24 |     "\n",
25 |     "class QLearningAgent:\n",
26 |     "    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):\n",
27 |     "        self.num_states = num_states\n",
28 |     "        self.num_actions = num_actions\n",
29 |     "        self.learning_rate = learning_rate\n",
30 |     "        self.discount_factor = discount_factor\n",
31 |     "        self.epsilon = epsilon\n",
32 |     "        self.q_table = np.zeros((num_states, num_actions))\n",
33 |     "\n",
34 |     "    def choose_action(self, state):\n",
35 |     "        if np.random.uniform(0, 1) < self.epsilon:\n",
36 |     "            return np.random.choice(self.num_actions)  # Exploration\n",
37 |     "        else:\n",
38 |     "            return np.argmax(self.q_table[state])  # Exploitation\n",
39 |     "\n",
40 |     "    def update_q_table(self, state, action, reward, next_state):\n",
41 |     "        best_next_action = np.argmax(self.q_table[next_state])\n",
42 |     "        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n",
43 |     "        td_error = td_target - self.q_table[state, action]\n",
44 |     "        self.q_table[state, action] += self.learning_rate * td_error\n",
45 |     "\n",
46 |     "# Example usage:\n",
47 |     "if __name__ == \"__main__\":\n",
48 |     "    # Define environment\n",
49 |     "    num_states = 5  # Example: 5 states\n",
50 |     "    num_actions = 2  # Example: 2 actions\n",
51 |     "\n",
52 |     "    # Create Q-learning agent\n",
53 |     "    agent = QLearningAgent(num_states, num_actions)\n",
54 |     "\n",
55 |     "    # Active learning loop\n",
56 |     "    num_episodes = 1000\n",
57 |     "    max_steps = 100\n",
58 |     "    for episode in range(num_episodes):\n",
59 |     "        state = np.random.randint(0, num_states)  # Start from a random state\n",
60 |     "        for step in range(max_steps):\n",
61 |     "            action = agent.choose_action(state)\n",
62 |     "            next_state = np.random.choice(num_states)  # Transition to a random next state\n",
63 |     "            reward = np.random.normal(0, 1)  # Example: Reward from a normal distribution\n",
64 |     "            agent.update_q_table(state, action, reward, next_state)\n",
65 |     "            state = next_state\n",
66 |     "\n",
67 |     "    # Print learned Q-values\n",
68 |     "    print(\"Learned Q-values:\")\n",
69 |     "    print(agent.q_table)\n"
70 |    ]
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 3 (ipykernel)",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.9.12"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 5
94 | }
95 | 


--------------------------------------------------------------------------------