└── passive_learning.ipynb


/passive_learning.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "cell_type": "markdown",
 5 |       "metadata": {
 6 |         "id": "view-in-github",
 7 |         "colab_type": "text"
 8 |       },
 9 |       "source": [
10 |         "<a href=\"https://colab.research.google.com/github/NandhanaRameshkumar/Passive-learning/blob/main/passive_learning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11 |       ]
12 |     },
13 |     {
14 |       "cell_type": "code",
15 |       "execution_count": null,
16 |       "id": "5d951c11",
17 |       "metadata": {
18 |         "id": "5d951c11",
19 |         "outputId": "e5141ded-ebb5-438b-93fc-722638c987ea"
20 |       },
21 |       "outputs": [
22 |         {
23 |           "name": "stdout",
24 |           "output_type": "stream",
25 |           "text": [
26 |             "Learned Q-values:\n",
27 |             "[[0.  0.1]\n",
28 |             " [0.1 0. ]\n",
29 |             " [0.  0.1]\n",
30 |             " [0.1 0. ]\n",
31 |             " [0.  0. ]]\n"
32 |           ]
33 |         }
34 |       ],
35 |       "source": [
36 |         "import numpy as np\n",
37 |         "\n",
38 |         "class QLearningAgent:\n",
39 |         "    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9):\n",
40 |         "        self.num_states = num_states\n",
41 |         "        self.num_actions = num_actions\n",
42 |         "        self.learning_rate = learning_rate\n",
43 |         "        self.discount_factor = discount_factor\n",
44 |         "        self.q_table = np.zeros((num_states, num_actions))\n",
45 |         "\n",
46 |         "    def update_q_table(self, state, action, reward, next_state):\n",
47 |         "        best_next_action = np.argmax(self.q_table[next_state])\n",
48 |         "        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n",
49 |         "        td_error = td_target - self.q_table[state, action]\n",
50 |         "        self.q_table[state, action] += self.learning_rate * td_error\n",
51 |         "\n",
52 |         "# Assuming we have a dataset of experiences (state, action, reward, next_state)\n",
53 |         "dataset = [(0, 1, 1, 1), (1, 0, 1, 2), (2, 1, 1, 3), (3, 0, 1, 4)]\n",
54 |         "\n",
55 |         "# Example usage:\n",
56 |         "if __name__ == \"__main__\":\n",
57 |         "    # Create Q-learning agent\n",
58 |         "    agent = QLearningAgent(num_states=5, num_actions=2)\n",
59 |         "\n",
60 |         "    # Passive learning\n",
61 |         "    for state, action, reward, next_state in dataset:\n",
62 |         "        agent.update_q_table(state, action, reward, next_state)\n",
63 |         "\n",
64 |         "    # Print learned Q-values\n",
65 |         "    print(\"Learned Q-values:\")\n",
66 |         "    print(agent.q_table)\n"
67 |       ]
68 |     }
69 |   ],
70 |   "metadata": {
71 |     "kernelspec": {
72 |       "display_name": "Python 3 (ipykernel)",
73 |       "language": "python",
74 |       "name": "python3"
75 |     },
76 |     "language_info": {
77 |       "codemirror_mode": {
78 |         "name": "ipython",
79 |         "version": 3
80 |       },
81 |       "file_extension": ".py",
82 |       "mimetype": "text/x-python",
83 |       "name": "python",
84 |       "nbconvert_exporter": "python",
85 |       "pygments_lexer": "ipython3",
86 |       "version": "3.9.12"
87 |     },
88 |     "colab": {
89 |       "provenance": [],
90 |       "include_colab_link": true
91 |     }
92 |   },
93 |   "nbformat": 4,
94 |   "nbformat_minor": 5
95 | }


--------------------------------------------------------------------------------