└── passive_learning.ipynb /passive_learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "5d951c11", 17 | "metadata": { 18 | "id": "5d951c11", 19 | "outputId": "e5141ded-ebb5-438b-93fc-722638c987ea" 20 | }, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "Learned Q-values:\n", 27 | "[[0. 0.1]\n", 28 | " [0.1 0. ]\n", 29 | " [0. 0.1]\n", 30 | " [0.1 0. ]\n", 31 | " [0. 0. ]]\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "import numpy as np\n", 37 | "\n", 38 | "class QLearningAgent:\n", 39 | " def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9):\n", 40 | " self.num_states = num_states\n", 41 | " self.num_actions = num_actions\n", 42 | " self.learning_rate = learning_rate\n", 43 | " self.discount_factor = discount_factor\n", 44 | " self.q_table = np.zeros((num_states, num_actions))\n", 45 | "\n", 46 | " def update_q_table(self, state, action, reward, next_state):\n", 47 | " best_next_action = np.argmax(self.q_table[next_state])\n", 48 | " td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n", 49 | " td_error = td_target - self.q_table[state, action]\n", 50 | " self.q_table[state, action] += self.learning_rate * td_error\n", 51 | "\n", 52 | "# Assuming we have a dataset of experiences (state, action, reward, next_state)\n", 53 | "dataset = [(0, 1, 1, 1), (1, 0, 1, 2), (2, 1, 1, 3), (3, 0, 1, 4)]\n", 54 | "\n", 55 | "# Example usage:\n", 56 | "if __name__ == \"__main__\":\n", 57 | " # Create Q-learning agent\n", 58 | " agent = QLearningAgent(num_states=5, num_actions=2)\n", 59 | "\n", 60 | " # Passive learning\n", 61 | " for state, action, reward, next_state in dataset:\n", 62 | " agent.update_q_table(state, action, reward, next_state)\n", 63 | "\n", 64 | " # Print learned Q-values\n", 65 | " print(\"Learned Q-values:\")\n", 66 | " print(agent.q_table)\n" 67 | ] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3 (ipykernel)", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.9.12" 87 | }, 88 | "colab": { 89 | "provenance": [], 90 | "include_colab_link": true 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 5 95 | } --------------------------------------------------------------------------------