└── passive learning.ipynb /passive learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "5d951c11", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Learned Q-values:\n", 14 | "[[0. 0.1]\n", 15 | " [0.1 0. ]\n", 16 | " [0. 0.1]\n", 17 | " [0.1 0. ]\n", 18 | " [0. 0. ]]\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import numpy as np\n", 24 | "\n", 25 | "class QLearningAgent:\n", 26 | " def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9):\n", 27 | " self.num_states = num_states\n", 28 | " self.num_actions = num_actions\n", 29 | " self.learning_rate = learning_rate\n", 30 | " self.discount_factor = discount_factor\n", 31 | " self.q_table = np.zeros((num_states, num_actions))\n", 32 | "\n", 33 | " def update_q_table(self, state, action, reward, next_state):\n", 34 | " best_next_action = np.argmax(self.q_table[next_state])\n", 35 | " td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]\n", 36 | " td_error = td_target - self.q_table[state, action]\n", 37 | " self.q_table[state, action] += self.learning_rate * td_error\n", 38 | "\n", 39 | "# Assuming we have a dataset of experiences (state, action, reward, next_state)\n", 40 | "dataset = [(0, 1, 1, 1), (1, 0, 1, 2), (2, 1, 1, 3), (3, 0, 1, 4)]\n", 41 | "\n", 42 | "# Example usage:\n", 43 | "if __name__ == \"__main__\":\n", 44 | " # Create Q-learning agent\n", 45 | " agent = QLearningAgent(num_states=5, num_actions=2)\n", 46 | "\n", 47 | " # Passive learning\n", 48 | " for state, action, reward, next_state in dataset:\n", 49 | " agent.update_q_table(state, action, reward, next_state)\n", 50 | "\n", 51 | " # Print learned Q-values\n", 52 | " print(\"Learned Q-values:\")\n", 53 | " print(agent.q_table)\n" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3 (ipykernel)", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.9.12" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 5 78 | } 79 | --------------------------------------------------------------------------------