├── .gitignore
├── LICENSE.md
├── README.md
├── assets
    ├── DQL_NN_architecture.png
    ├── DQL_results.jpg
    ├── DQL_sim_quant
    │   ├── entropies.jpg
    │   ├── trained_strategy.jpg
    │   ├── training_history.jpg
    │   └── training_history_moving_average.jpg
    ├── DQL_sim_quant_results.jpg
    ├── QL_results.jpg
    ├── agent_walk.gif
    ├── bellman_equation.png
    ├── circuit.jpg
    ├── encoding.jpg
    ├── layer_1.jpg
    ├── number_of_steps.png
    └── percentage_of_succesfull_walks.png
├── requirements.txt
├── results
    ├── auto_hp_tuning
    │   ├── results.csv
    │   ├── results_scatter_plot.gif
    │   └── results_triangular_surface.gif
    ├── classical_DQL
    │   ├── cl_entropies.txt
    │   ├── entropies.jpg
    │   ├── entropies.txt
    │   ├── hyperparameters.txt
    │   ├── trained_strategy.jpg
    │   ├── training_history.jpg
    │   └── training_history_moving_average.jpg
    ├── classical_DQL_sim_quantum
    │   └── _BEST_1_layers_sigmoid_activation_longer
    │   │   ├── cl_entropies.txt
    │   │   ├── entropies.jpg
    │   │   ├── entropies.txt
    │   │   ├── hyperparameters.txt
    │   │   ├── trained_strategy.jpg
    │   │   ├── training_history.jpg
    │   │   └── training_history_moving_average.jpg
    ├── classical_QL
    │   ├── hyperparameters.txt
    │   ├── trained_strategy.jpg
    │   ├── training_history.jpg
    │   └── training_history_moving_average.jpg
    └── quantum
    │   ├── Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220426183607.png
    │   ├── earlystopping_Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220425223058.png
    │   ├── earlystopping_entropies.png
    │   └── entropies.png
├── scripts
    ├── 1._Classical_QL.py
    ├── 2._Classical_DQL.py
    ├── 3._Classical_DQL_sim_quant.py
    ├── 3b._Classical_DQL_sim_quant_grid_search.py
    ├── 3c._Classical_DQL_sim_quant_finetuning.ipynb
    ├── QML
    │   ├── QML_DQN_FROZEN_LAKE.py
    │   ├── README_ENTANGLEMENT.md
    │   ├── ShortestPathFrozenLake.py
    │   └── run_job.sh
    ├── auto_hp_tuning_visuals.py
    └── src
    │   ├── DQL
    │       └── quant_sim.py
    │   ├── entropies.py
    │   ├── utils.py
    │   └── visualizations.py
└── tutorials
    ├── training_example.py
    └── tutorial_state_preparation.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | **/__pycache__
3 | _archive
4 | 
5 | /scripts/_external_sv_save.py
6 | /scripts/3z_entropy-calculation.ipynb
7 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 "Qubit" Science Club , Wrocław University of Science and Technology
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Quantum Reinforcement Learning
  2 | 
  3 | ## Requirements:
  4 | 
  5 | Code was developed and run on `Python` version `3.9.11`. All main requirements are in [`requirements`](./requirements.txt) file.
  6 | 
  7 | ## Structure:
  8 | 
  9 | ```
 10 | ├───assets                              # assets for readme
 11 | ├───results                             # results from training
 12 | │   ├───auto_hp_tuning                  # auto hyperparameter finetunign results directory
 13 | │   ├───classical_QL                    # classical Q-learning results directory
 14 | │   ├───classical_DQL                   # classical DQL results directory
 15 | │   ├───classical_DQL_sim_quantum       # classical DQL simulating quantum model results directory
 16 | │   └───quantum                         # quantum model results directory
 17 | │
 18 | ├───scripts                             # scripts for generating results
 19 | │   ├───src                             # source code directory
 20 | │   ├───QML                             # quantum model directory
 21 | │   │   1._Classical_QL.py              # classical Q-learning
 22 | │   │   2._Classical_DQL.py             # classical Deep Q-learning
 23 | │   │   3._Classical_DQL_sim_quant.py   # classical Deep Q-learning simulating quantum circuit
 24 | │   │   3b._Classical_DQL_sim_quant_grid_search.py   # script no. 3 for grid search finetuning
 25 | │   │   3c._Classical_DQL_sim_quant_finetuning.ipynb # script no. 3 with automatic hyperparameter finetuning
 26 | │   │   auto_hp_tuning_visuals.py       # auto hyperparameter finetuning results ploting script
 27 | │   
 28 | └───tutorials                           # supplementary tutorials to start with
 29 | 
 30 | ```
 31 | 
 32 | 
 33 | ## Introduction
 34 | 
 35 | The concept of entropy is widely used in various machine learning methods. In this project we wanted to see, if this value behaves in the same way as an analogous quantity used in quantum physics - the entanglement entropy. To investigate this, we used both classical deep Q-learning and its quantum counterpart to train an agent to move in a simple Frozen Lake environment.
 36 | 
 37 | The output of a quantum circuit is of course a quantum state, for which the concept of entanglement entropy is pretty straightforward. In contrast, it's not as obvious in the case of classical RL. In that case, we were treating the output generated by the neural network as a quantum state (which in fact, is just some vector).
 38 | 
 39 | ## Classical method
 40 | 
 41 | ### Reinforcement learning
 42 | 
 43 | Q-learning is a machine learning algorithm of the "Reinforcement Learning" type. The family of such algorithms differs from supervised and unsupervised learning in that the information for training is collected not from data, but from the interaction of the **agent** (trained algorithm) with the **environment**. The function that the agent is guided by is called **politics** and takes the **observation** as an input (e.g. the position of a player on the board, car speed on the track) and returns an **action** (e.g. move right, add gas).
 44 | 
 45 | Further reading: https://en.wikipedia.org/wiki/Reinforcement_learning
 46 | 
 47 | ### Classical Q-Learning
 48 | 
 49 | <br/>
 50 | 
 51 | >Run in [this script](./scripts/1._Classical_QL.py) 
 52 | 🚀
 53 | 
 54 | <br/>
 55 | 
 56 | We will explain the concept of Q-learning on the example of an agent moving around the "FrozenLake" environment. Let's take a 4x4 element board:
 57 | 
 58 | ```Python
 59 | import gym
 60 | 
 61 | lake = gym.make('FrozenLake-v1', is_slippery=False)
 62 | lake.reset()
 63 | lake.render()
 64 | ```
 65 | 
 66 | SFFF
 67 | 
 68 | FHFH
 69 | 
 70 | FFFH
 71 | 
 72 | HFFG
 73 | 
 74 | **Legend:**
 75 | - S: starting point, one per board,
 76 | - F: frozen surface - safe field,
 77 | - H: hole - an ice hole, a field that gives a large penalty, or ends the game,
 78 | - G: goal, end point.
 79 | 
 80 | The agent is always in one of the spaces and, based on his policy, decides to move in one direction (left, right, up, down). The entire walk focuses on the reward, which the agent increases or decreases by entering a specific field.
 81 | Scoring example:
 82 | 
 83 | - Frozen surface: 0 or -0.01 to the reward (optional penalty to eliminate detour or walking in circles),
 84 | - Starting point: 0 or -0.01 to reward,
 85 | - Hole: -1 to the reward,
 86 | - End point: +1 to the reward.
 87 | 
 88 | Politics is a Q function that takes an action (e.g. move left) and a state (field 5 - second row from the top, second column from the right) as an input, and returns an appropriate reward. In the the most basic case (without the use of deep learning), the update of the politics (after each step) is represented by the Bellman equation:
 89 | 
 90 | 
 91 | ![](assets/bellman_equation.png "Bellman equation")
 92 | 
 93 | 
 94 | Further reading: https://en.wikipedia.org/wiki/Q-learning
 95 | 
 96 | 
 97 | 
 98 | ### Deep Q-Learning
 99 | <br/>
100 | 
101 | >Run in [this script](./scripts/2._Classical_DQL.py) 
102 | 🚀
103 | 
104 | <br/>
105 | 
106 | However, in our case, we use the so-called Deep Q-Learning (DQL). To make decisions, we use a neural network with a 16-element input (number of fields on the board) and a 4-element output, corresponding to the weights provided for each movement:
107 | 
108 | - 0 - move left,
109 | - 1 - move down,
110 | - 2 - move right,
111 | - 3 - move up.
112 | 
113 | For example, if we would like to check which way is the best to go from the field number 5, we activate the neuron with index 4
114 | 
115 | ![](assets/DQL_NN_architecture.png "DQL")
116 | 
117 | 
118 | We can access the exact values returned by the neural network as follows
119 | 
120 | ```Python
121 | agent.Qstate(4)
122 | ```
123 | tensor([0.4165, 0.6063, 0.5308, 0.4209])
124 | 
125 | 
126 | We can see, that the second element is the greatest in value, so the agent is going to move downwards.
127 | 
128 | 
129 | ### Implementation
130 | 
131 | The whole thing was coded in pytorch. The architecture of our model is a network consisting of linear layers, followed by a sigmoid activation function:
132 | - https://pytorch.org/docs/stable/generated/torch.nn.Sigmoid.html
133 | - https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
134 | 
135 | The loss function used was the SmoothL1Loss: https://pytorch.org/docs/stable/generated/torch.nn.SmoothL1Loss.html
136 | 
137 | Several hundred epochs are enough to train a model. During the training we are tracking the percentage of cases, in which the walk ended up succesfully. For example, if the ratio of walks (over the last 50 epochs) during which the agent ended up in the goal field is equivalent to 90%, we are finishing the training process.
138 | 
139 | 
140 | Below we present the average success rate over the last 50 epochs of training, during the total of 1000 epochs:
141 | 
142 | ```Python
143 | plt.figure(figsize=(12,5))
144 | plt.plot(bin_stat.statistic)
145 | plt.title(label=f'Percentage of succesfully finised walks (over the last {epoch_bin} epochs)')
146 | plt.ylim(-1, 101)
147 | plt.xlim(-1, (epochs+1)/epoch_bin)
148 | plt.yticks(ticks=list(range(0, 110, 10)))
149 | plt.xticks(ticks=bin_stat.bin_edges/epoch_bin-1, labels=bin_stat.bin_edges.astype(int))
150 | plt.grid()
151 | plt.show()
152 | ```
153 | 
154 | ![](assets/percentage_of_succesfull_walks.png "Percentage of succesfull walks")
155 | 
156 | 
157 | Additionally, we check whether the process is optimal or not. We strive for an algorithm that covers the route in 6 steps. If there is more of them, the algorithm is walking in circles. We are looking for the moment where it starts to oscillate around the optimal value. Therefore, we track how many steps an agent takes thanks to its politics, after each epoch:
158 | 
159 | 
160 | ```Python
161 | plt.figure(figsize=(12,5))
162 | plt.plot(t.jList)
163 | plt.yticks(ticks=list(range(0, 110, 10)))
164 | plt.title(label="Number of states done by the agent")
165 | plt.grid()
166 | plt.show()
167 | ```
168 | 
169 | ![](assets/number_of_steps.png "Number of steps")
170 | 
171 | Let's see how the trained agent works:
172 | 
173 | ![](assets/agent_walk.gif "Agent walk")
174 | 
175 | 
176 | ## Variational Quantum Circuit
177 | <br/>
178 | 
179 | >Run from [this folder](./scripts/RUN_QML/) 
180 | 🚀
181 | 
182 | <br/>
183 | 
184 | In the quantum approach we are replacing the neural network with the so-called Variational Quantum Circuit (VQC). This is a type of quantum circuit with manipulable (classical) parameters. Like neural networks, VQCs can approximate arbitrary functions or classifiers. The following implementation is taken from
185 | 
186 | [S. Y.-C. Chen, C.-H. H. Yang, J. Qi, P.-Y. Chen, X. Ma and H.-S. Goan, *"Variational Quantum Circuits for Deep Reinforcement Learning"*, 2019](https://arxiv.org/abs/1907.00397)
187 | 
188 | ### Encoding of the state
189 | 
190 | Recall that our task is, given a state (the place on the board where our agent is), to find the best corresponding move. The board contains 16 possible states (0-15), which can be encoded using 4 bits (0000-1111)
191 | 
192 | <!-- $$ b_1 b_2 b_3 b_4, $$ -->
193 | ![equation](https://latex.codecogs.com/svg.image?b_1&space;b_2&space;b_3&space;b_4,)
194 | 
195 | or similarly 4 qubits
196 | 
197 | <!-- $$ |b_1\rangle \otimes |b_2\rangle \otimes |b_3\rangle \otimes |b_4\rangle. $$ -->
198 | ![equation](https://latex.codecogs.com/svg.image?|b_1\rangle&space;\otimes&space;|b_2\rangle&space;\otimes&space;|b_3\rangle&space;\otimes&space;|b_4\rangle.)
199 | 
200 | For example, if we are in the 13th state, its bitwise representation is 1101, which can be also written down using qubits' states as ![equation](https://latex.codecogs.com/svg.image?|1\rangle&space;\otimes&space;|1\rangle&space;\otimes&space;|0\rangle&space;\otimes&space;|1\rangle). 
201 | 
202 | Finally, the explicit gates which must be applied on consecutive qubits to encode agent's state are shown below:
203 | 
204 | ![](assets/encoding.jpg "Encoding of agent's state")
205 | 
206 | Note, that we are using the ![equation](https://latex.codecogs.com/svg.image?\theta_i) and ![equation](https://latex.codecogs.com/svg.image?\phi_i) parameters. These parameters won't be trained in the further part of the procedure, and are only used to encode the state properly.
207 | 
208 | ### Layers
209 | 
210 | On the encoded state we are acting with the following gates:
211 | 
212 | ![](assets/layer_1.jpg)
213 | 
214 | At the beginning we are entangling all of the qubits using the *CNOT* gates. Then, we are rotating each qubit along the *X*, *Y* and *Z* axes according to the following formula
215 | 
216 | <!-- $$ R(\alpha_i, \beta_i, \gamma_i) = R_x(\alpha_i) R_y(\beta_i) R_z(\gamma_i) $$ -->
217 | ![equation](https://latex.codecogs.com/svg.image?R(\alpha_i,&space;\beta_i,&space;\gamma_i)&space;=&space;R_x(\alpha_i)&space;R_y(\beta_i)&space;R_z(\gamma_i))
218 | 
219 | ![equation](https://latex.codecogs.com/svg.image?\alpha), ![equation](https://latex.codecogs.com/svg.image?\beta) and ![equation](https://latex.codecogs.com/svg.image?\gamma) are the parameters that will be optimized during each training's iteration (which also means, that the gradient will be calculated exactly over these variables).
220 | 
221 | At the very end we are conducting a measurement of each qubit and basing on the output we make a proper move. The whole circuit looks as follows:
222 | 
223 | ![](assets/circuit.jpg "The whole VQC")
224 | 
225 | ### Early stopping
226 | 
227 | Because the training proces of VQC is very unstable we are (similarly as in the case of classical approach) using early stopping. We are terminating the procedure if during the last 20 epochs the reward didn't change and was positive.
228 | 
229 | ## Extended classical model (NN simulating VQC)
230 | <br/>
231 | 
232 | >Run in [this script](./scripts/3._Classical_DQL_sim_quant.py) 
233 | 🚀
234 | 
235 | <br/>
236 | We have also tested classical Neural Network, which mimic quantum model behaviour. 
237 | <br/>
238 | 
239 | It's important to note, that the state vector given by the VQC consists of ![equation](https://latex.codecogs.com/svg.image?2^n) complex variables, where *n* is the number of qubits, which in our case gives 16 numbers. To resemble this situation as close as possible we are extending the classical model by increasing the number of neurons in each layer (also the output one) to 32. We are using 32 real numbers to encode 16 complex ones.
240 | 
241 | It should be noted, that unfortunately due to this extension the agent's training becomes more unstable and converges more slowly.
242 | 
243 | 
244 | ## Results
245 | ### Quantum DQN
246 | 
247 |  With earlystopping | WithOUT earlystopping 
248 |  :---: | :---: 
249 | ![earlystopping_reward](results/quantum/earlystopping_Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220425223058.png) | ![without-earlystopping_reward](/results/quantum/Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220426183607.png)
250 | ![earlystopping_entropies](results/quantum/earlystopping_entropies.png) | ![without-earlystopping_entropies](/results/quantum/entropies.png)
251 | 
252 | 
253 | As we can see, the two kinds of entropy behave differently and there is no obvious correltation between both of them (and also between entropies and the obtained reward).
254 | 
255 | ### Classical Q-learnig:
256 | 
257 | Classical Q-learning model learns for wide range of parameters and has very predictible behaviour: 
258 |  1. agent wanders until spots reward 
259 |  2. then stays on that path usually optimizing path to the shortest one in few epochs
260 | The only fun is to set hyperparamers to converge as fast as possible. 
261 | 
262 | ![image](./assets/QL_results.jpg)
263 | 
264 | We didn't calculate entanglement entropies here. This one is just complementary proof of concept.
265 | 
266 | ### Deep Q-learning:
267 | 
268 | Learns slower than non-deep version and is slightly more sensitive to hyperparameters. Almost always converge to optimal number of steps but sometimes vary a lot along the way:
269 | 
270 | ![image](./assets/DQL_results.jpg)
271 | 
272 | ### Deep Q-learning simulating quantum circuit (Extended classical model):
273 | 
274 | Here was a lot of trouble to train this model. We were seeking best architecture and set of hyperparameters in 3 stages:
275 | 
276 | ### **Finetuning stages:**
277 | -----
278 | 
279 | #### **1. Manual finetuning on baseline model:**
280 | 
281 | * *Model*: one/two hidden layer and sigmoid activation function
282 | * *Goal*: We perform a few experiments to gain a sense how and which hyperparamers influence our model training. Also we had baseline ranges to start more systematic searching
283 | * *Results*: It turns out the model is very hard to train, in fact only for some very narrow ranges we obtained ~40% win ratio in last few epoch means. It occur rarely, only for some seeds. 
284 | 
285 | User can run our [script](./scripts/3._Classical_DQL_sim_quant.py), all paramers there are the results of this finetuning. 
286 | 
287 | #### **2. Grid search for the best architecture:**
288 | 
289 | * *Model*: 
290 |     * **Hidden layers**: from 1 to 6 incl.
291 |     * **Activation functions**: sigmoid, hyperbolic tangent and leaky relu with slope 0.1
292 | * *Goal*: Here we put training with slightly lower hyperparameters, which gaves training 'pace' (learning rate, random paramer scaling etc.) and run every combination of tested architectures for `20'000` epochs to choose best architecture to hyperparameter finetuning.
293 | * *Results*: 
294 |     * None of the models trained to win. 
295 |     * Most promising results shown leaky relu, but we quit it in next stage since it 'favors' positive values.
296 |     * Tangent performs the worst.
297 |     * Model starts to train for 1 and 2 hidden layers, for 3 and over hidden layers the architecture seems to be too complicated.
298 | 
299 | However, the one layer architecture with sigmoid activation function almost trained (has around 90% win ratio at the end of a training). Naturally we put it in **training with 30'000 epochs and it trained calling early stop on 22800 epoch**. Model converged to optimal number of steps, but the training history is very chaotic in comparison to the previous methods. 
300 | This experiment information and history is in [this directory](./results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/).
301 | 
302 | All the other results are in [release with full results](./results/classical_DQL_sim_quantum/) in `results` directory. They were too big to include them into main repository (weighting around 0.5 GB).  
303 | Script used for training is [here](./scripts/3b._Classical_DQL_sim_quant_grid_search.py).
304 | 
305 | #### **3. Automatic hyperparamers finetuning :**
306 | 
307 | For this stage we used `pyTorch` [finetuning tutorial](https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html) with `ray`.
308 | 
309 | * *Model*: 
310 |     * **Hidden layers**: from 1 to 2 incl.
311 |     * **Activation functions**: sigmoid
312 | * *Details*:
313 |     * 150 experiments run 12 at once with scheduler setting next 16 in queue
314 |     * 30'000 epochs per experiment, with grace period for early stopper from scheduler on 15'000 epochs. Early stopper was enabled to prevent wasting resources for models, that doesn't train.
315 |     * We used [ray's ASHA scheduler](https://docs.ray.io/en/latest/tune/api_docs/schedulers.html)
316 | 
317 | 
318 | * *Goal*: 
319 |     * Final, automatic, full scale finetuning. 
320 | * *Results*: 
321 |     * only one of the experiments trained with lowered win ratio threshold set to 70% from last 100 epochs. (Gamma: ~`0.92`, learning rate: ~`0.008`, random scaling: ~`0.9998`, `1` hidden layer)
322 |     * The only winner architecture is very similar to our parameters from previous stages:
323 |         * Best model from 2nd stage: Gamma: `0.9`, learning rate: ~`0.0002`, random scaling: `0.9998`, `1` hidden layer.
324 |     * 1 hidden layers model dominates in higher win ratio domain
325 |     
326 | 
327 | | Results on scatter plot  | Results on triangular surface |
328 | | ------------- | ------------- |
329 | | ![results_scatter_plot](./results/auto_hp_tuning/results_scatter_plot.gif)  | ![results_triangular_surface](./results/auto_hp_tuning/results_triangular_surface.gif)  |
330 | | Violet dots are models with 1 hidden layers. <br/> Yellow dots are models with 2 hidden layers. | |
331 | 
332 | All the details are in [results csv file](./results/auto_hp_tuning/results.csv).
333 | 
334 | Notebook used for training is [here](./scripts/3c._Classical_DQL_sim_quant_finetuning.ipynb). Finetuning was performed on desktop with i5-8600K 3.6 GHz CPU (6 CPUs) and Nvidia 2060 RTX GPU.
335 | 
336 | ### **Results of classical model simulating quantum model**:
337 | 
338 | For best classical model simulating quantum model, we can see convergence of (both) entropies to zero, right in epochs, where model started to reach goal:
339 | 
340 | ![image](./assets/DQL_sim_quant_results.jpg)
341 | ![image](./assets/DQL_sim_quant/entropies.jpg)
342 | 
343 | All parameters are in [results folder](./results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/). 
344 | 
345 | However this method is incomparably harder to obtain effective model for environment. Not only in terms of hyperparameters sensitivity, but also from training duration and only small amount of 'succesful' experiments i.e. model which has mean win ratio over 40%. Also to obtain 'succesful' model we need to stop in right place i.e. for smaller win ratio early stop condition, which does not guarantee optimal path.
346 | 
347 | Interesting is, that models with 'real' quantum circuits (not simulated by neural network) were able to train, even if rarely. 
348 | This shows that simulating quantum distributions for classical nerual networks can be tough. In our case particularly with:
349 | * classical data encoded with basis embedding 
350 | * classical data decoded with expectation values from Pauli Z operator (with flipped sign)
351 | 
352 | Similar conclusions, that quantum disribution can be hard to simulate, can be found in literature e.g. *Learning hard quantum distributions with variational
353 | autoencoders* [Rochetto et, al. 2018](https://www.nature.com/articles/s41534-018-0077-z.pdf).
354 | 
355 | # Conclusions
356 | 
357 | We examined two kinds of entropy - the von Neumann (entanglement) entropy and Shannon entropy of the output vector (treated as a quantum state-vector) returned by different ML models. It's worth noticing, that these two values are the same, when we express the (quantum) state vector in the Schmidt basis (which can be obtained by means of the SVD). See [here](https://physics.stackexchange.com/questions/600120/von-neumann-entropy-vs-shannon-entropy-for-a-quantum-state-vector/608071#608071) for more details. However, the SVD is never conducted when training ML models, which justifies the different behaviour of the curves presented in above plots. However, we were not able to find any obvious relations between these two entropies, and also between them and the reward obtained by the agent.
358 | 
359 | We can see that in the case of a classical model trying to reproduce the behavior of a quantum model the two analyzed entropies differ from the one obtained from a truly quantum model. The two values begin their oscillations at a similar level, but for the classical model the entanglement entropy seems to be dropping faster. Also, classical model seems to be "more stable". However, these conclusions shouldn't be regarded as general rules, because in theory other agents might give a different behavior.
360 | 
361 | Finally, **we suggest, that the von Neumann entropy can be also used during the training of any (classical) ML model**, which outputs a vector of length $4^n$, for some integer $n$. In that case we would just treat the output as the state vector of some quantum system. **The maximization of entropy is widely used in RL by adding it as a bonus component to the loss function (as described [here](https://awjuliani.medium.com/maximum-entropy-policies-in-reinforcement-learning-everyday-life-f5a1cc18d32d) and [here](https://towardsdatascience.com/entropy-regularization-in-reinforcement-learning-a6fa6d7598df)), so it would be interesting to see, if we could gain some different behaviour of an agent by utilizing the entanglement entropy in a similar way**. It should be possible, because the von Neumann entropy is differentiable (see [here](https://math.stackexchange.com/questions/3123031/derivative-of-the-von-neumann-entropy), [here](https://math.stackexchange.com/questions/2877997/derivative-of-von-neumann-entropy) and [here](https://quantumcomputing.stackexchange.com/questions/22263/how-to-compute-derivatives-of-partial-traces-of-the-form-frac-partial-operat)).
362 | 
363 | ## Authors:
364 | 
365 | M.Sc. Bartosz Rzepkowski, Janusz Twardak, Michał Łukomski, Marek Kowalik <marekkowalik97@gmail.com>. <br/>
366 | Project was developed as part of [science club "Qubit"](http://qubit.pwr.edu.pl/) ([Wrocław University of Science and Technology](https://pwr.edu.pl/en/)) work.
367 | 
368 | ## License:
369 | 
370 | Code and other materials are  under [MIT license](LICENSE).
371 | 
372 | > **Disclaimer**: <br/> Our project uses fragments of code from other sources, sometimes with different licenses. All these fragments are marked properly. MIT license refers only to our original code.
373 | 


--------------------------------------------------------------------------------
/assets/DQL_NN_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_NN_architecture.png


--------------------------------------------------------------------------------
/assets/DQL_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_results.jpg


--------------------------------------------------------------------------------
/assets/DQL_sim_quant/entropies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_sim_quant/entropies.jpg


--------------------------------------------------------------------------------
/assets/DQL_sim_quant/trained_strategy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_sim_quant/trained_strategy.jpg


--------------------------------------------------------------------------------
/assets/DQL_sim_quant/training_history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_sim_quant/training_history.jpg


--------------------------------------------------------------------------------
/assets/DQL_sim_quant/training_history_moving_average.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_sim_quant/training_history_moving_average.jpg


--------------------------------------------------------------------------------
/assets/DQL_sim_quant_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/DQL_sim_quant_results.jpg


--------------------------------------------------------------------------------
/assets/QL_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/QL_results.jpg


--------------------------------------------------------------------------------
/assets/agent_walk.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/agent_walk.gif


--------------------------------------------------------------------------------
/assets/bellman_equation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/bellman_equation.png


--------------------------------------------------------------------------------
/assets/circuit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/circuit.jpg


--------------------------------------------------------------------------------
/assets/encoding.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/encoding.jpg


--------------------------------------------------------------------------------
/assets/layer_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/layer_1.jpg


--------------------------------------------------------------------------------
/assets/number_of_steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/number_of_steps.png


--------------------------------------------------------------------------------
/assets/percentage_of_succesfull_walks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/assets/percentage_of_succesfull_walks.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | gym==0.23.1
 2 | matplotlib==3.5.1
 3 | numpy==1.21.5
 4 | pandas==1.4.2
 5 | PennyLane==0.22.2
 6 | scipy==1.8.0
 7 | torch==1.11.0
 8 | torchsummary==1.5.1
 9 | tqdm==4.63.1
10 | 


--------------------------------------------------------------------------------
/results/auto_hp_tuning/results.csv:
--------------------------------------------------------------------------------
  1 | ,if_trained,win_ratio,episode_reward_mean,steps,config/gamma,config/lr,config/n_hidden_layers,config/random_scaling
  2 | 0,False,0.0,-0.01,30000,0.6429321044920745,0.3381538660004886,2,0.9996758915938184
  3 | 1,False,0.0,-0.01,30000,0.9634394487957104,0.2477646826964672,1,0.9995080556877116
  4 | 2,False,0.0,-0.0297999999999999,30000,0.1278502176306413,0.0038077620563105,2,0.9998001486092865
  5 | 3,False,0.0,-0.01,30000,1.0896413637009497,0.0017159593249697,2,0.9995951030113496
  6 | 4,False,0.0,-0.01,30000,1.0712069399496895,0.085835916834808,1,0.9996429880689348
  7 | 5,False,0.0,-0.01,30000,0.8661881600527119,2.554632307572298e-05,1,0.9996932883674374
  8 | 6,False,0.0,-0.01,30000,0.1225708589257898,0.0139979971854847,2,0.9997758306755856
  9 | 7,False,0.0,-0.9604,30000,0.2754528346538609,0.0076829647096619,1,0.9999600857569494
 10 | 8,False,0.0,-0.01,30000,1.7615002274484195,0.0552439197921312,1,0.999598278069634
 11 | 9,False,0.0,-0.01,30000,1.5133222203725931,0.1273945273279201,2,0.9997853193656548
 12 | 10,False,0.0,-0.01,30000,1.5002468807136646,2.6380295940306665e-05,1,0.9997212188109448
 13 | 11,False,0.0,-0.0396999999999999,30000,0.1633806918537651,0.0013508943695215,1,0.999795513082905
 14 | 12,False,0.0,-0.01,30000,1.043793449468295,0.0004852534818499,2,0.9997452502646372
 15 | 13,False,0.0,-0.01,30000,0.1867059025212574,0.0022757296997554,2,0.9997575758039736
 16 | 14,False,0.0,-0.01,30000,0.1651360985918654,0.0482472028034841,2,0.9996526573436372
 17 | 15,False,0.0,-0.01,30000,0.2070144579998142,0.0070213709546484,1,0.9996215729338886
 18 | 16,False,0.0,-0.0990999999999998,30000,0.9923896837900787,0.1056199344999185,1,0.9998951477891126
 19 | 17,False,0.0,-0.01,30000,1.4201523331746466,0.0015458123147767,2,0.9997230074833044
 20 | 18,False,0.0,-0.01,30000,1.7480348463032849,2.014815576209728e-05,1,0.9995371364283584
 21 | 19,False,0.0,-0.0199,30000,1.8417113562031533,0.2154447653216558,1,0.99985706158186
 22 | 20,False,0.0,-0.5445999999999995,30000,0.5970746888952899,0.0001218713769029,1,0.9998234467457447
 23 | 21,False,0.0,-0.3070000000000005,30000,1.1827803259001586,0.0247895655778716,1,0.9999165844503128
 24 | 22,False,0.0,-0.01,30000,0.1282933336489403,0.0466998624759277,1,0.9995931449082964
 25 | 23,False,0.01,-0.2672000000000004,30000,0.4622827099556696,0.0021432397052196,1,0.9998740751853912
 26 | 24,False,0.0,-0.01,30000,1.2977867224977024,0.0561111162867689,1,0.9996255510058668
 27 | 25,False,0.0,-0.01,30000,0.1567895264753695,0.0165690023921189,1,0.9996237193594274
 28 | 26,False,0.0,-0.01,30000,1.4727594079969806,0.0004150972159699,1,0.9997270378517816
 29 | 27,False,0.0,-0.0199,30000,0.9182373810574538,0.0015362350728754,2,0.999739198852663
 30 | 28,False,0.0,-0.01,30000,0.7434210257967419,0.001170155304365,2,0.9996202599603984
 31 | 29,False,0.04,-0.92,30000,0.247895277202211,0.0002437194807502,1,0.9996984813681832
 32 | 30,False,0.0,-1.0,30000,1.6122508191725604,0.0329602790122621,2,0.9998265615906032
 33 | 31,False,0.0,-0.01,30000,1.173721736028401,1.5129672327953871e-05,2,0.9995861852433636
 34 | 32,False,0.0,-1.0,30000,0.6992660103232805,0.0453870875743961,1,0.9995416645930334
 35 | 33,False,0.0,-0.01,30000,0.1653453980074516,0.0707238056454616,1,0.9997029239321734
 36 | 34,False,0.0,-0.01,30000,1.027998697712554,0.122835059742065,1,0.9996781856116186
 37 | 35,True,0.71,0.42,5081,0.9290491174416302,0.008324696975339,1,0.9997782891020208
 38 | 36,False,0.0,-0.0693999999999998,30000,0.4197324795305008,0.0001720748361111,2,0.9998019298510372
 39 | 37,False,0.0,-0.0495999999999998,30000,0.3733165908582321,0.1212374949291871,2,0.9998862590476916
 40 | 38,False,0.0,-0.6039999999999998,30000,0.1573622973702977,0.0035960750603891,1,0.999765686982886
 41 | 39,False,0.0,-1.0,30000,0.2473455049724662,0.0378887863351928,2,0.99956675528091
 42 | 40,False,0.0,-1.0,30000,0.1764780350453074,0.0001031075100616,2,0.9997205847528896
 43 | 41,False,0.0,-0.0396999999999999,30000,0.4072720256706442,0.0070840569297821,1,0.9997780684078488
 44 | 42,False,0.0,-0.0594999999999999,30000,1.7268542051153504,0.0048827754968105,2,0.9998067154411232
 45 | 43,False,0.0,-0.01,30000,0.8495164604325529,0.0053360962329389,2,0.9996787880072384
 46 | 44,False,0.0,-0.8713000000000001,30000,0.8646900133433162,1.4006101524620536e-05,1,0.9996289920450506
 47 | 45,False,0.0,-0.01,30000,0.3793606189997634,0.0054999946824404,2,0.9997672713132628
 48 | 46,False,0.02,-0.96,30000,0.5929944913180015,0.0003991362367752,2,0.9998398280483788
 49 | 47,False,0.0,-0.0495999999999998,30000,0.1363438069180158,0.253873180398043,1,0.9998645676206388
 50 | 48,False,0.0,-0.0297999999999999,30000,1.7455507839170006,0.0001459711892257,1,0.9998564577565828
 51 | 49,False,0.0,-0.01,30000,0.1764179871049245,0.0025394200819503,1,0.9998398574939236
 52 | 50,False,0.0,-0.01,30000,0.2872822760506955,0.1146244473995302,2,0.9995671909060307
 53 | 51,False,0.03,-0.94,30000,0.1529637490250278,0.0044881029490943,2,0.9995889919222896
 54 | 52,False,0.0,-0.01,30000,0.8781078384803666,0.0015079445240741,1,0.9995379728085784
 55 | 53,False,0.0,-1.0,30000,0.2435036737900988,0.0434386006754544,1,0.9996608279719646
 56 | 54,False,0.0,-0.01,30000,1.7624853324164442,0.0001049000522652,1,0.9997320302874484
 57 | 55,False,0.0,-0.01,30000,0.1029824728427666,0.0645324560288929,1,0.9996948118334364
 58 | 56,False,0.0,-0.01,30000,0.8249080134344877,0.000173328586113,1,0.999707189826582
 59 | 57,False,0.0,-0.2476000000000003,30000,0.114258424353069,0.1783563851894506,1,0.9998624876682436
 60 | 58,False,0.0,-0.01,30000,1.76851907345061,0.1733522664878826,2,0.9995974556033712
 61 | 59,False,0.0,-0.0693999999999998,30000,0.4136452613713096,0.0882459180925758,1,0.9998796528202928
 62 | 60,False,0.0,-0.3070000000000004,30000,1.6984180942346725,0.0013904745347936,2,0.999917056000489
 63 | 61,False,0.0,-0.2080000000000002,30000,0.3581003809357341,0.048697114879218,1,0.999910488465492
 64 | 62,False,0.0,-0.3961,30000,0.934022277197529,1.6417061742262208e-05,1,0.9996838403225784
 65 | 63,False,0.0,-0.01,30000,0.8728713457173364,0.0905910978154328,2,0.9996910151888726
 66 | 64,False,0.0,-0.01,30000,0.1916298481672728,0.0008895571978989,1,0.9997006918247698
 67 | 65,False,0.0,-1.0,30000,0.4632149915967479,1.763914837333703e-05,2,0.9996287493268948
 68 | 66,False,0.41,-0.18,30000,0.2677309847165141,3.583438994283214e-05,1,0.9996196260689872
 69 | 67,False,0.0,-1.0,30000,0.8688559293028582,0.0139797099016574,1,0.9999844191947254
 70 | 68,False,0.15,-0.7,30000,0.1931046440223698,0.0004632822261573,1,0.9999008309179324
 71 | 69,False,0.16,-0.68,30000,0.1563756248922679,6.048453533896794e-05,1,0.999633302277556
 72 | 70,False,0.0,-1.0,30000,0.348985690523884,9.973450560320763e-05,1,0.9995430566861742
 73 | 71,False,0.0,-1.0,30000,0.1056654675880671,0.0027974320434799,2,0.9999176548313498
 74 | 72,False,0.0,-1.0,30000,0.3074087589238471,0.0155020707632973,2,0.9999842516561064
 75 | 73,False,0.03,-0.94,30000,0.1546564267346814,9.93342422253282e-05,2,0.9999203463049876
 76 | 74,False,0.28,-0.44,30000,0.3162738638752472,0.0004800338315775,1,0.9999277422458096
 77 | 75,False,0.02,-0.96,30000,0.2828673746129952,9.048801037284429e-05,2,0.9997972993675318
 78 | 76,False,0.0,-0.6039999999999996,30000,1.5915177412269603,0.0778440917382973,1,0.9999446009038824
 79 | 77,False,0.0,-1.0,30000,0.1242618394065656,0.00063302531668,2,0.9995621965349032
 80 | 78,False,0.0,-0.01,30000,0.4608206303735836,0.2295263739992638,2,0.9997569438594714
 81 | 79,False,0.0,-1.0,30000,0.2749294471106079,0.0016065331863113,2,0.9997794731736648
 82 | 80,False,0.0,-0.01,30000,0.6212225918930153,0.1130091047209987,2,0.9995550533248264
 83 | 81,False,0.0,-0.01,30000,0.3602010991674411,0.0851414730181986,2,0.999574528562404
 84 | 82,False,0.0,-1.0,30000,0.6421513501812322,0.0001621933069186,1,0.9995803002618714
 85 | 83,False,0.4,-0.2,30000,0.2850173581529486,0.0010002090231379,1,0.9997445099676456
 86 | 84,False,0.08,-0.84,30000,0.100983557622482,0.0001067076275481,2,0.99953552724926
 87 | 85,False,0.02,-0.96,30000,0.2435556513266287,0.002661728342529,1,0.999758479138454
 88 | 86,False,0.0,-0.7525000000000002,30000,1.5030567251135971,0.1990498973597343,1,0.9999516952647388
 89 | 87,False,0.0,-0.01,30000,0.1732110622001226,0.0037424042043013,2,0.9995978807984024
 90 | 88,False,0.0,-0.9208000000000002,30000,0.2210277762652791,0.0283417056156158,2,0.9999631431740428
 91 | 89,False,0.0,-0.01,30000,1.2363435124406372,0.0015610712409781,2,0.9995204402150571
 92 | 90,False,0.0,-1.0,30000,0.4271359114067996,0.0151022327985865,1,0.999636335719432
 93 | 91,False,0.0,-0.01,30000,0.7583286273947663,0.1222865247561305,2,0.9998012814765712
 94 | 92,False,0.0,-0.01,30000,1.3652593949268037,0.1254053091325861,1,0.9997548352547312
 95 | 93,False,0.0,-0.01,30000,1.246076004080192,0.1230026378197825,2,0.9995093891848044
 96 | 94,False,0.0,-0.4555,30000,0.1403656232842004,0.0086860096010341,1,0.9997717049183344
 97 | 95,False,0.0,-0.01,30000,1.5345245808230117,0.0015850033704645,2,0.9996743577736196
 98 | 96,False,0.0,-1.0,30000,0.3688726659296873,0.0017085093430574,1,0.9995032685069192
 99 | 97,False,0.0,-1.0,30000,0.2927250366610833,1.7712898673717144e-05,1,0.9997372345427918
100 | 98,False,0.0,-0.4555,30000,0.9641072950932572,0.0652305759095847,1,0.9998942222237154
101 | 99,False,0.0,-0.0693999999999998,30000,0.827293829484058,4.821232610393972e-05,1,0.999838770674412
102 | 100,False,0.0,-0.0990999999999998,30000,1.8489630306643132,0.0006665007713949,2,0.9998132139934904
103 | 101,False,0.0,-1.0,30000,0.8131687154441953,0.0015490470772516,2,0.9998404814641412
104 | 102,False,0.01,-0.98,30000,0.4175501508386886,0.0037450660080778,1,0.9999853290628032
105 | 103,False,0.01,-0.8513,30000,0.1224406285594123,3.23995734435952e-05,1,0.99989190354388
106 | 104,False,0.0,-1.0,30000,0.1989093004579079,0.0030819439728224,2,0.9999238417961404
107 | 105,False,0.03,-0.8212000000000002,30000,0.2698351090053247,0.0093102520489009,2,0.9997683109625122
108 | 106,False,0.0,-0.7822000000000001,30000,0.7679231502364715,0.0087134494030332,2,0.9999353081836212
109 | 107,False,0.0,-0.01,30000,0.5357687861652993,0.2494288305441379,1,0.9997191378956938
110 | 108,False,0.0,-1.0,30000,0.4216947617566411,0.0001468907882574,1,0.9997246670100448
111 | 109,False,0.11,-0.78,30000,0.253585884640042,2.815797771621376e-05,1,0.999790471858894
112 | 110,False,0.0,-0.1882000000000001,30000,0.2742842856642405,0.0054770047753832,1,0.9998626205440888
113 | 111,False,0.01,-0.98,30000,0.5011013684041216,0.0090731514832493,2,0.9997913341343893
114 | 112,False,0.01,-0.98,30000,0.1410562465371877,1.6969948901266786e-05,1,0.999972028450545
115 | 113,False,0.0,-0.01,30000,0.2390276056834814,0.3785721790510388,2,0.999704442437822
116 | 114,False,0.0,-0.0297999999999999,30000,1.1767288593959575,0.0066718731062661,2,0.9998579557261104
117 | 115,False,0.0,-0.01,30000,1.049595836497032,1.641136402320201e-05,1,0.9995394434056774
118 | 116,False,0.0,-0.0199,30000,0.1528049548842212,0.0002432643624448,1,0.999666318898786
119 | 117,False,0.0,-1.0,30000,1.004942815700538,0.0001036668288347,1,0.999723273467624
120 | 118,False,0.06,-0.88,30000,0.1113152183052778,0.0001489935272995,1,0.9996457099944768
121 | 119,False,0.0,-0.01,30000,0.1236944359712828,0.0533697887578857,2,0.9996897949919288
122 | 120,False,0.0,-1.0,30000,0.2542219582858407,5.9475469355802686e-05,2,0.9999738288631536
123 | 121,False,0.0,-1.0,30000,0.4812530835598186,0.0095977348930231,2,0.999763361263128
124 | 122,False,0.0,-0.7326999999999999,30000,0.1555210030751397,0.0022717248366247,1,0.9996506750393652
125 | 123,False,0.01,-0.9602,30000,0.3814656542401731,0.0001539739756366,2,0.9998189959803054
126 | 124,False,0.0,-0.1188999999999998,30000,0.6086110436806602,0.0017970177993131,1,0.9998442728255352
127 | 125,False,0.0,-0.01,30000,1.1974470449356451,1.806034163988675e-05,2,0.9996287681454716
128 | 126,False,0.04,-0.92,30000,0.1998974154718145,0.000625094829695,2,0.9995303782986686
129 | 127,False,0.11,-0.78,30000,0.2415105677606703,0.0144280109251387,2,0.9997104788474996
130 | 128,False,0.0,-0.01,30000,1.3706071726274904,0.0003946668555946,2,0.9995924294159184
131 | 129,False,0.0,-0.01,30000,1.7255441606182815,0.000864726021344,2,0.9996879766505772
132 | 130,False,0.0,-0.01,30000,1.3853807348232268,0.0155724131678131,1,0.9995470768132348
133 | 131,False,0.0,-0.01,30000,1.983272393937628,3.0549862970746466e-05,2,0.9997320274701303
134 | 132,False,0.0,-0.01,30000,0.2082208474114526,0.1139820713316224,2,0.9997805821600446
135 | 133,False,0.0,-0.01,30000,0.9261263514700856,0.0130307419924446,1,0.9996430823206288
136 | 134,False,0.0,-0.2674000000000002,30000,0.5132629697454023,0.0003982877290684,2,0.9998700352439036
137 | 135,False,0.0,-0.9109,15000,0.5444327848035303,1.8578367335631687e-05,2,0.9999266494882394
138 | 136,False,0.01,-0.98,30000,0.3487198566880853,0.002602537112272,1,0.9997628222091388
139 | 137,False,0.0,-1.0,30000,0.695916596406484,0.0548471321697416,2,0.9996909205314796
140 | 138,False,0.05,-0.9,30000,0.3631532553272026,5.12493179570414e-05,1,0.9997638600282388
141 | 139,False,0.36,-0.2602,30000,0.6411061541320067,1.331602334783476e-05,1,0.9996297518563424
142 | 140,False,0.03,-0.94,30000,0.178833972659598,0.0018197303708018,1,0.9999411358269298
143 | 141,False,0.0,-0.9802,15000,0.2855286451736493,0.0010119331361557,1,0.999964704489208
144 | 142,False,0.0,-1.0,15000,0.2228139045304387,0.0003225958253618,2,0.999571484968628
145 | 143,False,0.0,-0.4951,15000,0.6505560909197262,0.0544989023448025,1,0.9998728620721252
146 | 144,False,0.05,-0.9,30000,0.1176609532497377,1.9708523079728263e-05,2,0.9997699760339828
147 | 145,False,0.0,-0.7326999999999998,15000,0.2364637942701128,0.0017079191690217,2,0.9995315884365752
148 | 146,False,0.01,-0.98,15000,0.1228124108551648,0.0026631816126636,2,0.999565769986434
149 | 147,False,0.09,-0.82,30000,0.1563081523121499,0.0180635340699864,2,0.999750519585998
150 | 148,False,0.0,-0.9901,15000,1.7890956973019545,0.0002644387570891,1,0.999956651391072
151 | 149,False,0.0,-1.0,15000,1.8453549162305485,0.155827858415406,1,0.9999884512123388
152 | 


--------------------------------------------------------------------------------
/results/auto_hp_tuning/results_scatter_plot.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/auto_hp_tuning/results_scatter_plot.gif


--------------------------------------------------------------------------------
/results/auto_hp_tuning/results_triangular_surface.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/auto_hp_tuning/results_triangular_surface.gif


--------------------------------------------------------------------------------
/results/classical_DQL/entropies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL/entropies.jpg


--------------------------------------------------------------------------------
/results/classical_DQL/hyperparameters.txt:
--------------------------------------------------------------------------------
 1 | gamma;0.8
 2 | epochs;500
 3 | max_steps;60
 4 | learning_rate;0.002
 5 | random_chance;0.99
 6 | random_scaling;0.99
 7 | window;40
 8 | target_win_ratio;0.98
 9 | min_steps_num;6
10 | 


--------------------------------------------------------------------------------
/results/classical_DQL/trained_strategy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL/trained_strategy.jpg


--------------------------------------------------------------------------------
/results/classical_DQL/training_history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL/training_history.jpg


--------------------------------------------------------------------------------
/results/classical_DQL/training_history_moving_average.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL/training_history_moving_average.jpg


--------------------------------------------------------------------------------
/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/entropies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/entropies.jpg


--------------------------------------------------------------------------------
/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/hyperparameters.txt:
--------------------------------------------------------------------------------
 1 | gamma;0.9
 2 | epochs;30000
 3 | max_steps;60
 4 | learning_rate;0.0002
 5 | non_random_chance;0.99
 6 | random_scaling;0.9998
 7 | window;40
 8 | target_win_ratio;0.98
 9 | min_steps_num;6
10 | n_hidden_layers;1
11 | activation_function;sigmoid
12 | global_seed;123456
13 | 


--------------------------------------------------------------------------------
/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/trained_strategy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/trained_strategy.jpg


--------------------------------------------------------------------------------
/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/training_history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/training_history.jpg


--------------------------------------------------------------------------------
/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/training_history_moving_average.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_DQL_sim_quantum/_BEST_1_layers_sigmoid_activation_longer/training_history_moving_average.jpg


--------------------------------------------------------------------------------
/results/classical_QL/hyperparameters.txt:
--------------------------------------------------------------------------------
 1 | gamma;0.05
 2 | epochs;200
 3 | max_steps;400
 4 | learning_rate;0.001
 5 | random_chance;0.99
 6 | random_scaling;0.95
 7 | window;40
 8 | target_win_ratio;0.98
 9 | min_steps_num;6
10 | 


--------------------------------------------------------------------------------
/results/classical_QL/trained_strategy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_QL/trained_strategy.jpg


--------------------------------------------------------------------------------
/results/classical_QL/training_history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_QL/training_history.jpg


--------------------------------------------------------------------------------
/results/classical_QL/training_history_moving_average.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/classical_QL/training_history_moving_average.jpg


--------------------------------------------------------------------------------
/results/quantum/Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220426183607.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/quantum/Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220426183607.png


--------------------------------------------------------------------------------
/results/quantum/earlystopping_Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220425223058.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/quantum/earlystopping_Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp_REWARD_NO20220425223058.png


--------------------------------------------------------------------------------
/results/quantum/earlystopping_entropies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/quantum/earlystopping_entropies.png


--------------------------------------------------------------------------------
/results/quantum/entropies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qubit-science-club/quantum_reinforcement_learning/31f2daf7a2ce8493b8fd9fe85aaea0ebd754448f/results/quantum/entropies.png


--------------------------------------------------------------------------------
/scripts/1._Classical_QL.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from gym.envs.registration import register
  3 | import torch
  4 | 
  5 | from tqdm import tqdm
  6 | import numpy as np
  7 | from src.visualizations import *
  8 | import os
  9 | 
 10 | # Parameters
 11 | gamma = 0.05
 12 | epochs = 200
 13 | max_steps = 400
 14 | learning_rate = 0.001
 15 | random_chance = 0.99
 16 | random_scaling = 0.95
 17 | window = 40
 18 | target_win_ratio = 0.98
 19 | min_steps_num = 6
 20 | global_seed = 42
 21 | 
 22 | np.random.seed(global_seed)
 23 | torch.manual_seed(global_seed)
 24 | 
 25 | # register(
 26 | #     id='FrozenLake-v1',
 27 | #     entry_point='gym.envs.toy_text:FrozenLakeEnv',
 28 | #     kwargs={'map_name' : '4x4', 
 29 | #             'is_slippery': False})
 30 | env = gym.make('FrozenLake-v1', is_slippery=False) 
 31 | #print(env.render(mode='ansi'))
 32 | 
 33 | 
 34 | #Initilize Q
 35 | number_of_states = env.observation_space.n
 36 | number_of_actions = env.action_space.n
 37 | 
 38 | print('number_of_states:', number_of_states,'\nnumber_of_actions' ,number_of_actions)
 39 | 
 40 | # At first Q is a zero tensor with action and observation space
 41 | Q = torch.zeros([number_of_states, number_of_actions])
 42 | 
 43 | steps_total = []
 44 | rewards_total = []
 45 | win_history = []
 46 | random_params = []
 47 | epoch_random_chance = random_chance
 48 | for i_episode in tqdm(range(epochs)):    
 49 |     state = env.reset()
 50 |     reward_all = 0
 51 |     epoch_random_chance*=random_scaling
 52 | 
 53 |     for step in range(max_steps):
 54 |         # action
 55 |         if torch.rand(1) < epoch_random_chance:
 56 |             Q_state = torch.rand(number_of_actions)
 57 |         else:
 58 |             Q_state = Q[state]
 59 |         
 60 |         action = torch.argmax(Q_state)
 61 |         
 62 |         #Take the best action
 63 |         new_state, reward, done, info = env.step(action.item())
 64 |         if reward==0:
 65 |             if done==True:
 66 |                 reward=-1
 67 |             # else:
 68 |             #     reward=-0.01
 69 | 
 70 |         #Update Q and state
 71 |         Q[state,action] = Q[state,action]+learning_rate*(reward + gamma * torch.max(Q[new_state])-Q[state,action])
 72 |         state = new_state
 73 |         reward_all += reward
 74 | 
 75 |         #env.render()
 76 |         if done or step==max_steps-1:
 77 |             steps_total.append(step+1)
 78 |             rewards_total.append(reward_all)
 79 |             win_history.append(1 if reward==1. else 0)
 80 |             random_params.append(epoch_random_chance)
 81 |             break
 82 | 
 83 |     if sum(win_history[-window:])/window>=target_win_ratio:
 84 |         break
 85 |      
 86 | results_path = "../results/classical_QL"
 87 | 
 88 | strategy = np.array([torch.argmax(Q_state).item() for Q_state in Q]).reshape((4,4))
 89 | holes_indexes = np.array([5,7,11,12])
 90 | strategy_save_path = os.path.join(results_path, "trained_strategy.jpg")
 91 | 
 92 | plot_strategy(strategy, holes_indexes, strategy_save_path)
 93 | 
 94 | 
 95 | moving_average_history_save_path = os.path.join(results_path, "training_history_moving_average.jpg")
 96 | plot_rolling_window_history(steps_total, rewards_total, win_history, random_params, target_win_ratio, min_steps_num, moving_average_history_save_path, window=window)
 97 | history_save_path = os.path.join(results_path, "training_history.jpg")
 98 | plot_history(steps_total, rewards_total, win_history, random_params, target_win_ratio, min_steps_num, history_save_path)
 99 | 
100 | 
101 | with open(os.path.join(results_path, "hyperparameters.txt"), "w+") as f:
102 |     f.write(f'gamma;{gamma}\n')
103 |     f.write(f'epochs;{epochs}\n')
104 |     f.write(f'max_steps;{max_steps}\n')
105 |     f.write(f'learning_rate;{learning_rate}\n')
106 |     f.write(f'random_chance;{random_chance}\n')
107 |     f.write(f'random_scaling;{random_scaling}\n')
108 |     f.write(f'window;{window}\n')
109 |     f.write(f'target_win_ratio;{target_win_ratio}\n')
110 |     f.write(f'min_steps_num;{min_steps_num}\n')
111 | 


--------------------------------------------------------------------------------
/scripts/2._Classical_DQL.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch import linalg as LA
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import matplotlib.pyplot as plt
  8 | import os
  9 | from tqdm import tqdm
 10 | 
 11 | from src.entropies import entanglement_entropy, classical_entropy
 12 | from src.visualizations import *
 13 | 
 14 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 15 | 
 16 | gamma = 0.8
 17 | epochs = 500
 18 | max_steps = 60
 19 | learning_rate = 0.002
 20 | random_chance = 0.99
 21 | random_scaling = 0.99
 22 | window = 40
 23 | target_win_ratio = 0.98
 24 | min_steps_num = 6
 25 | global_seed = 42
 26 | 
 27 | np.random.seed(global_seed)
 28 | torch.manual_seed(global_seed)
 29 | 
 30 | class Agent(torch.nn.Module):
 31 |     def __init__(self, location_space_size, action_space_size, hidden_layer_size):
 32 |         super(Agent, self).__init__()
 33 |         self.location_space_size = location_space_size
 34 |         self.action_space_size = action_space_size
 35 | 
 36 |         self.l1 = torch.nn.Linear(in_features=location_space_size, out_features=hidden_layer_size)
 37 |         self.l2 = torch.nn.Linear(in_features=hidden_layer_size, out_features=action_space_size) #action_space_size
 38 |         self.l1.weight.data.uniform_()
 39 |         self.l1.bias.data.fill_(-0.02)
 40 |         self.l2.weight.data.uniform_()
 41 |         self.l2.bias.data.fill_(-0.02)
 42 |         
 43 |         print("Set the neural network with \
 44 |               \n\tInput size: \t{inp}, \
 45 |               \n\tHidden layer size: \t{hidden} \
 46 |               \n\tOutput size: \t{outp}"\
 47 |               .format(inp=self.location_space_size, hidden=self.location_space_size, outp=self.action_space_size))
 48 |     
 49 |     def forward(self, state):
 50 |         state_one_hot = torch.zeros(self.location_space_size)
 51 |         state_one_hot.scatter_(0,torch.tensor([int(state)]), 1.)
 52 |         out1 = torch.sigmoid(self.l1(state_one_hot))
 53 |         return self.l2(out1).view((-1)) # 1 x ACTION_SPACE_SIZE == 1 x 4  =>  4
 54 | 
 55 | class Trainer:
 56 |     def __init__(self):
 57 |         self.action_space_size = 4
 58 |         self.location_space_size = 16
 59 |         self.holes = 2
 60 |         self.agent = Agent(self.location_space_size, self.action_space_size, 16)
 61 |         self.optimizer = torch.optim.Adam(params=self.agent.parameters(), lr=learning_rate)
 62 |         self.location = 0
 63 |         temp_lake = torch.zeros(self.location_space_size)
 64 |         
 65 |         holes_indexes = np.random.randint(1, self.location_space_size-1, (self.holes,))
 66 |         while np.unique(holes_indexes).size<self.holes \
 67 |             or (np.any(holes_indexes==1) and np.any(holes_indexes==4)) \
 68 |             or (np.any(holes_indexes==11) and np.any(holes_indexes==14)): 
 69 |             holes_indexes = np.random.randint(1, self.location_space_size-1, (self.holes,))
 70 |         
 71 |         self.holes_indexes = holes_indexes
 72 |         temp_lake[self.holes_indexes] = -1. 
 73 |         temp_lake[15] = 1.
 74 |         self.lake = temp_lake.clone().detach().requires_grad_(True) #klonuje tensor od orginału, ale następnie musi go odłączyć, ponieważ w grafie obliczeniowym pozostanie rekord doprowadzający do orginału
 75 |         
 76 |         self.epsilon = random_chance
 77 |         self.epsilon_growth_rate = random_scaling
 78 |         self.gamma = gamma
 79 | 
 80 |         self.epsilon_list = []
 81 |         self.success = []
 82 |         self.jInEpoch = []
 83 |         self.reward_list = []
 84 | 
 85 |         self.compute_entropy = True
 86 |         self.entropies = []
 87 |         self.cl_entropies = []
 88 |         
 89 |     def render(self):
 90 |         print(self.lake.reshape((4,4)))
 91 |     
 92 |     def step(self, step):
 93 |         if step==0:
 94 |             if self.location<4:
 95 |                 return self.location, self.lake[self.location]
 96 |             else:
 97 |                 return self.location-4, self.lake[self.location-4]
 98 |         if step==1:
 99 |             if (self.location+1)%4==0:
100 |                 return self.location, self.lake[self.location]
101 |             else:
102 |                 return self.location+1, self.lake[self.location+1]
103 |         if step==2:
104 |             if self.location>11:
105 |                 return self.location, self.lake[self.location]
106 |             else:
107 |                 return self.location+4, self.lake[self.location+4]
108 |         if step==3:
109 |             if (self.location%4)==0:
110 |                 return self.location, self.lake[self.location]
111 |             else:
112 |                 return self.location-1, self.lake[self.location-1]
113 |         
114 |     def choose_action(self):
115 |         if np.random.rand(1) > (self.epsilon):
116 |             action = torch.argmax(self.agent(self.location)) #wybor najwiekszej wartosci z tablicy
117 |         else:
118 |             action = torch.tensor(np.random.randint(0, 4))
119 |         return action
120 |     
121 |     def Qtable(self):
122 |         return torch.stack([self.agent(i) for i in range(self.location_space_size)], dim=0)
123 | 
124 |     def Qstrategy(self):
125 |         return [torch.argmax(self.agent(i)).item() for i in range(self.location_space_size)]
126 |         
127 |     def train(self, epochs):
128 |         for x in (pbar := tqdm(range(epochs))):
129 |             pbar.set_description(f'Success rate: {sum(self.success[-window:])/window:.2%} | Random chance: {self.epsilon:.2%}')
130 |             j=0
131 |             self.location = 0
132 |             while j<max_steps:
133 |                 j+=1
134 |                 a = self.choose_action()
135 |                 s1, r = self.step(a)
136 | 
137 |                 target_q = r + self.gamma * torch.max(self.agent(s1).detach()) 
138 |                 loss = F.smooth_l1_loss(self.agent(self.location)[a], target_q)
139 | 
140 |                 self.optimizer.zero_grad()
141 |                 loss.backward()
142 |                 self.optimizer.step()
143 |                 
144 |                 self.location = s1
145 |                 
146 |                 if(self.compute_entropy):
147 |                     self.entropies.append(entanglement_entropy(self.agent(self.location).detach()/LA.norm(self.agent(self.location).detach())))
148 |                     self.cl_entropies.append(classical_entropy(self.agent(self.location).detach()/LA.norm(self.agent(self.location).detach())))
149 |                     
150 |                 
151 |                 if self.location==15:
152 |                     self.jInEpoch.append(j)
153 |                     self.success.append(1)
154 |                     self.reward_list.append(r.item())
155 |                     break
156 |                 if r==-1.:
157 |                     self.jInEpoch.append(j)
158 |                     self.success.append(0)
159 |                     self.reward_list.append(r.item())
160 |                     break
161 | 
162 |             self.epsilon*=self.epsilon_growth_rate
163 |             self.epsilon_list.append(self.epsilon)
164 |             
165 |             if x%10==0 and x>100:
166 |                 if sum(self.success[-window:])/window>target_win_ratio:
167 |                     print("Network trained before epoch limit on {x} epoch".format(x=x))
168 |                     break
169 |                 
170 |     
171 | if __name__ == "__main__":
172 |     fl = Trainer()
173 |     print("Setting deep Q-learning in FrozenLake environment",\
174 |           "\nFrozenlake:")
175 |     #print(fl.render())
176 |     
177 |     print("Train through {epochs} epochs". format(epochs=epochs))
178 |     fl.train(epochs)
179 |     
180 |     plot_success_steps_history(fl.jInEpoch, fl.success)
181 | 
182 |     results_path = "../results/classical_DQL"
183 | 
184 |     strategy = np.array(fl.Qstrategy()).reshape((4,4))
185 |     strategy_save_path = os.path.join(results_path, "trained_strategy.jpg")
186 |     strategy_angles = ((strategy+3)%4)*90
187 |     plot_strategy(strategy, fl.holes_indexes, strategy_save_path, custom_angles=strategy_angles)
188 |     
189 |     entropies = np.array(fl.entropies)
190 |     cl_entropies = np.array(fl.cl_entropies)
191 |     entropies_save_path = os.path.join(results_path, "entropies.jpg")
192 |     plot_entropies(entropies, cl_entropies, entropies_save_path)
193 | 
194 |     moving_average_history_save_path = os.path.join(results_path, "training_history_moving_average.jpg")
195 |     plot_rolling_window_history(fl.jInEpoch, fl.reward_list, fl.success, fl.epsilon_list, target_win_ratio, min_steps_num, moving_average_history_save_path, window=window)
196 |     history_save_path = os.path.join(results_path, "training_history.jpg")
197 |     plot_history(fl.jInEpoch, fl.reward_list, fl.success, fl.epsilon_list, target_win_ratio, min_steps_num, history_save_path)
198 | 
199 | 
200 |     with open(os.path.join(results_path, "hyperparameters.txt"), "w+") as f:
201 |         f.write(f'gamma;{gamma}\n')
202 |         f.write(f'epochs;{epochs}\n')
203 |         f.write(f'max_steps;{max_steps}\n')
204 |         f.write(f'learning_rate;{learning_rate}\n')
205 |         f.write(f'random_chance;{random_chance}\n')
206 |         f.write(f'random_scaling;{random_scaling}\n')
207 |         f.write(f'window;{window}\n')
208 |         f.write(f'target_win_ratio;{target_win_ratio}\n')
209 |         f.write(f'min_steps_num;{min_steps_num}\n')
210 | 
211 |     with open(os.path.join(results_path, "entropies.txt"), "w") as f:
212 |         for ent in fl.entropies:
213 |             f.write(str(ent)+";")
214 |             
215 |     with open(os.path.join(results_path, "cl_entropies.txt"), "w") as f:
216 |         for ent in fl.cl_entropies:
217 |             f.write(str(ent)+";")


--------------------------------------------------------------------------------
/scripts/3._Classical_DQL_sim_quant.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import torch
  3 | from torch import nn
  4 | from torch.autograd import Variable
  5 | from torch import optim
  6 | from torch.nn import functional as F
  7 | from torch import linalg as LA
  8 | 
  9 | import numpy as np 
 10 | from tqdm import tqdm
 11 | 
 12 | from src.entropies import entanglement_entropy, classical_entropy
 13 | from src.visualizations import *
 14 | 
 15 | import os
 16 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 17 | 
 18 | 
 19 | global_seed = 123456
 20 | 
 21 | torch.manual_seed(global_seed)
 22 | np.random.seed(global_seed)
 23 | 
 24 | 
 25 | gamma = 0.9
 26 | epochs = 30000
 27 | max_steps = 60
 28 | learning_rate = 0.0002
 29 | non_random_chance = 0.99
 30 | random_scaling = 0.9998
 31 | window = 40
 32 | target_win_ratio = 0.98
 33 | min_steps_num = 6
 34 | activation_function = 'sigmoid'
 35 | n_hidden_layers = 1
 36 | results_folder = f'_BEST_{n_hidden_layers}_layers_{activation_function}_activation_longer'
 37 | results_path = os.path.join('../results', 'classical_DQL_sim_quantum', results_folder)
 38 | 
 39 | if not os.path.exists(results_path):
 40 |     os.mkdir(results_path)
 41 | 
 42 | 
 43 | # ## 1. Create a FrozenLake environment
 44 | lake = gym.make('FrozenLake-v1', is_slippery=False)
 45 | lake.reset()
 46 | print(lake.render(mode='ansi'))
 47 | 
 48 | 
 49 | # ## 1. Define one_hot encoding function, and uniform initializer for linear layer
 50 | def one_hot(ids, nb_digits):
 51 |     """
 52 |     ids: (list, ndarray) shape:[batch_size]
 53 |     """
 54 |     if not isinstance(ids, (list, np.ndarray)):
 55 |         raise ValueError("ids must be 1-D list or array")
 56 |     batch_size = len(ids)
 57 |     ids = torch.LongTensor(ids).view(batch_size, 1)
 58 |     out_tensor = Variable(torch.FloatTensor(batch_size, nb_digits))
 59 |     out_tensor.data.zero_()
 60 |     out_tensor.data.scatter_(dim=1, index=ids, value=1.)
 61 |     return out_tensor
 62 | 
 63 | def uniform_linear_layer(linear_layer):
 64 |     linear_layer.weight.data.uniform_()
 65 |     linear_layer.bias.data.fill_(-0.02)
 66 | 
 67 | 
 68 | # ## 3. Define Agent model, basically for Q values
 69 | class Agent(nn.Module):
 70 |     def __init__(self, observation_space_size, action_space_size, n_hidden_layers):
 71 |         super(Agent, self).__init__()
 72 |         self.observation_space_size = observation_space_size
 73 |         self.hidden_size = 2*self.observation_space_size
 74 | 
 75 |         self.l1 = nn.Linear(in_features=2*self.observation_space_size, out_features=self.hidden_size)
 76 |         self.hidden_layers = [
 77 |             nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) \
 78 |                 for i in range(n_hidden_layers)
 79 |         ]
 80 |         self.l2 = nn.Linear(in_features=self.hidden_size, out_features=32) 
 81 |         self.activation = None
 82 |         if activation_function=='lrelu':
 83 |             self.activation = F.leaky_relu
 84 |         if activation_function=='sigmoid':
 85 |             self.activation = F.sigmoid
 86 |         if activation_function=='tanh':
 87 |             self.activation = F.tanh
 88 | 
 89 |         uniform_linear_layer(self.l1)
 90 |         for l in self.hidden_layers:
 91 |             uniform_linear_layer(l)
 92 | 
 93 |         uniform_linear_layer(self.l2)
 94 |         
 95 |         print('Set the neural network with:')
 96 |         print(f'\tInput size: \t{2*self.observation_space_size}')
 97 |         for i, l in enumerate(range(n_hidden_layers)):
 98 |             print(f'\tHidden {i+1}. layer size: \t{self.hidden_size}')
 99 |         print(f'\tOutput size: \t{32}')
100 |     
101 |     def forward(self, state):
102 |         obs_emb = one_hot([int(2*state)], 2*self.observation_space_size)
103 |         # first layer:
104 |         out1 = self.activation(self.l1(obs_emb))
105 |         
106 |         # hidden layers:
107 |         for l in self.hidden_layers:
108 |             out1 = self.activation(l(out1))
109 |         
110 |         # output layers:
111 |         out2 = self.activation(self.l2(out1))
112 | 
113 |         return out2.view((-1)) 
114 | 
115 | 
116 | # ## 4. Define the Trainer to optimize Agent model
117 | class Trainer:
118 |     def __init__(self, n_hidden_layers):
119 |         self.holes_indexes = np.array([5,7,11,12])
120 | 
121 |         self.agent = Agent(lake.observation_space.n, lake.action_space.n, n_hidden_layers)
122 |         self.optimizer = optim.Adam(params=self.agent.parameters(), lr=learning_rate)
123 |         
124 |         self.epsilon = non_random_chance
125 |         self.epsilon_growth_rate = random_scaling
126 |         self.gamma = gamma
127 |         
128 |         self.epsilon_list = []
129 |         self.success = []
130 |         self.jList = []
131 |         self.reward_list = []
132 | 
133 |         self.compute_entropy = True
134 |         self.entropies = []
135 |         self.cl_entropies = []
136 |         self.entropies_episodes = [0]
137 |         
138 |         self.print = False
139 | 
140 |     
141 |     def train(self, epoch):
142 |         # entropies_episodes = [0] * (epoch+1)
143 |         for i in (pbar := tqdm(range(epoch))):
144 |             pbar.set_description(f'Success rate: {sum(self.success[-window:])/window:.2%} | Random chance: {self.epsilon:.2%}')
145 |             
146 |             s = lake.reset() #stan na jeziorze 0-16, dla resetu 0
147 |             j = 0
148 |             self.entropies_episodes.append(0)
149 |             while j < max_steps:
150 |                 j += 1
151 |                 # perform chosen action
152 |                 a = self.choose_action(s)
153 |                 s1, r, d, _ = lake.step(int(a))
154 |                 if d == True and r == 0: r = -1
155 |                 elif d== True: r == 1
156 |                 elif r==0: r = -0.01
157 | 
158 |                 # if self.print==False:
159 |                 #     print(self.agent(s)[a])
160 |                 #     self.print=True
161 | 
162 |                 # calculate target and loss
163 |                 target_q = r + self.gamma * torch.max(self.calc_probabilities(s1).detach()) 
164 | 
165 |                 loss = F.smooth_l1_loss(self.calc_probability(s, a), target_q) 
166 |                 # update model to optimize Q
167 |                 self.optimizer.zero_grad()
168 |                 loss.backward()
169 |                 self.optimizer.step()
170 |                 
171 |                 # update state
172 |                 s = s1
173 |                 if(self.compute_entropy):
174 |                     with torch.inference_mode():
175 |                         self.entropies.append(entanglement_entropy(self.calc_statevector(s))) 
176 |                         self.cl_entropies.append(classical_entropy(self.calc_statevector(s))) 
177 |                         self.entropies_episodes[i] += 1
178 |                 
179 |                 if d == True: break
180 |             
181 |             # append results onto report lists
182 |             if d == True and r > 0:
183 |                 self.success.append(1)
184 |             else:
185 |                 self.success.append(0)
186 | 
187 |             self.reward_list.append(r)
188 |             self.jList.append(j)
189 | 
190 |             if self.epsilon < 1.:
191 |                 self.epsilon *= self.epsilon_growth_rate
192 |             self.epsilon_list.append(self.epsilon)
193 | 
194 |             if i>100:
195 |                 if sum(self.success[-window:])/window>target_win_ratio:
196 |                     print("Network trained before epoch limit on {i} epoch".format(i=i))
197 |                     break
198 | 
199 |         #print("last 100 epoches success rate: " + str(sum(self.success[-100:])/100) + "%")
200 | 
201 |     def choose_action(self, s):
202 |         self.calc_probabilities(s)
203 |         if np.random.rand(1) > self.epsilon : 
204 |             action = torch.argmax(self.calc_probabilities(s)) #wybor najwiekszej wartosci z tablicy
205 |         else:
206 |             action = torch.tensor(np.random.randint(0, 4))
207 |         return action
208 |     
209 |     def calc_statevector(self, s):
210 |         return torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
211 | 
212 |     def calc_probability(self, s, a): #liczenie prawdopodobieństwa obsadzenia kubitu (0-3) z danego stanu planszy (0-15)
213 |         statevector = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
214 |         probabilities = (statevector.abs()**2)
215 |         probabilities = probabilities/probabilities.sum() #normowanie
216 |         prob_indexes = [
217 |             [0,1,2,3,4,5,6,7],
218 |             [0,1,2,3,8,9,10,11],
219 |             [0,1,4,5,8,9,12,13],
220 |             [0,2,4,6,8,10,12,14]
221 |         ]
222 |         return probabilities[prob_indexes[a]].sum()
223 | 
224 |     def calc_probabilities(self, s): #liczenie prawdopodobieństw każdego z kubitów z danego stanu planszy (0-15) do tensora o kształcie (4)
225 |         raw_wavefunction = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
226 |         probabilities = (raw_wavefunction.abs()**2)
227 |         probabilities = probabilities/probabilities.sum() #normowanie
228 |         probs_of_qubits = torch.tensor([
229 |             probabilities[[0,1,2,3,4,5,6,7]].sum(),
230 |             probabilities[[0,1,2,3,8,9,10,11]].sum(),
231 |             probabilities[[0,1,4,5,8,9,12,13]].sum(),
232 |             probabilities[[0,2,4,6,8,10,12,14]].sum()
233 |             ])
234 |         return probs_of_qubits
235 | 
236 |         
237 |     def Q(self):
238 |         Q = []
239 |         for x in range(lake.observation_space.n):
240 |             Qstate = self.agent(x).detach()
241 |             Qstate /= LA.norm(Qstate)
242 |             Q.append(Qstate)   
243 |         Q_out = torch.Tensor(lake.observation_space.n, lake.action_space.n)
244 |         torch.cat(Q, out=Q_out)
245 |         return Q_out
246 |     
247 |     def Qstate(self, state):
248 |         Qstate = self.agent(state).detach()
249 |         Qstate /= LA.norm(Qstate)
250 |         return Qstate
251 |     
252 |     def Qstrategy(self):
253 |         return [torch.argmax(self.calc_probabilities(state)).item() for state in range(lake.observation_space.n)]
254 |     
255 | 
256 | # ## 5. Initialize a trainer, and perform training by 2k epoches
257 | fl = Trainer(n_hidden_layers)
258 | 
259 | t = torch.Tensor(np.array([32], dtype=int))
260 | t = t.to(torch.int32)
261 | 
262 | print("Train through {epochs} epochs". format(epochs=epochs))
263 | fl.train(epochs)
264 | 
265 | plot_success_steps_history(fl.jList, fl.success)
266 | 
267 | strategy = np.array(fl.Qstrategy()).reshape((4,4))
268 | strategy_save_path = os.path.join(results_path, "trained_strategy.jpg")
269 | plot_strategy(strategy, fl.holes_indexes, strategy_save_path)
270 | 
271 | entropies = np.array(fl.entropies)
272 | cl_entropies = np.array(fl.cl_entropies)
273 | entropies_save_path = os.path.join(results_path, "entropies.jpg")
274 | plot_entropies(entropies, cl_entropies, entropies_save_path)
275 | 
276 | moving_average_history_save_path = os.path.join(results_path, "training_history_moving_average.jpg")
277 | plot_rolling_window_history(fl.jList, fl.reward_list, fl.success, np.array(fl.epsilon_list), target_win_ratio, min_steps_num, moving_average_history_save_path, window=window)
278 | history_save_path = os.path.join(results_path, "training_history.jpg")
279 | plot_history(fl.jList, fl.reward_list, fl.success, np.array(fl.epsilon_list), target_win_ratio, min_steps_num, history_save_path)
280 | 
281 | 
282 | with open(os.path.join(results_path, "hyperparameters.txt"), "w+") as f:
283 |     f.write(f'gamma;{gamma}\n')
284 |     f.write(f'epochs;{epochs}\n')
285 |     f.write(f'max_steps;{max_steps}\n')
286 |     f.write(f'learning_rate;{learning_rate}\n')
287 |     f.write(f'non_random_chance;{non_random_chance}\n')
288 |     f.write(f'random_scaling;{random_scaling}\n')
289 |     f.write(f'window;{window}\n')
290 |     f.write(f'target_win_ratio;{target_win_ratio}\n')
291 |     f.write(f'min_steps_num;{min_steps_num}\n')
292 |     f.write(f'n_hidden_layers;{n_hidden_layers}\n')
293 |     f.write(f'activation_function;{activation_function}\n')
294 |     f.write(f'global_seed;{global_seed}\n')
295 | 
296 | with open(os.path.join(results_path, "entropies.txt"), "w") as f:
297 |     for ent in fl.entropies:
298 |         f.write(str(ent)+";")
299 |         
300 | with open(os.path.join(results_path, "cl_entropies.txt"), "w") as f:
301 |     for ent in fl.cl_entropies:
302 |         f.write(str(ent)+";")
303 | 
304 | 
305 | 
306 | 
307 | 
308 | #na poczatku zamiast one hota ma wziąć cos 32 liczby
309 | #na razie jest funkcja zmianiajace liczbe na zwykły binarny zapis
310 | #następnie sluży nam to do w obwodzie kwantowym do obrotu kubitów, żeby je włączyć
311 | #i tak np jak stoimy na polu 13 (1,1,0,1) to włączymy wszystkie kubity poza 3cim
312 | #to jest wejscie sieci kwantowej
313 | #u nas musi to być 32
314 | 
315 | #u nas trzeba z lokalizacji wybrać jedną z 16tu kombinacji kubitów i nadać jej amplitudę jakby została włączona (po zobaczeniu w kodzie kwantowym jak odbywa się preparation state wrzucić to do symulatora obwodu kwantowego i uzyskać amplitudę, czyli (-1+0i))
316 | #stąd wszędzie bd one-hoty postaci para [-1, 0] a reszta zera
317 | #zmieni się to, jeżeli będziemy mieli do czynienia z innym przygotowaniem
318 | 
319 | def decimalToBinaryFixLength(_length, _decimal):
320 |     binNum = bin(int(_decimal))[2:]
321 |     outputNum = [int(item) for item in binNum]
322 |     if len(outputNum) < _length:
323 |         outputNum = np.concatenate((np.zeros((_length-len(outputNum),)),np.array(outputNum)))
324 |     else:
325 |         outputNum = np.array(outputNum)
326 |     return outputNum
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 


--------------------------------------------------------------------------------
/scripts/3b._Classical_DQL_sim_quant_grid_search.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | 
  3 | activations = ['lrelu', 'sigmoid', 'tanh']
  4 | 
  5 | n_layers = list(range(1,7))
  6 | global_seed = 123456
  7 | 
  8 | 
  9 | for i, (activation_function, n_hidden_layers) in enumerate(product(activations, n_layers)):
 10 |     print(f'Experiment: {i+1}/{len(activations)*len(n_layers)}')
 11 |     print("Activation function: ", activation_function)
 12 |     print("Number of hidden layers: ", n_hidden_layers)
 13 | 
 14 |     import gym
 15 |     import torch
 16 |     from torch import nn
 17 |     from torch.autograd import Variable
 18 |     from torch import optim
 19 |     from torch.nn import functional as F
 20 |     from torch import linalg as LA
 21 | 
 22 |     import numpy as np 
 23 |     from tqdm import tqdm
 24 | 
 25 |     from src.entropies import entanglement_entropy, classical_entropy
 26 |     from src.visualizations import *
 27 | 
 28 |     import os
 29 |     os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 30 | 
 31 |     torch.manual_seed(global_seed)
 32 |     np.random.seed(global_seed)
 33 | 
 34 | 
 35 |     gamma = 0.9
 36 |     epochs = 20000
 37 |     max_steps = 60
 38 |     learning_rate = 0.0002
 39 |     non_random_chance = 0.99
 40 |     random_scaling = 0.9998
 41 |     window = 40
 42 |     target_win_ratio = 0.98
 43 |     min_steps_num = 6
 44 |     #activation_function = 'sigmoid'
 45 |     #n_hidden_layers = 1
 46 |     results_folder = f'{n_hidden_layers}_layers_{activation_function}_activation'
 47 |     results_path = os.path.join('../results', 'classical_DQL_sim_quantum', results_folder)
 48 | 
 49 |     if not os.path.exists(results_path):
 50 |         os.mkdir(results_path)
 51 | 
 52 | 
 53 |     # ## 1. Create a FrozenLake environment
 54 |     lake = gym.make('FrozenLake-v1', is_slippery=False)
 55 |     lake.reset()
 56 |     print(lake.render(mode='ansi'))
 57 | 
 58 | 
 59 |     # ## 1. Define one_hot encoding function, and uniform initializer for linear layer
 60 |     def one_hot(ids, nb_digits):
 61 |         """
 62 |         ids: (list, ndarray) shape:[batch_size]
 63 |         """
 64 |         if not isinstance(ids, (list, np.ndarray)):
 65 |             raise ValueError("ids must be 1-D list or array")
 66 |         batch_size = len(ids)
 67 |         ids = torch.LongTensor(ids).view(batch_size, 1)
 68 |         out_tensor = Variable(torch.FloatTensor(batch_size, nb_digits))
 69 |         out_tensor.data.zero_()
 70 |         out_tensor.data.scatter_(dim=1, index=ids, value=1.)
 71 |         return out_tensor
 72 | 
 73 |     def uniform_linear_layer(linear_layer):
 74 |         linear_layer.weight.data.uniform_()
 75 |         linear_layer.bias.data.fill_(-0.02)
 76 | 
 77 | 
 78 |     # ## 3. Define Agent model, basically for Q values
 79 |     class Agent(nn.Module):
 80 |         def __init__(self, observation_space_size, action_space_size, n_hidden_layers):
 81 |             super(Agent, self).__init__()
 82 |             self.observation_space_size = observation_space_size
 83 |             self.hidden_size = 2*self.observation_space_size
 84 | 
 85 |             self.l1 = nn.Linear(in_features=2*self.observation_space_size, out_features=self.hidden_size)
 86 |             self.hidden_layers = [
 87 |                 nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) \
 88 |                     for i in range(n_hidden_layers)
 89 |             ]
 90 |             self.l2 = nn.Linear(in_features=self.hidden_size, out_features=32) 
 91 |             self.activation = None
 92 |             if activation_function=='lrelu':
 93 |                 self.activation = F.leaky_relu
 94 |             if activation_function=='sigmoid':
 95 |                 self.activation = F.sigmoid
 96 |             if activation_function=='tanh':
 97 |                 self.activation = F.tanh
 98 | 
 99 |             uniform_linear_layer(self.l1)
100 |             for l in self.hidden_layers:
101 |                 uniform_linear_layer(l)
102 | 
103 |             uniform_linear_layer(self.l2)
104 |             
105 |             print('Set the neural network with:')
106 |             print(f'\tInput size: \t{2*self.observation_space_size}')
107 |             for i, l in enumerate(range(n_hidden_layers)):
108 |                 print(f'\tHidden {i+1}. layer size: \t{self.hidden_size}')
109 |             print(f'\tOutput size: \t{32}')
110 |         
111 |         def forward(self, state):
112 |             obs_emb = one_hot([int(2*state)], 2*self.observation_space_size)
113 |             # first layer:
114 |             out1 = self.activation(self.l1(obs_emb))
115 |             
116 |             # hidden layers:
117 |             for l in self.hidden_layers:
118 |                 out1 = self.activation(l(out1))
119 |             
120 |             # output layers:
121 |             out2 = self.activation(self.l2(out1))
122 | 
123 |             return out2.view((-1)) 
124 | 
125 | 
126 |     # ## 4. Define the Trainer to optimize Agent model
127 |     class Trainer:
128 |         def __init__(self, n_hidden_layers):
129 |             self.holes_indexes = np.array([5,7,11,12])
130 | 
131 |             self.agent = Agent(lake.observation_space.n, lake.action_space.n, n_hidden_layers)
132 |             self.optimizer = optim.Adam(params=self.agent.parameters(), lr=learning_rate)
133 |             
134 |             self.epsilon = non_random_chance
135 |             self.epsilon_growth_rate = random_scaling
136 |             self.gamma = gamma
137 |             
138 |             self.epsilon_list = []
139 |             self.success = []
140 |             self.jList = []
141 |             self.reward_list = []
142 | 
143 |             self.compute_entropy = True
144 |             self.entropies = []
145 |             self.cl_entropies = []
146 |             self.entropies_episodes = [0]
147 |             
148 |             self.print = False
149 | 
150 |         
151 |         def train(self, epoch):
152 |             # entropies_episodes = [0] * (epoch+1)
153 |             for i in (pbar := tqdm(range(epoch))):
154 |                 pbar.set_description(f'Success rate: {sum(self.success[-window:])/window:.2%} | Random chance: {self.epsilon:.2%}')
155 |                 
156 |                 s = lake.reset() #stan na jeziorze 0-16, dla resetu 0
157 |                 j = 0
158 |                 self.entropies_episodes.append(0)
159 |                 while j < max_steps:
160 |                     j += 1
161 |                     # perform chosen action
162 |                     a = self.choose_action(s)
163 |                     s1, r, d, _ = lake.step(int(a))
164 |                     if d == True and r == 0: r = -1
165 |                     elif d== True: r == 1
166 |                     elif r==0: r = -0.01
167 | 
168 |                     # if self.print==False:
169 |                     #     print(self.agent(s)[a])
170 |                     #     self.print=True
171 | 
172 |                     # calculate target and loss
173 |                     target_q = r + self.gamma * torch.max(self.calc_probabilities(s1).detach()) 
174 | 
175 |                     loss = F.smooth_l1_loss(self.calc_probability(s, a), target_q) 
176 |                     # update model to optimize Q
177 |                     self.optimizer.zero_grad()
178 |                     loss.backward()
179 |                     self.optimizer.step()
180 |                     
181 |                     # update state
182 |                     s = s1
183 |                     if(self.compute_entropy):
184 |                         with torch.inference_mode():
185 |                             self.entropies.append(entanglement_entropy(self.calc_statevector(s))) 
186 |                             self.cl_entropies.append(classical_entropy(self.calc_statevector(s))) 
187 |                             self.entropies_episodes[i] += 1
188 |                     
189 |                     if d == True: break
190 |                 
191 |                 # append results onto report lists
192 |                 if d == True and r > 0:
193 |                     self.success.append(1)
194 |                 else:
195 |                     self.success.append(0)
196 | 
197 |                 self.reward_list.append(r)
198 |                 self.jList.append(j)
199 | 
200 |                 if self.epsilon < 1.:
201 |                     self.epsilon *= self.epsilon_growth_rate
202 |                 self.epsilon_list.append(self.epsilon)
203 | 
204 |                 if i>100:
205 |                     if sum(self.success[-window:])/window>target_win_ratio:
206 |                         print("Network trained before epoch limit on {i} epoch".format(i=i))
207 |                         break
208 | 
209 |             #print("last 100 epoches success rate: " + str(sum(self.success[-100:])/100) + "%")
210 | 
211 |         def choose_action(self, s):
212 |             self.calc_probabilities(s)
213 |             if np.random.rand(1) > self.epsilon : 
214 |                 action = torch.argmax(self.calc_probabilities(s)) #wybor najwiekszej wartosci z tablicy
215 |             else:
216 |                 action = torch.tensor(np.random.randint(0, 4))
217 |             return action
218 |         
219 |         def calc_statevector(self, s):
220 |             return torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
221 | 
222 |         def calc_probability(self, s, a): #liczenie prawdopodobieństwa obsadzenia kubitu (0-3) z danego stanu planszy (0-15)
223 |             statevector = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
224 |             probabilities = (statevector.abs()**2)
225 |             probabilities = probabilities/probabilities.sum() #normowanie
226 |             prob_indexes = [
227 |                 [0,1,2,3,4,5,6,7],
228 |                 [0,1,2,3,8,9,10,11],
229 |                 [0,1,4,5,8,9,12,13],
230 |                 [0,2,4,6,8,10,12,14]
231 |             ]
232 |             return probabilities[prob_indexes[a]].sum()
233 | 
234 |         def calc_probabilities(self, s): #liczenie prawdopodobieństw każdego z kubitów z danego stanu planszy (0-15) do tensora o kształcie (4)
235 |             raw_wavefunction = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
236 |             probabilities = (raw_wavefunction.abs()**2)
237 |             probabilities = probabilities/probabilities.sum() #normowanie
238 |             probs_of_qubits = torch.tensor([
239 |                 probabilities[[0,1,2,3,4,5,6,7]].sum(),
240 |                 probabilities[[0,1,2,3,8,9,10,11]].sum(),
241 |                 probabilities[[0,1,4,5,8,9,12,13]].sum(),
242 |                 probabilities[[0,2,4,6,8,10,12,14]].sum()
243 |                 ])
244 |             return probs_of_qubits
245 | 
246 |             
247 |         def Q(self):
248 |             Q = []
249 |             for x in range(lake.observation_space.n):
250 |                 Qstate = self.agent(x).detach()
251 |                 Qstate /= LA.norm(Qstate)
252 |                 Q.append(Qstate)   
253 |             Q_out = torch.Tensor(lake.observation_space.n, lake.action_space.n)
254 |             torch.cat(Q, out=Q_out)
255 |             return Q_out
256 |         
257 |         def Qstate(self, state):
258 |             Qstate = self.agent(state).detach()
259 |             Qstate /= LA.norm(Qstate)
260 |             return Qstate
261 |         
262 |         def Qstrategy(self):
263 |             return [torch.argmax(self.calc_probabilities(state)).item() for state in range(lake.observation_space.n)]
264 |         
265 | 
266 |     # ## 5. Initialize a trainer, and perform training by 2k epoches
267 |     fl = Trainer(n_hidden_layers)
268 | 
269 |     t = torch.Tensor(np.array([32], dtype=int))
270 |     t = t.to(torch.int32)
271 | 
272 |     print("Train through {epochs} epochs". format(epochs=epochs))
273 |     fl.train(epochs)
274 | 
275 |     plot_success_steps_history(fl.jList, fl.success)
276 | 
277 |     strategy = np.array(fl.Qstrategy()).reshape((4,4))
278 |     strategy_save_path = os.path.join(results_path, "trained_strategy.jpg")
279 |     plot_strategy(strategy, fl.holes_indexes, strategy_save_path)
280 | 
281 |     entropies = np.array(fl.entropies)
282 |     cl_entropies = np.array(fl.cl_entropies)
283 |     entropies_save_path = os.path.join(results_path, "entropies.jpg")
284 |     plot_entropies(entropies, cl_entropies, entropies_save_path)
285 | 
286 |     moving_average_history_save_path = os.path.join(results_path, "training_history_moving_average.jpg")
287 |     plot_rolling_window_history(fl.jList, fl.reward_list, fl.success, np.array(fl.epsilon_list), target_win_ratio, min_steps_num, moving_average_history_save_path, window=window)
288 |     history_save_path = os.path.join(results_path, "training_history.jpg")
289 |     plot_history(fl.jList, fl.reward_list, fl.success, np.array(fl.epsilon_list), target_win_ratio, min_steps_num, history_save_path)
290 | 
291 | 
292 |     with open(os.path.join(results_path, "hyperparameters.txt"), "w+") as f:
293 |         f.write(f'gamma;{gamma}\n')
294 |         f.write(f'epochs;{epochs}\n')
295 |         f.write(f'max_steps;{max_steps}\n')
296 |         f.write(f'learning_rate;{learning_rate}\n')
297 |         f.write(f'non_random_chance;{non_random_chance}\n')
298 |         f.write(f'random_scaling;{random_scaling}\n')
299 |         f.write(f'window;{window}\n')
300 |         f.write(f'target_win_ratio;{target_win_ratio}\n')
301 |         f.write(f'min_steps_num;{min_steps_num}\n')
302 |         f.write(f'n_hidden_layers;{n_hidden_layers}\n')
303 |         f.write(f'activation_function;{activation_function}\n')
304 |         f.write(f'global_seed;{global_seed}\n')
305 | 
306 |     with open(os.path.join(results_path, "entropies.txt"), "w") as f:
307 |         for ent in fl.entropies:
308 |             f.write(str(ent)+";")
309 |             
310 |     with open(os.path.join(results_path, "cl_entropies.txt"), "w") as f:
311 |         for ent in fl.cl_entropies:
312 |             f.write(str(ent)+";")
313 | 


--------------------------------------------------------------------------------
/scripts/QML/QML_DQN_FROZEN_LAKE.py:
--------------------------------------------------------------------------------
  1 | # QML as Q Learning function approximator
  2 | # Need to specify STATE input format
  3 | # Computational Basis Encoding
  4 | # action output is still softmax [a_0, a_1, a_2, a_3, a_4, a_5]
  5 | # Deep Q-Learning DQN
  6 | # Experimence Replay (For i.i.d sampling)
  7 | # Target Network (Updata every C episodes) ==> Another Circuit Parameter Set
  8 | 
  9 | # This version is enhanced with PyTorch
 10 | # Adapt some code from
 11 | # PyTorch tutorial on deep reinforcement learning
 12 | # and
 13 | # Xanadu AI github repository
 14 | # Environment: OpenAI gym FrozenLake
 15 | 
 16 | ##
 17 | import pennylane as qml
 18 | from pennylane import numpy as np
 19 | from pennylane.optimize import NesterovMomentumOptimizer
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | from torch.autograd import Variable
 24 | 
 25 | import matplotlib.pyplot as plt
 26 | from datetime import datetime
 27 | import pickle
 28 | 
 29 | import gym
 30 | import time
 31 | import random
 32 | from collections import namedtuple
 33 | from copy import deepcopy
 34 | 
 35 | from scipy.linalg import logm
 36 | 
 37 | from ShortestPathFrozenLake import ShortestPathFrozenLake
 38 | 
 39 | # ENTANGLEMENT ENTROPY
 40 | def entanglement_entropy(state):
 41 |     state = np.array(state.detach(), ndmin=2)
 42 |     ket = state.T
 43 |     bra = state.conj()
 44 |     rho_final = np.outer(ket, bra)
 45 |     num_wires = int(np.log2(state.size))
 46 |     S = []
 47 |     for d in range(1, num_wires):
 48 |         Ia = np.identity(2 ** d)
 49 |         Ib = np.identity(2 ** (num_wires - d))
 50 |         Tr_a = np.empty(
 51 |             [2 ** d, 2 ** (num_wires - d), 2 ** (num_wires - d)], dtype=complex
 52 |         )
 53 |         for i in range(2 ** d):
 54 |             ai = np.array(Ia[i], ndmin=2).T
 55 |             Tr_a[i] = np.kron(ai.conj().T, Ib).dot(rho_final).dot(np.kron(ai, Ib))
 56 |         rho_b = Tr_a.sum(axis=0)
 57 |         rho_b_l2 = logm(rho_b) / np.log(2.0)
 58 |         S_rho_b = -rho_b.dot(rho_b_l2).trace()
 59 |         S.append(S_rho_b)
 60 |     return np.array(S).numpy().mean()
 61 | 
 62 | 
 63 | # ENTROPY
 64 | def classical_entropy(state):
 65 |     state_temp = state.detach()
 66 |     ket_2 = np.abs(state_temp) ** 2
 67 |     return -torch.sum(ket_2 * np.log2(ket_2))  # zwraca jednoelementowy tensor torch'a
 68 | 
 69 | 
 70 | ## Definition of Replay Memory
 71 | ## If next_state == None
 72 | ## it is in the terminal state
 73 | 
 74 | 
 75 | Transition = namedtuple(
 76 |     "Transition", ("state", "action", "reward", "next_state", "done")
 77 | )
 78 | 
 79 | 
 80 | class ReplayMemory(object):
 81 |     def __init__(self, capacity):
 82 |         self.capacity = capacity
 83 |         self.memory = []
 84 |         self.position = 0
 85 | 
 86 |     def push(self, *args):
 87 |         """Saves a transition."""
 88 |         if len(self.memory) < self.capacity:
 89 |             self.memory.append(None)
 90 |         self.memory[self.position] = Transition(*args)
 91 |         self.position = (self.position + 1) % self.capacity
 92 | 
 93 |     def sample(self, batch_size):
 94 |         return random.sample(self.memory, batch_size)
 95 | 
 96 |     def output_all(self):
 97 |         return self.memory
 98 | 
 99 |     def __len__(self):
100 |         return len(self.memory)
101 | 
102 | 
103 | ####
104 | 
105 | 
106 | ## Plotting Function ##
107 | """
108 | Note: the plotting code is origin from Yang, Chao-Han Huck, et al. "Enhanced Adversarial Strategically-Timed Attacks Against Deep Reinforcement Learning."
109 | ## ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP). IEEE, 2020.
110 | If you use the code in your research, please cite the original reference.
111 | """
112 | 
113 | 
114 | def plotTrainingResultCombined(
115 |     _iter_index, _iter_reward, _iter_total_steps, _fileTitle
116 | ):
117 |     fig, ax = plt.subplots()
118 |     # plt.yscale('log')
119 |     ax.plot(_iter_index, _iter_reward, "-b", label="Reward")
120 |     ax.plot(_iter_index, _iter_total_steps, "-r", label="Total Steps")
121 |     leg = ax.legend()
122 | 
123 |     ax.set(xlabel="Iteration Index", title=_fileTitle)
124 |     fig.savefig(_fileTitle + "_" + datetime.now().strftime("NO%Y%m%d%H%M%S") + ".png")
125 | 
126 | 
127 | def plotTrainingResultReward(_iter_index, _iter_reward, _iter_total_steps, _fileTitle):
128 |     fig, ax = plt.subplots()
129 |     # plt.yscale('log')
130 |     ax.plot(_iter_index, _iter_reward, "-b", label="Reward")
131 |     # ax.plot(_iter_index, _iter_total_steps, '-r', label='Total Steps')
132 |     leg = ax.legend()
133 | 
134 |     ax.set(xlabel="Iteration Index", title=_fileTitle)
135 |     fig.savefig(
136 |         _fileTitle
137 |         + "_REWARD"
138 |         + "_"
139 |         + datetime.now().strftime("NO%Y%m%d%H%M%S")
140 |         + ".png"
141 |     )
142 | 
143 | 
144 | ########################################
145 | 
146 | 
147 | def decimalToBinaryFixLength(_length, _decimal):
148 |     binNum = bin(int(_decimal))[2:]
149 |     outputNum = [int(item) for item in binNum]
150 |     if len(outputNum) < _length:
151 |         outputNum = np.concatenate(
152 |             (np.zeros((_length - len(outputNum),)), np.array(outputNum))
153 |         )
154 |     else:
155 |         outputNum = np.array(outputNum)
156 |     return outputNum
157 | 
158 | 
159 | ## PennyLane Part ##
160 | 
161 | # Specify the datatype of the Totch tensor
162 | dtype = torch.DoubleTensor
163 | 
164 | ## Define a FOUR qubit system
165 | dev = qml.device("default.qubit", wires=4)
166 | # dev = qml.device('qiskit.basicaer', wires=4)
167 | def statepreparation(a):
168 | 
169 |     """Quantum circuit to encode the input vector into variational params
170 | 
171 |     Args:
172 |         a: feature vector of rad and rad_square => np.array([rad_X_0, rad_X_1, rad_square_X_0, rad_square_X_1])
173 |     """
174 | 
175 |     # Rot to computational basis encoding
176 |     # a = [a_0, a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8]
177 | 
178 |     for ind in range(len(a)):
179 |         qml.RX(np.pi * a[ind], wires=ind)
180 |         qml.RZ(np.pi * a[ind], wires=ind)
181 | 
182 | 
183 | def layer(W):
184 |     """ Single layer of the variational classifier.
185 | 
186 |     Args:
187 |         W (array[float]): 2-d array of variables for one layer
188 |     """
189 | 
190 |     qml.CNOT(wires=[0, 1])
191 |     qml.CNOT(wires=[1, 2])
192 |     qml.CNOT(wires=[2, 3])
193 | 
194 |     qml.Rot(W[0, 0], W[0, 1], W[0, 2], wires=0)
195 |     qml.Rot(W[1, 0], W[1, 1], W[1, 2], wires=1)
196 |     qml.Rot(W[2, 0], W[2, 1], W[2, 2], wires=2)
197 |     qml.Rot(W[3, 0], W[3, 1], W[3, 2], wires=3)
198 | 
199 | 
200 | @qml.qnode(dev, interface="torch")
201 | def circuit(weights, angles=None):
202 |     """The circuit of the variational classifier."""
203 |     # Can consider different expectation value
204 |     # PauliX , PauliY , PauliZ , Identity
205 | 
206 |     statepreparation(angles)
207 | 
208 |     for W in weights:
209 |         layer(W)
210 | 
211 |     return [qml.expval(qml.PauliZ(ind)) for ind in range(4)]
212 | 
213 | 
214 | def variational_classifier(var_Q_circuit, var_Q_bias, angles=None):
215 |     """The variational classifier."""
216 | 
217 |     # Change to SoftMax???
218 | 
219 |     weights = var_Q_circuit
220 |     # bias_1 = var_Q_bias[0]
221 |     # bias_2 = var_Q_bias[1]
222 |     # bias_3 = var_Q_bias[2]
223 |     # bias_4 = var_Q_bias[3]
224 |     # bias_5 = var_Q_bias[4]
225 |     # bias_6 = var_Q_bias[5]
226 | 
227 |     # raw_output = circuit(weights, angles=angles) + np.array([bias_1,bias_2,bias_3,bias_4,bias_5,bias_6])
228 |     raw_output = circuit(weights, angles=angles) + var_Q_bias
229 | 
230 |     # We are approximating Q Value
231 |     # Maybe softmax is no need
232 |     # softMaxOutPut = np.exp(raw_output) / np.exp(raw_output).sum()
233 | 
234 |     return raw_output
235 | 
236 | 
237 | def square_loss(labels, predictions):
238 |     """ Square loss function
239 | 
240 |     Args:
241 |         labels (array[float]): 1-d array of labels
242 |         predictions (array[float]): 1-d array of predictions
243 |     Returns:
244 |         float: square loss
245 |     """
246 |     loss = 0
247 |     for l, p in zip(labels, predictions):
248 |         loss = loss + (l - p) ** 2
249 |     loss = loss / len(labels)
250 |     # print("LOSS")
251 | 
252 |     # print(loss)
253 | 
254 |     # output = torch.abs(predictions - labels)**2
255 |     # output = torch.sum(output) / len(labels)
256 | 
257 |     # loss = nn.MSELoss()
258 |     # output = loss(labels.double(), predictions.double())
259 | 
260 |     return loss
261 | 
262 | 
263 | # def square_loss(labels, predictions):
264 | # 	""" Square loss function
265 | 
266 | # 	Args:
267 | # 		labels (array[float]): 1-d array of labels
268 | # 		predictions (array[float]): 1-d array of predictions
269 | # 	Returns:
270 | # 		float: square loss
271 | # 	"""
272 | # 	# In Deep Q Learning
273 | # 	# labels = target_action_value_Q
274 | # 	# predictions = action_value_Q
275 | 
276 | # 	# loss = 0
277 | # 	# for l, p in zip(labels, predictions):
278 | # 	# 	loss = loss + (l - p) ** 2
279 | # 	# loss = loss / len(labels)
280 | 
281 | # 	# loss = nn.MSELoss()
282 | # 	output = torch.abs(predictions - labels)**2
283 | # 	output = torch.sum(output) / len(labels)
284 | # 	# output = loss(torch.tensor(predictions), torch.tensor(labels))
285 | # 	# print("LOSS OUTPUT")
286 | # 	# print(output)
287 | 
288 | # 	return output
289 | 
290 | 
291 | def abs_loss(labels, predictions):
292 |     """ Square loss function
293 | 
294 |     Args:
295 |         labels (array[float]): 1-d array of labels
296 |         predictions (array[float]): 1-d array of predictions
297 |     Returns:
298 |         float: square loss
299 |     """
300 |     # In Deep Q Learning
301 |     # labels = target_action_value_Q
302 |     # predictions = action_value_Q
303 | 
304 |     # loss = 0
305 |     # for l, p in zip(labels, predictions):
306 |     # 	loss = loss + (l - p) ** 2
307 |     # loss = loss / len(labels)
308 | 
309 |     # loss = nn.MSELoss()
310 |     output = torch.abs(predictions - labels)
311 |     output = torch.sum(output) / len(labels)
312 |     # output = loss(torch.tensor(predictions), torch.tensor(labels))
313 |     # print("LOSS OUTPUT")
314 |     # print(output)
315 | 
316 |     return output
317 | 
318 | 
319 | def huber_loss(labels, predictions):
320 |     """ Square loss function
321 | 
322 |     Args:
323 |         labels (array[float]): 1-d array of labels
324 |         predictions (array[float]): 1-d array of predictions
325 |     Returns:
326 |         float: square loss
327 |     """
328 |     # In Deep Q Learning
329 |     # labels = target_action_value_Q
330 |     # predictions = action_value_Q
331 | 
332 |     # loss = 0
333 |     # for l, p in zip(labels, predictions):
334 |     # 	loss = loss + (l - p) ** 2
335 |     # loss = loss / len(labels)
336 | 
337 |     # loss = nn.MSELoss()
338 |     loss = nn.SmoothL1Loss()
339 |     # output = loss(torch.tensor(predictions), torch.tensor(labels))
340 |     # print("LOSS OUTPUT")
341 |     # print(output)
342 | 
343 |     return loss(labels, predictions)
344 | 
345 | 
346 | def cost(var_Q_circuit, var_Q_bias, features, labels):
347 |     """Cost (error) function to be minimized."""
348 | 
349 |     # predictions = [variational_classifier(weights, angles=f) for f in features]
350 |     # Torch data type??
351 | 
352 |     predictions = [
353 |         variational_classifier(
354 |             var_Q_circuit=var_Q_circuit,
355 |             var_Q_bias=var_Q_bias,
356 |             angles=decimalToBinaryFixLength(4, item.state),
357 |         )[item.action]
358 |         for item in features
359 |     ]
360 |     # predictions = torch.tensor(predictions,requires_grad=True)
361 |     # labels = torch.tensor(labels)
362 |     # print("PRIDICTIONS:")
363 |     # print(predictions)
364 |     # print("LABELS:")
365 |     # print(labels)
366 | 
367 |     return square_loss(labels, predictions)
368 | 
369 | 
370 | #############################
371 | 
372 | 
373 | def epsilon_greedy(var_Q_circuit, var_Q_bias, epsilon, n_actions, s, train=False):
374 |     """
375 |     @param Q Q values state x action -> value
376 |     @param epsilon for exploration
377 |     @param s number of states
378 |     @param train if true then no random actions selected
379 |     """
380 | 
381 |     # Modify to incorporate with Variational Quantum Classifier
382 |     # epsilon should change along training
383 |     # In the beginning => More Exploration
384 |     # In the end => More Exploitation
385 | 
386 |     # More Random
387 |     # np.random.seed(int(datetime.now().strftime("%S%f")))
388 | 
389 |     if train or np.random.rand() < ((epsilon / n_actions) + (1 - epsilon)):
390 |         # action = np.argmax(Q[s, :])
391 |         # variational classifier output is torch tensor
392 |         # action = np.argmax(variational_classifier(var_Q_circuit = var_Q_circuit, var_Q_bias = var_Q_bias, angles = decimalToBinaryFixLength(9,s)))
393 |         action = torch.argmax(
394 |             variational_classifier(
395 |                 var_Q_circuit=var_Q_circuit,
396 |                 var_Q_bias=var_Q_bias,
397 |                 angles=decimalToBinaryFixLength(4, s),
398 |             )
399 |         )
400 |         # after circuit() dev.state changes so we can compute entropy
401 |         # if(compute_entropy):
402 |         # 	S = entanglement_entropy(dev.state) #dev is global variable
403 |         # 	entropies.append(S)
404 |     else:
405 |         # need to be torch tensor
406 |         action = torch.tensor(np.random.randint(0, n_actions))
407 |     return action
408 | 
409 | 
410 | def deep_Q_Learning(
411 |     alpha,
412 |     gamma,
413 |     epsilon,
414 |     episodes,
415 |     max_steps,
416 |     n_tests,
417 |     render=False,
418 |     test=False,
419 |     early_stopping_threshold=100,
420 | ):
421 |     """
422 |     @param alpha learning rate
423 |     @param gamma decay factor
424 |     @param epsilon for exploration
425 |     @param max_steps for max step in each episode
426 |     @param n_tests number of test episodes
427 |     """
428 | 
429 |     env = ShortestPathFrozenLake(is_slippery=False)
430 | 
431 |     n_states, n_actions = env.observation_space.n, env.action_space.n
432 |     print("NUMBER OF STATES:" + str(n_states))
433 |     print("NUMBER OF ACTIONS:" + str(n_actions))
434 | 
435 |     # Initialize Q function approximator variational quantum circuit
436 |     # initialize weight layers
437 | 
438 |     num_qubits = 4
439 |     num_layers = 2
440 |     # var_init = (0.01 * np.random.randn(num_layers, num_qubits, 3), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
441 | 
442 |     var_init_circuit = Variable(
443 |         torch.tensor(
444 |             0.01 * np.random.randn(num_layers, num_qubits, 3), device="cpu"
445 |         ).type(dtype),
446 |         requires_grad=True,
447 |     )
448 |     var_init_bias = Variable(
449 |         torch.tensor([0.0, 0.0, 0.0, 0.0], device="cpu").type(dtype), requires_grad=True
450 |     )
451 | 
452 |     # Define the two Q value function initial parameters
453 |     # Use np copy() function to DEEP COPY the numpy array
454 |     var_Q_circuit = var_init_circuit
455 |     var_Q_bias = var_init_bias
456 |     # print("INIT PARAMS")
457 |     # print(var_Q_circuit)
458 | 
459 |     var_target_Q_circuit = var_Q_circuit.clone().detach()
460 |     var_target_Q_bias = var_Q_bias.clone().detach()
461 | 
462 |     ##########################
463 |     # Optimization method => random select train batch from replay memory
464 |     # and opt
465 | 
466 |     # opt = NesterovMomentumOptimizer(0.01)
467 | 
468 |     # opt = torch.optim.Adam([var_Q_circuit, var_Q_bias], lr = 0.1)
469 |     # opt = torch.optim.SGD([var_Q_circuit, var_Q_bias], lr=0.1, momentum=0.9)
470 |     opt = torch.optim.RMSprop(
471 |         [var_Q_circuit, var_Q_bias],
472 |         lr=0.01,
473 |         alpha=0.99,
474 |         eps=1e-08,
475 |         weight_decay=0,
476 |         momentum=0,
477 |         centered=False,
478 |     )
479 | 
480 |     ## NEed to move out of the function
481 |     TARGET_UPDATE = 20
482 |     batch_size = 5
483 |     OPTIMIZE_STEPS = 5
484 |     ##
485 | 
486 |     target_update_counter = 0
487 | 
488 |     iter_index = []
489 |     iter_reward = []
490 |     iter_total_steps = []
491 | 
492 |     cost_list = []
493 | 
494 |     timestep_reward = []
495 | 
496 |     # Demo of generating an ACTION
497 |     # Output a numpy array of value for each action
498 | 
499 |     # Define the replay memory
500 |     # Each transition:
501 |     # (s_t_0, a_t_0, r_t, s_t_1, 'DONE')
502 | 
503 |     memory = ReplayMemory(80)
504 | 
505 |     # Input Angle = decimalToBinaryFixLength(9, stateInd)
506 |     # Input Angle is a numpy array
507 | 
508 |     # stateVector = decimalToBinaryFixLength(9, stateInd)
509 | 
510 |     # q_val_s_t = variational_classifier(var_Q, angles=stateVector)
511 |     # # action_t = q_val_s_t.argmax()
512 |     # action_t = epsilon_greedy(var_Q, epsilon, n_actions, s)
513 |     # q_val_target_s_t = variational_classifier(var_target_Q, angles=stateVector)
514 | 
515 |     # train the variational classifier
516 | 
517 |     episode = 0
518 |     episodes_since_last_reward_change = 0
519 |     last_total_reward = -np.inf  # infinity
520 | 
521 |     # for episode in range(episodes):
522 |     while (
523 |         episode < episodes
524 |         and episodes_since_last_reward_change <= early_stopping_threshold
525 |     ):
526 |         print(f"Episode: {episode}")
527 |         # Output s in decimal format
528 |         s = env.reset()
529 |         # Doing epsilog greedy action selection
530 |         # With var_Q
531 |         a = epsilon_greedy(
532 |             var_Q_circuit=var_Q_circuit,
533 |             var_Q_bias=var_Q_bias,
534 |             epsilon=epsilon,
535 |             n_actions=n_actions,
536 |             s=s,
537 |         ).item()
538 |         t = 0
539 |         total_reward = 0
540 |         done = False
541 | 
542 |         while t < max_steps:
543 |             if render:
544 |                 print("###RENDER###")
545 |                 env.render()
546 |                 print("###RENDER###")
547 |             t += 1
548 | 
549 |             target_update_counter += 1
550 | 
551 |             # Execute the action
552 |             s_, reward, done, info = env.step(a)
553 |             # print("Reward : " + str(reward))
554 |             # print("Done : " + str(done))
555 |             total_reward += reward
556 |             # a_ = np.argmax(Q[s_, :])
557 |             a_ = epsilon_greedy(
558 |                 var_Q_circuit=var_Q_circuit,
559 |                 var_Q_bias=var_Q_bias,
560 |                 epsilon=epsilon,
561 |                 n_actions=n_actions,
562 |                 s=s_,
563 |             ).item()
564 | 
565 |             # print("ACTION:")
566 |             # print(a_)
567 | 
568 |             memory.push(s, a, reward, s_, done)
569 | 
570 |             if len(memory) > batch_size:
571 | 
572 |                 # Sampling Mini_Batch from Replay Memory
573 | 
574 |                 batch_sampled = memory.sample(batch_size=batch_size)
575 | 
576 |                 # Transition = (s_t, a_t, r_t, s_t+1, done(True / False))
577 | 
578 |                 # item.state => state
579 |                 # item.action => action taken at state s
580 |                 # item.reward => reward given based on (s,a)
581 |                 # item.next_state => state arrived based on (s,a)
582 | 
583 |                 Q_target = [
584 |                     item.reward
585 |                     + (1 - int(item.done))
586 |                     * gamma
587 |                     * torch.max(
588 |                         variational_classifier(
589 |                             var_Q_circuit=var_target_Q_circuit,
590 |                             var_Q_bias=var_target_Q_bias,
591 |                             angles=decimalToBinaryFixLength(4, item.next_state),
592 |                         )
593 |                     )
594 |                     for item in batch_sampled
595 |                 ]
596 |                 # Q_prediction = [variational_classifier(var_Q, angles=decimalToBinaryFixLength(9,item.state))[item.action] for item in batch_sampled ]
597 | 
598 |                 # Gradient Descent
599 |                 # cost(weights, features, labels)
600 |                 # square_loss_training = square_loss(labels = Q_target, Q_predictions)
601 |                 # print("UPDATING PARAMS...")
602 | 
603 |                 # CHANGE TO TORCH OPTIMIZER
604 | 
605 |                 # var_Q = opt.step(lambda v: cost(v, batch_sampled, Q_target), var_Q)
606 |                 # opt.zero_grad()
607 |                 # loss = cost(var_Q_circuit = var_Q_circuit, var_Q_bias = var_Q_bias, features = batch_sampled, labels = Q_target)
608 |                 # print(loss)
609 |                 # FIX this gradient error
610 |                 # loss.backward()
611 |                 # opt.step(loss)
612 | 
613 |                 def closure():
614 |                     opt.zero_grad()
615 |                     entropies[0].append(entanglement_entropy(dev.state))
616 |                     loss = cost(
617 |                         var_Q_circuit=var_Q_circuit,
618 |                         var_Q_bias=var_Q_bias,
619 |                         features=batch_sampled,
620 |                         labels=Q_target,
621 |                     )
622 |                     entropies[1].append(entanglement_entropy(dev.state))
623 |                     # print(loss)
624 |                     loss.backward()
625 |                     entropies[2].append(entanglement_entropy(dev.state))
626 |                     entropies[3].append(classical_entropy(dev.state))
627 |                     return loss
628 | 
629 |                 opt.step(closure)
630 | 
631 |                 # print("UPDATING PARAMS COMPLETED")
632 |                 current_replay_memory = memory.output_all()
633 |                 current_target_for_replay_memory = [
634 |                     item.reward
635 |                     + (1 - int(item.done))
636 |                     * gamma
637 |                     * torch.max(
638 |                         variational_classifier(
639 |                             var_Q_circuit=var_target_Q_circuit,
640 |                             var_Q_bias=var_target_Q_bias,
641 |                             angles=decimalToBinaryFixLength(4, item.next_state),
642 |                         )
643 |                     )
644 |                     for item in current_replay_memory
645 |                 ]
646 |                 # current_target_for_replay_memory = [item.reward + (1 - int(item.done)) * gamma * np.max(variational_classifier(var_target_Q, angles=decimalToBinaryFixLength(9,item.next_state))) for item in current_replay_memory]
647 | 
648 |                 # if t%5 == 0:
649 |                 # 	cost_ = cost(var_Q_circuit = var_Q_circuit, var_Q_bias = var_Q_bias, features = current_replay_memory, labels = current_target_for_replay_memory)
650 |                 # 	print("Cost: ")
651 |                 # 	print(cost_.item())
652 |                 # 	cost_list.append(cost_)
653 | 
654 |             if target_update_counter > TARGET_UPDATE:
655 |                 print("UPDATING TARGET CIRCUIT...")
656 | 
657 |                 var_target_Q_circuit = var_Q_circuit.clone().detach()
658 |                 var_target_Q_bias = var_Q_bias.clone().detach()
659 | 
660 |                 target_update_counter = 0
661 | 
662 |             s, a = s_, a_
663 | 
664 |             if done:
665 |                 if render:
666 |                     print("###FINAL RENDER###")
667 |                     env.render()
668 |                     print("###FINAL RENDER###")
669 |                     print(f"This episode took {t} timesteps and reward: {total_reward}")
670 |                 epsilon = epsilon / ((episode / 100) + 1)
671 |                 # print("Q Circuit Params:")
672 |                 # print(var_Q_circuit)
673 |                 print(f"This episode took {t} timesteps and reward: {total_reward}")
674 |                 timestep_reward.append(total_reward)
675 |                 iter_index.append(episode)
676 |                 iter_reward.append(total_reward)
677 |                 iter_total_steps.append(t)
678 |                 break
679 | 
680 |         ########################################################################
681 |         # EARLY STOPPING
682 |         ########################################################################
683 |         if total_reward != last_total_reward:
684 |             last_total_reward = total_reward
685 |             episodes_since_last_reward_change = 0
686 |         elif (
687 |             total_reward == last_total_reward and total_reward > 0
688 |         ):  # Increment the counter only if the reward was positive
689 |             episodes_since_last_reward_change += 1
690 |         ########################################################################
691 | 
692 |         # Increment the episode's index
693 |         episode += 1
694 | 
695 |     if render:
696 |         print(f"Here are the Q values:\n{var_Q_circuit}\nTesting now:")
697 |     if test:
698 |         test_agent(var_Q_circuit, var_Q_bias, env, n_tests, n_actions)
699 |     return (
700 |         timestep_reward,
701 |         iter_index,
702 |         iter_reward,
703 |         iter_total_steps,
704 |         var_Q_circuit,
705 |         var_Q_bias,
706 |     )
707 | 
708 | 
709 | def test_agent(var_Q_circuit, var_Q_bias, env, n_tests, n_actions, delay=1):
710 |     for test in range(n_tests):
711 |         print(f"Test #{test}")
712 |         s = env.reset()
713 |         done = False
714 |         epsilon = 0
715 |         while True:
716 |             time.sleep(delay)
717 |             env.render()
718 |             a = epsilon_greedy(
719 |                 var_Q_circuit, var_Q_bias, epsilon, n_actions, s, train=False
720 |             ).item()
721 |             print(f"Chose action {a} for state {s}")
722 |             s, reward, done, info = env.step(a)
723 |             if done:
724 |                 if reward > 0:
725 |                     print("Reached goal!")
726 |                 else:
727 |                     print("Shit! dead x_x")
728 |                 time.sleep(3)
729 |                 break
730 | 
731 | 
732 | # Should add plotting function and KeyboardInterrupt Handler
733 | 
734 | # run circuit with trained params
735 | def run(n_tests):
736 |     env = gym.make("Deterministic-ShortestPath-4x4-FrozenLake-v0")
737 |     # env = gym.make('Deterministic-4x4-FrozenLake-v0')
738 |     n_states, n_actions = env.observation_space.n, env.action_space.n
739 |     print("NUMBER OF STATES:" + str(n_states))
740 |     print("NUMBER OF ACTIONS:" + str(n_actions))
741 | 
742 |     # Initialize Q function approximator variational quantum circuit
743 |     # initialize weight layers
744 | 
745 |     with open(
746 |         "VQDQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSPropNO20190628142021_var_Q_circuit.txt",
747 |         "rb",
748 |     ) as fp:
749 |         var_Q_circuit = pickle.load(fp)
750 | 
751 |     with open(
752 |         "VQDQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSPropNO20190628142021_var_Q_bias"
753 |         + ".txt",
754 |         "rb",
755 |     ) as fp:
756 |         var_Q_bias = pickle.load(fp)
757 | 
758 |     # with open("_iter_reward" + ".txt", "rb") as fp:
759 |     # 	iter_reward = pickle.load(fp)
760 | 
761 |     test_agent(var_Q_circuit, var_Q_bias, env, n_tests, n_actions)
762 | 
763 | 
764 | if __name__ == "__main__":
765 |     alpha = 0.4
766 |     gamma = 0.999
767 |     epsilon = 0.999
768 |     episodes = 10000
769 |     max_steps = 2500
770 |     n_tests = 10
771 |     early_stopping_threshold = 20
772 | 
773 |     compute_entropy = True
774 |     if compute_entropy:
775 |         entropies = [[], [], [], []]
776 |     train = True  # Training from scratch
777 |     evaluate = False  # Evaluation
778 |     if train:
779 |         # timestep_reward, iter_index, iter_reward, iter_total_steps , var_Q_circuit, var_Q_bias = deep_Q_Learning(alpha, gamma, epsilon, episodes, max_steps, n_tests, render=False, test=False)
780 |         (
781 |             timestep_reward,
782 |             iter_index,
783 |             iter_reward,
784 |             iter_total_steps,
785 |             var_Q_circuit,
786 |             var_Q_bias,
787 |         ) = deep_Q_Learning(
788 |             alpha,
789 |             gamma,
790 |             epsilon,
791 |             episodes,
792 |             max_steps,
793 |             n_tests,
794 |             render=False,
795 |             test=False,
796 |             early_stopping_threshold=early_stopping_threshold,
797 |         )
798 | 
799 |         print(timestep_reward)
800 | 
801 |         ## Drawing Training Result ##
802 |         file_title = "VQDQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp" + datetime.now().strftime(
803 |             "NO%Y%m%d%H%M%S"
804 |         )
805 | 
806 |         plotTrainingResultReward(
807 |             _iter_index=iter_index,
808 |             _iter_reward=iter_reward,
809 |             _iter_total_steps=iter_total_steps,
810 |             _fileTitle="Quantum_DQN_Frozen_Lake_NonSlip_Dynamic_Epsilon_RMSProp",
811 |         )
812 |         # plt.show()
813 |         ## Saving the model
814 |         with open(file_title + "_var_Q_circuit" + ".txt", "wb") as fp:
815 |             pickle.dump(var_Q_circuit, fp)
816 | 
817 |         with open(file_title + "_var_Q_bias" + ".txt", "wb") as fp:
818 |             pickle.dump(var_Q_bias, fp)
819 | 
820 |         with open(file_title + "_iter_reward" + ".txt", "wb") as fp:
821 |             pickle.dump(iter_reward, fp)
822 |         if compute_entropy:
823 |             with open("entropies.txt", "wb") as fp:
824 |                 entropies = np.array(entropies)
825 |                 np.savetxt(fp, entropies)
826 | 
827 |             fig, ax = plt.subplots()
828 |             ax.plot(entropies[0].real, label="before loss")
829 |             ax.plot(entropies[1].real, label="after loss")
830 |             ax.plot(entropies[2].real, ":", label="after .backward")
831 |             ax.plot(entropies[3].real, ":", label="classical")
832 |             ax.set_title("Entropies")
833 |             fig.legend()
834 |             fig.savefig("entropies.png")
835 |             # plt.show()
836 | 
837 |     if evaluate:
838 |         run(n_tests)
839 | 


--------------------------------------------------------------------------------
/scripts/QML/README_ENTANGLEMENT.md:
--------------------------------------------------------------------------------
 1 | Potrzebne pliki do odpalenia to : "QML_DQN_FROZEN_LAKE.py", "ShortestPathFrozenLake.py" (znajdują się w foldrze "RUN_QML", najlepiej skopiować cały folder i przenieść się do niego)
 2 | 
 3 | W QML_DQN_... można zmieniać liczbę episodes.
 4 | 
 5 | Odpalić skrypt "QML_DQN_FROZEN_LAKE.py"
 6 | 
 7 | zgrać wszystkie otrzymane pliki
 8 | 
 9 | Potrzebne biblioteki:
10 | gym
11 | numpy
12 | pennylane
13 | pytorch
14 | scipy


--------------------------------------------------------------------------------
/scripts/QML/ShortestPathFrozenLake.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from gym.envs import toy_text
 3 | import sys
 4 | 
 5 | class ShortestPathFrozenLake(toy_text.frozen_lake.FrozenLakeEnv):
 6 | 	def __init__(self, **kwargs):
 7 | 		super(ShortestPathFrozenLake, self).__init__(**kwargs)
 8 | 
 9 | 		for state in range(self.observation_space.start, self.observation_space.n): # for all states
10 | 			for action in range(self.action_space.start, self.action_space.n): # for all actions
11 | 				my_transitions = []
12 | 				for (prob, next_state, _, is_terminal) in self.P[state][action]:
13 | 					row = next_state // self.ncol
14 | 					col = next_state - row * self.ncol
15 | 					tile_type = self.desc[row, col]
16 | 					if tile_type == b'H':
17 | 						reward = -0.2
18 | 					elif tile_type == b'G':
19 | 						reward = 1.
20 | 					else:
21 | 						reward = -0.01
22 | 
23 | 					my_transitions.append((prob, next_state, reward, is_terminal))
24 | 				self.P[state][action] = my_transitions
25 | 


--------------------------------------------------------------------------------
/scripts/QML/run_job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -N1
 3 | #SBATCH -c8
 4 | #SBATCH --mem=4gb
 5 | #SBATCH --time=168:00:00
 6 | 
 7 | #SBATCH --job-name=QML
 8 | 
 9 | python3 QML_DQN_FROZEN_LAKE.py
10 | 


--------------------------------------------------------------------------------
/scripts/auto_hp_tuning_visuals.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits import mplot3d
 4 | import numpy as np
 5 | import os
 6 | import matplotlib.ticker as mticker
 7 | 
 8 | 
 9 | def log_tick_formatter(val, pos=None):
10 |     return f"$10^{{{int(val)}}}$"
11 | 
12 | 
13 | base_results_df_path = os.path.join("..", "results", "auto_hp_tuning")+'/results.csv'
14 | 
15 | res_df = pd.read_csv(base_results_df_path, index_col=0)
16 | res_df = res_df[['if_trained', 'win_ratio', 'episode_reward_mean', 'steps','config/gamma',
17 |        'config/lr', 'config/n_hidden_layers', 'config/random_scaling']]
18 | 
19 | 
20 | ax = plt.axes(projection='3d')
21 | 
22 | # Data for three-dimensional scattered points
23 | zdata = res_df['win_ratio']
24 | xdata = np.log10(res_df['config/gamma'])
25 | ydata = np.log10(res_df['config/lr'])
26 | 
27 | ax.plot_trisurf(xdata, ydata, zdata, cmap='viridis')
28 | 
29 | ax.set_xlabel("gamma")
30 | ax.set_ylabel("learning rate")
31 | ax.set_zlabel("win ratio")
32 | 
33 | ax.yaxis.set_major_formatter(mticker.FuncFormatter(log_tick_formatter))
34 | ax.yaxis.set_major_locator(mticker.MaxNLocator(integer=True))
35 | 
36 | ax.xaxis.set_major_formatter(mticker.FuncFormatter(log_tick_formatter))
37 | ax.xaxis.set_major_locator(mticker.MaxNLocator(integer=True))
38 | 
39 | plt.suptitle('Win ratio distribution in experiments:')
40 | plt.show()
41 | 
42 | 
43 | ax = plt.axes(projection='3d')
44 | 
45 | ax.scatter3D(xdata, ydata, zdata, c=res_df['config/n_hidden_layers'], cmap='viridis')
46 | 
47 | ax.set_xlabel("gamma")
48 | ax.set_ylabel("learning rate")
49 | ax.set_zlabel("win ratio")
50 | 
51 | ax.yaxis.set_major_formatter(mticker.FuncFormatter(log_tick_formatter))
52 | ax.yaxis.set_major_locator(mticker.MaxNLocator(integer=True))
53 | 
54 | ax.xaxis.set_major_formatter(mticker.FuncFormatter(log_tick_formatter))
55 | ax.xaxis.set_major_locator(mticker.MaxNLocator(integer=True))
56 | 
57 | plt.suptitle('Win ratio distribution in experiments:')
58 | plt.show()


--------------------------------------------------------------------------------
/scripts/src/DQL/quant_sim.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch import optim
  4 | from torch.nn import functional as F
  5 | from torch import linalg as LA
  6 | 
  7 | import numpy as np 
  8 | from tqdm import tqdm
  9 | 
 10 | from src.entropies import entanglement_entropy, classical_entropy
 11 | from src.visualizations import *
 12 | from src.utils import one_hot, uniform_linear_layer
 13 | 
 14 | 
 15 | class Agent(nn.Module):
 16 |     def __init__(self, observation_space_size, n_hidden_layers, activation_function):
 17 |         super(Agent, self).__init__()
 18 |         self.observation_space_size = observation_space_size
 19 |         self.hidden_size = 2*self.observation_space_size
 20 | 
 21 |         self.l1 = nn.Linear(in_features=2*self.observation_space_size, out_features=self.hidden_size)
 22 |         self.hidden_layers = [
 23 |             nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) \
 24 |                 for i in range(n_hidden_layers)
 25 |         ]
 26 |         self.l2 = nn.Linear(in_features=self.hidden_size, out_features=32) 
 27 |         self.activation = None
 28 |         if activation_function=='lrelu':
 29 |             self.activation = F.leaky_relu
 30 |         if activation_function=='sigmoid':
 31 |             self.activation = F.sigmoid
 32 |         if activation_function=='tanh':
 33 |             self.activation = F.tanh
 34 | 
 35 |         uniform_linear_layer(self.l1)
 36 |         for l in self.hidden_layers:
 37 |             uniform_linear_layer(l)
 38 | 
 39 |         uniform_linear_layer(self.l2)
 40 |         
 41 |         print('Set the neural network with:')
 42 |         print(f'\tInput size: \t{2*self.observation_space_size}')
 43 |         for i, l in enumerate(range(n_hidden_layers)):
 44 |             print(f'\tHidden {i+1}. layer size: \t{self.hidden_size}')
 45 |         print(f'\tOutput size: \t{32}')
 46 |     
 47 |     def forward(self, state):
 48 |         obs_emb = one_hot([int(2*state)], 2*self.observation_space_size)
 49 |         # first layer:
 50 |         out1 = self.activation(self.l1(obs_emb))
 51 |         
 52 |         # hidden layers:
 53 |         for l in self.hidden_layers:
 54 |             out1 = self.activation(l(out1))
 55 |         
 56 |         # output layers:
 57 |         out2 = self.activation(self.l2(out1))
 58 | 
 59 |         return out2.view((-1)) 
 60 | 
 61 | 
 62 | class Trainer:
 63 |     def __init__(self, n_hidden_layers, lake, learning_rate, non_random_chance, random_scaling, gamma, activation_function):
 64 |         self.holes_indexes = np.array([5,7,11,12])
 65 | 
 66 |         self.lake = lake
 67 |         self.agent = Agent(self.lake.observation_space.n, n_hidden_layers, activation_function)
 68 |         self.optimizer = optim.Adam(params=self.agent.parameters(), lr=learning_rate)
 69 |         
 70 |         self.epsilon = non_random_chance
 71 |         self.epsilon_growth_rate = random_scaling
 72 |         self.gamma = gamma
 73 |         
 74 |         self.epsilon_list = []
 75 |         self.success = []
 76 |         self.jList = []
 77 |         self.reward_list = []
 78 | 
 79 |         self.compute_entropy = True
 80 |         self.entropies = []
 81 |         self.cl_entropies = []
 82 |         self.entropies_episodes = [0]
 83 |         
 84 |         self.print = False
 85 | 
 86 |     
 87 |     def train(self, epoch, max_steps, window, target_win_ratio):
 88 |         # entropies_episodes = [0] * (epoch+1)
 89 |         for i in (pbar := tqdm(range(epoch))):
 90 |             pbar.set_description(f'Success rate: {sum(self.success[-window:])/window:.2%} | Random chance: {self.epsilon:.2%}')
 91 |             
 92 |             s = self.lake.reset() #stan na jeziorze 0-16, dla resetu 0
 93 |             j = 0
 94 |             self.entropies_episodes.append(0)
 95 |             while j < max_steps:
 96 |                 j += 1
 97 |                 # perform chosen action
 98 |                 a = self.choose_action(s)
 99 |                 s1, r, d, _ = self.lake.step(int(a))
100 |                 if d == True and r == 0: r = -1
101 |                 elif d== True: r == 1
102 |                 elif r==0: r = -0.01
103 | 
104 |                 # if self.print==False:
105 |                 #     print(self.agent(s)[a])
106 |                 #     self.print=True
107 | 
108 |                 # calculate target and loss
109 |                 target_q = r + self.gamma * torch.max(self.calc_probabilities(s1).detach()) 
110 | 
111 |                 loss = F.smooth_l1_loss(self.calc_probability(s, a), target_q) 
112 |                 # update model to optimize Q
113 |                 self.optimizer.zero_grad()
114 |                 loss.backward()
115 |                 self.optimizer.step()
116 |                 
117 |                 # update state
118 |                 s = s1
119 |                 if(self.compute_entropy):
120 |                     self.entropies.append(entanglement_entropy(self.calc_probabilities(s))) 
121 |                     self.cl_entropies.append(classical_entropy(self.calc_probabilities(s))) 
122 |                     self.entropies_episodes[i] += 1
123 |                 
124 |                 if d == True: break
125 |             
126 |             # append results onto report lists
127 |             if d == True and r > 0:
128 |                 self.success.append(1)
129 |             else:
130 |                 self.success.append(0)
131 | 
132 |             self.reward_list.append(r)
133 |             self.jList.append(j)
134 | 
135 |             if self.epsilon < 1.:
136 |                 self.epsilon *= self.epsilon_growth_rate
137 |             self.epsilon_list.append(self.epsilon)
138 | 
139 |             if i%10==0 and i>100:
140 |                 if sum(self.success[-window:])/window>target_win_ratio:
141 |                     print("Network trained before epoch limit on {i} epoch".format(i=i))
142 |                     break
143 | 
144 |         #print("last 100 epoches success rate: " + str(sum(self.success[-100:])/100) + "%")
145 | 
146 |     def choose_action(self, s):
147 |         self.calc_probabilities(s)
148 |         if np.random.rand(1) > self.epsilon : 
149 |             action = torch.argmax(self.calc_probabilities(s)) #wybor najwiekszej wartosci z tablicy
150 |         else:
151 |             action = torch.tensor(np.random.randint(0, 4))
152 |         return action
153 |     
154 |     def calc_probability(self, s, a): #liczenie prawdopodobieństwa obsadzenia kubitu (0-3) z danego stanu planszy (0-15)
155 |         raw_wavefunction = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
156 |         probabilities = (raw_wavefunction.abs()**2)
157 |         probabilities = probabilities/probabilities.sum() #normowanie
158 |         prob_indexes = [
159 |             [0,1,2,3,4,5,6,7],
160 |             [0,1,2,3,8,9,10,11],
161 |             [0,1,4,5,8,9,12,13],
162 |             [0,2,4,6,8,10,12,14]
163 |         ]
164 |         return probabilities[prob_indexes[a]].sum()
165 | 
166 |     def calc_probabilities(self, s): #liczenie prawdopodobieństw każdego z kubitów z danego stanu planszy (0-15) do tensora o kształcie (4)
167 |         raw_wavefunction = torch.complex(self.agent(s)[0::2], self.agent(s)[1::2])
168 |         probabilities = (raw_wavefunction.abs()**2)
169 |         probabilities = probabilities/probabilities.sum() #normowanie
170 |         probs_of_qubits = torch.tensor([
171 |             probabilities[[0,1,2,3,4,5,6,7]].sum(),
172 |             probabilities[[0,1,2,3,8,9,10,11]].sum(),
173 |             probabilities[[0,1,4,5,8,9,12,13]].sum(),
174 |             probabilities[[0,2,4,6,8,10,12,14]].sum()
175 |             ])
176 |         return probs_of_qubits
177 | 
178 |         
179 |     def Q(self):
180 |         Q = []
181 |         for x in range(self.lake.observation_space.n):
182 |             Qstate = self.agent(x).detach()
183 |             Qstate /= LA.norm(Qstate)
184 |             Q.append(Qstate)   
185 |         Q_out = torch.Tensor(self.lake.observation_space.n, self.lake.action_space.n)
186 |         torch.cat(Q, out=Q_out)
187 |         return Q_out
188 |     
189 |     def Qstate(self, state):
190 |         Qstate = self.agent(state).detach()
191 |         Qstate /= LA.norm(Qstate)
192 |         return Qstate
193 |     
194 |     def Qstrategy(self):
195 |         return [torch.argmax(self.calc_probabilities(state)).item() for state in range(self.lake.observation_space.n)]


--------------------------------------------------------------------------------
/scripts/src/entropies.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.linalg import logm
 3 | from torch import sum as t_sum
 4 | 
 5 | 
 6 | def entanglement_entropy(state):
 7 |     #state = np.array(state, ndmin=2)
 8 |     state = np.array(state.detach().numpy(), ndmin=2)
 9 | 
10 |     ket = state.T
11 |     bra = state.conj()
12 |     rho_final = np.outer(ket,bra)
13 |     num_wires = int(np.log2(state.size))
14 |     S = []
15 |     for d in range(1, num_wires):
16 |         Ia = np.identity(2**d)
17 |         Ib = np.identity(2**(num_wires-d))
18 |         Tr_a = np.empty([2**d, 2**(num_wires-d), 2**(num_wires-d)], dtype=complex)
19 |         for i in range(2**d):
20 |             ai = np.array(Ia[i], ndmin=2).T
21 |             Tr_a[i] = np.kron(ai.conj().T, Ib).dot(rho_final).dot(np.kron(ai,Ib))
22 |         rho_b = Tr_a.sum(axis=0)
23 |         rho_b_l2 = logm(rho_b+0.000001)/np.log(2.0)
24 |         S_rho_b = - rho_b.dot(rho_b_l2).trace()
25 |         S.append(S_rho_b)
26 |     return np.array(S).mean()
27 | 
28 | def classical_entropy(state):
29 |     state_temp = state.detach()
30 |     ket_2 = np.abs(state_temp)**2
31 |     return - t_sum(ket_2 * np.log2(ket_2))
32 | 


--------------------------------------------------------------------------------
/scripts/src/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import numpy as np
 4 | 
 5 | def one_hot(ids, nb_digits):
 6 |     """
 7 |     ids: (list, ndarray) shape:[batch_size]
 8 |     """
 9 |     if not isinstance(ids, (list, np.ndarray)):
10 |         raise ValueError("ids must be 1-D list or array")
11 |     batch_size = len(ids)
12 |     ids = torch.LongTensor(ids).view(batch_size, 1)
13 |     out_tensor = Variable(torch.FloatTensor(batch_size, nb_digits))
14 |     out_tensor.data.zero_()
15 |     out_tensor.data.scatter_(dim=1, index=ids, value=1.)
16 |     return out_tensor
17 | 
18 | def uniform_linear_layer(linear_layer):
19 |     linear_layer.weight.data.uniform_()
20 |     linear_layer.bias.data.fill_(-0.02)


--------------------------------------------------------------------------------
/scripts/src/visualizations.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | def plot_success_steps_history(steps_list, success_list):
  6 |     plt.plot(steps_list, label="Steps in epoch")
  7 |     plt.plot(success_list, label="If success")
  8 |     plt.legend()
  9 |     plt.title("Steps from epochs with success indicator")
 10 |     plt.show()
 11 | 
 12 | 
 13 | def plot_strategy(strategy, holes_indexes, save_path, custom_angles=None):
 14 |     #just for the plot purposes
 15 |     strategy_angles = ((2-strategy)%4)*90
 16 |     if custom_angles is not None:
 17 |         strategy_angles = custom_angles
 18 |     fig, axs = plt.subplots(1, 1, figsize=(3.5, 3.5), sharex=True, sharey=True, tight_layout=True)
 19 |     axs.set_aspect(1)
 20 |     x,y = np.meshgrid(np.linspace(0,3,4), np.linspace(3,0,4))
 21 |     axs.quiver(x, y, np.ones((x.shape))*1.5,np.ones((x.shape))*1.5,angles=np.flip(strategy_angles, axis=0), pivot='middle', units='xy')
 22 |     axs.scatter( [0], [0], c="cornflowerblue", s=150, alpha=0.6, label="start")
 23 |     axs.scatter( holes_indexes%4, holes_indexes//4, c="firebrick", s=150, alpha=0.6, label="hole")
 24 |     axs.scatter( [3], [3], c="mediumseagreen", s=150, alpha=0.6, label="goal")
 25 |     major_ticks = np.arange(0, 4, 1)
 26 |     axs.set_xticks(major_ticks)
 27 |     axs.set_yticks(major_ticks)
 28 |     axs.set_title("Move strategy from Qtable")
 29 |     axs.grid(which="major", alpha=0.4)
 30 |     axs.legend()
 31 |     plt.savefig(save_path, dpi=900)
 32 |     plt.show()
 33 | 
 34 |     
 35 | def plot_entropies(entropies, cl_entropies, save_path):
 36 |     fig, ax = plt.subplots()
 37 |     ax.plot(entropies, label="entglmt_entr Lax")
 38 |     ax.plot(cl_entropies, color='red', label="cl_entropy Rax", alpha=0.4)
 39 |     ax.legend()
 40 |     plt.savefig(save_path, dpi=900)
 41 |     plt.show()
 42 | 
 43 | 
 44 | def plot_rolling_window_history(steps_list, reward_list, success_list, epsilon_list, target_win_ratio, min_steps_num, save_path, window=40):
 45 |     plt.figure(figsize=[9,16])
 46 |     plt.subplot(411)
 47 |     plt.plot(pd.Series(steps_list).rolling(window).mean())
 48 |     plt.title('Step Moving Average ({}-episode window)'.format(window))
 49 |     plt.ylabel('Moves')
 50 |     plt.xlabel('Episode')
 51 |     plt.axhline(y=min_steps_num, color='g', linestyle='-', label=f'Optimal number of steps: {min_steps_num}')
 52 |     plt.ylim(bottom=0)
 53 |     plt.legend()
 54 |     plt.grid()
 55 | 
 56 |     plt.subplot(412)
 57 |     plt.plot(pd.Series(reward_list).rolling(window).mean())
 58 |     plt.title('Reward Moving Average ({}-episode window)'.format(window))
 59 |     plt.ylabel('Reward')
 60 |     plt.xlabel('Episode')
 61 |     plt.ylim(-1.1, 1.1)
 62 |     plt.grid()
 63 | 
 64 |     plt.subplot(413)
 65 |     plt.plot(pd.Series(success_list).rolling(window).mean())
 66 |     plt.title('Wins Moving Average ({}-episode window)'.format(window))
 67 |     plt.ylabel('If won')
 68 |     plt.axhline(y=target_win_ratio, color='r', linestyle='-', label=f'Early stop condition: {target_win_ratio*100:.2f}%')
 69 |     plt.legend()
 70 |     plt.xlabel('Episode')
 71 |     plt.ylim(-0.1, 1.1)
 72 |     plt.grid()
 73 | 
 74 |     plt.subplot(414)
 75 |     plt.plot(np.array(epsilon_list))
 76 |     plt.title('Random Action Parameter')
 77 |     plt.ylabel('Chance Random Action')
 78 |     plt.xlabel('Episode')
 79 |     plt.ylim(-0.1, 1.1)
 80 |     plt.grid()
 81 | 
 82 |     plt.tight_layout(pad=2)
 83 |     plt.savefig(save_path, dpi=450)
 84 |     plt.show()
 85 | 
 86 |     
 87 | def plot_history(steps_list, reward_list, success_list, epsilon_list, target_win_ratio, min_steps_num, save_path):
 88 |     plt.figure(figsize=[9,16])
 89 |     plt.subplot(411)
 90 |     plt.plot(pd.Series(steps_list))
 91 |     plt.title('Steps per training episode')
 92 |     plt.ylabel('Moves')
 93 |     plt.xlabel('Episode')
 94 |     plt.axhline(y=min_steps_num, color='g', linestyle='-', label=f'Optimal number of steps: {min_steps_num}')
 95 |     plt.ylim(bottom=0)
 96 |     plt.legend()
 97 |     plt.grid()
 98 | 
 99 |     plt.subplot(412)
100 |     plt.plot(pd.Series(reward_list))
101 |     plt.title('Reward per training episode')
102 |     plt.ylabel('Reward')
103 |     plt.xlabel('Episode')
104 |     plt.ylim(-1.1, 1.1)
105 |     plt.grid()
106 | 
107 |     plt.subplot(413)
108 |     plt.plot(pd.Series(success_list))
109 |     plt.title('Wins per training episode')
110 |     plt.ylabel('If won')
111 |     plt.axhline(y=target_win_ratio, color='r', linestyle='-', label=f'Early stop condition: {target_win_ratio*100:.2f}%')
112 |     plt.legend()
113 |     plt.xlabel('Episode')
114 |     plt.ylim(-0.1, 1.1)
115 |     plt.grid()
116 | 
117 |     plt.subplot(414)
118 |     plt.plot(np.array(epsilon_list))
119 |     plt.title('Random Action Parameter')
120 |     plt.ylabel('Chance Random Action')
121 |     plt.xlabel('Episode')
122 |     plt.ylim(-0.1, 1.1)
123 |     plt.grid()
124 | 
125 |     plt.tight_layout(pad=2)
126 |     plt.savefig(save_path, dpi=450)
127 |     plt.show()


--------------------------------------------------------------------------------
/tutorials/training_example.py:
--------------------------------------------------------------------------------
  1 | import pennylane as qml
  2 | import numpy as np
  3 | import torch
  4 | from torch.autograd import Variable
  5 | 
  6 | np.random.seed(42)
  7 | 
  8 | # we generate a three-dimensional random vector by sampling
  9 | # each entry from a standard normal distribution
 10 | v = np.random.normal(0, 1, 3)
 11 | 
 12 | # purity of the target state
 13 | purity = 0.66
 14 | 
 15 | # create a random Bloch vector with the specified purity
 16 | bloch_v = np.sqrt(2 * purity - 1) * v / np.sqrt(np.sum(v ** 2))
 17 | # bloch_v = np.array([0.0, 0.0, 1.0])
 18 | 
 19 | # array of Pauli matrices (will be useful later)
 20 | Paulis = np.zeros((3, 2, 2), dtype=complex)
 21 | Paulis[0] = [[0, 1], [1, 0]]
 22 | Paulis[1] = [[0, -1j], [1j, 0]]
 23 | Paulis[2] = [[1, 0], [0, -1]]
 24 | 
 25 | # number of qubits in the circuit
 26 | nr_qubits = 3
 27 | # number of layers in the circuit
 28 | nr_layers = 2
 29 | 
 30 | # randomly initialize parameters from a normal distribution. Below
 31 | # - first parameter (equal to 0) is the “centre” of the distribution,
 32 | # - second parameter (equal to pi) is the standard deviation of the distribution
 33 | params = np.random.normal(0, np.pi, (nr_qubits, nr_layers, 3))
 34 | # params = np.zeros((nr_qubits, nr_layers, 3))
 35 | params = Variable(torch.tensor(params), requires_grad=True)
 36 | 
 37 | # a layer of the circuit ansatz
 38 | def layer(params, j):
 39 |     for i in range(nr_qubits):
 40 |         qml.RX(params[i, j, 0], wires=i)
 41 |         qml.RY(params[i, j, 1], wires=i)
 42 |         qml.RZ(params[i, j, 2], wires=i)
 43 | 
 44 |     qml.CNOT(wires=[0, 1])
 45 |     qml.CNOT(wires=[0, 2])
 46 |     qml.CNOT(wires=[1, 2])
 47 | 
 48 | dev = qml.device("default.qubit", wires=3)
 49 | 
 50 | @qml.qnode(dev, interface="torch")
 51 | def circuit(params, A=None):
 52 | 
 53 |     # repeatedly apply each layer in the circuit
 54 |     for j in range(nr_layers):
 55 |         layer(params, j)
 56 | 
 57 |     # returns the expectation of the input matrix A on the first qubit
 58 |     return qml.expval(qml.Hermitian(A, wires=0))
 59 | 
 60 | # cost function
 61 | def cost_fn(params):
 62 |     cost = 0
 63 |     for k in range(3):
 64 |         cost += torch.abs(circuit(params, A=Paulis[k]) - bloch_v[k])
 65 | 
 66 |     return cost
 67 | 
 68 | # set up the optimizer
 69 | opt = torch.optim.Adam([params], lr=0.1)
 70 | 
 71 | # number of steps in the optimization routine
 72 | steps = 200
 73 | 
 74 | # the final stage of optimization isn't always the best, so we keep track of
 75 | # the best parameters along the way
 76 | best_cost = cost_fn(params)
 77 | best_params = np.zeros((nr_qubits, nr_layers, 3))
 78 | 
 79 | print("Cost after 0 steps is {:.4f}".format(cost_fn(params)))
 80 | 
 81 | # optimization begins
 82 | for n in range(steps):
 83 |     opt.zero_grad()
 84 |     loss = cost_fn(params)
 85 |     loss.backward()
 86 |     opt.step()
 87 | 
 88 |     # keeps track of best parameters
 89 |     if loss < best_cost:
 90 |         best_params = params
 91 | 
 92 |     # Keep track of progress every 10 steps
 93 |     if n % 10 == 9 or n == steps - 1:
 94 |         print("Cost after {} steps is {:.4f}".format(n + 1, loss))
 95 | 
 96 | # calculate the Bloch vector of the output state
 97 | output_bloch_v = np.zeros(3)
 98 | for l in range(3):
 99 |     output_bloch_v[l] = circuit(best_params, A=Paulis[l])
100 | 
101 | print("Target Bloch vector = ", bloch_v)
102 | print("Output Bloch vector = ", output_bloch_v)
103 | print(circuit.draw())
104 | 
105 | # This code would give such an output:
106 | # Cost after 0 steps is 1.0179
107 | # Cost after 10 steps is 0.1467
108 | # Cost after 20 steps is 0.0768
109 | # ...
110 | # Cost after 190 steps is 0.0502
111 | # Cost after 200 steps is 0.0573
112 | # Target Bloch vector =  [ 0.33941241 -0.09447812  0.44257553]
113 | # Output Bloch vector =  [ 0.3070773  -0.07421859  0.47392787]
114 | # Found circuit:
115 | #  0: ──RX(4.974)───RY(-0.739)──RZ(-0.358)──╭C──╭C───RX(4.6)──RY(2.739)───RZ(-1.297)──────────────╭C──╭C──────┤ ⟨H0⟩
116 | #  1: ──RX(1.927)───RY(-1.859)──RZ(-1.008)──╰X──│───╭C────────RX(0.375)───RY(-6.204)──RZ(-5.583)──╰X──│───╭C──┤
117 | #  2: ──RX(-2.027)──RY(-3.447)──RZ(1.425)───────╰X──╰X────────RX(-2.378)──RY(-4.139)──RZ(4.284)───────╰X──╰X──┤
118 | # H0 =
119 | # [[ 1.+0.j  0.+0.j]
120 | #  [ 0.+0.j -1.+0.j]]
121 | 


--------------------------------------------------------------------------------
/tutorials/tutorial_state_preparation.py:
--------------------------------------------------------------------------------
  1 | r"""
  2 | .. _state_preparation:
  3 | 
  4 | Training a quantum circuit with PyTorch
  5 | =======================================
  6 | 
  7 | .. meta::
  8 |     :property="og:description": Build and optimize a circuit to prepare
  9 |         arbitrary single-qubit states, including mixed states, with PyTorch
 10 |         and PennyLane.
 11 |     :property="og:image": https://pennylane.ai/qml/_images/NOON.png
 12 | 
 13 | .. related::
 14 | 
 15 |    tutorial_qubit_rotation Basic tutorial: qubit rotation
 16 |    pytorch_noise PyTorch and noisy devices 
 17 |    tutorial_isingmodel_PyTorch 3-qubit Ising model in PyTorch
 18 | 
 19 | In this notebook, we build and optimize a circuit to prepare arbitrary
 20 | single-qubit states, including mixed states. Along the way, we also show
 21 | how to:
 22 | 
 23 | 1. Construct compact expressions for circuits composed of many layers.
 24 | 2. Succinctly evaluate expectation values of many observables.
 25 | 3. Estimate expectation values from repeated measurements, as in real
 26 |    hardware.
 27 | 
 28 | """
 29 | 
 30 | ##############################################################################
 31 | # The most general state of a qubit is represented in terms of a positive
 32 | # semi-definite density matrix :math:`\rho` with unit trace. The density
 33 | # matrix can be uniquely described in terms of its three-dimensional
 34 | # *Bloch vector* :math:`\vec{a}=(a_x, a_y, a_z)` as:
 35 | #
 36 | # .. math:: \rho=\frac{1}{2}(\mathbb{1}+a_x\sigma_x+a_y\sigma_y+a_z\sigma_z),
 37 | #
 38 | # where :math:`\sigma_x, \sigma_y, \sigma_z` are the Pauli matrices. Any
 39 | # Bloch vector corresponds to a valid density matrix as long as
 40 | # :math:`\|\vec{a}\|\leq 1`.
 41 | #
 42 | # The *purity* of a state is defined as :math:`p=\text{Tr}(\rho^2)`, which
 43 | # for a qubit is bounded as :math:`1/2\leq p\leq 1`. The state is pure if
 44 | # :math:`p=1` and maximally mixed if :math:`p=1/2`. In this example, we
 45 | # select the target state by choosing a random Bloch vector and
 46 | # renormalizing it to have a specified purity.
 47 | #
 48 | # To start, we import PennyLane, NumPy, and PyTorch for the optimization:
 49 | 
 50 | import pennylane as qml
 51 | import numpy as np
 52 | import torch
 53 | from torch._C import dtype
 54 | from torch.autograd import Variable
 55 | 
 56 | from scipy.linalg import logm
 57 | import matplotlib.pyplot as plt
 58 | 
 59 | np.random.seed(42)
 60 | 
 61 | # we generate a three-dimensional random vector by sampling
 62 | # each entry from a standard normal distribution
 63 | v = np.random.normal(0, 1, 3)
 64 | 
 65 | # purity of the target state
 66 | purity = 0.66
 67 | 
 68 | # create a random Bloch vector with the specified purity
 69 | bloch_v = np.sqrt(2 * purity - 1) * v / np.sqrt(np.sum(v ** 2))
 70 | 
 71 | # array of Pauli matrices (will be useful later)
 72 | Paulis = np.zeros((3, 2, 2), dtype=complex)
 73 | Paulis[0] = [[0, 1], [1, 0]]
 74 | Paulis[1] = [[0, -1j], [1j, 0]]
 75 | Paulis[2] = [[1, 0], [0, -1]]
 76 | 
 77 | ##############################################################################
 78 | # Unitary operations map pure states to pure states. So how can we prepare
 79 | # mixed states using unitary circuits? The trick is to introduce
 80 | # additional qubits and perform a unitary transformation on this larger
 81 | # system. By "tracing out" the ancilla qubits, we can prepare mixed states
 82 | # in the target register. In this example, we introduce two additional
 83 | # qubits, which suffices to prepare arbitrary states.
 84 | #
 85 | # The ansatz circuit is composed of repeated layers, each of which
 86 | # consists of single-qubit rotations along the :math:`x, y,` and :math:`z`
 87 | # axes, followed by three CNOT gates entangling all qubits. Initial gate
 88 | # parameters are chosen at random from a normal distribution. Importantly,
 89 | # when declaring the layer function, we introduce an input parameter
 90 | # :math:`j`, which allows us to later call each layer individually.
 91 | 
 92 | # number of qubits in the circuit
 93 | nr_qubits = 3
 94 | # number of layers in the circuit
 95 | nr_layers = 2
 96 | 
 97 | # randomly initialize parameters from a normal distribution
 98 | params = np.random.normal(0, np.pi, (nr_qubits, nr_layers, 3))
 99 | params = Variable(torch.tensor(params), requires_grad=True)
100 | 
101 | # a layer of the circuit ansatz
102 | def layer(params, j):
103 |     for i in range(nr_qubits):
104 |         qml.RX(params[i, j, 0], wires=i)
105 |         qml.RY(params[i, j, 1], wires=i)
106 |         qml.RZ(params[i, j, 2], wires=i)
107 | 
108 |     qml.CNOT(wires=[0, 1])
109 |     qml.CNOT(wires=[0, 2])
110 |     qml.CNOT(wires=[1, 2])
111 | 
112 | 
113 | ##############################################################################
114 | # Here, we use the ``default.qubit`` device to perform the optimization, but this can be changed to
115 | # any other supported device.
116 | 
117 | dev = qml.device("default.qubit", wires=3)
118 | 
119 | 
120 | # entanglement entropy
121 | def entanglement_entropy(state):
122 |     state = np.array(state, ndmin=2)
123 |     ket = state.T
124 |     bra = state.conj()
125 |     rho_final = np.outer(ket,bra)
126 |     num_wires = int(np.log2(state.size))
127 |     S = []
128 |     for d in range(1, num_wires):
129 |         Ia = np.identity(2**d)
130 |         Ib = np.identity(2**(num_wires-d))
131 |         Tr_a = np.empty([2**d, 2**(num_wires-d), 2**(num_wires-d)], dtype=complex)
132 |         for i in range(2**d):
133 |             ai = np.array(Ia[i], ndmin=2).T
134 |             Tr_a[i] = np.kron(ai.conj().T, Ib).dot(rho_final).dot(np.kron(ai,Ib))
135 |         rho_b = Tr_a.sum(axis=0)
136 |         rho_b_l2 = logm(rho_b)/np.log(2.0)
137 |         S_rho_b = - rho_b.dot(rho_b_l2).trace()
138 |         S.append(S_rho_b)
139 |     return np.array(S).mean()
140 | 
141 | ##############################################################################
142 | # When defining the QNode, we introduce as input a Hermitian operator
143 | # :math:`A` that specifies the expectation value being evaluated. This
144 | # choice later allows us to easily evaluate several expectation values
145 | # without having to define a new QNode each time.
146 | #
147 | # Since we will be optimizing using PyTorch, we configure the QNode
148 | # to use the PyTorch interface:
149 | 
150 | 
151 | @qml.qnode(dev, interface="torch")
152 | def circuit(params, A=None):
153 | 
154 |     # repeatedly apply each layer in the circuit
155 |     for j in range(nr_layers):
156 |         layer(params, j)
157 | 
158 |     # returns the expectation of the input matrix A on the first qubit
159 |     return qml.expval(qml.Hermitian(A, wires=0))
160 | 
161 | 
162 | ##############################################################################
163 | # Our goal is to prepare a state with the same Bloch vector as the target
164 | # state. Therefore, we define a simple cost function
165 | #
166 | # .. math::  C = \sum_{i=1}^3 \left|a_i-a'_i\right|,
167 | #
168 | # where :math:`\vec{a}=(a_1, a_2, a_3)` is the target vector and
169 | # :math:`\vec{a}'=(a'_1, a'_2, a'_3)` is the vector of the state prepared
170 | # by the circuit. Optimization is carried out using the Adam optimizer.
171 | # Finally, we compare the Bloch vectors of the target and output state.
172 | 
173 | # cost function
174 | def cost_fn(params):
175 |     cost = 0
176 |     for k in range(3):
177 |         cost += torch.abs(circuit(params, A=Paulis[k]) - bloch_v[k])
178 | 
179 |     return cost
180 | 
181 | 
182 | 
183 | # set up the optimizer
184 | opt = torch.optim.Adam([params], lr=0.1)
185 | 
186 | # number of steps in the optimization routine
187 | steps = 200
188 | 
189 | # the final stage of optimization isn't always the best, so we keep track of
190 | # the best parameters along the way
191 | best_cost = cost_fn(params)
192 | best_params = np.zeros((nr_qubits, nr_layers, 3))
193 | 
194 | print("Cost after 0 steps is {:.4f}".format(cost_fn(params)))
195 | 
196 | entropy_list_cost = []
197 | entropy_list_backward = []
198 | 
199 | # optimization begins
200 | for n in range(steps):
201 |     opt.zero_grad()
202 |     loss = cost_fn(params)
203 |     entropy_list_cost.append(entanglement_entropy(dev.state))
204 |     loss.backward()
205 |     entropy_list_backward.append(entanglement_entropy(dev.state))
206 |     opt.step()
207 | 
208 |     # keeps track of best parameters
209 |     if loss < best_cost:
210 |         best_params = params
211 | 
212 |     # Keep track of progress every 10 steps
213 |     if n % 10 == 9 or n == steps - 1:
214 |         print("Cost after {} steps is {:.4f}".format(n + 1, loss))
215 | 
216 | # calculate the Bloch vector of the output state
217 | output_bloch_v = np.zeros(3)
218 | for l in range(3):
219 |     output_bloch_v[l] = circuit(best_params, A=Paulis[l])
220 | 
221 | # print results
222 | print("Target Bloch vector = ", bloch_v)
223 | print("Output Bloch vector = ", output_bloch_v)
224 | 
225 | entropy_list_cost = np.array(entropy_list_cost)
226 | entropy_list_backward = np.array(entropy_list_backward)
227 | 
228 | plt.plot(entropy_list_cost.real)
229 | plt.plot(entropy_list_backward.real)
230 | plt.show()
231 | # plt.plot(entropy_list_cost.imag)
232 | # plt.plot(entropy_list_backward.imag)
233 | # plt.show()
234 | print(entropy_list_cost)
235 | print(entropy_list_backward)
236 | 


--------------------------------------------------------------------------------