├── LICENSE
├── README.md
├── requirements.txt
├── train.py
└── tvmc
    ├── hamiltonians
        ├── hamiltonian.py
        └── rydberg.py
    ├── models
        ├── BaseModel.py
        ├── LPTF.py
        ├── ModelBuilder.py
        ├── PTF.py
        ├── RNN.py
        └── training.py
    └── util.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Code accompanying the paper "Variational Monte Carlo with Large Patched Transformers"
  2 | 
  3 | ## Requirements
  4 | A suitable [conda](https://conda.io/) environment named `qsr` can be created
  5 | and activated with:
  6 | 
  7 | ```
  8 | conda create --name qsr
  9 | conda install -n qsr pip
 10 | conda activate qsr
 11 | pip install -r requirements.txt
 12 | ```
 13 | 
 14 | ## Model builder
 15 | 
 16 | ### TRAINING
 17 | 
 18 | This script is used to train new models from scratch. This is an example of a command
 19 | to train an $8\times 8$ Rydberg lattice with $V=7$, $\delta=\Omega=1$ with a $2\times 2$ patched transformer:
 20 | ```
 21 | python train.py --train L=64 NLOOPS=16 K=1024 sub_directory=2x2 --ptf patch=2x2 --rydberg V=7 delta=1 Omega=1
 22 | ```
 23 | Training parameters are shown when running:
 24 | 
 25 | ```
 26 | python train.py --help --train
 27 | ```
 28 | 
 29 | These are all possible training arguments:
 30 | ```    
 31 | 
 32 |    Training Arguments:
 33 | 
 34 | 	L          (int)     -- Total lattice size (8x8 would be L=64).
 35 |         
 36 |         Q          (int)     -- Number of minibatches per batch.
 37 |         
 38 |         K          (int)     -- size of each minibatch.
 39 |         
 40 |         B          (int)     -- Total batch size (should be Q*K).
 41 |         
 42 |         NLOOPS     (int)     -- Number of loops within the off_diag_labels function. Higher values save ram and
 43 |                                 generally makes the code run faster (up to 2x). Note, you can only set this
 44 |                                 as high as your effective sequence length. (Take L and divide by your patch size).
 45 |         
 46 |         steps      (int)     -- Number of training steps.
 47 |         
 48 |         dir        (str)     -- Output directory, set to <NONE> for no output.
 49 |         
 50 |         lr         (float)   -- Learning rate.
 51 |         
 52 |         seed       (int)     -- Random seed for the run.
 53 |                 
 54 |         sgrad      (bool)    -- Whether or not to sample with gradients, otherwise create gradients in extra network run. 
 55 |                                 (Uses less ram when but slightly slower)
 56 |                                 
 57 |         true_grad  (bool)    -- Set to false to approximate the gradients, more efficient but approximate.
 58 |                                 
 59 |         sub_directory (str)  -- String to add to the end of the output directory (inside a subfolder). 
 60 | ```
 61 | 
 62 | ### RNN
 63 | 
 64 | All optional rnn parameters can be viewed by running 
 65 | 
 66 | ```
 67 | python train.py --help --rnn
 68 | ```
 69 | 
 70 | These are the RNN parameters:
 71 | 
 72 | 
 73 | ```
 74 |     
 75 |     RNN Optional arguments:
 76 |     
 77 |         L          (int)     -- The total number of atoms in your lattice.
 78 |     
 79 |         Nh         (int)     -- RNN hidden size.
 80 |     
 81 |         patch      (str)     -- Number of atoms input/predicted at once (patch size).
 82 |                                 The Input sequence will have an effective length of L/prod(patch).
 83 |                                 Example values: 2x2, 2x3, 2, 4
 84 |         
 85 |         rnntype    (string)  -- Which type of RNN cell to use. Only ELMAN and GRU are valid options at the moment.
 86 |     
 87 | 
 88 | ```
 89 | 
 90 | ### Patched Transformer (PTF)
 91 | 
 92 | 
 93 | All optional ptf parameters can be viewed by running 
 94 | 
 95 | ```
 96 | python train.py --help --ptf
 97 | ```
 98 | 
 99 | These are your PTF parameters:
100 | ```
101 |     
102 |     PTF Optional arguments:
103 |     
104 |         L          (int)     -- The total number of atoms in your lattice.
105 |     
106 |         Nh         (int)     -- Transformer token size. Input patches are projected to match the token size.
107 |     
108 |         patch      (str)     -- Number of atoms input/predicted at once (patch size).
109 |                                 The Input sequence will have an effective length of L/prod(patch).
110 |                                 Example values: 2x2, 2x3, 2, 4
111 |             
112 |         dropout    (float)   -- The amount of dropout to use in the transformer layers.
113 |         
114 |         num_layers (int)     -- The number of transformer layers to use.
115 |         
116 |         nhead     (int)      -- The number of heads to use in Multi-headed Self-Attention. This should divide Nh.
117 |     
118 |         repeat_pre (bool)    -- Repeat the precondition (input) instead of projecting it out to match the token size.
119 |     
120 | 
121 | ```
122 | 
123 | ### Large-Patched Transformer (LPTF)
124 | 
125 | 
126 | All optional LPTF parameters can be viewed by running 
127 | 
128 | ```
129 | python train.py --help --lptf
130 | ```
131 | LPTF parameters must be followed by the sub-model (e.g. --rnn) and the corresponding parameters, where the L parameter needs to match the patch parameter of the LPTF (e.g. --lptf path=2x3 --rnn L=2x3).
132 | 
133 | These are your LPTF parameters:
134 | ```
135 |     
136 |     
137 |     LPTF Optional arguments:
138 |     
139 |         L          (int)     -- The total number of atoms in your lattice.
140 |     
141 |         Nh         (int)     -- Transformer token size. Input patches are projected to match the token size.
142 |                                 Note: When using an RNN subsampler this Nh MUST match the rnn's Nh.
143 |     
144 |         patch      (int)     -- Number of atoms input/predicted at once (patch size).
145 |                                 The Input sequence will have an effective length of L/patch.
146 |             
147 |         dropout    (float)   -- The amount of dropout to use in the transformer layers.
148 |         
149 |         num_layers (int)     -- The number of transformer layers to use.
150 |         
151 |         nhead     (int)     -- The number of heads to use in Multi-headed Self-Attention. This should divide Nh.
152 |         
153 |         subsampler (Sampler) -- The inner model to use for probability factorization. This is set implicitly
154 |                                 by including --rnn or --ptf arguments.
155 |     
156 |     
157 | 
158 | ```
159 | 
160 | ## Rydberg Hamiltonian
161 | 
162 | The following parameters can be chosen for the Rydberg Hamiltonian:
163 | 
164 | ```
165 | Lx                            			4
166 | Ly                            			4
167 | V                             			7.0
168 | Omega                         			1.0
169 | delta                         			1.0
170 | 
171 | ```
172 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | filelock==3.12.0
 2 | Jinja2==3.1.2
 3 | MarkupSafe==2.1.2
 4 | mpmath==1.3.0
 5 | networkx==3.1
 6 | numpy==1.24.3
 7 | sympy==1.12
 8 | torch==2.0.1
 9 | typing_extensions==4.6.0
10 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from tvmc.models.ModelBuilder import *
 2 | 
 3 | import sys
 4 | 
 5 | def helper(args):
 6 |     
 7 |     help(build_model)
 8 |     
 9 |     example = "Runtime Example:\n>>>python train.py --rydberg --train L=144"
10 |     while True:
11 |         if "--lptf" in args:
12 |             print(LPTF.INFO)
13 |             print(example+" --lptf patch=3x3 --rnn L=9 patch=3 Nh=128")
14 |             break
15 |         if "--rnn" in args:
16 |             print(PRNN.INFO)
17 |             print(example+" NLOOPS=36 --rnn patch=4")
18 |             break
19 |         if "--ptf" in args:
20 |             print(PTF.INFO)
21 |             print(example+" NLOOPS=24 --ptf patch=2x3")
22 |             break
23 |         if "--train" in args:
24 |             print(TrainOpt.__doc__)
25 |             print(example+" NLOOPS=36 sgrad=False steps=4000 --ptf patch=2x2")
26 |             break
27 |             
28 |         args=["--"+input("What Model do you need help with?\nOptions are rnn, lptf, ptf, and train:\n".lower())]
29 |         
30 | 
31 | 
32 | if "--help" in sys.argv:
33 |     print()
34 |     helper(sys.argv)
35 | else:
36 |     print(sys.argv[1:])
37 | 
38 |     model,full_opt,opt_dict = build_model(sys.argv[1:])
39 |     train_opt=opt_dict["TRAIN"]
40 | 
41 |     #Initialize optimizer
42 |     beta1=0.9;beta2=0.999
43 |     optimizer = torch.optim.Adam(
44 | 	    model.parameters(), 
45 |     	lr=train_opt.lr, 
46 |     	betas=(beta1,beta2)
47 |     )
48 | 
49 |     print(full_opt)
50 |     mydir=setup_dir(opt_dict)
51 |     orig_stdout = sys.stdout
52 | 
53 |     full_opt.save(mydir+"\\settings.json")
54 | 
55 |     f = open(mydir+'\\output.txt', 'w')
56 |     sys.stdout = f
57 |     try:
58 |         reg_train(opt_dict,(model,optimizer),printf=True,mydir=mydir)
59 |     except Exception as e:
60 |         print(e)
61 |         sys.stdout = orig_stdout
62 |         f.close()
63 |         1/0
64 |     sys.stdout = orig_stdout
65 |     f.close()
66 | 


--------------------------------------------------------------------------------
/tvmc/hamiltonians/hamiltonian.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import nn
  4 | ngpu=1
  5 | device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
  6 | 
  7 | class Hamiltonian():
  8 |     def __init__(self,L,offDiag,device=device):
  9 |         self.offDiag  = offDiag           # Off-diagonal interaction
 10 |         self.L        = L               # Number of spins
 11 |         self.device   = device
 12 |         self.Vij      = self.Vij=nn.Linear(self.L,self.L).to(device)
 13 |         self.buildlattice()
 14 | 
 15 |     def buildlattice():
 16 |         """Creates the matrix representation of the on-diagonal part of the hamiltonian
 17 |             - This should fill Vij with values"""
 18 |         raise NotImplementedError
 19 | 
 20 | #    def localenergy(self,samples,logp,logppj):
 21 | #        """
 22 | #        Takes in s, ln[p(s)] and ln[p(s')] (for all s'), then computes Hloc(s) for N samples s.
 23 | #        
 24 | #        Inputs:
 25 | #            samples - [B,L,1] matrix of zeros and ones for ground/excited states
 26 | #            logp - size B vector of logscale probabilities ln[p(s)]
 27 | #            logppj - [B,L] matrix of logscale probabilities ln[p(s')] where s'[i][j] had one state flipped at position j
 28 | #                    relative to s[i]
 29 | #        Returns:
 30 | #            size B vector of energies Hloc(s)
 31 | #        
 32 | #        """
 33 | #        # Going to calculate Eloc for each sample in a separate spot
 34 | #        # so eloc will have shape [B]
 35 | #        # recall samples has shape [B,L,1]
 36 | #        B=samples.shape[0]
 37 | #        eloc = torch.zeros(B,device=self.device)
 38 | #        # Chemical potential
 39 | #        with torch.no_grad():
 40 | #            tmp=self.Vij(samples.squeeze(2))
 41 | #            eloc += torch.sum(tmp*samples.squeeze(2),axis=1)
 42 | #        # Off-diagonal part
 43 | #        #logppj is shape [B,L]
 44 | #        #logppj[:,j] has one state flipped at position j
 45 | #        for j in range(self.L):
 46 | #            #make sure torch.exp is a thing
 47 | #            eloc += self.offDiag * torch.exp((logppj[:,j]-logp)/2)
 48 | #
 49 | #        return eloc
 50 | 
 51 |     def localenergyALT(self,samples,logp,sumsqrtp,logsqrtp):
 52 |         """
 53 |         Takes in s, ln[p(s)] and exp(-logsqrtp)*sum(sqrt[p(s')]), then computes Hloc(s) for N samples s.
 54 |         
 55 |         Inputs:
 56 |             samples  - [B,L,1] matrix of zeros and ones for ground/excited states
 57 |             logp     - size B vector of logscale probabilities ln[p(s)]
 58 |             logsqrtp - size B vector of average (log p)/2 values used for numerical stability 
 59 |                        when calculating sum_s'(sqrt[p(s')/p(s)]) 
 60 |             sumsqrtp - size B vector of exp(-logsqrtp)*sum(sqrt[p(s')]).
 61 |         Returns:
 62 |             size B vector of energies Hloc(s)
 63 |         
 64 |         """
 65 |         # Going to calculate Eloc for each sample in a separate spot
 66 |         # so eloc will have shape [B]
 67 |         # recall samples has shape [B,L,1]
 68 |         B=samples.shape[0]
 69 |         eloc = torch.zeros(B,device=self.device)
 70 |         # Chemical potential
 71 |         with torch.no_grad():
 72 |             tmp=self.Vij(samples.squeeze(2))
 73 |             eloc += torch.sum(tmp*samples.squeeze(2),axis=1)
 74 |         # Off-diagonal part
 75 |         
 76 |         #in this function the entire sum is precomputed and it was premultiplied by exp(-logsqrtp) for stability
 77 |         eloc += self.offDiag *sumsqrtp* torch.exp(logsqrtp-logp/2)
 78 | 
 79 |         return eloc
 80 |     
 81 |     def magnetizations(self, samples):
 82 |         B = samples.shape[0]
 83 |         L = samples.shape[1]
 84 |         mag = torch.zeros(B, device=self.device)
 85 |         abs_mag = torch.zeros(B, device=self.device)
 86 |         sq_mag = torch.zeros(B, device=self.device)
 87 |         stag_mag = torch.zeros(B, device=self.device)
 88 | 
 89 |         with torch.no_grad():
 90 |             samples_pm = 2 * samples - 1
 91 |             mag += torch.sum(samples_pm.squeeze(2), axis=1)
 92 |             abs_mag += torch.abs(torch.sum(samples_pm.squeeze(2), axis=1))
 93 |             sq_mag += torch.abs(torch.sum(samples_pm.squeeze(2), axis=1))**2
 94 |             
 95 |             samples_reshape = torch.reshape(samples.squeeze(2), (B, int(np.sqrt(L)), int(np.sqrt(L))))
 96 |             for i in range(int(np.sqrt(L))):
 97 |                 for j in range(int(np.sqrt(L))):
 98 |                     stag_mag += (-1)**(i+j) * (samples_reshape[:,i,j] - 0.5)
 99 | 
100 |         return mag, abs_mag, sq_mag, stag_mag / L
101 | 
102 |     def ground(self):
103 |         """Returns the ground state energy E/L"""
104 |         raise NotImplementedError
105 | 


--------------------------------------------------------------------------------
/tvmc/hamiltonians/rydberg.py:
--------------------------------------------------------------------------------
 1 | from tvmc.util import Options,OptionManager
 2 | from tvmc.hamiltonians.hamiltonian import *
 3 | 
 4 | class Rydberg(Hamiltonian):
 5 |     
 6 |     DEFAULTS = Options(Lx=4,Ly=4,V=7.0,Omega=1.0,delta=1.0)
 7 |     def __init__(self,Lx,Ly,V,Omega,delta,device=device,**kwargs):
 8 |         self.Lx       = Lx              # Size along x
 9 |         self.Ly       = Ly              # Size along y
10 |         self.V        = V               # Van der Waals potential
11 |         self.delta    = delta           # Detuning
12 |         # off diagonal part is -0.5*Omega
13 |         super(Rydberg,self).__init__(Lx*Ly,-0.5*Omega,device)
14 | 
15 |     @staticmethod
16 |     def Vij(Ly,Lx,V,matrix):
17 |     #matrix will be size [Lx*Ly,Lx*Ly]
18 |       for i in range(Ly):
19 |         for j in range(Lx):
20 |             #flatten two indices into one
21 |             idx = Ly*j+i
22 |             # only fill in the upper diagonal
23 |             for k in range(idx+1,Lx*Ly):
24 |                 #expand one index into two
25 |                 i2 = k%Ly
26 |                 j2=k//Ly
27 |                 div = ((i2-i)**2+(j2-j)**2)**3
28 |                 #if div<=R:
29 |                 matrix[idx][k]=V/div
30 | 
31 |     def buildlattice(self):
32 |         Lx,Ly=self.Lx,self.Ly
33 |         
34 |         #diagonal hamiltonian portion can be written as a matrix multiplication then a dot product        
35 |         mat=np.zeros([self.L,self.L])
36 |         Rydberg.Vij(Lx,Ly,self.V,mat)
37 |         
38 |         with torch.no_grad():
39 |             self.Vij.weight[:,:]=torch.Tensor(mat)
40 |             self.Vij.bias.fill_(-self.delta)
41 | 
42 |     def ground(self):
43 |         return Rydberg.E[self.Lx*self.Ly]
44 | 
45 | OptionManager.register("rydberg",Rydberg.DEFAULTS)
46 | 


--------------------------------------------------------------------------------
/tvmc/models/BaseModel.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math,time,json
  3 | import torch
  4 | from torch import nn
  5 | ngpu=1
  6 | device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
  7 | 
  8 | 
  9 | class Sampler(nn.Module):
 10 | 
 11 |     def __init__(self,device=device):
 12 |         self.device=device
 13 |         super(Sampler, self).__init__()
 14 | 
 15 |     def save(self,fn):
 16 |         torch.save(self,fn)
 17 | 
 18 |     def logprobability(self,input):
 19 |         # type: (Tensor) -> Tensor
 20 |         """Compute the logscale probability of a given state
 21 |             Inputs:
 22 |                 input - [B,L,1] matrix of zeros and ones for ground/excited states
 23 |             Returns:
 24 |                 logp - [B] size vector of logscale probability labels
 25 |         """
 26 |         raise NotImplementedError
 27 | 
 28 |     @torch.jit.export
 29 |     def sample(self,B,L):
 30 |         # type: (int,int) -> Tensor
 31 |         """ Generates a set states
 32 |         Inputs:
 33 |             B (int)            - The number of states to generate in parallel
 34 |             L (int)            - The length of generated vectors
 35 |         Returns:
 36 |             samples - [B,L,1] matrix of zeros and ones for ground/excited states
 37 |             logprobs - [B] matrix of logscale probabilities (float Tensor)
 38 |         """
 39 |         raise NotImplementedError
 40 | 
 41 |     @torch.jit.export
 42 |     def off_diag_labels(self,sample,nloops=1):
 43 |         # type: (Tensor,int) -> Tensor
 44 |         """
 45 |         Inputs:
 46 |             samples  - [B,L,1] matrix of zeros and ones for ground/excited states
 47 |         
 48 |         Returns:
 49 |             probs - size [B,L] tensor of probabilities of the excitation-flipped states
 50 |         """
 51 |         D=nloops
 52 |         B,L,_=sample.shape
 53 |         sflip = torch.zeros([B,L,L,1],device=self.device)
 54 |         #collect all of the flipped states into one array
 55 |         for j in range(L):
 56 |             #get all of the states with one spin flipped
 57 |             sflip[:,j] = sample*1.0
 58 |             sflip[:,j,j] = 1-sflip[:,j,j]
 59 |         #compute all of their logscale probabilities
 60 |         with torch.no_grad():
 61 |             probs=torch.zeros([B*L],device=self.device)
 62 |             tmp=sflip.view([B*L,L,1])
 63 |             for k in range(D):
 64 |                 probs[k*B*L//D:(k+1)*B*L//D] = self.logprobability(tmp[k*B*L//D:(k+1)*B*L//D])
 65 | 
 66 |         return probs.reshape([B,L])
 67 | 
 68 |     @torch.jit.export
 69 |     def off_diag_labels_summed(self,sample,nloops=1):
 70 |         # type: (Tensor,int) -> Tuple[Tensor,Tensor]
 71 |         """
 72 |         Inputs:
 73 |             samples  - [B,L,1] matrix of zeros and ones for ground/excited states
 74 |         
 75 |         Returns:
 76 |             logsqrtp - size B vector of average (log p)/2 values used for numerical stability 
 77 |                        when calculating sum_s'(sqrt[p(s')/p(s)]) 
 78 |             sumsqrtp - size B vector of exp(-logsqrtp)*sum(sqrt[p(s')]).
 79 |         """
 80 |         probs = self.off_diag_labels(sample,nloops)
 81 |         #get the average of our logprobabilities and divide by 2
 82 |         logsqrtp=probs.mean(dim=1)/2
 83 |         #compute the sum with a constant multiplied to keep the sum close to 1
 84 |         sumsqrtp = torch.exp(probs/2-logsqrtp.unsqueeze(1)).sum(dim=1)
 85 |         return sumsqrtp,logsqrtp
 86 |     
 87 | # Functions for making Patches & doing probability traces
 88 | class Patch2D(nn.Module):
 89 |     def __init__(self,nx,ny,Lx,Ly,device=device):
 90 |         super().__init__()
 91 |         self.nx=nx
 92 |         self.ny=ny
 93 |         self.Ly=Ly
 94 |         self.Lx=Lx
 95 |         
 96 |         #construct an index tensor for the reverse operation
 97 |         indices = torch.arange(Lx*Ly,device=device).unsqueeze(0)
 98 |         self.mixed = self.forward(indices).reshape([Lx*Ly])
 99 |         #inverse
100 |         self.mixed=torch.argsort(self.mixed)
101 |         
102 |     def forward(self,x):
103 |         # type: (Tensor) -> Tensor
104 |         nx,ny,Lx,Ly=self.nx,self.ny,self.Lx,self.Ly
105 |         """Unflatten a tensor back to 2D, break it into nxn chunks, then flatten the sequence and the chunks
106 |             Input:
107 |                 Tensor of shape [B,L]
108 |             Output:
109 |                 Tensor of shape [B,L//n^2,n^2]
110 |         """
111 |         #make the input 2D then break it into 2x2 chunks 
112 |         #afterwards reshape the 2x2 chunks to vectors of size 4 and flatten the 2d bit
113 |         return x.view([x.shape[0],Lx,Ly]).unfold(-2,nx,nx).unfold(-2,ny,ny).reshape([x.shape[0],int(Lx*Ly//(nx*ny)),nx*ny])
114 | 
115 |     def reverse(self,x):
116 |         # type: (Tensor) -> Tensor
117 |         """Inverse function of forward
118 |             Input:
119 |                 Tensor of shape [B,L//n^2,n^2]
120 |             Output:
121 |                 Tensor of shape [B,L]
122 |         """
123 |         Ly,Lx=self.Ly,self.Lx 
124 |         # Reversing is done with an index tensor because torch doesn't have an inverse method for unfold
125 |         return x.reshape([x.shape[0],Ly*Lx])[:,self.mixed]
126 |     
127 | class Patch1D(nn.Module):
128 |     def __init__(self,n,L):
129 |         super().__init__()
130 |         self.n=n
131 |         self.L = L
132 |     
133 |     def forward(self,x):
134 |         # type: (Tensor) -> Tensor
135 |         """Break a tensor into chunks, essentially a wrapper of reshape
136 |             Input:
137 |                 Tensor of shape [B,L]
138 |             Output:
139 |                 Tensor of shape [B,L/n,n]
140 |         """
141 |         #make the input 2D then break it into 2x2 chunks 
142 |         #afterwards reshape the 2x2 chunks to vectors of size 4 and flatten the 2d bit
143 |         return x.reshape([x.shape[0],self.L//self.n,self.n])
144 | 
145 |     def reverse(self,x):
146 |         # type: (Tensor) -> Tensor
147 |         """Inverse function of forward
148 |             Input:
149 |                 Tensor of shape [B,L/n,n]
150 |             Output:
151 |                 Tensor of shape [B,L]
152 |         """
153 |         # original sequence order can be retrieved by chunking twice more
154 |         #in the x-direction you should have chunks of size 2, but in y it should
155 |         #be chunks of size Ly//2
156 |         return x.reshape([x.shape[0],self.L])
157 | 
158 | @torch.jit.script
159 | def genpatch2onehot(patch,p):
160 |     # type: (Tensor,int) -> Tensor
161 |     """ Turn a sequence of size p patches into a onehot vector
162 |     Inputs:
163 |         patch - Tensor of shape [?,p]
164 |         p (int) - the patch size
165 |     
166 |     """
167 |     #moving the last dimension to the front
168 |     patch=patch.unsqueeze(0).transpose(-1,0).squeeze(-1).to(torch.int64)
169 |     out=torch.zeros(patch.shape[1:],device=patch.device)
170 |     for i in range(p):
171 |         out+=patch[i]<<i
172 |     return nn.functional.one_hot(out.to(torch.int64), num_classes=1<<p)
173 | 


--------------------------------------------------------------------------------
/tvmc/models/LPTF.py:
--------------------------------------------------------------------------------
  1 | from tvmc.util import Options,OptionManager
  2 | from tvmc.models.PTF import *
  3 | from tvmc.models.RNN import PRNN
  4 | 
  5 | 
  6 | class LPTF(Sampler):
  7 |     
  8 |     """
  9 |     Sampler class which uses a transformer for long term information and a smaller subsampler for short term information
 10 |     This can either be in the form of an RNN or a transformer (likely patched).
 11 |     
 12 |     The sequence is broken into 2D patches, each patch is expanded to a tensor of size Nh (be repeating it),\
 13 |     then a positional encoding is added. You then apply masked self-attention to the patches num_layers times, with the final
 14 |     outputs fed in as the initial hidden state of an rnn.
 15 |     
 16 |     Going with 4x4 patches, you can use these patches as a sequence to get a factorized probability of the entire
 17 |     4x4 patch by feeding the 2x2 patches in one at a time and outputting a size 16 tensor 
 18 |     (probability of all possible next 2x2 patches) for each patch. The output is obtained by applying two FC layers to
 19 |     the hidden state of the rnn.
 20 |     
 21 |     
 22 |     Here is an example of how everything comes together
 23 |     
 24 |     Say you have a 16x16 input and Nh=128, this input is broken into 16 4x4 patches which are repeated 8 times and
 25 |     given a positional encoding. Masked self attention is done between the 16 patches (size Nh) for N layers, then
 26 |     16 RNNs are given the outputs in parallel as the hidden state. Now the original input is broken into 16 sets of 4 2x2
 27 |     patches. These length 4 sequences are given to the rnns (16 in parallel all sharing the same weights) and the outputs
 28 |     are then grouped together such that you end up with a length 64 sequence of vectors of size 16. this gives your probability.
 29 |     You can easily calculate it by taking the output (of 16) corresponding to each 2x2 patch and multiplying all 64 of them
 30 |     together (or adding them in logscale).
 31 |     
 32 |     """
 33 |     INFO = """Transformer based sampler where the input sequence is broken up into large 'patches' and the output is a sequence of conditional probabilities of all possible patches at position i given the previous 0 to i-1 patches. Each patch is projected into a token with an added positional encoding. The sequence of encoded patches is used as transformer input. This specific model is used for very large patches where doing a softmax over all possible patches is not feasable thus a subsampler must be used to factorize these probabilities.
 34 |     
 35 |     
 36 |     LPTF Optional arguments:
 37 |     
 38 |         L          (int)     -- The total number of atoms in your lattice.
 39 |     
 40 |         Nh         (int)     -- Transformer token size. Input patches are projected to match the token size.
 41 |                                 Note: When using an RNN subsampler this Nh MUST match the rnn's Nh.
 42 |     
 43 |         patch      (int)     -- Number of atoms input/predicted at once (patch size).
 44 |                                 The Input sequence will have an effective length of L/patch.
 45 |             
 46 |         dropout    (float)   -- The amount of dropout to use in the transformer layers.
 47 |         
 48 |         num_layers (int)     -- The number of transformer layers to use.
 49 |         
 50 |         nhead     (int)     -- The number of heads to use in Multi-headed Self-Attention. This should divide Nh.
 51 |         
 52 |         subsampler (Sampler) -- The inner model to use for probability factorization. This is set implicitly.
 53 |                                 by including --rnn or --ptf arguments.
 54 |     
 55 |     """
 56 |     DEFAULTS=Options(L=64,patch=1,Nh=128,dropout=0.0,num_layers=2,nhead=8,full_seq=False)
 57 |     def __init__(self,subsampler,L,patch,Nh,dropout,num_layers,nhead,full_seq,device=device, **kwargs):
 58 |         super(Sampler, self).__init__()
 59 |         
 60 |         if type(patch)==str and len(patch.split("x"))==2:
 61 |             
 62 |             #patch and system sizes
 63 |             px,py = [int(a) for a in patch.split("x")]
 64 |             Lx,Ly=[int(L**0.5)]*2 if type(L) is int else [int(a) for a in L.split("x")]
 65 |             #token size and positional encoder
 66 |             self.pe = PE2D(Nh, Lx//px,Ly//py,device)
 67 |             #patching, sequence length and total patch size
 68 |             self.patch=Patch2D(px,py,Lx,Ly)
 69 |             self.L = int(L//(px*py))
 70 |             self.p=px*py
 71 |         else:
 72 |             p=int(patch)
 73 |             self.pe = PE1D(Nh,L//p,device)
 74 |             self.patch=Patch1D(p,L)
 75 |             self.L = int(L//p)
 76 |             self.p = p
 77 |             
 78 |         self.tokenize=nn.Sequential(
 79 |                 nn.Linear(self.p,Nh),
 80 |                 nn.Tanh()
 81 |         )
 82 |             
 83 |         self.allh=full_seq
 84 |             
 85 |         self.device=device
 86 |         #Encoder only transformer
 87 |         self.transformer = FastMaskedTransformerEncoder(Nh=Nh,dropout=dropout,num_layers=num_layers,nhead=nhead)       
 88 |         
 89 |         # Sampler class object which has both sample and logprobability functions
 90 |         self.subsampler = subsampler
 91 |         
 92 |         self.set_mask(self.L)
 93 |         
 94 |         self.to(device)
 95 |     
 96 |     def set_mask(self, L):
 97 |         # type: (int)
 98 |         """Initialize the self-attention mask"""
 99 |         self.L=L
100 |         self.transformer.set_mask(L)
101 |         self.pe.L=L
102 | 
103 |     @torch.jit.export
104 |     def logprobability(self,input):
105 |         # type: (Tensor) -> Tensor
106 |         """Compute the logscale probability of a given state
107 |             Inputs:
108 |                 input - [B,L,1] matrix of zeros and ones for ground/excited states
109 |             Returns:
110 |                 logp - [B] size vector of logscale probability labels
111 |         """
112 |         
113 |         if input.shape[1]//self.p!=self.L:
114 |             self.set_mask(input.shape[1]//self.p)
115 |         #shape should be sequence first [L,B,Nh]
116 |         
117 |         #shape is modified to [L//p,B,p]
118 |         input = self.patch(input.squeeze(-1)).transpose(1,0)
119 |         
120 |         data=torch.zeros(input.shape,device=self.device)
121 |         #The first input should be zeros and the last patch is not used as input
122 |         data[1:]=input[:-1]
123 |         
124 |         #[L//p,B,p] -> [L//p,B,Nh]
125 |         encoded=self.pe(self.tokenize(data))
126 |         #shape is preserved
127 |         output = self.transformer(encoded)
128 |         
129 |         Lp,B,Nh=output.shape
130 |         if self.allh:
131 |             h0 = output
132 |         else:
133 |             # [L//p,B,Nh] -> [1,L//p*B,Nh]
134 |             h0 = output.view([1,Lp*B,Nh])
135 |         flattened_input = input.reshape([Lp*B,self.p])
136 |         # [L//p*B,p],[1,L//p*B,Nh] -> [L//p,B]
137 |         logsubsample = self.subsampler.logprobability(flattened_input,h0).view([Lp,B])
138 |         
139 |         #[L//p,B] -> [B]
140 |         logp=torch.sum(logsubsample,dim=0)
141 |         return logp
142 |     
143 |     @torch.jit.export
144 |     def sample(self,B,L,cache=None):
145 |         # type: (int,int,Optional[Tensor]) -> Tuple[Tensor,Tensor]
146 |         """ Generates a set states
147 |         Inputs:
148 |             B (int)            - The number of states to generate in parallel
149 |             L (int)            - The length of generated vectors
150 |         Returns:
151 |             samples - [B,L,1] matrix of zeros and ones for ground/excited states
152 |         """
153 |         #sequence length is divided by patch size due to patching
154 |         L=L//self.p
155 |         
156 |         #Sample set will have shape [L/p,B,p]
157 |         #need one extra zero batch at the start for first pred hence input is [L/p+1,B,1] 
158 |         input = torch.zeros([L+1,B,self.p],device=self.device)
159 | 
160 |         logp = torch.zeros([B],device=self.device)
161 |         
162 |         for idx in range(1,L+1):
163 |             
164 |             #[l,B,p] -> [l,B,Nh]            multiply by 1 to copy the tensor
165 |             encoded_input = self.pe(self.tokenize(input[:idx,:,:]*1))
166 |                         
167 |             #Get transformer output (shape [l,B,Nh])
168 |             output,cache = self.transformer.next_with_cache(encoded_input,cache)
169 |             #get state and probability by sampling from the subsample (pass along the last elem reshaped to [1,B,Nh])
170 |             if self.allh:
171 |                 sample,logsubsample = self.subsampler.sample(B,self.p,output)
172 |             else:
173 |                 sample,logsubsample = self.subsampler.sample(B,self.p,output[-1].view([1,B,output.shape[-1]]))
174 |             #Add your logscale conditional probability to the sum
175 |             logp+=logsubsample
176 |             #set input to the sample that was actually chosen
177 |             input[idx] = sample.squeeze(-1)
178 |             
179 |         #remove the leading zero in the input    
180 |         input=input[1:]
181 |         #Unpatch the samples
182 |         return self.patch.reverse(input.transpose(1,0)).unsqueeze(-1),logp
183 |     
184 |     
185 |     @torch.jit.export
186 |     def off_diag_labels(self,sample,nloops=1):
187 |         # type: (Tensor,int) -> Tensor
188 |         """label all of the flipped states  - set D as high as possible without it slowing down runtime
189 |         Parameters:
190 |             sample - [B,L,1] matrix of zeros and ones for ground/excited states
191 |             B,L (int) - batch size and sequence length
192 |             D (int) - Number of partitions sequence-wise. We must have L%D==0 (D divides L)
193 |             
194 |         Outputs:
195 |             
196 |             sample - same as input
197 |             probs - [B,L] matrix of probabilities of states with the jth excitation flipped
198 |         """
199 |         
200 |         D=nloops
201 |         B,L,_=sample.shape
202 |         
203 |         sample0=sample
204 |         #sample is batch first at the moment
205 |         sample = self.patch(sample.squeeze(-1))
206 |         
207 |         sflip = torch.zeros([B,L,L//self.p,self.p],device=self.device)
208 |         #collect all of the flipped states into one array
209 |         for j in range(L//self.p):
210 |             #have to change the order of in which states are flipped for the cache to be useful
211 |             for j2 in range(self.p):
212 |                 sflip[:,j*self.p+j2] = sample*1.0
213 |                 sflip[:,j*self.p+j2,j,j2] = 1-sflip[:,j*self.p+j2,j,j2]
214 |             
215 |         #switch sample into sequence-first
216 |         sample = sample.transpose(1,0)
217 |             
218 |         #compute all of the logscale probabilities of the original sample
219 |         data=torch.zeros(sample.shape,device=self.device)
220 |         data[1:]=sample[:-1]
221 | 
222 |         #[L//p,B,p] -> [L//p,B,Nh]
223 |         encoded=self.pe(self.tokenize(data))
224 | 
225 |         #add positional encoding and make the cache
226 |         out,cache=self.transformer.make_cache(encoded)
227 |         probs=torch.zeros([B,L],device=self.device)
228 |         #expand cache to group L//D flipped states
229 |         cache=cache.unsqueeze(2)
230 |         
231 |         #the cache has to be repeated L//D times along the correct axis (otherwise there is a mismatch)
232 |         cache=cache.repeat(1,1,L//D,1,1).transpose(2,3).reshape(cache.shape[0],L//self.p,B*L//D,cache.shape[-1])
233 | 
234 |         Lp,B,Nh=out.shape
235 |         
236 |         if self.allh:
237 |             h0 = out
238 |         else:
239 |             # [L//p,B,Nh] -> [1,L//p*B,Nh]
240 |             h0 = out.view([1,Lp*B,Nh])
241 |         #flatten the batch & sequence dimensions into the batch dimension
242 |         flattened_input = sample.reshape([Lp*B,self.p])
243 |         # [L//p*B,p],[1,L//p*B,Nh] -> [L//p,B]
244 |         logsubsample0 = self.subsampler.logprobability(flattened_input,h0).view([Lp,B])
245 | 
246 |         for k in range(D):
247 | 
248 |             N = k*L//D
249 |             #next couple of steps are crucial          
250 |             #get the samples from N to N+L//D
251 |             #Note: samples are the same as the original up to the Nth spin
252 |             real = sflip[:,N:(k+1)*L//D]
253 |             #flatten it out and set to sequence first
254 |             tmp = real.reshape([B*L//D,L//self.p,self.p]).transpose(1,0)
255 |             #set up next state predction
256 |             fsample=torch.zeros(tmp.shape,device=self.device)
257 |             fsample[1:]=tmp[:-1]
258 |             # add positional encoding
259 |             tgt=self.pe(self.tokenize(fsample))
260 |             #grab your transformer output
261 |             out,_=self.transformer.next_with_cache(tgt,cache[:,:N//self.p],N//self.p)
262 | 
263 |             output = out[N//self.p:]
264 | 
265 |             #[(L-N)/p,B*L/D,Nh]
266 |             Lp2,B2,Nh=output.shape
267 | 
268 |             if self.allh:
269 |                 h0 = out
270 |             else:
271 |                 # [(L-N)/p,B*L/D,Nh] -> [1,((L-N)/p)*(B*L/D),Nh]
272 |                 h0 = output.view([1,Lp2*B2,Nh])
273 |             #flatten the batch & sequence dimensions into the batch dimension
274 |             flattened_input = tmp[N//self.p:].reshape([Lp2*B2,self.p])
275 |             
276 |             #get the subsampler output and unflatten it
277 |             # [?] -> [(L-N)/p,B*L//D]
278 |             logsubsample = self.subsampler.logprobability(flattened_input,h0).view([Lp2,B2])
279 | 
280 |             #[(L-N)/p,B*L//D] -> [B,L/D]
281 | 
282 |             #sum over (L-N)/p
283 |             logp=torch.sum(logsubsample,dim=0).view([B,L//D])
284 | 
285 |             #sum over N/p
286 |             logp+=torch.sum(logsubsample0[:N//self.p],dim=0).unsqueeze(-1)
287 | 
288 |             probs[:,N:(k+1)*L//D]=logp
289 |                 
290 |         return probs
291 |     
292 | OptionManager.register("lptf",LPTF.DEFAULTS) 
293 | 
294 | 


--------------------------------------------------------------------------------
/tvmc/models/ModelBuilder.py:
--------------------------------------------------------------------------------
  1 | from tvmc.util import Options,OptionManager
  2 | from tvmc.models.LPTF import *
  3 | from tvmc.models.training import *
  4 | from tvmc.hamiltonians.rydberg import Rydberg
  5 | 
  6 | import random
  7 | 
  8 | def build_model(args):
  9 |     """
 10 |     Builds a Sampler network using command line arguments
 11 |     
 12 |     CMD arguments should look like this:
 13 |     
 14 |     >>> python train.py --<param1> <name11>=<value11> <name12>=<value12> --<param2> <name21>=<value21> <name22>=<value22> . . .
 15 |     
 16 |     Ex: A Patched Transformer with 2x2 patches, system total size of 8x8, a batch size of K*Q=1024 and 16 loops when calculating
 17 |         the off diagonal probabilities to save on memory:
 18 |     
 19 |     >>> python train.py --train L=64 NLOOPS=16 K=1024 sub_directory=2x2 --ptf patch=2x2
 20 |     
 21 |     Ex2: A Large Patched Transformer using an RNN subsampler with 3x3 patches on the LPTF and 1D patches of size 3 on the RNN
 22 |     
 23 |     >>> python train.py --rydberg --train L=576 NLOOPS=64 sub_directory=3x3 --lptf patch=3x3 --rnn L=9 patch=3 Nh=128
 24 |     
 25 |     """
 26 |     
 27 | 
 28 |     options_dict = OptionManager.parse_cmd(args)
 29 |     is_lptf= ("LPTF" in options_dict)
 30 |     all_models=dict(RNN=PRNN,LPTF=LPTF,PTF=PTF)
 31 |     
 32 |     
 33 |     if not "TRAIN" in options_dict:
 34 |         options_dict["TRAIN"]=None
 35 |         for name in options_dict:
 36 |             if name in all_models and (not is_lptf or name=="LPTF"):
 37 |                 options_dict["TRAIN"] = TrainOpt(L=options_dict[name].L)
 38 |     
 39 |     
 40 |     if options_dict["TRAIN"].seed is None:
 41 |         options_dict["TRAIN"].seed = np.random.randint(65536)
 42 |     
 43 |     torch.manual_seed(options_dict["TRAIN"].seed)
 44 |     np.random.seed(options_dict["TRAIN"].seed)
 45 |     random.seed(options_dict["TRAIN"].seed)
 46 |         
 47 |     HAMILTONIAN = None
 48 |     for name in options_dict:
 49 |         #make sure system size is consistent among all options
 50 |         if name == "LPTF" or is_lptf==False:
 51 |             options_dict[name].L=options_dict["TRAIN"].L
 52 |             if options_dict["TRAIN"].dir == "out" and name in all_models:
 53 |                 options_dict["TRAIN"].dir=name
 54 |         #make sure hamiltonians have correct system size
 55 |         if (not name in all_models and name != "TRAIN"):
 56 |             options_dict[name].L=options_dict["TRAIN"].L
 57 |             HAMILTONIAN = options_dict[name]
 58 |             options_dict[name].name=name
 59 |             if name=="RYDBERG":
 60 |                 h=options_dict[name]
 61 |                 if h.Lx*h.Ly!=h.L:
 62 |                     h.Lx=h.Ly=int(h.L**0.5)
 63 |         #set model type
 64 |         if name in all_models:
 65 |             options_dict[name].model_name=all_models[name].__name__
 66 |             if not is_lptf or name!="LPTF":
 67 |                 SMODEL,sub_opt = all_models[name],options_dict[name]
 68 |                 
 69 |     #Special case for no hamiltonian specified
 70 |     if HAMILTONIAN is None: 
 71 |         HAMILTONIAN=h=Rydberg.DEFAULTS.copy()
 72 |         h.L=options_dict["TRAIN"].L
 73 |         if h.Lx*h.Ly!=h.L:h.Lx=h.Ly=int(h.L**0.5)
 74 |         
 75 |     options_dict["HAMILTONIAN"]=HAMILTONIAN
 76 |     
 77 |     #Make sure batch size makes sense
 78 |     train_opt=options_dict["TRAIN"]
 79 |     train_opt.B=train_opt.K*train_opt.Q
 80 |     
 81 |     # Build models
 82 |     #for the lptf we need to have a model and submodel
 83 |     if is_lptf:
 84 |         lptf_opt=options_dict["LPTF"]
 85 |         #extra condition on the PTF to make the conditioned sampling work
 86 |         if SMODEL==PTF:
 87 |             sub_opt.Nh=[sub_opt.Nh,lptf_opt.Nh]
 88 |         else:
 89 |             sub_opt.Nh = lptf_opt.Nh
 90 |         
 91 |         subsampler = SMODEL(**sub_opt.__dict__)
 92 |         #set lptf options
 93 |         #make lptf model and global settings
 94 |         model = torch.jit.script(LPTF(subsampler,**lptf_opt.__dict__))
 95 |         full_opt = Options(train=train_opt.__dict__,model=lptf_opt.__dict__,
 96 |                            submodel=sub_opt.__dict__,hamiltonian=HAMILTONIAN.__dict__)
 97 |     else:
 98 |         #set model to submodel and create global settings
 99 |         full_opt = Options(train=train_opt.__dict__,model=sub_opt.__dict__,hamiltonian=HAMILTONIAN.__dict__)
100 |         model = torch.jit.script(SMODEL(**sub_opt.__dict__))
101 |         
102 |     return model,full_opt,options_dict
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/tvmc/models/PTF.py:
--------------------------------------------------------------------------------
  1 | from tvmc.util import Options,OptionManager
  2 | from tvmc.models.BaseModel import *
  3 | 
  4 | ############################################Transformer Encoder Module############################################################
  5 | 
  6 | #Original Idea: https://github.com/alex-matton/causal-transformer-decoder
  7 | 
  8 | class FastMaskedTransformerEncoder(nn.Module):
  9 |     """
 10 |     Base class for a fast, masked transformer
 11 |     
 12 |     """
 13 |     def __init__(self,Nh=128,dropout=0.0,num_layers=2,nhead=8,device=device):
 14 |         super(FastMaskedTransformerEncoder, self).__init__()
 15 |         #Encoder only transformer
 16 |         encoder_layer = nn.TransformerEncoderLayer(d_model=Nh, nhead=nhead, dropout=dropout)
 17 |         self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)      
 18 |         self.nl=num_layers
 19 |         self.Nh=Nh
 20 |         self.nhead=nhead
 21 |         self.device=device
 22 |     
 23 |     def set_mask(self, L):
 24 |         # type: (int)
 25 |         """
 26 |         Set the transformer mask for a sequence of length L
 27 |         Inputs: 
 28 |             L (int) - the desired sequence length
 29 |         """
 30 |         # take the log of a lower triangular matrix
 31 |         self.mask = torch.log(torch.tril(torch.ones([L,L],device=self.device)))        
 32 |         
 33 |     def forward(self, input):
 34 |         # type: (Tensor)->Tensor
 35 |         """Run the transformer on a sequence of length L
 36 |             Inputs:
 37 |                 input -  Tensor of shape [L,B,Nh]
 38 |             Outputs:    
 39 |                 Tensor of shape [L,B,Nh]
 40 |         """
 41 |         return self.transformer(input,self.mask)
 42 |     
 43 |     def next_with_cache(self,tgt,cache=None,idx=-1):
 44 |         # type: (Tensor,Optional[Tensor],int) -> Tuple[Tensor,Tensor]
 45 |         """Efficiently calculates the next output of a transformer given the input sequence and 
 46 |         cached intermediate layer encodings of the input sequence
 47 |         
 48 |         Inputs:
 49 |             tgt - Tensor of shape [L,B,Nh]
 50 |             cache - Tensor of shape ?
 51 |             idx - index from which to start
 52 |             
 53 |         Outputs:
 54 |             output - Tensor of shape [?,B,Nh]
 55 |             new_cache - Tensor of shape ?
 56 |         """
 57 |         output = tgt
 58 |         new_token_cache = []
 59 |         #go through each layer and apply self attention only to the last input
 60 |         for i,layer in enumerate(self.transformer.layers):
 61 |             
 62 |             tgt=output
 63 |             #have to merge the functions into one
 64 |             src = tgt[idx:, :, :]
 65 |             mask = None if idx==-1 else self.mask[idx:]
 66 | 
 67 |             # self attention part
 68 |             src2 = layer.self_attn(
 69 |                 src,#only do attention with the last elem of the sequence
 70 |                 tgt,
 71 |                 tgt,
 72 |                 attn_mask=mask,  
 73 |                 key_padding_mask=None,
 74 |             )[0]
 75 |             #straight from torch transformer encoder code
 76 |             src = src + layer.dropout1(src2)
 77 |             src = layer.norm1(src)
 78 |             src2 = layer.linear2(layer.dropout(layer.activation(layer.linear1(src))))
 79 |             src = src + layer.dropout2(src2)
 80 |             src = layer.norm2(src)
 81 |             
 82 |             output = src
 83 |             new_token_cache.append(output)
 84 |             if cache is not None:
 85 |                 #layers after layer 1 need to use a cache of the previous layer's output on each input
 86 |                 output = torch.cat([cache[i], output], dim=0)
 87 | 
 88 |         #update cache with new output
 89 |         if cache is not None:
 90 |             new_cache = torch.cat([cache, torch.stack(new_token_cache, dim=0)], dim=1)
 91 |         else:
 92 |             new_cache = torch.stack(new_token_cache, dim=0)
 93 | 
 94 |         return output, new_cache
 95 |     
 96 |     def make_cache(self,tgt):
 97 |         # type: (Tensor) -> Tuple[Tensor,Tensor]
 98 |         """
 99 |         Equivalent to forward, but the intermediate outputs are also returned
100 |         Inputs:
101 |             tgt - Tensor of shape [L,B,Nh]
102 |         Outputs:
103 |             output - Tensor of shape [L,B,Nh]
104 |             new_cache - Tensor of shape [?,L,B,Nh]
105 |         """
106 |         output = tgt
107 |         new_token_cache = []
108 |         #go through each layer and apply self attention only to the last input
109 |         for i, layer in enumerate(self.transformer.layers):
110 |             output = layer(output,src_mask=self.mask)
111 |             new_token_cache.append(output)
112 |         #create cache with tensor
113 |         new_cache = torch.stack(new_token_cache, dim=0)
114 |         return output, new_cache
115 |     
116 | ############################################################Positional Encodings#######################################################
117 |     
118 | class PE2D(nn.Module):
119 |     """Sequence-First 2D Positional Encoder"""
120 |     def __init__(self, d_model, Lx,Ly,device,n_encode=None):
121 |         
122 |         super().__init__()
123 |         assert (d_model%4==0)
124 |         self.d_model = d_model
125 |         
126 |         # create constant 'pe' matrix with values dependant on 
127 |         # pos and i
128 |         pe = torch.zeros(Lx*Ly, d_model)
129 |         
130 |         if type(n_encode)==type(None):
131 |             n_encode=3*d_model//4
132 |         for pos in range(Lx*Ly):
133 |             x=pos//Ly
134 |             y=pos%Ly
135 |             # Only going to fill 3/4 of the matrix so the
136 |             # occupation values are preserved
137 |             for i in range(0, n_encode, 4):
138 |                 
139 |                 #x direction encoding
140 |                 pe[pos, i] =                 math.sin(x / (10000 ** ((2 * i)/n_encode)))
141 |                 pe[pos, i + 1] =                 math.cos(x / (10000 ** ((2 * (i + 1))/n_encode)))
142 |                 #y direction encoding
143 |                 pe[pos, i+2] =                 math.sin(y / (10000 ** ((2 * i)/n_encode)))
144 |                 pe[pos, i + 3] =                 math.cos(y / (10000 ** ((2 * (i + 1))/n_encode)))
145 |                 
146 |         self.pe = pe.unsqueeze(1).to(device)
147 |         self.L=Lx*Ly
148 |     
149 |     def forward(self, x):
150 |         """
151 |         Adds a 2D positional encoding of size d_model to x
152 |         Inputs:
153 |             Tensor of shape [L,B,?]
154 |         Outputs:
155 |             Tensor of shape [L,B,d_model]
156 |         """
157 |         if self.d_model%x.shape[-1]!=0:
158 |             return x.repeat(1,1,self.d_model//x.shape[-1]+1)[:,:,:self.d_model] + self.pe[:x.shape[0]]
159 |         return x.repeat(1,1,self.d_model//x.shape[-1]) + self.pe[:x.shape[0]]
160 |     
161 | class PE1D(nn.Module):
162 |     """Sequence-First 1D Positional Encoder"""
163 |     def __init__(self, d_model, L,device,n_encode=None):
164 |         super().__init__()
165 |         assert (d_model%4==0)
166 |         self.d_model = d_model
167 |         # create constant 'pe' matrix with values dependent on 
168 |         # pos and i
169 |         pe = torch.zeros(L, d_model)
170 |         if type(n_encode)==type(None):
171 |             n_encode=3*d_model//4
172 |         for pos in range(L):
173 |             # Only going to fill 3/4 of the matrix so the
174 |             # occupation values are preserved
175 |             for i in range(0, n_encode, 2):
176 |                 #position encoding
177 |                 pe[pos, i] =                 math.sin(pos / (10000 ** ((2 * i)/n_encode)))
178 |                 pe[pos, i + 1] =             math.cos(pos / (10000 ** ((2 * (i + 1))/n_encode)))
179 |         self.pe = pe.unsqueeze(1).to(device)
180 |         self.L=L
181 |     
182 |     def forward(self, x):
183 |         """
184 |         Adds a 1D positional encoding of size d_model to x
185 |         Inputs:
186 |             Tensor of shape [L,B,?]
187 |         Outputs:
188 |             Tensor of shape [L,B,d_model]
189 |         """
190 |         if self.d_model%x.shape[-1]!=0:
191 |             return x.repeat(1,1,self.d_model//x.shape[-1]+1)[:,:,:self.d_model] + self.pe[:x.shape[0]]
192 |         return x.repeat(1,1,self.d_model//x.shape[-1]) + self.pe[:x.shape[0]]
193 | 
194 |     
195 | ##########################################################PTF Model#############################################################
196 | 
197 | class PTF(Sampler):
198 |     """ 
199 |     
200 |     Architecture wise this is how it works:
201 |     
202 |     You give it a state and it patches it into groups of size p. It then tells you the probability of each potential patch given all previous patches in your sequence using masked attention.
203 |     
204 |     This model has 2**p outputs, which describes the probability distrubition for the nth patch when given the first n-1 patches
205 |     
206 |     """
207 |     
208 |     INFO = """Transformer based sampler where the input sequence is broken up into 'patches' and the output is a sequence of conditional probabilities of all possible patches at position i given the previous 0 to i-1 patches. Each patch is projected into a token with an added positional encoding. The sequence of encoded patches is used as transformer input.
209 |     
210 |     PTF Optional arguments:
211 |     
212 |         L          (int)     -- The total number of atoms in your lattice
213 |     
214 |         Nh         (int)     -- Transformer token size. Input patches are projected to match the token size.
215 |     
216 |         patch      (str)     -- Number of atoms input/predicted at once (patch size).
217 |                                 The Input sequence will have an effective length of L/prod(patch).
218 |                                 Example values: 2x2, 2x3, 2, 4
219 |             
220 |         dropout    (float)   -- The amount of dropout to use in the transformer layers
221 |         
222 |         num_layers (int)     -- The number of transformer layers to use
223 |         
224 |         nhead     (int)      -- The number of heads to use in Multi-headed Self-Attention. This should divide Nh
225 |     
226 |         repeat_pre (bool)    -- repeat the precondition instead of projecting it out
227 |     """
228 |     
229 |     DEFAULTS=Options(L=16,patch=1,Nh=128,dropout=0.0,num_layers=2,nhead=8,repeat_pre=False)
230 |     def __init__(self,L,patch,Nh,dropout,num_layers,nhead,repeat_pre,device=device, **kwargs):
231 |         super(Sampler, self).__init__()
232 |         
233 |         if type(patch)==str and len(patch.split("x"))==2:
234 |             #patch and system sizes
235 |             px,py = [int(a) for a in patch.split("x")]
236 |             Lx,Ly=[int(L**0.5)]*2 if type(L) is int else [int(a) for a in L.split("x")]
237 |             #token size and positional encoder
238 |             t_size = Nh if type(Nh) is int else Nh[0]
239 |             self.pe = PE2D(t_size, Lx//px,Ly//py,device)
240 |             #patching, sequence length and total patch size
241 |             self.patch=Patch2D(px,py,Lx,Ly)
242 |             self.L = int(L//(px*py))
243 |             self.p=px*py
244 |         else:
245 |             p=int(patch)
246 |             self.pe = PE1D(Nh,L//p,device)
247 |             self.patch=Patch1D(p,L)
248 |             self.L = int(L//p)
249 |             self.p = p
250 | 
251 |         if type(Nh) is int:
252 |             Nh = [Nh]*4
253 |         else:
254 |             Nh += [self.L*Nh[0]] if _2D else [self.L*Nh[0]] 
255 |             
256 |         self.device=device
257 |         
258 |         self.tokenize=nn.Sequential(
259 |                 nn.Linear(self.p,Nh[0]),
260 |                 nn.Tanh()
261 |         )
262 |         
263 |         #Encoder only transformer
264 |         self.transformer = FastMaskedTransformerEncoder(Nh=Nh[0],dropout=dropout,num_layers=num_layers,nhead=nhead)       
265 |         
266 |         self.nrepeat = Nh[2]//Nh[1] if repeat_pre else 1
267 |         
268 |         self.lin = nn.Sequential(
269 |                 nn.Linear(Nh[1],(Nh[1] if repeat_pre else Nh[2])),
270 |                 nn.ReLU(),
271 |                 nn.Linear(Nh[0],1<<self.p),
272 |                 nn.Softmax(dim=-1)
273 |             )
274 |         
275 |         self.lin0,self.lin1=self.lin[:2],self.lin[2:]
276 |         
277 |         self.set_mask(self.L)
278 |         
279 |         #create a tensor of all possible patches
280 |         self.options=torch.zeros([1<<self.p,self.p],device=self.device)
281 |         tmp=torch.arange(1<<self.p,device=self.device)
282 |         for i in range(self.p):
283 |             self.options[:,i]=(tmp>>i)%2
284 |         
285 |         self.to(device)
286 |     
287 |     def set_mask(self, L):
288 |         # type: (int)
289 |         """Initialize the self-attention mask"""
290 |         # take the log of a lower triangular matrix
291 |         self.L=L
292 |         self.transformer.set_mask(L)
293 |         self.pe.L=L
294 | 
295 |     @torch.jit.export
296 |     def logprobability(self,input,h0=None):
297 |         # type: (Tensor,Optional[Tensor]) -> Tensor
298 |         """Compute the logscale probability of a given state
299 |             Inputs:
300 |                 input - [B,L,1] matrix of zeros and ones for ground/excited states
301 |             Returns:
302 |                 logp - [B] size vector of logscale probability labels
303 |         """
304 |         
305 |         if input.shape[1]//self.p!=self.L:
306 |             self.set_mask(input.shape[1]//self.p)
307 |         #pe should be sequence first [L,B,Nh]
308 |         
309 |         #shape is modified to [L//p,B,p]
310 |         input = self.patch(input.squeeze(-1)).transpose(1,0)
311 |         
312 |         #The first input should be zeros and the last patch is not used as input
313 |         data=torch.zeros(input.shape,device=self.device)
314 |         data[1:]=input[:-1]
315 |         
316 |         #[L//p,B,p] -> [L//p,B,Nh]
317 |         encoded=self.pe(self.tokenize(data))
318 |         
319 |         if h0 is not None:
320 |             
321 |             L,B,Nh=encoded.shape
322 |             #sequence is preconditioned with h0
323 |             h0 = self.lin0(h0)
324 |             #repeat h0 if necessary
325 |             h0=h0.repeat(1,1,self.nrepeat)
326 |             #project out h0
327 |             #[1,B,Nh0] -> [L,B,Nh]
328 |             h=h0.reshape([1,B,Nh,L]).transpose(-1,0).squeeze(-1)
329 |             #output is shape [L//p,B,Nh]
330 |             #precondition information is added to the sequence
331 |             output = self.transformer(encoded+h)
332 |             output=self.lin1(output)          
333 |         else:
334 |             #shape is preserved
335 |             output = self.transformer(encoded)
336 |             # [L//p,B,Nh] -> [L//p,B,2^p]
337 |             output = self.lin(output)
338 |         
339 |         #real is going to be a onehot with the index of the appropriate patch set to 1
340 |         #shape will be [L//p,B,2^p]
341 |         real=genpatch2onehot(input,self.p)
342 |         
343 |         #[L//p,B,2^p] -> [L//p,B]
344 |         total = torch.sum(real*output,dim=-1)
345 |         #[L//p,B] -> [B]
346 |         logp=torch.sum(torch.log(total),dim=0)
347 |         return logp   
348 |     
349 |     @torch.jit.export
350 |     def sample(self,B,L,h0=None):
351 |         # type: (int,int,Optional[Tensor]) -> Tuple[Tensor,Tensor]
352 |         """ Generates a set states
353 |         Inputs:
354 |             B (int)            - The number of states to generate in parallel
355 |             L (int)            - The length of generated vectors
356 |         Returns:
357 |             samples - [B,L,1] matrix of zeros and ones for ground/excited states
358 |         """
359 |         #length is divided by four due to patching
360 |         L=L//self.p
361 |         
362 |         if h0 is not None:
363 |             
364 |             #h0 is shape [1,B,Nh0], Nh0=L*Nh
365 |             # Project out the precondition information
366 |             h0 = self.lin0(h0)
367 |             # repeat it if necessary
368 |             h0=h0.repeat(1,1,self.nrepeat)
369 |             
370 |             #[1,B,Nh0] -> [L,B,Nh]
371 |             h0=h0.reshape([1,B,self.transformer.Nh,L]).transpose(-1,0).squeeze(-1)
372 |         
373 |         #Sample set will have shape [L/p,B,p]
374 |         #need one extra zero batch at the start for first pred hence input is [L+1,B,1] 
375 |         input = torch.zeros([L+1,B,self.p],device=self.device)
376 | 
377 |         logp = torch.zeros([B],device=self.device)
378 |         
379 |         #make cache initially an empty tensor
380 |         cache = torch.zeros([self.transformer.nl,0,B,self.transformer.Nh],device=self.device)
381 |         
382 |         for idx in range(1,L+1):
383 |             
384 |             #pe should be sequence first [l,B,Nh]
385 |             # multiply by 1 to copy the tensor
386 |             encoded_input = self.pe(self.tokenize(input[:idx,:,:]*1))
387 |                         
388 |             #check out the probability of all 16 vectors
389 |             if h0 is not None:
390 |                 h = h0[:idx,:,:]
391 |                 #output is shape [?,B,Nh]
392 |                 output,cache = self.transformer.next_with_cache(encoded_input+h,cache)
393 |                 probs=self.lin1(output[-1,:,:]).view([B,1<<self.p])
394 |             else:
395 |                 #Get transformer output
396 |                 output,cache = self.transformer.next_with_cache(encoded_input,cache)
397 |                 probs=self.lin(output[-1,:,:]).view([B,1<<self.p])
398 | 
399 |             #sample from the probability distribution
400 |             indices = torch.multinomial(probs,1,False).squeeze(1)
401 |             #extract samples
402 |             sample = self.options[indices]
403 |             
404 |             onehot = nn.functional.one_hot(indices, num_classes=1<<self.p)
405 |             logp+= torch.log(torch.sum(onehot*probs,dim=-1))
406 |             
407 |             #set input to the sample that was actually chosen
408 |             input[idx] = sample
409 |             
410 |         #remove the leading zero in the input    
411 |         input=input[1:]
412 |         #sample is repeated 16 times at 3rd index so we just take the first one
413 |         return self.patch.reverse(input.transpose(1,0)).unsqueeze(-1),logp
414 |     
415 |     @torch.jit.export
416 |     def off_diag_labels(self,sample,nloops=1):
417 |         # type: (Tensor,int) -> Tensor
418 |         """label all of the flipped states  - set D as high as possible without it slowing down runtime
419 |         Parameters:
420 |             sample - [B,L,1] matrix of zeros and ones for ground/excited states
421 |             B,L (int) - batch size and sequence length
422 |             D (int) - Number of partitions sequence-wise. We must have L%D==0 (D divides L)
423 |             
424 |         Outputs:
425 |             
426 |             sample - same as input
427 |             probs - [B,L] matrix of probabilities of states with the jth excitation flipped
428 |         """
429 |         
430 |         D=nloops
431 |         B,L,_=sample.shape
432 |         
433 |         sample0=sample
434 |         #sample is batch first at the moment
435 |         sample = self.patch(sample.squeeze(-1))
436 |         
437 |         sflip = torch.zeros([B,L,L//self.p,self.p],device=self.device)
438 |         #collect all of the flipped states into one array
439 |         for j in range(L//self.p):
440 |             #have to change the order of in which states are flipped for the cache to be useful
441 |             for j2 in range(self.p):
442 |                 sflip[:,j*self.p+j2] = sample*1.0
443 |                 sflip[:,j*self.p+j2,j,j2] = 1-sflip[:,j*self.p+j2,j,j2]
444 |             
445 |         #switch sample into sequence-first
446 |         sample = sample.transpose(1,0)
447 |             
448 |         #compute all of their logscale probabilities            
449 |         data=torch.zeros(sample.shape,device=self.device)
450 |         data[1:]=sample[:-1]
451 | 
452 |         #[L//p,B,p] -> [L//p,B,Nh]
453 |         encoded=self.pe(self.tokenize(data))
454 | 
455 |         #add positional encoding and make the cache
456 |         out,cache=self.transformer.make_cache(encoded)
457 |         probs=torch.zeros([B,L],device=self.device)
458 |         #expand cache to group L//D flipped states
459 |         cache=cache.unsqueeze(2)
460 | 
461 |         #the cache has to be repeated along the correct axis
462 |         cache=cache.repeat(1,1,L//D,1,1).transpose(2,3).reshape(cache.shape[0],L//self.p,B*L//D,cache.shape[-1])
463 | 
464 |         pred0 = self.lin(out)
465 |         #shape will be [L//p,B,2^p]
466 |         real=genpatch2onehot(sample,self.p)
467 |         #[L//p,B,2^p] -> [B,L//p]
468 |         total0 = torch.sum(real*pred0,dim=-1).transpose(1,0)
469 | 
470 |         for k in range(D):
471 | 
472 |             N = k*L//D
473 |             #next couple of steps are crucial          
474 |             #get the samples from N to N+L//D
475 |             #Note: samples are the same as the original up to the Nth spin
476 |             real = sflip[:,N:(k+1)*L//D]
477 |             #flatten it out and set to sequence first
478 |             tmp = real.reshape([B*L//D,L//self.p,self.p]).transpose(1,0)
479 |             #set up next state predction
480 |             fsample=torch.zeros(tmp.shape,device=self.device)
481 |             fsample[1:]=tmp[:-1]
482 |             # put sequence before batch so you can use it with your transformer
483 |             tgt=self.pe(self.tokenize(fsample))
484 |             #grab your transformer output
485 |             out,_=self.transformer.next_with_cache(tgt,cache[:,:N//self.p],N//self.p)
486 | 
487 |             # grab output for the new part
488 |             output = self.lin(out[N//self.p:].transpose(1,0))
489 |             # reshape output separating batch from spin flip grouping
490 |             pred = output.view([B,L//D,(L-N)//self.p,1<<self.p])
491 |             real = genpatch2onehot(real[:,:,N//self.p:],self.p)
492 |             total = torch.sum(real*pred,dim=-1)
493 |             #sum across the sequence for probabilities
494 | 
495 |             logp=torch.sum(torch.log(total),dim=-1)
496 |             logp+=torch.sum(torch.log(total0[:,:N//self.p]),dim=-1).unsqueeze(-1)
497 |             probs[:,N:(k+1)*L//D]=logp
498 | 
499 |         return probs
500 | 
501 | OptionManager.register("ptf",PTF.DEFAULTS)
502 |     
503 |     
504 | 


--------------------------------------------------------------------------------
/tvmc/models/RNN.py:
--------------------------------------------------------------------------------
  1 | from tvmc.util import Options,OptionManager
  2 | from tvmc.models.BaseModel import *
  3 | 
  4 | class PRNN(Sampler):
  5 |     """
  6 |     Patched Recurrent Neural Network Implementation.
  7 |     
  8 |     The network is patched as the sequence is broken into patches of size p, then entire patches are sampled at once.
  9 |     This means the sequence length is reduced from L to L/p but the output layer must now use a softmax over 2**p possible
 10 |     patches. Setting p above 5 is not recommended.
 11 |     
 12 |     Note for _2D = True, p actually becomes a pxp patch so the sequence is reduced to L/p^2 and it's a softmax over
 13 |     2^(p^2) patches so p=2 is about the only patch size which makes sense
 14 |     
 15 |     """
 16 |     
 17 |     INFO = """RNN based sampler where the input sequence is broken up into 'patches' and the output is a sequence of conditional probabilities of all possible patches at position i given the previous 0 to i-1 patches. Each patch is used to update the RNN hidden state, which (after two Fully Connected layers) is used to get the probability labels.
 18 |     
 19 |     RNN Optional arguments:
 20 |     
 21 |         L          (int)     -- The total number of atoms in your lattice.
 22 |     
 23 |         Nh         (int)     -- RNN hidden size.
 24 |     
 25 |         patch      (str)     -- Number of atoms input/predicted at once (patch size).
 26 |                                 The Input sequence will have an effective length of L/prod(patch).
 27 |                                 Example values: 2x2, 2x3, 2, 4
 28 |         
 29 |         rnntype    (string)  -- Which type of RNN cell to use. Only ELMAN and GRU are valid options at the moment.
 30 |     """
 31 |     
 32 |     DEFAULTS=Options(L=16,patch=1,rnntype="GRU",Nh=256)
 33 |     TYPES={"GRU":nn.GRU,"ELMAN":nn.RNN,"LSTM":nn.LSTM}
 34 |     def __init__(self,L,patch,rnntype,Nh,device=device, **kwargs):
 35 |         
 36 |         super(PRNN, self).__init__(device=device)
 37 |         if type(patch)==str and len(patch.split("x"))==2:
 38 |             px,py = [int(a) for a in patch.split("x")]
 39 |             Lx,Ly=[int(L**0.5)]*2 if type(L) is int else [int(a) for a in L.split("x")]
 40 |             self.patch=Patch2D(px,py,Lx,Ly)
 41 |             self.L = int(Lx*Ly//(px*py))
 42 |             self.p=px*py
 43 |         else:
 44 |             p=int(patch)
 45 |             self.patch=Patch1D(p,L)
 46 |             self.L = int(L//p)
 47 |             self.p = p
 48 |         
 49 |         assert rnntype!="LSTM"
 50 |         #rnn takes input shape [B,L,1]
 51 |         self.rnn = PRNN.TYPES[rnntype](input_size=self.p,hidden_size=Nh,batch_first=True)
 52 |          
 53 |         self.lin = nn.Sequential(
 54 |                 nn.Linear(Nh,Nh),
 55 |                 nn.ReLU(),
 56 |                 nn.Linear(Nh,1<<self.p),
 57 |                 nn.Softmax(dim=-1)
 58 |             )
 59 |         self.Nh=Nh
 60 |         self.rnntype=rnntype
 61 |         
 62 |         #create a tensor of all possible patches
 63 |         self.options=torch.zeros([1<<self.p,self.p],device=self.device)
 64 |         tmp=torch.arange(1<<self.p,device=self.device)
 65 |         for i in range(self.p):
 66 |             self.options[:,i]=(tmp>>i)%2
 67 |             
 68 |         self.to(device)
 69 |     
 70 |     @torch.jit.export
 71 |     def logprobability(self,input,h0=None):
 72 |         # type: (Tensor,Optional[Tensor]) -> Tensor
 73 |         """Compute the logscale probability of a given state
 74 |             Inputs:
 75 |                 input - [B,L,1] matrix of zeros and ones for ground/excited states
 76 |             Returns:
 77 |                 logp - [B] size vector of logscale probability labels
 78 |         """
 79 |                 
 80 |         #shape is modified to [B,L//4,4]
 81 |         input = self.patch(input.squeeze(-1))
 82 |         data=torch.zeros(input.shape,device=self.device)
 83 |         #batch first
 84 |         data[:,1:]=input[:,:-1]
 85 |         # [B,L//4,Nh] -> [B,L//4,16]
 86 |         
 87 |         if h0 is None:
 88 |             h0=torch.zeros([1,input.shape[0],self.Nh],device=self.device)
 89 |         out,h=self.rnn(data,h0)
 90 |         output = self.lin(out)
 91 |         
 92 |         #real is going to be a onehot with the index of the appropriate patch set to 1
 93 |         #shape will be [B,L//4,16]
 94 |         real=genpatch2onehot(input,self.p)
 95 |         
 96 |         #[B,L//4,16] -> [B,L//4]
 97 |         total = torch.sum(real*output,dim=-1)
 98 |         #[B,L//4] -> [B]
 99 |         logp=torch.sum(torch.log(total),dim=1)
100 |         return logp
101 |     
102 |     @torch.jit.export
103 |     def sample(self,B,L,h0=None):
104 |         # type: (int,int,Optional[Tensor]) -> Tuple[Tensor,Tensor]
105 |         """ Generates a set states
106 |         Inputs:
107 |             B (int)            - The number of states to generate in parallel
108 |             L (int)            - The length of generated vectors
109 |         Returns:
110 |             samples - [B,L,1] matrix of zeros and ones for ground/excited states
111 |         """
112 |         #length is divided by four due to patching
113 |         L=L//self.p
114 |         
115 |         if h0 is None:  
116 |             h=torch.zeros([1,B,self.Nh],device=self.device)
117 |         else:
118 |             h=h0
119 |         #Sample set will have shape [B,L,p]
120 |         #need one extra zero batch at the start for first pred hence input is [L+1,B,1] 
121 |         input = torch.zeros([B,L+1,self.p],device=self.device)
122 |         sample = torch.zeros([B,self.p],device=self.device)
123 |         logp = torch.zeros([B],device=self.device)
124 |         
125 |         for idx in range(1,L+1):
126 |             #out should be batch first [B,L,Nh]
127 |             out,h=self.rnn(sample.unsqueeze(1),h)
128 |             #check out the probability of all 1<<p vectors
129 |             probs=self.lin(out[:,0,:]).view([B,1<<self.p])
130 |             #sample from the probability distribution
131 |             indices = torch.multinomial(probs,1,False).squeeze(1)
132 |             #extract samples
133 |             sample = self.options[indices]
134 |             
135 |             onehot = nn.functional.one_hot(indices, num_classes=1<<self.p)
136 |             
137 |             logp+= torch.log(torch.sum(onehot*probs,dim=-1))
138 |             
139 |             #set input to the sample that was actually chosen
140 |             input[:,idx] = sample
141 |         #remove the leading zero in the input    
142 |         #sample is repeated 16 times at 3rd index so we just take the first one 
143 |         return self.patch.reverse(input[:,1:]).unsqueeze(-1),logp
144 |         
145 |     @torch.jit.export
146 |     def off_diag_labels(self,sample,nloops=1):
147 |         # type: (Tensor,int) -> Tensor
148 |         """label all of the flipped states  - set D as high as possible without it slowing down runtime
149 |         Parameters:
150 |             sample - [B,L,1] matrix of zeros and ones for ground/excited states
151 |             B,L (int) - batch size and sequence length
152 |             D (int) - Number of partitions sequence-wise. We must have L%D==0 (D divides L)
153 |             
154 |         Outputs:
155 |             
156 |             sample - same as input
157 |             probs - [B,L] matrix of probabilities of states with the jth excitation flipped
158 |         """
159 |         D=nloops
160 |         B,L,_=sample.shape
161 |         sample0=sample
162 |         #sample is batch first at the moment
163 |         sample = self.patch(sample.squeeze(-1))
164 |         
165 |         sflip = torch.zeros([B,L,L//self.p,self.p],device=self.device)
166 |         #collect all of the flipped states into one array
167 |         for j in range(L//self.p):
168 |             #have to change the order of in which states are flipped for the cache to be useful
169 |             for j2 in range(self.p):
170 |                 sflip[:,j*self.p+j2] = sample*1.0
171 |                 sflip[:,j*self.p+j2,j,j2] = 1-sflip[:,j*self.p+j2,j,j2]
172 |                  
173 |         #compute all of their logscale probabilities
174 |         data=torch.zeros(sample.shape,device=self.device)
175 | 
176 |         data[:,1:]=sample[:,:-1]
177 | 
178 |         #add positional encoding and make the cache
179 | 
180 |         h=torch.zeros([1,B,self.Nh],device=self.device)
181 | 
182 |         out,_=self.rnn(data,h)
183 | 
184 |         #cache for the rnn is the output in this sense
185 |         #shape [B,L//4,Nh]
186 |         cache=out
187 |         probs=torch.zeros([B,L],device=self.device)
188 |         #expand cache to group L//D flipped states
189 |         cache=cache.unsqueeze(1)
190 | 
191 |         #the cache has to be shaped such that the batch parts line up
192 |         cache=cache.repeat(1,L//D,1,1).reshape(B*L//D,L//self.p,cache.shape[-1])
193 | 
194 |         pred0 = self.lin(out)
195 |         #shape will be [B,L//4,16]
196 |         real=genpatch2onehot(sample,self.p)
197 |         #[B,L//4,16] -> [B,L//4]
198 |         total0 = torch.sum(real*pred0,dim=-1)
199 | 
200 |         for k in range(D):
201 | 
202 |             N = k*L//D
203 |             #next couple of steps are crucial          
204 |             #get the samples from N to N+L//D
205 |             #Note: samples are the same as the original up to the Nth spin
206 |             real = sflip[:,N:(k+1)*L//D]
207 |             #flatten it out and set to sequence first
208 |             tmp = real.reshape([B*L//D,L//self.p,self.p])
209 |             #set up next state predction
210 |             fsample=torch.zeros(tmp.shape,device=self.device)
211 |             fsample[:,1:]=tmp[:,:-1]
212 |             #grab your rnn output
213 |             if k==0:
214 |                 out,_=self.rnn(fsample,cache[:,0].unsqueeze(0)*0.0)
215 |             else:
216 |                 out,_=self.rnn(fsample[:,N//self.p:],cache[:,N//self.p-1].unsqueeze(0)*1.0)
217 |             # grab output for the new part
218 |             output = self.lin(out)
219 |             # reshape output separating batch from spin flip grouping
220 |             pred = output.view([B,L//D,(L-N)//self.p,1<<self.p])
221 |             real = genpatch2onehot(real[:,:,N//self.p:],self.p)
222 |             total = torch.sum(real*pred,dim=-1)
223 |             #sum across the sequence for probabilities
224 |             logp=torch.sum(torch.log(total),dim=-1)
225 |             logp+=torch.sum(torch.log(total0[:,:N//self.p]),dim=-1).unsqueeze(-1)
226 |             probs[:,N:(k+1)*L//D]=logp
227 | 
228 |         return probs
229 | 
230 | OptionManager.register("rnn",PRNN.DEFAULTS)
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------
/tvmc/models/training.py:
--------------------------------------------------------------------------------
  1 | from tvmc.hamiltonians.hamiltonian import *
  2 | from tvmc.models.LPTF import *
  3 | from tvmc.hamiltonians.rydberg import Rydberg
  4 | 
  5 | def new_rnn_with_optim(rnntype,op,beta1=0.9,beta2=0.999):
  6 |     rnn = torch.jit.script(PRNN(op.L,**PRNN.DEFAULTS))
  7 |     optimizer = torch.optim.Adam(
  8 |     rnn.parameters(), 
  9 |     lr=op.lr, 
 10 |     betas=(beta1,beta2)
 11 |     )
 12 |     return rnn,optimizer
 13 | 
 14 | def momentum_update(m, target_network, network):
 15 |     for target_param, param in zip(target_network.parameters(), network.parameters()):
 16 |         target_param.data.copy_(target_param.data*m + param.data*(1.0-m))
 17 | 
 18 | # Setting Constants
 19 | 
 20 | import os
 21 | def mkdir(dir_):
 22 |     try:
 23 |         os.mkdir(dir_)
 24 |     except:return -1
 25 |     return 0
 26 | 
 27 | def setup_dir(op_dict):
 28 |     """Makes directory for output and saves the run settings there
 29 |     Inputs: 
 30 |         op_dict (dict) - Dictionary of Options objects
 31 |     Outputs:
 32 |         Output directory mydir 
 33 |     """
 34 |     op=op_dict["TRAIN"]
 35 |     
 36 |     if op.dir=="<NONE>":
 37 |         return
 38 |     
 39 |     hname = op_dict["HAMILTONIAN"].name if "HAMILTONIAN" in op_dict else "NA"
 40 |     
 41 |     mydir= op.dir+"/%s/%d-B=%d-K=%d%s"%(hname,op.L,op.B,op.K,op.sub_directory)
 42 | 
 43 |     os.makedirs(mydir,exist_ok = True)
 44 |     biggest=-1
 45 |     for paths,folders,files in os.walk(mydir):
 46 |         for f in folders:
 47 |             try:biggest=max(biggest,int(f))
 48 |             except:pass
 49 |             
 50 |     mydir+="/"+str(biggest+1)
 51 |     mkdir(mydir)
 52 |     
 53 |     print("Output folder path established")
 54 |     return mydir
 55 | 
 56 | class TrainOpt(Options):
 57 |     """
 58 |     Training Arguments:
 59 |     
 60 |         L          (int)     -- Total lattice size (8x8 would be L=64).
 61 |         
 62 |         Q          (int)     -- Number of minibatches per batch.
 63 |         
 64 |         K          (int)     -- size of each minibatch.
 65 |         
 66 |         B          (int)     -- Total batch size (should be Q*K).
 67 |         
 68 |         NLOOPS     (int)     -- Number of loops within the off_diag_labels function. Higher values save ram and
 69 |                                 generally makes the code run faster (up to 2x). Note, you can only set this
 70 |                                 as high as your effective sequence length. (Take L and divide by your patch size).
 71 |         
 72 |         steps      (int)     -- Number of training steps.
 73 |         
 74 |         dir        (str)     -- Output directory, set to <NONE> for no output.
 75 |         
 76 |         lr         (float)   -- Learning rate.
 77 |         
 78 |         seed       (int)     -- Random seed for the run.
 79 |                 
 80 |         sgrad      (bool)    -- Whether or not to sample with gradients, otherwise create gradients in extra network run.
 81 |                                 (Uses less ram when but slightly slower)
 82 |                                 
 83 |         true_grad  (bool)    -- Set to false to approximate the gradients, more efficient but approximate.
 84 |                                 
 85 |         sub_directory (str)  -- String to add to the end of the output directory (inside a subfolder).
 86 |         
 87 |     """
 88 |     def get_defaults(self):
 89 |         return dict(L=16,Q=1,K=256,B=256,NLOOPS=1,steps=50000,dir="out",lr=5e-4,seed=None,sgrad=False,true_grad=False,sub_directory="")
 90 | 
 91 |     
 92 | OptionManager.register("train",TrainOpt())
 93 |     
 94 | import sys
 95 | def reg_train(op,net_optim=None,printf=False,mydir=None):
 96 |   try:
 97 |     
 98 |     if "RYDBERG" in op:
 99 |         h = Rydberg(**op["RYDBERG"].__dict__)
100 |     else:        
101 |         h_opt=Rydberg.DEFAULTS.copy()
102 |         h_opt.Lx=h_opt.Ly=int(op["TRAIN"].L**0.5)
103 |         h = Rydberg(**h_opt.__dict__)
104 |     
105 |     if mydir==None:
106 |         mydir = setup_dir(op)
107 |     
108 |     op=op["TRAIN"]
109 |     
110 |     if op.true_grad:assert op.Q==1
111 |     
112 |     if type(net_optim)==type(None):
113 |         net,optimizer=new_rnn_with_optim("GRU",op)
114 |     else:
115 |         net,optimizer=net_optim
116 | 
117 |     debug=[]
118 |     losses=[]
119 |     true_energies=[]
120 | 
121 |     #samples
122 |     samplebatch = torch.zeros([op.B,op.L,1],device=device)
123 |     #sum of off diagonal labels for each sample (scaled)
124 |     sump_batch=torch.zeros([op.B],device=device)
125 |     #scaling factors for the off-diagonal sums
126 |     sqrtp_batch=torch.zeros([op.B],device=device)
127 | 
128 |     def fill_batch():
129 |         with torch.no_grad():
130 |             for i in range(op.Q):
131 |                 sample,logp = net.sample(op.K,op.L)
132 |                 #get the off diagonal info
133 |                 sump,sqrtp = net.off_diag_labels_summed(sample,nloops=op.NLOOPS)
134 |                 samplebatch[i*op.K:(i+1)*op.K]=sample
135 |                 sump_batch[i*op.K:(i+1)*op.K]=sump
136 |                 sqrtp_batch[i*op.K:(i+1)*op.K]=sqrtp
137 |         return logp
138 |     i=0
139 |     t=time.time()
140 |     for x in range(op.steps):
141 |         
142 |         #gather samples and probabilities                
143 |         if op.Q!=1:
144 |             fill_batch()
145 |             logp=net.logprobability(samplebatch)
146 |         else:
147 |             if op.sgrad:
148 |                 samplebatch,logp = net.sample(op.B,op.L)
149 |             else:
150 |                 with torch.no_grad():samplebatch,_= net.sample(op.B,op.L)
151 |                 #if you sample without gradients you have to recompute probabilities with gradients
152 |                 logp=net.logprobability(samplebatch)
153 |             
154 |             if op.true_grad:
155 |                 sump_batch,sqrtp_batch = net.off_diag_labels_summed(samplebatch,nloops=op.NLOOPS)
156 |             else:
157 |                 #don't need gradients on the off diagonal when approximating gradients
158 |                  with torch.no_grad(): sump_batch,sqrtp_batch = net.off_diag_labels_summed(samplebatch,nloops=op.NLOOPS)
159 | 
160 |         #obtain energy
161 |         with torch.no_grad():
162 |             E=h.localenergyALT(samplebatch,logp,sump_batch,sqrtp_batch)
163 |             #energy mean and variance
164 |             Ev,Eo=torch.var_mean(E)
165 | 	    
166 |             MAG, ABS_MAG, SQ_MAG, STAG_MAG  = h.magnetizations(samplebatch)
167 |             mag_v, mag = torch.var_mean(MAG)
168 |             abs_mag_v, abs_mag = torch.var_mean(ABS_MAG)
169 |             sq_mag_v, sq_mag = torch.var_mean(SQ_MAG)
170 |             stag_mag_v, stag_mag = torch.var_mean(STAG_MAG)
171 | 
172 |         ERR  = Eo/(op.L)
173 |         
174 |         if op.true_grad:
175 |             #get the extra loss term
176 |             h_x= h.offDiag *sump_batch* torch.exp(sqrtp_batch-logp/2)
177 |             loss = (logp*E).mean() + h_x.mean()
178 |             
179 |         else:
180 |             loss =(logp*(E-Eo)).mean()  if op.B>1 else (logp*E).mean()
181 | 
182 |         #Main loss curve to follow
183 |         losses.append(ERR.cpu().item())
184 |         
185 |         #update weights
186 |         net.zero_grad()
187 |         loss.backward()
188 |         optimizer.step()
189 | 
190 |         debug += [[Eo.item(), Ev.item(), mag.item(), mag_v.item(), abs_mag.item(), abs_mag_v.item(), sq_mag.item(), sq_mag_v.item(), stag_mag.item(), stag_mag_v.item(), time.time()-t]]
191 | 
192 |         if x%500==0:
193 |             print(int(time.time()-t),end=",%.3f|"%(losses[-1]))
194 |             if x%4000==0:print()
195 |             if printf:sys.stdout.flush()
196 |     print(time.time()-t,x+1)
197 | 
198 |     DEBUG = np.array(debug)
199 |     
200 |     if op.dir!="<NONE>":
201 |         np.save(mydir+"/DEBUG",DEBUG)
202 |         net.save(mydir+"/T")
203 |         
204 |   except KeyboardInterrupt:
205 |     if op.dir!="<NONE>":
206 |         DEBUG = np.array(debug)
207 |         np.save(mydir+"/DEBUG",DEBUG)
208 |         net.save(mydir+"/T")
209 |   return DEBUG
210 | 
211 |         
212 | 


--------------------------------------------------------------------------------
/tvmc/util.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | # # Estimating the Rydberg Hamiltonian:
  4 | class Options:
  5 |     """Base class for managing options"""
  6 |     def __init__(self,**kwargs):
  7 |         self.__dict__.update(self.get_defaults())
  8 |         self.__dict__.update(kwargs)
  9 | 
 10 |     def get_defaults(self):
 11 |         """This is where you define your default parameters"""
 12 |         return dict()
 13 |         
 14 |     def __str__(self):
 15 |         out=""
 16 |         for key in self.__dict__:
 17 |             line=key+" "*(30-len(key))+ "\t"*3+str(self.__dict__[key])
 18 |             out+=line+"\n"
 19 |         return out
 20 | 
 21 |     def cmd(self):
 22 |         """Returns a string with command line arguments corresponding to the options
 23 |             Outputs:
 24 |                 out (str) - a single string of space-separated command line arguments
 25 |         """
 26 |         out=""
 27 |         for key in self.__dict__:
 28 |             line=key+"="+str(self.__dict__[key])
 29 |             out+=line+" "
 30 |         return out[:-1]
 31 |     
 32 |     def apply(self,args,warn=True):
 33 |         """Takes in a tuple of command line arguments and turns them into options
 34 |         Inputs:
 35 |             args (tuple<str>) - Your command line arguments
 36 |         
 37 |         """
 38 |         kwargs = dict()
 39 |         for arg in args:
 40 |             try:
 41 |                 key,val=arg.split("=")
 42 |                 kwargs[key]=self.cmd_cast(val)
 43 |                 if warn and (not key in self.__dict__):
 44 |                     print("Unknown Argument: %s"%key)
 45 |             except:pass
 46 |         self.__dict__.update(kwargs)
 47 | 
 48 |     def cmd_cast(self,x0):
 49 |         """Casting from a string to other datatypes
 50 |             Inputs
 51 |                 x0 (string) - A string which could represent an int or float or boolean value
 52 |             Outputs
 53 |                 x (?) - The best-fitting cast for x0
 54 |         """
 55 |         try:
 56 |             if x0=="True":return True
 57 |             elif x0=="False":return False
 58 |             elif x0=="None":return None
 59 |             x=x0
 60 |             x=float(x0)
 61 |             x=int(x0)
 62 |         except:return x
 63 |         return x
 64 | 
 65 |     def from_file(self,fn):
 66 |         """Depricated: Takes files formatted in the __str__ format and turns them into a set of options
 67 |         Instead of using this, consider using save() and load() functions. 
 68 |         """
 69 |         kwargs = dict()
 70 |         with open(fn,"r") as f:
 71 |           for line in f:
 72 |             line=line.strip()
 73 |             split = line.split("\t")
 74 |             key,val = split[0].strip(),split[-1].strip()
 75 |             try:
 76 |                 kwargs[key]=self.cmd_cast(val)
 77 |             except:pass
 78 |         self.__dict__.update(kwargs)
 79 |         
 80 |     def save(self,fn):
 81 |         """Saves the options in json format
 82 |         Inputs:
 83 |             fn (str) - The file destination for your output file (.json is not appended automatically)
 84 |         Outputs:
 85 |             A plain text json file
 86 |         """
 87 |         with open(fn,"w") as f:
 88 |             json.dump(self.__dict__, f, indent = 4)
 89 |             
 90 |     def load(self,fn):
 91 |         """Saves  options stored in json format
 92 |         Inputs:
 93 |             fn (str) - The file source (.json is not appended automatically)
 94 |         """
 95 |         with open(fn,"r") as f:
 96 |             kwargs = json.load(f)
 97 |         self.__dict__.update(kwargs)
 98 | 
 99 |     def copy(self):
100 |         return Options(**self.__dict__)
101 | 
102 | 
103 | class OptionManager():
104 |     
105 |     registry = dict()
106 |     @staticmethod
107 |     def register(name: str , opt: Options):
108 |         OptionManager.registry[name.upper()] = opt
109 |     
110 |     @staticmethod
111 |     def parse_cmd(args: list) -> dict:
112 |         output=dict()
113 |         sub_args=[]
114 |         for arg in args[::-1]:
115 |             # --name Signifies a new set of options
116 |             if arg[:2] == "--":
117 |                 arg=arg.upper()
118 |                 #make sure the name is registered
119 |                 if not arg[2:] in OptionManager.registry:
120 |                     raise Exception("Argument %s Not Registered"%arg)
121 |                 #copy the defaults and apply the new options
122 |                 opt = OptionManager.registry[arg[2:]].copy()
123 |                 opt.apply(sub_args)
124 |                 output[arg[2:]] = opt
125 |                 # Reset the collection of arguments
126 |                 sub_args=[]
127 |             #otherwise keep adding options
128 |             else:
129 |                 sub_args+=[arg]
130 |         return output
131 | 


--------------------------------------------------------------------------------