├── H266.png
├── LICENSE
├── README.org
├── embed.py
├── environment.yml
├── hype
    ├── Euclidean.py
    ├── HTiling_rsgd.py
    ├── LTiling_rsgd.py
    ├── LTiling_sgd.py
    ├── Lorentz.py
    ├── NLTiling_rsgd.py
    ├── NLorentz.py
    ├── Poincare.py
    ├── __init__.py
    ├── adjacency_matrix_dataset.pyx
    ├── checkpoint.py
    ├── common.py
    ├── graph.py
    ├── graph_dataset.pyx
    ├── manifold.py
    ├── rsgd.py
    ├── sn.py
    └── train.py
├── requirements.txt
├── setup.py
├── train-grqc.sh
├── train-mammals.sh
├── train-nouns.sh
├── train-verbs.sh
└── wordnet
    ├── grqc.csv
    ├── mammals_filter.txt
    ├── transitive_closure.py
    └── verb_closure.csv


/H266.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydtydr/HyperbolicTiling_Learning/c77f0d1a1b32ed5437a59d7cdeb8426ff03ea70b/H266.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |      wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More_considerations
 52 |      for the public:
 53 | 	wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial 4.0 International Public
 58 | License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial 4.0 International Public License ("Public
 63 | License"). To the extent this Public License may be interpreted as a
 64 | contract, You are granted the Licensed Rights in consideration of Your
 65 | acceptance of these terms and conditions, and the Licensor grants You
 66 | such rights in consideration of benefits the Licensor receives from
 67 | making the Licensed Material available under these terms and
 68 | conditions.
 69 | 
 70 | Section 1 -- Definitions.
 71 | 
 72 |   a. Adapted Material means material subject to Copyright and Similar
 73 |      Rights that is derived from or based upon the Licensed Material
 74 |      and in which the Licensed Material is translated, altered,
 75 |      arranged, transformed, or otherwise modified in a manner requiring
 76 |      permission under the Copyright and Similar Rights held by the
 77 |      Licensor. For purposes of this Public License, where the Licensed
 78 |      Material is a musical work, performance, or sound recording,
 79 |      Adapted Material is always produced where the Licensed Material is
 80 |      synched in timed relation with a moving image.
 81 | 
 82 |   b. Adapter's License means the license You apply to Your Copyright
 83 |      and Similar Rights in Your contributions to Adapted Material in
 84 |      accordance with the terms and conditions of this Public License.
 85 | 
 86 |   c. Copyright and Similar Rights means copyright and/or similar rights
 87 |      closely related to copyright including, without limitation,
 88 |      performance, broadcast, sound recording, and Sui Generis Database
 89 |      Rights, without regard to how the rights are labeled or
 90 |      categorized. For purposes of this Public License, the rights
 91 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 92 |      Rights.
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. NonCommercial means not primarily intended for or directed towards
116 |      commercial advantage or monetary compensation. For purposes of
117 |      this Public License, the exchange of the Licensed Material for
118 |      other material subject to Copyright and Similar Rights by digital
119 |      file-sharing or similar means is NonCommercial provided there is
120 |      no payment of monetary compensation in connection with the
121 |      exchange.
122 | 
123 |   j. Share means to provide material to the public by any means or
124 |      process that requires permission under the Licensed Rights, such
125 |      as reproduction, public display, public performance, distribution,
126 |      dissemination, communication, or importation, and to make material
127 |      available to the public including in ways that members of the
128 |      public may access the material from a place and at a time
129 |      individually chosen by them.
130 | 
131 |   k. Sui Generis Database Rights means rights other than copyright
132 |      resulting from Directive 96/9/EC of the European Parliament and of
133 |      the Council of 11 March 1996 on the legal protection of databases,
134 |      as amended and/or succeeded, as well as other essentially
135 |      equivalent rights anywhere in the world.
136 | 
137 |   l. You means the individual or entity exercising the Licensed Rights
138 |      under this Public License. Your has a corresponding meaning.
139 | 
140 | Section 2 -- Scope.
141 | 
142 |   a. License grant.
143 | 
144 |        1. Subject to the terms and conditions of this Public License,
145 |           the Licensor hereby grants You a worldwide, royalty-free,
146 |           non-sublicensable, non-exclusive, irrevocable license to
147 |           exercise the Licensed Rights in the Licensed Material to:
148 | 
149 |             a. reproduce and Share the Licensed Material, in whole or
150 |                in part, for NonCommercial purposes only; and
151 | 
152 |             b. produce, reproduce, and Share Adapted Material for
153 |                NonCommercial purposes only.
154 | 
155 |        2. Exceptions and Limitations. For the avoidance of doubt, where
156 |           Exceptions and Limitations apply to Your use, this Public
157 |           License does not apply, and You do not need to comply with
158 |           its terms and conditions.
159 | 
160 |        3. Term. The term of this Public License is specified in Section
161 |           6(a).
162 | 
163 |        4. Media and formats; technical modifications allowed. The
164 |           Licensor authorizes You to exercise the Licensed Rights in
165 |           all media and formats whether now known or hereafter created,
166 |           and to make technical modifications necessary to do so. The
167 |           Licensor waives and/or agrees not to assert any right or
168 |           authority to forbid You from making technical modifications
169 |           necessary to exercise the Licensed Rights, including
170 |           technical modifications necessary to circumvent Effective
171 |           Technological Measures. For purposes of this Public License,
172 |           simply making modifications authorized by this Section 2(a)
173 |           (4) never produces Adapted Material.
174 | 
175 |        5. Downstream recipients.
176 | 
177 |             a. Offer from the Licensor -- Licensed Material. Every
178 |                recipient of the Licensed Material automatically
179 |                receives an offer from the Licensor to exercise the
180 |                Licensed Rights under the terms and conditions of this
181 |                Public License.
182 | 
183 |             b. No downstream restrictions. You may not offer or impose
184 |                any additional or different terms or conditions on, or
185 |                apply any Effective Technological Measures to, the
186 |                Licensed Material if doing so restricts exercise of the
187 |                Licensed Rights by any recipient of the Licensed
188 |                Material.
189 | 
190 |        6. No endorsement. Nothing in this Public License constitutes or
191 |           may be construed as permission to assert or imply that You
192 |           are, or that Your use of the Licensed Material is, connected
193 |           with, or sponsored, endorsed, or granted official status by,
194 |           the Licensor or others designated to receive attribution as
195 |           provided in Section 3(a)(1)(A)(i).
196 | 
197 |   b. Other rights.
198 | 
199 |        1. Moral rights, such as the right of integrity, are not
200 |           licensed under this Public License, nor are publicity,
201 |           privacy, and/or other similar personality rights; however, to
202 |           the extent possible, the Licensor waives and/or agrees not to
203 |           assert any such rights held by the Licensor to the limited
204 |           extent necessary to allow You to exercise the Licensed
205 |           Rights, but not otherwise.
206 | 
207 |        2. Patent and trademark rights are not licensed under this
208 |           Public License.
209 | 
210 |        3. To the extent possible, the Licensor waives any right to
211 |           collect royalties from You for the exercise of the Licensed
212 |           Rights, whether directly or through a collecting society
213 |           under any voluntary or waivable statutory or compulsory
214 |           licensing scheme. In all other cases the Licensor expressly
215 |           reserves any right to collect such royalties, including when
216 |           the Licensed Material is used other than for NonCommercial
217 |           purposes.
218 | 
219 | Section 3 -- License Conditions.
220 | 
221 | Your exercise of the Licensed Rights is expressly made subject to the
222 | following conditions.
223 | 
224 |   a. Attribution.
225 | 
226 |        1. If You Share the Licensed Material (including in modified
227 |           form), You must:
228 | 
229 |             a. retain the following if it is supplied by the Licensor
230 |                with the Licensed Material:
231 | 
232 |                  i. identification of the creator(s) of the Licensed
233 |                     Material and any others designated to receive
234 |                     attribution, in any reasonable manner requested by
235 |                     the Licensor (including by pseudonym if
236 |                     designated);
237 | 
238 |                 ii. a copyright notice;
239 | 
240 |                iii. a notice that refers to this Public License;
241 | 
242 |                 iv. a notice that refers to the disclaimer of
243 |                     warranties;
244 | 
245 |                  v. a URI or hyperlink to the Licensed Material to the
246 |                     extent reasonably practicable;
247 | 
248 |             b. indicate if You modified the Licensed Material and
249 |                retain an indication of any previous modifications; and
250 | 
251 |             c. indicate the Licensed Material is licensed under this
252 |                Public License, and include the text of, or the URI or
253 |                hyperlink to, this Public License.
254 | 
255 |        2. You may satisfy the conditions in Section 3(a)(1) in any
256 |           reasonable manner based on the medium, means, and context in
257 |           which You Share the Licensed Material. For example, it may be
258 |           reasonable to satisfy the conditions by providing a URI or
259 |           hyperlink to a resource that includes the required
260 |           information.
261 | 
262 |        3. If requested by the Licensor, You must remove any of the
263 |           information required by Section 3(a)(1)(A) to the extent
264 |           reasonably practicable.
265 | 
266 |        4. If You Share Adapted Material You produce, the Adapter's
267 |           License You apply must not prevent recipients of the Adapted
268 |           Material from complying with this Public License.
269 | 
270 | Section 4 -- Sui Generis Database Rights.
271 | 
272 | Where the Licensed Rights include Sui Generis Database Rights that
273 | apply to Your use of the Licensed Material:
274 | 
275 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
276 |      to extract, reuse, reproduce, and Share all or a substantial
277 |      portion of the contents of the database for NonCommercial purposes
278 |      only;
279 | 
280 |   b. if You include all or a substantial portion of the database
281 |      contents in a database in which You have Sui Generis Database
282 |      Rights, then the database in which You have Sui Generis Database
283 |      Rights (but not its individual contents) is Adapted Material; and
284 | 
285 |   c. You must comply with the conditions in Section 3(a) if You Share
286 |      all or a substantial portion of the contents of the database.
287 | 
288 | For the avoidance of doubt, this Section 4 supplements and does not
289 | replace Your obligations under this Public License where the Licensed
290 | Rights include other Copyright and Similar Rights.
291 | 
292 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
293 | 
294 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
295 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
296 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
297 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
298 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
299 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
300 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
301 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
302 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
303 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
304 | 
305 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
306 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
307 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
308 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
309 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
310 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
311 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
312 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
313 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
314 | 
315 |   c. The disclaimer of warranties and limitation of liability provided
316 |      above shall be interpreted in a manner that, to the extent
317 |      possible, most closely approximates an absolute disclaimer and
318 |      waiver of all liability.
319 | 
320 | Section 6 -- Term and Termination.
321 | 
322 |   a. This Public License applies for the term of the Copyright and
323 |      Similar Rights licensed here. However, if You fail to comply with
324 |      this Public License, then Your rights under this Public License
325 |      terminate automatically.
326 | 
327 |   b. Where Your right to use the Licensed Material has terminated under
328 |      Section 6(a), it reinstates:
329 | 
330 |        1. automatically as of the date the violation is cured, provided
331 |           it is cured within 30 days of Your discovery of the
332 |           violation; or
333 | 
334 |        2. upon express reinstatement by the Licensor.
335 | 
336 |      For the avoidance of doubt, this Section 6(b) does not affect any
337 |      right the Licensor may have to seek remedies for Your violations
338 |      of this Public License.
339 | 
340 |   c. For the avoidance of doubt, the Licensor may also offer the
341 |      Licensed Material under separate terms or conditions or stop
342 |      distributing the Licensed Material at any time; however, doing so
343 |      will not terminate this Public License.
344 | 
345 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
346 |      License.
347 | 
348 | Section 7 -- Other Terms and Conditions.
349 | 
350 |   a. The Licensor shall not be bound by any additional or different
351 |      terms or conditions communicated by You unless expressly agreed.
352 | 
353 |   b. Any arrangements, understandings, or agreements regarding the
354 |      Licensed Material not stated herein are separate from and
355 |      independent of the terms and conditions of this Public License.
356 | 
357 | Section 8 -- Interpretation.
358 | 
359 |   a. For the avoidance of doubt, this Public License does not, and
360 |      shall not be interpreted to, reduce, limit, restrict, or impose
361 |      conditions on any use of the Licensed Material that could lawfully
362 |      be made without permission under this Public License.
363 | 
364 |   b. To the extent possible, if any provision of this Public License is
365 |      deemed unenforceable, it shall be automatically reformed to the
366 |      minimum extent necessary to make it enforceable. If the provision
367 |      cannot be reformed, it shall be severed from this Public License
368 |      without affecting the enforceability of the remaining terms and
369 |      conditions.
370 | 
371 |   c. No term or condition of this Public License will be waived and no
372 |      failure to comply consented to unless expressly agreed to by the
373 |      Licensor.
374 | 
375 |   d. Nothing in this Public License constitutes or may be interpreted
376 |      as a limitation upon, or waiver of, any privileges and immunities
377 |      that apply to the Licensor or You, including from the legal
378 |      processes of any jurisdiction or authority.
379 | 
380 | =======================================================================
381 | 
382 | Creative Commons is not a party to its public
383 | licenses. Notwithstanding, Creative Commons may elect to apply one of
384 | its public licenses to material it publishes and in those instances
385 | will be considered the “Licensor.” The text of the Creative Commons
386 | public licenses is dedicated to the public domain under the CC0 Public
387 | Domain Dedication. Except for the limited purpose of indicating that
388 | material is shared under a Creative Commons public license or as
389 | otherwise permitted by the Creative Commons policies published at
390 | creativecommons.org/policies, Creative Commons does not authorize the
391 | use of the trademark "Creative Commons" or any other trademark or logo
392 | of Creative Commons without its prior written consent including,
393 | without limitation, in connection with any unauthorized modifications
394 | to any of its public licenses or any other arrangements,
395 | understandings, or agreements concerning use of licensed material. For
396 | the avoidance of doubt, this paragraph does not form part of the
397 | public licenses.
398 | 
399 | Creative Commons may be contacted at creativecommons.org.
400 | 
401 | 


--------------------------------------------------------------------------------
/README.org:
--------------------------------------------------------------------------------
 1 | ** Numerically Accurate Hyperbolic Embeddings Using Tiling-Based Models
 2 | 
 3 | **** Authors:
 4 | - [[http://www.cs.cornell.edu/~tyu/][Tao Yu]]
 5 | - [[http://www.cs.cornell.edu/~cdesa/][Christopher De Sa]]
 6 | [[file:H266.png]]
 7 | 
 8 | *** Introduction
 9 | This repo contains official code (learning part in PyTorch) and models for the NeurIPS 2019 paper,
10 | [[http://papers.neurips.cc/paper/8476-numerically-accurate-hyperbolic-embeddings-using-tiling-based-models.pdf][Numerically Accurate Hyperbolic Embeddings Using Tiling-Based Models]].
11 | We implemented our models under the same frame of [[https://github.com/facebookresearch/poincare-embeddings][Poincaré Embeddings for Learning Hierarchical Representations]],
12 | which is licensed under =LICENSE= and [[https://creativecommons.org/licenses/by-nc/4.0/][CC-BY-NC 4.0]]. However, we preserve the
13 | rights for commerical purpose of our tiling-based models.
14 | 
15 | ** Installation
16 | Clone this repository and run following commands
17 | #+BEGIN_SRC sh
18 |   git clone https://github.com/ydtydr/HyperbolicTiling_Learning.git
19 |   cd HyperbolicTiling_Learning
20 |   conda env create -f environment.yml
21 |   source activate tiling
22 |   python setup.py build_ext --inplace
23 | #+END_SRC
24 | 
25 | ** Dependencies
26 | - Python 3 with NumPy
27 | - PyTorch
28 | - Scikit-Learn
29 | - NLTK (to generate the WordNet data)
30 | 
31 | ** Example: Embedding WordNet Mammals
32 | First generate the transitive closure of data via
33 | #+BEGIN_SRC sh
34 |   cd wordnet
35 |   python transitive_closure.py
36 | #+END_SRC
37 | This will generate the transitive closure of the full noun, verb hierarchy as well as of the mammals subtree of WordNet.
38 | We also include the Gr-QC dataset in /wordnet/ folder.
39 | 
40 | To embed the mammals subtree in the reconstruction setting, go to the /root directory/ of
41 | the project and run
42 | #+BEGIN_SRC sh
43 |   ./train-mammals.sh
44 | #+END_SRC
45 | This shell script includes the appropriate parameter settings for the mammals subtree. Similar scripts to learn embeddings
46 | of Word nouns, Verbs and Gr-QC are located at =train-nouns.sh=, =train-verbs.sh=,
47 | =train-grqc.sh=. These scripts contain the hyperparameters to reproduce the embeddings results in the paper with available
48 | models: =Poincare=, =Lorentz=, =NLorentz=, =LTiling_rsgd=, =NLTiling_rsgd=, =LTiling_sgd= and =HTiling_rsgd=. Multithreaded async SGD training is also supported.
49 | 
50 | ** References
51 | If you use our code or wish to refer to our results, please use the following BibTex entry:
52 | #+BEGIN_SRC bibtex
53 | @incollection{yu2019numerically,
54 |   title = {Numerically Accurate Hyperbolic Embeddings Using Tiling-Based Models},
55 |   author = {Yu, Tao and De Sa, Christopher M},
56 |   booktitle = {Proceedings of the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019)},
57 |   month = {Oct.},
58 |   year = {2019}
59 | }
60 | #+END_SRC


--------------------------------------------------------------------------------
/embed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import torch as th
  4 | import numpy as np
  5 | import logging
  6 | import argparse
  7 | from hype.sn import Embedding, initialize
  8 | from hype.adjacency_matrix_dataset import AdjacencyDataset
  9 | from hype import train
 10 | from hype.graph import load_adjacency_matrix, load_edge_list, eval_reconstruction
 11 | from hype.rsgd import RiemannianSGD
 12 | from hype.Euclidean import EuclideanManifold
 13 | from hype.Poincare import PoincareManifold
 14 | from hype.Lorentz import LorentzManifold
 15 | # from hype.Halfspace import HalfspaceManifold
 16 | from hype.NLorentz import NLorentzManifold
 17 | from hype.LTiling_rsgd import LTilingRSGDManifold
 18 | from hype.NLTiling_rsgd import NLTilingRSGDManifold
 19 | from hype.LTiling_sgd import LTilingSGDManifold
 20 | from hype.HTiling_rsgd import HTilingRSGDManifold
 21 | import sys
 22 | import json
 23 | import torch.multiprocessing as mp
 24 | 
 25 | 
 26 | 
 27 | th.manual_seed(42)
 28 | np.random.seed(42)
 29 | 
 30 | 
 31 | MANIFOLDS = {
 32 |     'Euclidean': EuclideanManifold,
 33 |     'Poincare': PoincareManifold,
 34 |     'Lorentz': LorentzManifold,
 35 |     'Halfspace': HalfspaceManifold,
 36 |     'NLorentz': NLorentzManifold,
 37 |     'LTiling_rsgd': LTilingRSGDManifold,
 38 |     'NLTiling_rsgd': NLTilingRSGDManifold,
 39 |     'LTiling_sgd': LTilingSGDManifold,
 40 |     'HTiling_rsgd': HTilingRSGDManifold
 41 | }
 42 | 
 43 | 
 44 | # Adapated from:
 45 | # https://thisdataguy.com/2017/07/03/no-options-with-argparse-and-python/
 46 | class Unsettable(argparse.Action):
 47 |     def __init__(self, option_strings, dest, nargs=None, **kwargs):
 48 |         super(Unsettable, self).__init__(option_strings, dest, nargs='?', **kwargs)
 49 | 
 50 |     def __call__(self, parser, namespace, values, option_string=None):
 51 |         val = None if option_string.startswith('-no') else values
 52 |         setattr(namespace, self.dest, val)
 53 | 
 54 | 
 55 | def main():    
 56 |     parser = argparse.ArgumentParser(description='Train Hyperbolic Embeddings')
 57 |     parser.add_argument('-dset', type=str, required=True,
 58 |                         help='Dataset identifier')
 59 |     parser.add_argument('-dim', type=int, default=20,
 60 |                         help='Embedding dimension')
 61 |     parser.add_argument('-com_n', type=int, default=2,
 62 |                         help='Embedding components number')
 63 |     parser.add_argument('-manifold', type=str, default='lorentz',
 64 |                         choices=MANIFOLDS.keys(), help='Embedding manifold')
 65 |     parser.add_argument('-lr', type=float, default=1000,
 66 |                         help='Learning rate')
 67 |     parser.add_argument('-epochs', type=int, default=100,
 68 |                         help='Number of epochs')
 69 |     parser.add_argument('-batchsize', type=int, default=12800,
 70 |                         help='Batchsize')
 71 |     parser.add_argument('-negs', type=int, default=50,
 72 |                         help='Number of negatives')
 73 |     parser.add_argument('-burnin', type=int, default=20,
 74 |                         help='Epochs of burn in')
 75 |     parser.add_argument('-dampening', type=float, default=0.75,
 76 |                         help='Sample dampening during burnin')
 77 |     parser.add_argument('-ndproc', type=int, default=8,
 78 |                         help='Number of data loading processes')
 79 |     parser.add_argument('-eval_each', type=int, default=1,
 80 |                         help='Run evaluation every n-th epoch')
 81 |     parser.add_argument('-debug', action='store_true', default=False,
 82 |                         help='Print debuggin output')
 83 |     parser.add_argument('-gpu', default=-1, type=int,
 84 |                         help='Which GPU to run on (-1 for no gpu)')
 85 |     parser.add_argument('-sym', action='store_true', default=False,
 86 |                         help='Symmetrize dataset')
 87 |     parser.add_argument('-maxnorm', '-no-maxnorm', default='500000',
 88 |                         action=Unsettable, type=int)
 89 |     parser.add_argument('-sparse', default=False, action='store_true',
 90 |                         help='Use sparse gradients for embedding table')
 91 |     parser.add_argument('-burnin_multiplier', default=0.01, type=float)
 92 |     parser.add_argument('-neg_multiplier', default=1.0, type=float)
 93 |     parser.add_argument('-quiet', action='store_true', default=True)
 94 |     parser.add_argument('-lr_type', choices=['scale', 'constant'], default='constant')
 95 |     parser.add_argument('-train_threads', type=int, default=1,
 96 |                         help='Number of threads to use in training')
 97 |     parser.add_argument('-eval_embedding', default=False, help='path for the embedding to be evaluated')
 98 |     opt = parser.parse_args()
 99 |     
100 |     if 'LTiling' in opt.manifold:
101 |         opt.nor = 'LTiling'
102 |         opt.norevery = 20
103 |         opt.stre = 50
104 |     elif 'HTiling' in opt.manifold:
105 |         opt.nor = 'HTiling'
106 |         opt.norevery = 1
107 |         opt.stre = 0
108 |     else:
109 |         opt.nor = 'none'
110 | 
111 |     # setup debugging and logigng
112 |     log_level = logging.DEBUG if opt.debug else logging.INFO
113 |     log = logging.getLogger('tiling model')
114 |     logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout)
115 | 
116 |     # set default tensor type
117 |     th.set_default_tensor_type('torch.DoubleTensor')####FloatTensor DoubleTensor
118 |     # set device
119 |     # device = th.device(f'cuda:{opt.gpu}' if opt.gpu >= 0 else 'cpu')
120 |     device = th.device('cpu')
121 | 
122 |     # select manifold to optimize on
123 |     manifold = MANIFOLDS[opt.manifold](debug=opt.debug, max_norm=opt.maxnorm, com_n=opt.com_n)
124 |     if 'Halfspace' not in opt.manifold:
125 |         opt.dim = manifold.dim(opt.dim)
126 | 
127 |     if 'csv' in opt.dset:
128 |         log.info('Using edge list dataloader')
129 |         idx, objects, weights = load_edge_list(opt.dset, opt.sym)
130 |         model, data, model_name, conf = initialize(
131 |             manifold, opt, idx, objects, weights, sparse=opt.sparse
132 |         )
133 |     else:
134 |         log.info('Using adjacency matrix dataloader')
135 |         dset = load_adjacency_matrix(opt.dset, 'hdf5')
136 |         log.info('Setting up dataset...')
137 |         data = AdjacencyDataset(dset, opt.negs, opt.batchsize, opt.ndproc,
138 |             opt.burnin > 0, sample_dampening=opt.dampening)
139 |         model = Embedding(data.N, opt.dim, manifold, sparse=opt.sparse, com_n=opt.com_n)
140 |         objects = dset['objects']
141 |     print('the total dimension', model.lt.weight.data.size(-1), 'com_n', opt.com_n)
142 |     # set burnin parameters
143 |     data.neg_multiplier = opt.neg_multiplier
144 |     train._lr_multiplier = opt.burnin_multiplier
145 |     # Build config string for log
146 |     log.info(f'json_conf: {json.dumps(vars(opt))}')
147 |     if opt.lr_type == 'scale':
148 |         opt.lr = opt.lr * opt.batchsize
149 | 
150 |     # setup optimizer
151 |     optimizer = RiemannianSGD(model.optim_params(manifold), lr=opt.lr)
152 |     opt.epoch_start = 0
153 |     adj = {}
154 |     for inputs, _ in data:
155 |         for row in inputs:
156 |             x = row[0].item()
157 |             y = row[1].item()
158 |             if x in adj:
159 |                 adj[x].add(y)
160 |             else:
161 |                 adj[x] = {y}
162 |     if not opt.eval_embedding:
163 |         opt.adj = adj
164 |         model = model.to(device)
165 |         if hasattr(model, 'w_avg'):
166 |             model.w_avg = model.w_avg.to(device)
167 |         if opt.train_threads > 1:
168 |             threads = []
169 |             model = model.share_memory()
170 |             if 'LTiling' in opt.manifold:
171 |                 model.int_matrix.share_memory_()
172 |             kwargs = {'progress' : not opt.quiet}
173 |             for i in range(opt.train_threads):
174 |                 args = (i, device, model, data, optimizer, opt, log)
175 |                 threads.append(mp.Process(target=train.train, args=args, kwargs=kwargs))
176 |                 threads[-1].start()
177 |             [t.join() for t in threads]
178 |         else:
179 |             train.train(device, model, data, optimizer, opt, log, progress=not opt.quiet)
180 |     else:
181 |         model = th.load(opt.eval_embedding, map_location='cpu')['embeddings']
182 | 
183 |     if 'LTiling' in opt.manifold:
184 |         meanrank, maprank = eval_reconstruction(adj, model.lt.weight.data.clone(), manifold.distance, lt_int_matrix = model.int_matrix.data.clone(), workers = opt.ndproc)
185 |         sqnorms = manifold.pnorm(model.lt.weight.data.clone(), model.int_matrix.data.clone())
186 |     else:
187 |         meanrank, maprank = eval_reconstruction(adj, model.lt.weight.data.clone(), manifold.distance, workers = opt.ndproc)
188 |         sqnorms = manifold.pnorm(model.lt.weight.data.clone())
189 |     
190 |     log.info(
191 |         'json_stats final test: {'
192 |         f'"sqnorm_min": {sqnorms.min().item()}, '
193 |         f'"sqnorm_avg": {sqnorms.mean().item()}, '
194 |         f'"sqnorm_max": {sqnorms.max().item()}, '
195 |         f'"mean_rank": {meanrank}, '
196 |         f'"map": {maprank}, '
197 |         '}'
198 |     )
199 |     print(model.lt.weight.data[0])
200 | 
201 | 
202 | if __name__ == '__main__':
203 |     main()
204 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: tiling
 2 | channels:
 3 | - anaconda
 4 | - pytorch
 5 | - defaults
 6 | dependencies:
 7 | - python=3.6
 8 | - nltk
 9 | - scikit-learn
10 | - pytorch=1.0.0
11 | - pandas
12 | - h5py
13 | - cython
14 | - tqdm
15 | 


--------------------------------------------------------------------------------
/hype/Euclidean.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import torch as th
 9 | from .manifold import Manifold
10 | 
11 | 
12 | class EuclideanManifold(Manifold):
13 |     __slots__ = ["max_norm"]
14 | 
15 |     def __init__(self, max_norm=1, **kwargs):
16 |         self.max_norm = max_norm
17 | 
18 |     def normalize(self, u):
19 |         d = u.size(-1)
20 |         u.view(-1, d).renorm_(2, 0, self.max_norm)
21 |         return u
22 | 
23 |     def distance(self, u, v):
24 |         return (u - v).pow(2).sum(dim=-1)
25 | 
26 |     def pnorm(self, u, dim=-1):
27 |         return (u * u).sum(dim=dim).sqrt()
28 | 
29 |     def rgrad(self, p, d_p):
30 |         return d_p
31 | 
32 |     def expm(self, p, d_p, normalize=False, lr=None, out=None):
33 |         if lr is not None:
34 |             d_p.mul_(-lr)
35 |         if out is None:
36 |             out = p
37 |         out.add_(d_p)
38 |         if normalize:
39 |             self.normalize(out)
40 |         return out
41 | 
42 |     def logm(self, p, d_p, out=None):
43 |         return p - d_p
44 | 
45 |     def ptransp(self, p, x, y, v):
46 |         ix, v_ = v._indices().squeeze(), v._values()
47 |         return p.index_copy_(0, ix, v_)
48 | 
49 | 
50 | class TranseManifold(EuclideanManifold):
51 |     def __init__(self, dim, *args, **kwargs):
52 |         super(TranseManifold, self).__init__(*args, **kwargs)
53 |         self.r = th.nn.Parameter(th.randn(dim).view(1, dim))
54 | 
55 |     def distance(self, u, v):
56 |         # batch mode
57 |         if u.dim() == 3:
58 |             r = self.r.unsqueeze(0).expand(v.size(0), v.size(1), self.r.size(1))
59 |         # non batch
60 |         else:
61 |             r = self.r.expand(v.size(0), self.r.size(1))
62 |         return (u - v + r).pow(2).sum(dim=-1)
63 | 


--------------------------------------------------------------------------------
/hype/HTiling_rsgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | class HTilingRSGDManifold(Manifold):
 14 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 15 | 
 16 |     @staticmethod
 17 |     def dim(dim):
 18 |         return 2*dim+1
 19 | 
 20 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 21 |             debug=False, **kwargs):
 22 |         self.eps = eps
 23 |         self._eps = _eps
 24 |         self.norm_clip = norm_clip
 25 |         self.max_norm = max_norm
 26 |         self.debug = debug
 27 | 
 28 |     @staticmethod
 29 |     def ldot(u, v, keepdim=False):
 30 |         """Lorentzian Scalar Product"""
 31 |         uv = u * v
 32 |         uv.narrow(-1, 0, 1).mul_(-1)
 33 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 34 |     
 35 |     def sinhc(self, u):
 36 |         return th.div(th.sinh(u),u)
 37 | 
 38 |     def to_poincare_ball(self, uu):
 39 |         d = (uu.size(-1) - 1) // 2
 40 |         j = uu[..., -1]  # n
 41 |         k = uu[..., d:-1]  # n*d
 42 |         u = 2 ** j.unsqueeze(-1).expand_as(k) * (uu[..., :d] + k)
 43 |         uu = th.zeros(u.size(0), d + 1)
 44 |         squnom = th.sum(th.pow(u, 2), dim=-1)  # n
 45 |         uu[..., 0] = th.div(th.ones_like(u[..., -1]), u[..., -1]) + th.div(squnom, 4 * u[..., -1])  # n
 46 |         uu[..., 1] = th.div(th.ones_like(u[..., -1]), u[..., -1]) - th.div(squnom, 4 * u[..., -1])  # n
 47 |         uu[..., 2:] = th.div(u[..., :d - 1], u[..., -1].unsqueeze(-1).expand_as(u[..., :d - 1]))
 48 |         return uu.narrow(-1, 1, d) / (uu.narrow(-1, 0, 1) + 1)
 49 | 
 50 |     def distance(self, uu, vv):
 51 |         dis = HalfspaceRieDistance.apply(uu, vv)
 52 |         return dis
 53 | 
 54 |     def pnorm(self, u):
 55 |         return th.sqrt(th.sum(th.pow(self.to_poincare_ball(u), 2), dim=-1))
 56 | 
 57 |     def normalize(self, w):
 58 |         """Normalize vector such that it is located on the hyperboloid"""
 59 |         d = (w.size(-1) - 1)//2
 60 |         narrowed = w.narrow(-1, d-1, 1)
 61 |         narrowed.clamp_(min=1e-8)
 62 |         return w
 63 | 
 64 |     def normalize_tan(self, x_all, v_all):
 65 |         d = v_all.size(1) - 1
 66 |         x = x_all.narrow(1, 1, d)
 67 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 68 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 69 |         tmp.sqrt_().clamp_(min=self._eps)
 70 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 71 |         return v_all
 72 | 
 73 |     def init_weights(self, w, irange=1e-5):
 74 |         d = (w.size(-1)-1)//2
 75 |         w.data[...,:d-1].uniform_(-irange, irange)
 76 |         w.data[...,d-1] = irange * th.rand_like(w[...,d-1])
 77 |         w.data[...,d-1].add_(1)
 78 |         # ID
 79 |         w.data[...,d:].zero_()
 80 | 
 81 |     def rgrad(self, p, d_p):
 82 |         d = (p.size(-1)-1)//2
 83 |         """Euclidean gradient for hyperboloid"""
 84 |         if d_p.is_sparse:
 85 |             u = d_p._values()
 86 |             x = p.index_select(0, d_p._indices().squeeze())
 87 |         else:
 88 |             u = d_p
 89 |             x = p
 90 |         u.mul_((x[...,d-1]).unsqueeze(-1))### transform from Euclidean grad to Riemannian grad
 91 |         return d_p
 92 | 
 93 |     def expm(self, p, d_p, lr=None, out=None, normalize=False):
 94 |         """Exponential map for halfspace model"""
 95 |         d = (p.size(-1)-1)//2
 96 |         if out is None:
 97 |             out = p
 98 |         if d_p.is_sparse:
 99 |             ix, d_val = d_p._indices().squeeze(), d_p._values()
100 |             p_val = self.normalize(p.index_select(0, ix))
101 |             newp_val = p_val.clone()
102 |             s = th.norm(d_val[...,:d],dim=-1)#n
103 |             newp_val[...,:d-1] = p_val[...,:d-1] + th.div(p_val[...,d-1], th.div(th.cosh(s), self.sinhc(s))
104 |                                                           -d_val[...,d-1]).unsqueeze(-1).expand_as(d_val[...,:d-1]) * d_val[...,:d-1]#n*(d-1)
105 |             newp_val[...,d-1] = th.div(p_val[...,d-1], th.cosh(s)-d_val[...,d-1]*self.sinhc(s))#n
106 |             newp_val = self.normalize(newp_val)
107 |             p.index_copy_(0, ix, newp_val)
108 |         else:
109 |             raise NotImplementedError
110 | 
111 | 
112 |     def logm(self, x, y):
113 |         """Logarithmic map on the Lorenz Manifold"""
114 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
115 |         v = acosh(-xy, self.eps).div_(
116 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
117 |         ) * th.addcmul(y, xy, x)
118 |         return self.normalize_tan(x, v)
119 | 
120 |     def ptransp(self, x, y, v, ix=None, out=None):
121 |         """Parallel transport for hyperboloid"""
122 |         if ix is not None:
123 |             v_ = v
124 |             x_ = x.index_select(0, ix)
125 |             y_ = y.index_select(0, ix)
126 |         elif v.is_sparse:
127 |             ix, v_ = v._indices().squeeze(), v._values()
128 |             x_ = x.index_select(0, ix)
129 |             y_ = y.index_select(0, ix)
130 |         else:
131 |             raise NotImplementedError
132 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
133 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
134 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
135 |         if out is None:
136 |             return vnew
137 |         else:
138 |             out.index_copy_(0, ix, vnew)
139 | 
140 | class HalfspaceRieDistance(Function):
141 |     @staticmethod
142 |     def forward(self, preu, prev, AvOverflow = False, myeps = 1e-16):
143 |         self.myeps = myeps
144 |         self.AvOverflow = AvOverflow
145 |         assert th.isnan(preu).max()==0, "u includes NaNs"
146 |         assert th.isnan(prev).max()==0, "v includes NaNs"
147 |         if len(preu)<len(prev):
148 |             preu = preu.expand_as(prev)
149 |         elif len(preu)>len(prev):
150 |             prev = prev.expand_as(preu)
151 |         d = (preu.size(-1) - 1) // 2
152 |         preu[..., d - 1].clamp_(min=myeps)
153 |         prev[..., d - 1].clamp_(min=myeps)
154 |         if preu.dtype == th.float64:
155 |             self.ones = (preu[...,-1]<prev[...,-1]).double().unsqueeze(-1).expand_as(preu) #### Swap to make sure j2>j1 as paper mentioned to avoid overflow
156 |         elif preu.dtype == th.float32:
157 |             self.ones = (preu[...,-1]<prev[...,-1]).float().unsqueeze(-1).expand_as(preu)
158 |         u = preu*self.ones+prev*(1-self.ones)
159 |         v = prev*self.ones+preu*(1-self.ones)
160 |         self.save_for_backward(u, v)
161 |         self.j1 = u[...,-1]#m*n
162 |         self.j2 = v[...,-1]#m*n
163 |         k1 = u[...,d:-1]#m*n*d
164 |         k2 = v[...,d:-1]#m*n*d
165 |         if not AvOverflow:
166 |             self.upp = k1 - (2**(self.j2-self.j1)).unsqueeze(-1).expand_as(k2)*k2 + u[...,:d] \
167 |                        - (2**(self.j2-self.j1)).unsqueeze(-1).expand_as(k2)*v[...,:d]#m*n*d
168 |             self.Xprime = th.div(th.sum(th.pow(self.upp, 2), dim=-1), u[...,d-1] * v[...,d-1])#m*n
169 |             self.inside_log_1 = 1+2**(self.j1-self.j2-1)*self.Xprime#m*n
170 |             self.inside_log_2 = th.sqrt(2**(2*(self.j1-self.j2-1))*self.Xprime*self.Xprime+2**(self.j1-self.j2)*self.Xprime)#m*n
171 |             return th.log(self.inside_log_1+self.inside_log_2)
172 |         else:
173 |             # method described in the paper to avoid overflow
174 |             twosR = k1 - (2**(self.j2-self.j1)).unsqueeze(-1).expand_as(k2)*k2#m*n*d
175 |             norm_twosR = th.sqrt(th.sum(th.pow(twosR, 2), dim=-1))
176 |             self.zero_mask = (norm_twosR != 0.0)  ##m*n, to aviod the case norm_twosR==0, which causes NaN when compute log() to get s
177 |             self.s = th.zeros_like(norm_twosR)  ###m*n
178 |             self.s[self.zero_mask] = th.ceil(th.log2(norm_twosR))[self.zero_mask]  # m*n
179 |             R = 2**(-1*self.s.unsqueeze(-1).expand_as(twosR)) * twosR#m*n*d
180 |             self.upp = R + 2**(-1*self.s.unsqueeze(-1).expand_as(R)) * u[...,:d] \
181 |                        - 2**(self.j2-self.j1-self.s).unsqueeze(-1).expand_as(R) * v[...,:d]#m*n*d
182 |             # ################### alternative method to avoid overflow
183 |             # twos_upp = k1 - (2 ** (self.j2 - self.j1)).unsqueeze(-1).expand_as(k2) * k2 + u[..., :d] - (
184 |             #             2 ** (self.j2 - self.j1)).unsqueeze(-1).expand_as(k2) * v[..., :d]  ##m*n*d
185 |             # norm_twos_upp = th.sqrt(th.sum(th.pow(twos_upp, 2), dim=-1))
186 |             # self.zero_mask = (norm_twos_upp != 0.0)  ##m*n, to aviod the case norm_twosR==0, which causes NaN when compute log() to get s
187 |             # self.s = th.zeros_like(norm_twos_upp)  ###m*n
188 |             # self.s[self.zero_mask] = th.ceil(th.log2(norm_twos_upp))[self.zero_mask]  # m*n
189 |             # self.upp = 2 ** (-1 * self.s.unsqueeze(-1).expand_as(twos_upp)) * twos_upp  # m*n*d
190 |             # ###################
191 |             self.X = th.div(th.sum(th.pow(self.upp, 2), dim=-1),2 * u[...,d-1] * v[...,d-1])#m*n
192 |             self.nomdis = th.sqrt(th.clamp(self.X * self.X + 2 * self.X * 2**(-2*self.s-self.j1+self.j2),min=0))#m*n sqrt
193 |             log1t = (2*self.s+self.j1-self.j2)*th.log(th.Tensor([2]))  # m*n
194 |             self.log2t = 2**(-2*self.s-self.j1+self.j2)+self.X+self.nomdis#m*n
195 |             return log1t + th.log(self.log2t)#m*n
196 | 
197 |     @staticmethod
198 |     def backward(self, g):
199 |         u, v = self.saved_tensors
200 |         d = (u.size(-1) - 1) // 2
201 |         u[..., d - 1].clamp_(min=self.myeps)
202 |         v[..., d - 1].clamp_(min=self.myeps)
203 |         g = g.unsqueeze(-1).expand_as(u).clone()
204 |         gu = th.zeros_like(u)  # m*n*(2d+1)
205 |         gv = th.zeros_like(v)  # m*n*(2d+1)
206 |         if not self.AvOverflow:
207 |             auxli_term1 = th.div(th.ones_like(self.inside_log_1), self.inside_log_1+self.inside_log_2)\
208 |                           *(2**(self.j1-self.j2-1)+th.div(2**(2*(self.j1-self.j2-1))*self.Xprime
209 |                                                           + 2**(self.j1-self.j2-1), self.inside_log_2)) #m*n
210 |             auxli_term2 = th.div(2*self.upp, (u[...,d-1] * v[...,d-1]).unsqueeze(-1).expand_as(self.upp))#m*n*d
211 |             gu[..., :d - 1] = auxli_term1.unsqueeze(-1).expand_as(u[...,:d-1]) * auxli_term2[...,:d-1]#m*n*(d-1)
212 |             gu[..., d - 1] = auxli_term1 * (auxli_term2[...,d-1]-th.div(self.Xprime,u[...,d-1]))#m*n
213 |             gv[..., :d - 1] = -1 * 2 ** (self.j2 - self.j1).unsqueeze(-1).expand_as(u[...,:d-1]) * gu[..., :d - 1]  # m*n*(d-1)
214 |             gv[..., d - 1] = auxli_term1 * (-1 * 2 ** (self.j2 - self.j1) * auxli_term2[...,d-1]-th.div(self.Xprime,v[...,d-1]))#m*n
215 |             guu = gu*self.ones+gv*(1-self.ones) #### Swap back accordingly to backpropagate gradient correctly
216 |             gvv = gv*self.ones+gu*(1-self.ones)
217 |         else:
218 |             auxli_term1 = th.div(th.ones_like(self.log2t), self.log2t)*(1+th.div(self.X + 2**(-2*self.s-self.j1+self.j2), self.nomdis)) #m*n
219 |             auxli_term2 = th.div(self.upp, (2**(self.s) * u[...,d-1] * v[...,d-1]).unsqueeze(-1).expand_as(self.upp))#m*n*d
220 |             gu[..., :d - 1] = auxli_term1.unsqueeze(-1).expand_as(u[...,:d-1]) * auxli_term2[...,:d-1]#m*n*(d-1)
221 |             gu[..., d - 1] = auxli_term1 * (auxli_term2[...,d-1]-th.div(self.X,u[...,d-1]))#m*n
222 |             gv[..., :d - 1] = -1 * 2 ** (self.j2 - self.j1).unsqueeze(-1).expand_as(u[...,:d-1]) * gu[..., :d - 1]  # m*n*(d-1)
223 |             gv[..., d - 1] = auxli_term1 * (-1 * 2 ** (self.j2 - self.j1) * auxli_term2[...,d-1]-th.div(self.X,v[...,d-1]))#m*n
224 |             zero_mask = self.zero_mask.unsqueeze(-1).expand_as(gu)
225 |             guu = th.zeros_like(u)
226 |             gvv = th.zeros_like(v)
227 |             guu[zero_mask] = (gu*self.ones+gv*(1-self.ones))[zero_mask]
228 |             gvv[zero_mask] = (gv*self.ones+gu*(1-self.ones))[zero_mask]
229 |         assert th.isnan(gu).max() == 0, "gu includes NaNs"
230 |         assert th.isnan(gv).max() == 0, "gv includes NaNs"
231 |         return g * guu, g * gvv


--------------------------------------------------------------------------------
/hype/LTiling_rsgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | class LTilingRSGDManifold(Manifold):
 14 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 15 | 
 16 |     @staticmethod
 17 |     def dim(dim):
 18 |         return 3
 19 | 
 20 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 21 |             debug=False, **kwargs):
 22 |         self.eps = eps
 23 |         self._eps = _eps
 24 |         self.norm_clip = norm_clip
 25 |         self.max_norm = max_norm
 26 |         self.debug = debug
 27 | 
 28 |     @staticmethod
 29 |     def ldot(u, v, keepdim=False):
 30 |         """Lorentzian Scalar Product"""
 31 |         uv = u * v
 32 |         uv.narrow(-1, 0, 1).mul_(-1)
 33 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 34 | 
 35 |     def to_poincare_ball(self, u, u_int_matrix):
 36 |         L = th.sqrt(th.Tensor([[3, 0, 0], [0, 1, 0], [0, 0, 1]]))
 37 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
 38 |         u = th.matmul(L,th.matmul(u_int_matrix, th.matmul(R, u.unsqueeze(-1)))).squeeze(-1)
 39 |         d = u.size(-1) - 1
 40 |         return u.narrow(-1, 1, d) / (u.narrow(-1, 0, 1) + 1)
 41 | 
 42 |     def distance(self, uu, uu_int_matrix, vv, vv_int_matrix):
 43 |         dis = GroupRieDistance.apply(uu, uu_int_matrix, vv, vv_int_matrix)
 44 |         return dis
 45 | 
 46 |     def pnorm(self, u, u_int_matrix):
 47 |         return th.sqrt(th.sum(th.pow(self.to_poincare_ball(u, u_int_matrix), 2), dim=-1))
 48 | 
 49 |     def normalize(self, w):
 50 |         """Normalize vector such that it is located on the hyperboloid"""
 51 |         d = w.size(-1) - 1
 52 |         narrowed = w.narrow(-1, 1, d)
 53 |         if self.max_norm:
 54 |             narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 55 |         tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 56 |         tmp.sqrt_()
 57 |         w.narrow(-1, 0, 1).copy_(tmp)
 58 |         return w
 59 | 
 60 |     def normalize_tan(self, x_all, v_all):
 61 |         d = v_all.size(1) - 1
 62 |         x = x_all.narrow(1, 1, d)
 63 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 64 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 65 |         tmp.sqrt_().clamp_(min=self._eps)
 66 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 67 |         return v_all
 68 | 
 69 |     def init_weights(self, w, irange=1e-5):
 70 |         w.data.uniform_(-irange, irange)
 71 |         w.data[...,0] = th.sqrt(th.clamp(th.sum(w[...,1:] * w[...,1:], dim=-1),min=0) + 1)
 72 | 
 73 |     def init_weights_int_matrix(self, w):
 74 |         ID = th.eye(3,3)
 75 |         w.data.zero_()
 76 |         w.data.add_(ID)
 77 |     
 78 |     def rgrad(self, p, d_p):
 79 |         """Riemannian gradient for hyperboloid"""
 80 |         if d_p.is_sparse:
 81 |             u = d_p._values()
 82 |             x = p.index_select(0, d_p._indices().squeeze())
 83 |         else:
 84 |             u = d_p
 85 |             x = p
 86 |         u.narrow(-1, 0, 1).mul_(-1)
 87 |         u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
 88 |         return d_p
 89 | 
 90 | 
 91 |     def expm(self, p, d_p, lr=None, out=None, normalize=False):
 92 |         """Exponential map for hyperboloid"""
 93 |         if out is None:
 94 |             out = p
 95 |         if d_p.is_sparse:
 96 |             ix, d_val = d_p._indices().squeeze(), d_p._values()
 97 |             p_val = self.normalize(p.index_select(0, ix))
 98 |             ldv = self.ldot(d_val, d_val, keepdim=True)
 99 |             if self.debug:
100 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
101 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
102 |             nd_p = ldv.clamp_(min=0).sqrt_()
103 |             t = th.clamp(nd_p, max=self.norm_clip)
104 |             nd_p.clamp_(min=self.eps)
105 |             newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
106 |             if normalize:
107 |                 newp = self.normalize(newp)
108 |             p.index_copy_(0, ix, newp)
109 |         else:
110 |             if lr is not None:
111 |                 d_p.narrow(-1, 0, 1).mul_(-1)
112 |                 d_p.addcmul_((self.ldot(p, d_p, keepdim=True)).expand_as(p), p)
113 |                 d_p.mul_(-lr)
114 |             ldv = self.ldot(d_p, d_p, keepdim=True)
115 |             if self.debug:
116 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
117 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
118 |             nd_p = ldv.clamp_(min=0).sqrt_()
119 |             t = th.clamp(nd_p, max=self.norm_clip)
120 |             nd_p.clamp_(min=self.eps)
121 |             newp = (th.cosh(t) * p).addcdiv_(th.sinh(t) * d_p, nd_p)
122 |             if normalize:
123 |                 newp = self.normalize(newp)
124 |             p.copy_(newp)
125 | 
126 |     def logm(self, x, y):
127 |         """Logarithmic map on the Lorenz Manifold"""
128 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
129 |         v = acosh(-xy, self.eps).div_(
130 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
131 |         ) * th.addcmul(y, xy, x)
132 |         return self.normalize_tan(x, v)
133 | 
134 |     def ptransp(self, x, y, v, ix=None, out=None):
135 |         """Parallel transport for hyperboloid"""
136 |         if ix is not None:
137 |             v_ = v
138 |             x_ = x.index_select(0, ix)
139 |             y_ = y.index_select(0, ix)
140 |         elif v.is_sparse:
141 |             ix, v_ = v._indices().squeeze(), v._values()
142 |             x_ = x.index_select(0, ix)
143 |             y_ = y.index_select(0, ix)
144 |         else:
145 |             raise NotImplementedError
146 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
147 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
148 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
149 |         if out is None:
150 |             return vnew
151 |         else:
152 |             out.index_copy_(0, ix, vnew)
153 | 
154 | class GroupRieDistance(Function):
155 |     @staticmethod
156 |     def forward(self, u, u_int_matrix, v, v_int_matrix, AvOverflow = False, myeps1 = 1e-8 ,myeps2 = 1e-16, decompose_factor = 25):
157 |         # decompose_factor = 11 for float32; decompose_factor = 25 for float64.
158 |         assert th.isnan(u_int_matrix).max()==0, "u includes NaNs"
159 |         assert th.isnan(v_int_matrix).max()==0, "v includes NaNs"
160 |         if len(u)<len(v):
161 |             u = u.expand_as(v)
162 |             u_int_matrix = u_int_matrix.expand_as(v_int_matrix)
163 |         elif len(u)>len(v):
164 |             v = v.expand_as(u)
165 |             v_int_matrix = v_int_matrix.expand_as(u_int_matrix)
166 |         self.save_for_backward(u, v)
167 |         M3 = th.Tensor([[3, 0, 0], [0, -1, 0], [0, 0, -1]])
168 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
169 |         ############# use U = U1+U2 version, we separate U^TM3V into (U1+U2)^TM3(V1+V2)=U1^TM3V1+U1^TM3V2+U2^TM3V1+U2^TM3V2,
170 |         ############# in order to avoid numerical inprecision of storing
171 |         ############# integers in float, and multiply them to get the other intergers, which may be incorrect due to inprecision.
172 |         u_int_matrix2 = th.fmod(u_int_matrix, 2 ** decompose_factor)
173 |         u_int_matrix1 = u_int_matrix - u_int_matrix2
174 |         v_int_matrix2 = th.fmod(v_int_matrix, 2 ** decompose_factor)
175 |         v_int_matrix1 = v_int_matrix - v_int_matrix2
176 |         Q = th.matmul(u_int_matrix1.transpose(-2,-1), th.matmul(M3, v_int_matrix1))\
177 |                     +(th.matmul(u_int_matrix1.transpose(-2,-1), th.matmul(M3, v_int_matrix2))
178 |                       +th.matmul(u_int_matrix2.transpose(-2,-1), th.matmul(M3, v_int_matrix1)))\
179 |                     +th.matmul(u_int_matrix2.transpose(-2,-1), th.matmul(M3, v_int_matrix2))
180 |         Q11 = th.clamp(Q.narrow(-2,0,1).narrow(-1,0,1),min=myeps1)# divide Q by Q11 to avoid overflow
181 |         if not AvOverflow:#### if the dataset is not complex, and there is overflow concern, we set Q11=1, then Q=hatQ, if AvOverflow is false
182 |             Q11 = th.clamp(Q11,max=1)
183 |         self.hatQ = th.div(Q, Q11.expand_as(Q))#divided by Q11
184 |         RThatQR = th.matmul(R,th.matmul(self.hatQ, R))#cpu float
185 |         d_c = th.matmul(u.unsqueeze(-1).transpose(-2,-1), th.matmul(RThatQR, v.unsqueeze(-1))).squeeze(-1).squeeze(-1)#cpu float
186 |         invQ11 = th.div(th.ones_like(Q11.squeeze(-1).squeeze(-1)),Q11.squeeze(-1).squeeze(-1))#cpu float
187 |         self.nomdis = th.sqrt(th.clamp(d_c*d_c-invQ11*invQ11,min=myeps2))#cpu float
188 |         outp = th.log(Q11.squeeze(-1).squeeze(-1)) + th.log(th.clamp(d_c + self.nomdis,min=myeps1))#cpu float
189 |         return outp
190 | 
191 |     @staticmethod
192 |     def backward(self, g):
193 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
194 |         u, v = self.saved_tensors
195 |         g = g.unsqueeze(-1).expand_as(u)
196 |         uupfrac = th.matmul(R,th.matmul(self.hatQ, th.matmul(R,v.unsqueeze(-1)))).squeeze(-1)
197 |         vupfrac = th.matmul(R,th.matmul(self.hatQ.transpose(-2,-1), th.matmul(R,u.unsqueeze(-1)))).squeeze(-1)
198 |         gu = th.div(uupfrac, self.nomdis.unsqueeze(-1).expand_as(uupfrac))
199 |         gv = th.div(vupfrac, self.nomdis.unsqueeze(-1).expand_as(vupfrac))
200 |         assert th.isnan(gu).max() == 0, "gu includes NaNs"
201 |         assert th.isnan(gv).max() == 0, "gv includes NaNs"
202 |         return g * gu, None, g * gv, None


--------------------------------------------------------------------------------
/hype/LTiling_sgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | class LTilingSGDManifold(Manifold):
 14 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 15 | 
 16 |     @staticmethod
 17 |     def dim(dim):
 18 |         return 3
 19 | 
 20 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 21 |             debug=False, **kwargs):
 22 |         self.eps = eps
 23 |         self._eps = _eps
 24 |         self.norm_clip = norm_clip
 25 |         self.max_norm = max_norm
 26 |         self.debug = debug
 27 | 
 28 |     @staticmethod
 29 |     def ldot(u, v, keepdim=False):
 30 |         """Lorentzian Scalar Product"""
 31 |         uv = u * v
 32 |         uv.narrow(-1, 0, 1).mul_(-1)
 33 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 34 | 
 35 |     def to_poincare_ball(self, u, u_int_matrix):
 36 |         L = th.sqrt(th.Tensor([[3, 0, 0], [0, 1, 0], [0, 0, 1]]))
 37 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
 38 |         u = th.matmul(L, th.matmul(u_int_matrix, th.matmul(R, u.unsqueeze(-1)))).squeeze(-1)
 39 |         d = u.size(-1) - 1
 40 |         return u.narrow(-1, 1, d) / (u.narrow(-1, 0, 1) + 1)
 41 | 
 42 |     def distance(self, uu, uu_int_matrix, vv, vv_int_matrix):
 43 |         dis = GroupEucDistance.apply(uu, uu_int_matrix, vv, vv_int_matrix)
 44 |         return dis
 45 | 
 46 |     def pnorm(self, u, u_int_matrix):
 47 |         return th.sqrt(th.sum(th.pow(self.to_poincare_ball(u, u_int_matrix), 2), dim=-1))
 48 | 
 49 |     def normalize(self, w):
 50 |         """Normalize vector such that it is located on the hyperboloid"""
 51 |         d = w.size(-1) - 1
 52 |         narrowed = w.narrow(-1, 1, d)
 53 |         if self.max_norm:
 54 |             narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 55 |         tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 56 |         tmp.sqrt_()
 57 |         w.narrow(-1, 0, 1).copy_(tmp)
 58 |         return w
 59 | 
 60 |     def normalize_tan(self, x_all, v_all):
 61 |         d = v_all.size(1) - 1
 62 |         x = x_all.narrow(1, 1, d)
 63 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 64 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 65 |         tmp.sqrt_().clamp_(min=self._eps)
 66 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 67 |         return v_all
 68 | 
 69 |     def init_weights(self, w, irange=1e-5):
 70 |         w.data.uniform_(-irange, irange)
 71 |         w.data[..., 0] = th.sqrt(th.clamp(th.sum(w[..., 1:] * w[..., 1:], dim=-1), min=0) + 1)
 72 | 
 73 |     def init_weights_int_matrix(self, w):
 74 |         ID = th.eye(3,3)
 75 |         w.data.zero_()
 76 |         w.data.add_(ID)
 77 |     
 78 |     def rgrad(self, p, d_p):
 79 |         """Riemannian gradient for hyperboloid"""
 80 |         if d_p.is_sparse:
 81 |             u = d_p._values()
 82 |             x = p.index_select(0, d_p._indices().squeeze())
 83 |         else:
 84 |             u = d_p
 85 |             x = p
 86 |         u.narrow(-1, 0, 1).mul_(0)
 87 |         return d_p
 88 | 
 89 | 
 90 |     def expm(self, p, d_p, lr=None, out=None, normalize=False):
 91 |         """Exponential map for hyperboloid"""
 92 |         if out is None:
 93 |             out = p
 94 |         if d_p.is_sparse:
 95 |             ix, d_val = d_p._indices().squeeze(), d_p._values()
 96 |             p_val = self.normalize(p.index_select(0, ix))
 97 |             newp = p_val + d_val
 98 |             newp = self.normalize(newp)
 99 |             p.index_copy_(0, ix, newp)
100 |         else:
101 |             if lr is not None:
102 |                 d_p.narrow(-1, 0, 1).mul_(0)
103 |                 d_p.mul_(-lr)
104 |             newp = p + d_p
105 |             newp = self.normalize(newp)
106 |             if normalize:
107 |                 newp = self.normalize(newp)
108 |             p.copy_(newp)
109 | 
110 |     def logm(self, x, y):
111 |         """Logarithmic map on the Lorenz Manifold"""
112 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
113 |         v = acosh(-xy, self.eps).div_(
114 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
115 |         ) * th.addcmul(y, xy, x)
116 |         return self.normalize_tan(x, v)
117 | 
118 |     def ptransp(self, x, y, v, ix=None, out=None):
119 |         """Parallel transport for hyperboloid"""
120 |         if ix is not None:
121 |             v_ = v
122 |             x_ = x.index_select(0, ix)
123 |             y_ = y.index_select(0, ix)
124 |         elif v.is_sparse:
125 |             ix, v_ = v._indices().squeeze(), v._values()
126 |             x_ = x.index_select(0, ix)
127 |             y_ = y.index_select(0, ix)
128 |         else:
129 |             raise NotImplementedError
130 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
131 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
132 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
133 |         if out is None:
134 |             return vnew
135 |         else:
136 |             out.index_copy_(0, ix, vnew)
137 | 
138 | myeps1 = 1e-8
139 | myeps2 = 1e-16
140 | myn = 25##for float64
141 | # myn = 11##for float32
142 | 
143 | def nabl(u):
144 |     nablaut = th.zeros(u.size(0),u.size(1),3,3)
145 |     uu = th.div(u, u[..., 0:1].expand_as(u))
146 |     nablaut[..., 0, :].copy_(uu)
147 |     nablaut[..., 1, :].copy_(th.Tensor([0, 1, 0]).unsqueeze(0).unsqueeze(0).expand_as(uu))
148 |     nablaut[..., 2, :].copy_(th.Tensor([0, 0, 1]).unsqueeze(0).unsqueeze(0).expand_as(uu))
149 |     return nablaut.transpose(-2, -1)
150 | 
151 | class GroupEucDistance(Function):
152 |     @staticmethod
153 |     def forward(self, u, u_int_matrix, v, v_int_matrix, AvOverflow=False, myeps1=1e-8, myeps2=1e-16,
154 |                 decompose_factor=25):
155 |         # decompose_factor = 11 for float32; decompose_factor = 25 for float64.
156 |         assert th.isnan(u_int_matrix).max() == 0, "u includes NaNs"
157 |         assert th.isnan(v_int_matrix).max() == 0, "v includes NaNs"
158 |         if len(u) < len(v):
159 |             u = u.expand_as(v)
160 |             u_int_matrix = u_int_matrix.expand_as(v_int_matrix)
161 |         elif len(u) > len(v):
162 |             v = v.expand_as(u)
163 |             v_int_matrix = v_int_matrix.expand_as(u_int_matrix)
164 |         self.save_for_backward(u, v)
165 |         M3 = th.Tensor([[3, 0, 0], [0, -1, 0], [0, 0, -1]])
166 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
167 |         ############# use U = U1+U2 version, we separate U^TM3V into (U1+U2)^TM3(V1+V2)=U1^TM3V1+U1^TM3V2+U2^TM3V1+U2^TM3V2,
168 |         ############# in order to avoid numerical inprecision of storing
169 |         ############# integers in float, and multiply them to get the other intergers, which may be incorrect due to inprecision.
170 |         u_int_matrix2 = th.fmod(u_int_matrix, 2 ** decompose_factor)
171 |         u_int_matrix1 = u_int_matrix - u_int_matrix2
172 |         v_int_matrix2 = th.fmod(v_int_matrix, 2 ** decompose_factor)
173 |         v_int_matrix1 = v_int_matrix - v_int_matrix2
174 |         Q = th.matmul(u_int_matrix1.transpose(-2, -1), th.matmul(M3, v_int_matrix1)) \
175 |             + (th.matmul(u_int_matrix1.transpose(-2, -1), th.matmul(M3, v_int_matrix2))
176 |                + th.matmul(u_int_matrix2.transpose(-2, -1), th.matmul(M3, v_int_matrix1))) \
177 |             + th.matmul(u_int_matrix2.transpose(-2, -1), th.matmul(M3, v_int_matrix2))
178 |         Q11 = th.clamp(Q.narrow(-2, 0, 1).narrow(-1, 0, 1), min=myeps1)  # divide Q by Q11 to avoid overflow
179 |         if not AvOverflow:  #### if the dataset is not complex, and there is overflow concern, we set Q11=1, then Q=hatQ, if AvOverflow is false
180 |             Q11 = th.clamp(Q11, max=1)
181 |         self.hatQ = th.div(Q, Q11.expand_as(Q))  # divided by Q11
182 |         RThatQR = th.matmul(R, th.matmul(self.hatQ, R))  # cpu float
183 |         d_c = th.matmul(u.unsqueeze(-1).transpose(-2, -1), th.matmul(RThatQR, v.unsqueeze(-1))).squeeze(-1).squeeze(
184 |             -1)  # cpu float
185 |         invQ11 = th.div(th.ones_like(Q11.squeeze(-1).squeeze(-1)), Q11.squeeze(-1).squeeze(-1))  # cpu float
186 |         self.nomdis = th.sqrt(th.clamp(d_c * d_c - invQ11 * invQ11, min=myeps2))  # cpu float
187 |         outp = th.log(Q11.squeeze(-1).squeeze(-1)) + th.log(th.clamp(d_c + self.nomdis, min=myeps1))  # cpu float
188 |         return outp
189 | 
190 |     @staticmethod
191 |     def backward(self, g):
192 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]]))
193 |         u, v = self.saved_tensors
194 |         g = g.unsqueeze(-1).expand_as(u)
195 |         nablaut = nabl(u)
196 |         nablavt = nabl(v)
197 |         uupfrac = th.matmul(nablaut, th.matmul(R,th.matmul(self.hatQ, th.matmul(R,v.unsqueeze(-1))))).squeeze(-1)
198 |         vupfrac = th.matmul(nablavt, th.matmul(R,th.matmul(self.hatQ.transpose(-2,-1), th.matmul(R,u.unsqueeze(-1))))).squeeze(-1)
199 |         gu = th.div(uupfrac,self.nomdis.unsqueeze(-1).expand_as(uupfrac))
200 |         gv = th.div(vupfrac,self.nomdis.unsqueeze(-1).expand_as(vupfrac))
201 |         assert th.isnan(gu).max() == 0, "gu includes NaNs"
202 |         assert th.isnan(gv).max() == 0, "gv includes NaNs"
203 |         return g * gu, None, g * gv, None


--------------------------------------------------------------------------------
/hype/Lorentz.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | 
 14 | class LorentzManifold(Manifold):
 15 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 16 | 
 17 |     @staticmethod
 18 |     def dim(dim):
 19 |         return dim + 1
 20 | 
 21 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 22 |             debug=False, **kwargs):
 23 |         self.eps = eps
 24 |         self._eps = _eps
 25 |         self.norm_clip = norm_clip
 26 |         self.max_norm = max_norm
 27 |         self.debug = debug
 28 | 
 29 |     @staticmethod
 30 |     def ldot(u, v, keepdim=False):
 31 |         """Lorentzian Scalar Product"""
 32 |         uv = u * v
 33 |         uv.narrow(-1, 0, 1).mul_(-1)
 34 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 35 | 
 36 |     def to_poincare_ball(self, u):
 37 |         x = u.clone()
 38 |         d = x.size(-1) - 1
 39 |         return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
 40 | 
 41 |     def distance(self, u, v):
 42 |         d = -LorentzDot.apply(u, v)
 43 |         d.data.clamp_(min=1)
 44 |         return acosh(d, self._eps)
 45 | 
 46 |     def pnorm(self, u):
 47 |         return th.sqrt(th.sum(th.pow(self.to_poincare_ball(u), 2), dim=-1))
 48 | 
 49 |     def normalize(self, w):
 50 |         """Normalize vector such that it is located on the hyperboloid"""
 51 |         d = w.size(-1) - 1
 52 |         narrowed = w.narrow(-1, 1, d)
 53 |         if self.max_norm:
 54 |             narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 55 |         tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 56 |         tmp.sqrt_()
 57 |         w.narrow(-1, 0, 1).copy_(tmp)
 58 |         return w
 59 | 
 60 |     def normalize_tan(self, x_all, v_all):
 61 |         d = v_all.size(1) - 1
 62 |         x = x_all.narrow(1, 1, d)
 63 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 64 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 65 |         tmp.sqrt_().clamp_(min=self._eps)
 66 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 67 |         return v_all
 68 | 
 69 |     def init_weights(self, w, irange=1e-5):
 70 |         w.data.uniform_(-irange, irange)
 71 |         w.data.copy_(self.normalize(w.data))
 72 | 
 73 |     def rgrad(self, p, d_p):
 74 |         """Riemannian gradient for hyperboloid"""
 75 |         if d_p.is_sparse:
 76 |             u = d_p._values()
 77 |             x = p.index_select(0, d_p._indices().squeeze())
 78 |         else:
 79 |             u = d_p
 80 |             x = p
 81 |         u.narrow(-1, 0, 1).mul_(-1)
 82 |         u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
 83 |         return d_p
 84 | 
 85 |     def expm(self, p, d_p, lr=None, out=None, normalize=False):
 86 |         """Exponential map for hyperboloid"""
 87 |         if out is None:
 88 |             out = p
 89 |         if d_p.is_sparse:
 90 |             ix, d_val = d_p._indices().squeeze(), d_p._values()
 91 |             # This pulls `ix` out of the original embedding table, which could
 92 |             # be in a corrupted state.  normalize it to fix it back to the
 93 |             # surface of the hyperboloid...
 94 |             # TODO: we should only do the normalize if we know that we are
 95 |             # training with multiple threads, otherwise this is a bit wasteful
 96 |             p_val = self.normalize(p.index_select(0, ix))
 97 |             ldv = self.ldot(d_val, d_val, keepdim=True)
 98 |             if self.debug:
 99 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
100 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
101 |             nd_p = ldv.clamp_(min=0).sqrt_()
102 |             t = th.clamp(nd_p, max=self.norm_clip)
103 |             nd_p.clamp_(min=self.eps)
104 |             newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
105 |             if normalize:
106 |                 newp = self.normalize(newp)
107 |             p.index_copy_(0, ix, newp)
108 |         else:
109 |             if lr is not None:
110 |                 d_p.narrow(-1, 0, 1).mul_(-1)
111 |                 d_p.addcmul_((self.ldot(p, d_p, keepdim=True)).expand_as(p), p)
112 |                 d_p.mul_(-lr)
113 |             ldv = self.ldot(d_p, d_p, keepdim=True)
114 |             if self.debug:
115 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
116 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
117 |             nd_p = ldv.clamp_(min=0).sqrt_()
118 |             t = th.clamp(nd_p, max=self.norm_clip)
119 |             nd_p.clamp_(min=self.eps)
120 |             newp = (th.cosh(t) * p).addcdiv_(th.sinh(t) * d_p, nd_p)
121 |             if normalize:
122 |                 newp = self.normalize(newp)
123 |             p.copy_(newp)
124 | 
125 |     def logm(self, x, y):
126 |         """Logarithmic map on the Lorenz Manifold"""
127 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
128 |         v = acosh(-xy, self.eps).div_(
129 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
130 |         ) * th.addcmul(y, xy, x)
131 |         return self.normalize_tan(x, v)
132 | 
133 |     def ptransp(self, x, y, v, ix=None, out=None):
134 |         """Parallel transport for hyperboloid"""
135 |         if ix is not None:
136 |             v_ = v
137 |             x_ = x.index_select(0, ix)
138 |             y_ = y.index_select(0, ix)
139 |         elif v.is_sparse:
140 |             ix, v_ = v._indices().squeeze(), v._values()
141 |             x_ = x.index_select(0, ix)
142 |             y_ = y.index_select(0, ix)
143 |         else:
144 |             raise NotImplementedError
145 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
146 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
147 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
148 |         if out is None:
149 |             return vnew
150 |         else:
151 |             out.index_copy_(0, ix, vnew)
152 | 
153 | 
154 | class LorentzDot(Function):
155 |     @staticmethod
156 |     def forward(ctx, u, v):
157 |         ctx.save_for_backward(u, v)
158 |         return LorentzManifold.ldot(u, v)
159 | 
160 |     @staticmethod
161 |     def backward(ctx, g):
162 |         u, v = ctx.saved_tensors
163 |         g = g.unsqueeze(-1).expand_as(u).clone()
164 |         g.narrow(-1, 0, 1).mul_(-1)
165 |         return g * v, g * u
166 | 


--------------------------------------------------------------------------------
/hype/NLTiling_rsgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | class NLTilingRSGDManifold(Manifold):
 14 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 15 | 
 16 |     @staticmethod
 17 |     def dim(dim):
 18 |         return dim*3
 19 | 
 20 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 21 |             debug=False, **kwargs):
 22 |         self.eps = eps
 23 |         self._eps = _eps
 24 |         self.norm_clip = norm_clip
 25 |         self.max_norm = max_norm
 26 |         self.debug = debug
 27 | 
 28 |     @staticmethod
 29 |     def ldot(u, v, keepdim=False):
 30 |         """Lorentzian Scalar Product"""
 31 |         uv = u * v
 32 |         uv.narrow(-1, 0, 1).mul_(-1)
 33 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 34 | 
 35 |     def to_poincare_ball(self, u, u_int_matrix):
 36 |         L = th.sqrt(th.Tensor([[3, 0, 0], [0, 1, 0], [0, 0, 1]]))
 37 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
 38 |         u = th.matmul(L, th.matmul(u_int_matrix, th.matmul(R, u.unsqueeze(-1)))).squeeze(-1)
 39 |         d = u.size(-1) - 1
 40 |         return u.narrow(-1, 1, d) / (u.narrow(-1, 0, 1) + 1)
 41 | 
 42 |     def distance(self, uu, uu_int_matrix, vv, vv_int_matrix):
 43 |         dimension = uu.size(-1)//3
 44 |         d_all = 0
 45 |         for i in range(dimension):
 46 |             d_all += GroupRiehighDistance.apply(uu[...,3*i:3*(i+1)], uu_int_matrix[...,i,:,:], vv[...,3*i:3*(i+1)], vv_int_matrix[...,i,:,:])
 47 |         return d_all
 48 |     
 49 |     def pnorm(self, u, u_int_matrix):
 50 |         dimension = u.size(-1)//3
 51 |         all_norm = 0
 52 |         for i in range(dimension):
 53 |             all_norm += th.sqrt(th.sum(th.pow(self.to_poincare_ball(u[...,3*i:3*(i+1)],u_int_matrix[...,i,:,:]), 2), dim=-1))
 54 |         return all_norm/dimension
 55 |     
 56 |     def normalize(self, ww, gra=True):
 57 |         """Normalize vector such that it is located on the hyperboloid"""
 58 |         if gra:
 59 |             dimension = ww.size(-1)//3
 60 |             for i in range(dimension):
 61 |                 w = ww[...,3*i:3*(i+1)]
 62 |                 d = w.size(-1) - 1
 63 |                 narrowed = w.narrow(-1, 1, d)
 64 |                 if self.max_norm:
 65 |                     narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 66 |                 tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 67 |                 tmp.sqrt_()
 68 |                 w.narrow(-1, 0, 1).copy_(tmp)
 69 |         else:
 70 |             w = ww
 71 |             d = w.size(-1) - 1
 72 |             narrowed = w.narrow(-1, 1, d)
 73 |             if self.max_norm:
 74 |                 narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 75 |             tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 76 |             tmp.sqrt_()
 77 |             w.narrow(-1, 0, 1).copy_(tmp)
 78 |         return ww
 79 |     
 80 | 
 81 |     def normalize_tan(self, x_all, v_all):
 82 |         d = v_all.size(1) - 1
 83 |         x = x_all.narrow(1, 1, d)
 84 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 85 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 86 |         tmp.sqrt_().clamp_(min=self._eps)
 87 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 88 |         return v_all
 89 |     
 90 |     def init_weights(self, w, irange=1e-5):
 91 |         w.data.uniform_(-irange, irange)
 92 |         w.data.copy_(self.normalize(w.data))
 93 | 
 94 |     def init_weights_int_matrix(self, w):
 95 |         ID = th.eye(3,3)
 96 |         w.data.zero_()
 97 |         w.data.add_(ID)
 98 |     
 99 |     def rgrad(self, p, d_p):
100 |         """Riemannian gradient for hyperboloid"""
101 |         if d_p.is_sparse:
102 |             uu = d_p._values()
103 |             xx = p.index_select(0, d_p._indices().squeeze())
104 |         else:
105 |             uu = d_p
106 |             xx = p
107 |         dimension = p.size(-1)//3
108 |         for i in range(dimension):
109 |             u = uu[...,3*i:3*(i+1)]
110 |             x = xx[...,3*i:3*(i+1)]
111 |             u.narrow(-1, 0, 1).mul_(-1)
112 |             u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
113 |         return d_p
114 | 
115 |     def expm(self, pp, d_pp, lr=None, out=None, normalize=False):
116 |         dimension = pp.size(-1)//3
117 |         ix, d_val_p = d_pp._indices().squeeze(), d_pp._values()
118 |         p_val_p = self.normalize(pp.index_select(0, ix))
119 |         for i in range(dimension):
120 |             p = pp[...,3*i:3*(i+1)]
121 |             d_val = d_val_p[...,3*i:3*(i+1)]
122 |             p_val = p_val_p[...,3*i:3*(i+1)]
123 |             ldv = self.ldot(d_val, d_val, keepdim=True)
124 |             if self.debug:
125 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
126 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
127 |             nd_p = ldv.clamp_(min=0).sqrt_()
128 |             t = th.clamp(nd_p, max=self.norm_clip)
129 |             nd_p.clamp_(min=self.eps)
130 |             newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
131 |             if normalize:
132 |                 newp = self.normalize(newp,gra=False)
133 |             p.index_copy_(0, ix, newp)
134 | 
135 |     def logm(self, x, y):
136 |         """Logarithmic map on the Lorenz Manifold"""
137 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
138 |         v = acosh(-xy, self.eps).div_(
139 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
140 |         ) * th.addcmul(y, xy, x)
141 |         return self.normalize_tan(x, v)
142 | 
143 |     def ptransp(self, x, y, v, ix=None, out=None):
144 |         """Parallel transport for hyperboloid"""
145 |         if ix is not None:
146 |             v_ = v
147 |             x_ = x.index_select(0, ix)
148 |             y_ = y.index_select(0, ix)
149 |         elif v.is_sparse:
150 |             ix, v_ = v._indices().squeeze(), v._values()
151 |             x_ = x.index_select(0, ix)
152 |             y_ = y.index_select(0, ix)
153 |         else:
154 |             raise NotImplementedError
155 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
156 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
157 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
158 |         if out is None:
159 |             return vnew
160 |         else:
161 |             out.index_copy_(0, ix, vnew)
162 | 
163 | class GroupRiehighDistance(Function):
164 |     @staticmethod
165 |     def forward(self, u, u_int_matrix, v, v_int_matrix, AvOverflow=False, myeps1=1e-8, myeps2=1e-16,
166 |                 decompose_factor=25):
167 |         # decompose_factor = 11 for float32; decompose_factor = 25 for float64.
168 |         assert th.isnan(u_int_matrix).max() == 0, "u includes NaNs"
169 |         assert th.isnan(v_int_matrix).max() == 0, "v includes NaNs"
170 |         if len(u) < len(v):
171 |             u = u.expand_as(v)
172 |             u_int_matrix = u_int_matrix.expand_as(v_int_matrix)
173 |         elif len(u) > len(v):
174 |             v = v.expand_as(u)
175 |             v_int_matrix = v_int_matrix.expand_as(u_int_matrix)
176 |         self.save_for_backward(u, v)
177 |         M3 = th.Tensor([[3, 0, 0], [0, -1, 0], [0, 0, -1]])
178 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
179 |         ############# use U = U1+U2 version, we separate U^TM3V into (U1+U2)^TM3(V1+V2)=U1^TM3V1+U1^TM3V2+U2^TM3V1+U2^TM3V2,
180 |         ############# in order to avoid numerical inprecision of storing
181 |         ############# integers in float, and multiply them to get the other intergers, which may be incorrect due to inprecision.
182 |         u_int_matrix2 = th.fmod(u_int_matrix, 2 ** decompose_factor)
183 |         u_int_matrix1 = u_int_matrix - u_int_matrix2
184 |         v_int_matrix2 = th.fmod(v_int_matrix, 2 ** decompose_factor)
185 |         v_int_matrix1 = v_int_matrix - v_int_matrix2
186 |         Q = th.matmul(u_int_matrix1.transpose(-2, -1), th.matmul(M3, v_int_matrix1)) \
187 |             + (th.matmul(u_int_matrix1.transpose(-2, -1), th.matmul(M3, v_int_matrix2))
188 |                + th.matmul(u_int_matrix2.transpose(-2, -1), th.matmul(M3, v_int_matrix1))) \
189 |             + th.matmul(u_int_matrix2.transpose(-2, -1), th.matmul(M3, v_int_matrix2))
190 |         Q11 = th.clamp(Q.narrow(-2, 0, 1).narrow(-1, 0, 1), min=myeps1)  # divide Q by Q11 to avoid overflow
191 |         if not AvOverflow:  #### if the dataset is not complex, and there is overflow concern, we set Q11=1, then Q=hatQ, if AvOverflow is false
192 |             Q11 = th.clamp(Q11, max=1)
193 |         self.hatQ = th.div(Q, Q11.expand_as(Q))  # divided by Q11
194 |         RThatQR = th.matmul(R, th.matmul(self.hatQ, R))  # cpu float
195 |         d_c = th.matmul(u.unsqueeze(-1).transpose(-2, -1), th.matmul(RThatQR, v.unsqueeze(-1))).squeeze(-1).squeeze(
196 |             -1)  # cpu float
197 |         invQ11 = th.div(th.ones_like(Q11.squeeze(-1).squeeze(-1)), Q11.squeeze(-1).squeeze(-1))  # cpu float
198 |         self.nomdis = th.sqrt(th.clamp(d_c * d_c - invQ11 * invQ11, min=myeps2))  # cpu float
199 |         outp = th.log(Q11.squeeze(-1).squeeze(-1)) + th.log(th.clamp(d_c + self.nomdis, min=myeps1))  # cpu float
200 |         return outp
201 | 
202 |     @staticmethod
203 |     def backward(self, g):
204 |         R = th.sqrt(th.Tensor([[1.0 / 3.0, 0, 0], [0, 1, 0], [0, 0, 1]]))
205 |         u, v = self.saved_tensors
206 |         g = g.unsqueeze(-1).expand_as(u)
207 |         uupfrac = th.matmul(R, th.matmul(self.hatQ, th.matmul(R, v.unsqueeze(-1)))).squeeze(-1)
208 |         vupfrac = th.matmul(R, th.matmul(self.hatQ.transpose(-2, -1), th.matmul(R, u.unsqueeze(-1)))).squeeze(-1)
209 |         gu = th.div(uupfrac, self.nomdis.unsqueeze(-1).expand_as(uupfrac))
210 |         gv = th.div(vupfrac, self.nomdis.unsqueeze(-1).expand_as(vupfrac))
211 |         assert th.isnan(gu).max() == 0, "gu includes NaNs"
212 |         assert th.isnan(gv).max() == 0, "gv includes NaNs"
213 |         return g * gu, None, g * gv, None


--------------------------------------------------------------------------------
/hype/NLorentz.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | from torch.autograd import Function
 10 | from .common import acosh
 11 | from .manifold import Manifold
 12 | 
 13 | 
 14 | class NLorentzManifold(Manifold):
 15 |     __slots__ = ["eps", "_eps", "norm_clip", "max_norm", "debug"]
 16 | 
 17 |     @staticmethod
 18 |     def dim(dim):
 19 |         return dim*3
 20 | 
 21 |     def __init__(self, eps=1e-12, _eps=1e-5, norm_clip=1, max_norm=1e6,
 22 |             debug=False, **kwargs):
 23 |         self.eps = eps
 24 |         self._eps = _eps
 25 |         self.norm_clip = norm_clip
 26 |         self.max_norm = max_norm
 27 |         self.debug = debug
 28 | 
 29 |     @staticmethod
 30 |     def ldot(u, v, keepdim=False):
 31 |         """Lorentzian Scalar Product"""
 32 |         uv = u * v
 33 |         uv.narrow(-1, 0, 1).mul_(-1)
 34 |         return th.sum(uv, dim=-1, keepdim=keepdim)
 35 | 
 36 |     def to_poincare_ball(self, u):
 37 |         x = u.clone()
 38 |         d = x.size(-1) - 1
 39 |         return x.narrow(-1, 1, d) / (x.narrow(-1, 0, 1) + 1)
 40 | 
 41 |     def distance(self, u, v):
 42 |         dimension = u.size(-1)//3
 43 |         for i in range(dimension):
 44 |             d = -LorentzDot.apply(u[...,3*i:3*(i+1)], v[...,3*i:3*(i+1)])
 45 |             d.data.clamp_(min=1)
 46 |             if i==0:
 47 |                 d_all = acosh(d, self._eps)
 48 |             else:
 49 |                 d_all += acosh(d, self._eps)
 50 |         return d_all
 51 | 
 52 |     def pnorm(self, u):
 53 |         dimension = u.size(-1)//3
 54 |         for i in range(dimension):
 55 |             if i==0:
 56 |                 all_norm = th.sqrt(th.sum(th.pow(self.to_poincare_ball(u[...,3*i:3*(i+1)]), 2), dim=-1))
 57 |             else:
 58 |                 all_norm += th.sqrt(th.sum(th.pow(self.to_poincare_ball(u[...,3*i:3*(i+1)]), 2), dim=-1))
 59 |         return all_norm/dimension
 60 | 
 61 |     def normalize(self, ww, gra=True):
 62 |         """Normalize vector such that it is located on the hyperboloid"""
 63 |         if gra:
 64 |             dimension = ww.size(-1)//3
 65 |             for i in range(dimension):
 66 |                 w = ww[...,3*i:3*(i+1)]
 67 |                 d = w.size(-1) - 1
 68 |                 narrowed = w.narrow(-1, 1, d)
 69 |                 if self.max_norm:
 70 |                     narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 71 |                 tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 72 |                 tmp.sqrt_()
 73 |                 w.narrow(-1, 0, 1).copy_(tmp)
 74 |         else:
 75 |             w = ww
 76 |             d = w.size(-1) - 1
 77 |             narrowed = w.narrow(-1, 1, d)
 78 |             if self.max_norm:
 79 |                 narrowed.view(-1, d).renorm_(p=2, dim=0, maxnorm=self.max_norm)
 80 |             tmp = 1 + th.sum(th.pow(narrowed, 2), dim=-1, keepdim=True)
 81 |             tmp.sqrt_()
 82 |             w.narrow(-1, 0, 1).copy_(tmp)
 83 |         return ww
 84 | 
 85 |     def normalize_tan(self, x_all, v_all):
 86 |         d = v_all.size(1) - 1
 87 |         x = x_all.narrow(1, 1, d)
 88 |         xv = th.sum(x * v_all.narrow(1, 1, d), dim=1, keepdim=True)
 89 |         tmp = 1 + th.sum(th.pow(x_all.narrow(1, 1, d), 2), dim=1, keepdim=True)
 90 |         tmp.sqrt_().clamp_(min=self._eps)
 91 |         v_all.narrow(1, 0, 1).copy_(xv / tmp)
 92 |         return v_all
 93 | 
 94 |     def init_weights(self, w, irange=1e-5):
 95 |         w.data.uniform_(-irange, irange)
 96 |         w.data.copy_(self.normalize(w.data))
 97 | 
 98 |     def rgrad(self, p, d_p):
 99 |         """Riemannian gradient for hyperboloid"""
100 |         if d_p.is_sparse:
101 |             uu = d_p._values()
102 |             xx = p.index_select(0, d_p._indices().squeeze())
103 |         else:
104 |             uu = d_p
105 |             xx = p
106 |         dimension = p.size(-1)//3
107 |         for i in range(dimension):
108 |             u = uu[...,3*i:3*(i+1)]
109 |             x = xx[...,3*i:3*(i+1)]
110 |             u.narrow(-1, 0, 1).mul_(-1)
111 |             u.addcmul_(self.ldot(x, u, keepdim=True).expand_as(x), x)
112 |         return d_p
113 | 
114 |     def expm(self, pp, d_pp, lr=None, out=None, normalize=False):
115 |         dimension = pp.size(-1)//3
116 |         ix, d_val_p = d_pp._indices().squeeze(), d_pp._values()
117 |         p_val_p = self.normalize(pp.index_select(0, ix))
118 |         for i in range(dimension):
119 |             p = pp[...,3*i:3*(i+1)]
120 |             d_val = d_val_p[...,3*i:3*(i+1)]
121 |             p_val = p_val_p[...,3*i:3*(i+1)]
122 |             ldv = self.ldot(d_val, d_val, keepdim=True)
123 |             if self.debug:
124 |                 assert all(ldv > 0), "Tangent norm must be greater 0"
125 |                 assert all(ldv == ldv), "Tangent norm includes NaNs"
126 |             nd_p = ldv.clamp_(min=0).sqrt_()
127 |             t = th.clamp(nd_p, max=self.norm_clip)
128 |             nd_p.clamp_(min=self.eps)
129 |             newp = (th.cosh(t) * p_val).addcdiv_(th.sinh(t) * d_val, nd_p)
130 |             if normalize:
131 |                 newp = self.normalize(newp,gra=False)
132 |             p.index_copy_(0, ix, newp)
133 | 
134 |     def logm(self, x, y):
135 |         """Logarithmic map on the Lorenz Manifold"""
136 |         xy = th.clamp(self.ldot(x, y).unsqueeze(-1), max=-1)
137 |         v = acosh(-xy, self.eps).div_(
138 |             th.clamp(th.sqrt(xy * xy - 1), min=self._eps)
139 |         ) * th.addcmul(y, xy, x)
140 |         return self.normalize_tan(x, v)
141 | 
142 |     def ptransp(self, x, y, v, ix=None, out=None):
143 |         """Parallel transport for hyperboloid"""
144 |         if ix is not None:
145 |             v_ = v
146 |             x_ = x.index_select(0, ix)
147 |             y_ = y.index_select(0, ix)
148 |         elif v.is_sparse:
149 |             ix, v_ = v._indices().squeeze(), v._values()
150 |             x_ = x.index_select(0, ix)
151 |             y_ = y.index_select(0, ix)
152 |         else:
153 |             raise NotImplementedError
154 |         xy = self.ldot(x_, y_, keepdim=True).expand_as(x_)
155 |         vy = self.ldot(v_, y_, keepdim=True).expand_as(x_)
156 |         vnew = v_ + vy / (1 - xy) * (x_ + y_)
157 |         if out is None:
158 |             return vnew
159 |         else:
160 |             out.index_copy_(0, ix, vnew)
161 | 
162 | 
163 | class LorentzDot(Function):
164 |     @staticmethod
165 |     def forward(ctx, u, v):
166 |         ctx.save_for_backward(u, v)
167 |         return NLorentzManifold.ldot(u, v)
168 | 
169 |     @staticmethod
170 |     def backward(ctx, g):
171 |         u, v = ctx.saved_tensors
172 |         g = g.unsqueeze(-1).expand_as(u).clone()
173 |         g.narrow(-1, 0, 1).mul_(-1)
174 |         return g * v, g * u
175 | 


--------------------------------------------------------------------------------
/hype/Poincare.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import torch as th
 9 | from torch.autograd import Function
10 | from .Euclidean import EuclideanManifold
11 | 
12 | 
13 | class PoincareManifold(EuclideanManifold):
14 |     def __init__(self, eps=1e-5, **kwargs):
15 |         super(PoincareManifold, self).__init__(**kwargs)
16 |         self.eps = eps
17 |         self.boundary = 1 - eps
18 |         self.max_norm = self.boundary
19 | 
20 |     def distance(self, u, v):
21 |         return Distance.apply(u, v, self.eps)
22 | 
23 |     def rgrad(self, p, d_p):
24 |         if d_p.is_sparse:
25 |             p_sqnorm = th.sum(
26 |                 p[d_p._indices()[0].squeeze()] ** 2, dim=1,
27 |                 keepdim=True
28 |             ).expand_as(d_p._values())
29 |             n_vals = d_p._values() * ((1 - p_sqnorm) ** 2) / 4
30 |             n_vals.renorm_(2, 0, 5)
31 |             d_p = th.sparse.DoubleTensor(d_p._indices(), n_vals, d_p.size())
32 |         else:
33 |             p_sqnorm = th.sum(p ** 2, dim=-1, keepdim=True)
34 |             d_p = d_p * ((1 - p_sqnorm) ** 2 / 4).expand_as(d_p)
35 |         return d_p
36 | 
37 | 
38 | class Distance(Function):
39 |     @staticmethod
40 |     def grad(x, v, sqnormx, sqnormv, sqdist, eps):
41 |         alpha = (1 - sqnormx)
42 |         beta = (1 - sqnormv)
43 |         z = 1 + 2 * sqdist / (alpha * beta)
44 |         a = ((sqnormv - 2 * th.sum(x * v, dim=-1) + 1) / th.pow(alpha, 2))\
45 |             .unsqueeze(-1).expand_as(x)
46 |         a = a * x - v / alpha.unsqueeze(-1).expand_as(v)
47 |         z = th.sqrt(th.pow(z, 2) - 1)
48 |         z = th.clamp(z * beta, min=eps).unsqueeze(-1)
49 |         return 4 * a / z.expand_as(x)
50 | 
51 |     @staticmethod
52 |     def forward(ctx, u, v, eps):
53 |         squnorm = th.clamp(th.sum(u * u, dim=-1), 0, 1 - eps)
54 |         sqvnorm = th.clamp(th.sum(v * v, dim=-1), 0, 1 - eps)
55 |         sqdist = th.sum(th.pow(u - v, 2), dim=-1)
56 |         ctx.eps = eps
57 |         ctx.save_for_backward(u, v, squnorm, sqvnorm, sqdist)
58 |         x = sqdist / ((1 - squnorm) * (1 - sqvnorm)) * 2 + 1
59 |         # arcosh
60 |         z = th.sqrt(th.pow(x, 2) - 1)
61 |         return th.log(x + z)
62 | 
63 |     @staticmethod
64 |     def backward(ctx, g):
65 |         u, v, squnorm, sqvnorm, sqdist = ctx.saved_tensors
66 |         g = g.unsqueeze(-1)
67 |         gu = Distance.grad(u, v, squnorm, sqvnorm, sqdist, ctx.eps)
68 |         gv = Distance.grad(v, u, sqvnorm, squnorm, sqdist, ctx.eps)
69 |         return g.expand_as(gu) * gu, g.expand_as(gv) * gv, None
70 | 


--------------------------------------------------------------------------------
/hype/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) 2018-present, Facebook, Inc.
3 | # All rights reserved.
4 | #
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 | 


--------------------------------------------------------------------------------
/hype/adjacency_matrix_dataset.pyx:
--------------------------------------------------------------------------------
  1 | # cython: nonecheck=False
  2 | # cython: boundscheck=False
  3 | # cython: wraparound=False
  4 | # cython: cdivision=True
  5 | #
  6 | # Copyright (c) 2018-present, Facebook, Inc.
  7 | # All rights reserved.
  8 | #
  9 | # This source code is licensed under the license found in the
 10 | # LICENSE file in the root directory of this source tree.
 11 | 
 12 | cimport numpy as npc
 13 | cimport cython
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | from libcpp cimport bool
 18 | from libcpp.unordered_set cimport unordered_set
 19 | from libc.math cimport pow
 20 | from libc.stdlib cimport RAND_MAX
 21 | import threading
 22 | import queue
 23 | 
 24 | # Thread safe random number generation.  libcpp doesn't expose rand_r...
 25 | cdef unsigned long rand_r(unsigned long* seed) nogil:
 26 |     seed[0] = (seed[0] * 1103515245) % <unsigned long>pow(2, 32) + 12345
 27 |     return seed[0] % RAND_MAX
 28 | 
 29 | cdef class AdjacencyDataset:
 30 |     cdef public bool burnin
 31 |     cdef public int N, qsize, qsamples, qmisses
 32 |     cdef public npc.ndarray objects, counts
 33 |     cdef public double neg_multiplier, _sample_dampening
 34 | 
 35 |     cdef int nnegs, max_tries, batch_size, num_workers, join_count
 36 |     cdef long current
 37 |     cdef object queue
 38 |     cdef list threads
 39 | 
 40 |     cdef double [:] S, weights
 41 |     cdef long [:] A, ids, neighbors, offsets, perm
 42 | 
 43 |     def __cinit__(self, adj, nnegs, batch_size, num_workers, burnin = False,
 44 |             sample_dampening=0.75):
 45 |         self.burnin = burnin
 46 |         self.num_workers = num_workers
 47 |         self.batch_size = batch_size
 48 |         self.max_tries = 10 * nnegs
 49 |         self.neg_multiplier = 1
 50 |         self.queue = queue.Queue(maxsize=num_workers)
 51 |         self.nnegs = nnegs
 52 |         self._sample_dampening = sample_dampening
 53 | 
 54 |         self.ids = adj['ids']
 55 |         self.neighbors = adj['neighbors']
 56 |         self.offsets = adj['offsets']
 57 |         self.weights = adj['weights']
 58 |         self.objects = adj['objects']
 59 |         self.N = len(self.objects)
 60 |         self._setup_alias_tables()
 61 | 
 62 |     cdef _setup_alias_tables(self):
 63 |         # Setup the necessary data structures for "Alias Method"
 64 |         # See Lua Torch impl: https://github.com/torch/torch7/blob/master/lib/TH/generic/THTensorRandom.c
 65 |         # Alias method: https://en.wikipedia.org/wiki/Alias_method
 66 | 
 67 |         cdef long j, k, i, start, end
 68 |         cdef set Tl, Th
 69 |         cdef npc.ndarray[npc.long_t, ndim=1] A
 70 |         cdef npc.ndarray[npc.double_t, ndim=1] S
 71 | 
 72 |         self.counts = np.bincount(self.neighbors, weights=self.weights, minlength=self.N)
 73 |         self.counts = self.counts ** self._sample_dampening
 74 | 
 75 |         S = (self.counts / np.sum(self.counts)) * self.counts.shape[0]
 76 |         A = np.arange(0, self.counts.shape[0], dtype=np.long)
 77 |         Tl = set(list((S < 1).nonzero()[0]))
 78 |         Th = set(list((S > 1).nonzero()[0]))
 79 | 
 80 |         while len(Tl) > 0 and len(Th) > 0:
 81 |             j = Tl.pop()
 82 |             k = Th.pop()
 83 |             S[k] = S[k] - 1 + S[j]
 84 |             A[j] = k
 85 |             if S[k] < 1:
 86 |                 Tl.add(k)
 87 |             elif S[k] > 1:
 88 |                 Th.add(k)
 89 |         self.S = S
 90 |         self.A = A
 91 | 
 92 |     def iter(self):
 93 |         return self.__iter__()
 94 | 
 95 |     def __iter__(self):
 96 |         self.perm = np.random.permutation(self.neighbors.shape[0])
 97 |         self.qsize = self.qsamples = self.current = self.join_count = self.qmisses = 0
 98 |         self.threads = []
 99 |         for i in range(self.num_workers):
100 |             t = threading.Thread(target=self._worker, args=(i,))
101 |             t.start()
102 |             self.threads.append(t)
103 |         return self
104 | 
105 |     def _worker(self, tid):
106 |         cdef long [:,:] memview
107 |         cdef int count
108 |         cdef double [:] weights
109 |         cdef unsigned long seed
110 | 
111 |         seed = tid
112 |         while self.current < self.neighbors.shape[0]:
113 |             start = self.current
114 |             self.current += self.batch_size
115 | 
116 |             batch = torch.LongTensor(self.batch_size, self.nnegatives() + 2)
117 |             memview = batch.numpy()
118 |             with nogil:
119 |                 count = self._getbatch(start, memview, &seed)
120 |             if count < self.batch_size:
121 |                 batch = batch.narrow(0, 0, count)
122 |             self.queue.put((batch, torch.zeros(count).long()))
123 |         self.queue.put(tid)
124 | 
125 |     def __len__(self):
126 |         return int(np.ceil(float(self.neighbors.shape[0]) / self.batch_size))
127 | 
128 |     def avg_queue_size(self):
129 |         return float(self.qsize) / self.qsamples
130 | 
131 |     def queue_misses(self):
132 |         return self.qmisses
133 | 
134 |     def __next__(self):
135 |         return self.next()
136 | 
137 |     def next(self):
138 |         '''
139 |         Python visible function for indexing the dataset.  This first
140 |         allocates a tensor, and then modifies it in place with `_getitem`
141 | 
142 |         Args:
143 |             idx (int): index into the dataset
144 |         '''
145 |         size = self.queue.qsize()
146 |         if size == 0 and self.join_count == len(self.threads):
147 |             # No more items in queue and we've joined with all worker threads
148 |             raise StopIteration
149 | 
150 |         item = self.queue.get()
151 |         if isinstance(item, int):
152 |             self.join_count += 1
153 |             self.threads[item].join()  # Thread `item` is finished, join with it...
154 |             return self.next()  # try again...
155 |         self.qsize += size
156 |         self.qsamples += 1
157 |         prevmisses = self.qmisses
158 |         self.qmisses += 1 if size == 0 else 0
159 |         if self.qmisses == 20 and prevmisses == 19:
160 |             print('Warning: not enough threads to keep up with training loop!')
161 |         return item
162 | 
163 |     cdef long random_node(self, unsigned long* seed) nogil:
164 |         cdef long fu, n
165 |         cdef double u
166 | 
167 |         if self.burnin:
168 |             u = <double>rand_r(seed) / <double>RAND_MAX * self.N
169 |             fu = <long>u
170 |             if self.S[fu] <= u - fu:
171 |                 return self.A[fu]
172 |             else:
173 |                 return fu
174 |         else:
175 |             return <long>(<double>rand_r(seed) / <double>RAND_MAX * self.N)
176 | 
177 |     cdef long binary_search(self, long target, long[:] arr, long l, long r, bool approx) nogil:
178 |         '''
179 |         Binary search.  If the `approx` flag is `True`, then we find the position
180 |         in the array that `target` belongs.  If False, then we return `-1` if
181 |         `target` does not exist
182 |         '''
183 |         cdef long mid, N
184 |         N = r
185 |         while l <= r:
186 |             mid = <long>((l + r) / 2)
187 |             if (approx and arr[mid] <= target and (mid+1 > N or arr[mid+1] > target)) \
188 |                     or arr[mid] == target:
189 |                 return mid
190 |             if arr[mid] < target:
191 |                 l = mid + 1
192 |             else:
193 |                 r = mid - 1
194 |         return 0 if approx else -1
195 | 
196 |     cdef long _getbatch(self, long idx, long[:,:] batch, unsigned long* seed) nogil:
197 |         cdef long i, nnodes, ixptr, t, h, l, r, nodeidx, ntries, neighbor_idx
198 |         cdef long rnodeidx, rand_node
199 |         cdef unordered_set[long] negs
200 |         nnodes = self.ids.shape[0]
201 |         i = 0
202 | 
203 |         while idx < len(self.neighbors) and i < self.batch_size:
204 |             ntries = 0
205 |             neighbor_idx = self.perm[idx]
206 |             nodeidx = self.binary_search(neighbor_idx, self.offsets, 0, nnodes-1, True)
207 | 
208 |             # nodes for positive sample
209 |             t = self.ids[nodeidx]
210 |             h = self.neighbors[neighbor_idx]
211 | 
212 |             # left and right boundaries for this node's neighbors
213 |             l = self.offsets[nodeidx]
214 |             r = self.offsets[nodeidx + 1] - 1 if nodeidx + 1 < nnodes else len(self.neighbors) - 1
215 | 
216 |             batch[i, 0] = t
217 |             batch[i, 1] = h
218 |             ixptr = 2
219 |             negs = unordered_set[long]()
220 | 
221 |             while ntries < self.max_tries and ixptr < self._nnegatives() + 2:
222 |                 rand_node = self.random_node(seed)
223 |                 rnodeidx = self.binary_search(rand_node, self.neighbors, l, r, False)
224 |                 if rand_node != t and (rnodeidx == -1 or self.weights[rnodeidx] < self.weights[neighbor_idx]):
225 |                     if negs.find(rand_node) == negs.end():
226 |                         batch[i, ixptr] = rand_node
227 |                         ixptr = ixptr + 1
228 |                         negs.insert(rand_node)
229 |                 ntries = ntries + 1
230 | 
231 |             if ixptr == 2:
232 |                 batch[i, ixptr] = t
233 |                 ixptr += 1
234 | 
235 |             while ixptr < self._nnegatives() + 2:
236 |                 batch[i, ixptr] = batch[i, 2 + <long>(<double>rand_r(seed)/RAND_MAX*(ixptr-2))]
237 |                 ixptr = ixptr + 1
238 | 
239 |             idx = idx + 1
240 |             i = i + 1
241 |         return i
242 | 
243 |     def nnegatives(self):
244 |         return self._nnegatives()
245 | 
246 |     cdef int _nnegatives(self) nogil:
247 |         if self.burnin:
248 |             return int(self.neg_multiplier * self.nnegs)
249 |         else:
250 |             return self.nnegs
251 | 


--------------------------------------------------------------------------------
/hype/checkpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import os
 9 | from os.path import join as pjoin
10 | import time
11 | import torch
12 | 
13 | 
14 | class LocalCheckpoint(object):
15 |     def __init__(self, path, include_in_all=None, start_fresh=False):
16 |         self.path = path
17 |         self.start_fresh = start_fresh
18 |         self.include_in_all = {} if include_in_all is None else include_in_all
19 | 
20 |     def initialize(self, params):
21 |         if not self.start_fresh and os.path.isfile(self.path):
22 |             print(f'Loading checkpoint from {self.path}')
23 |             return torch.load(self.path)
24 |         else:
25 |             return params
26 | 
27 |     def save(self, params, tries=10):
28 |         try:
29 |             torch.save({**self.include_in_all, **params}, self.path)
30 |         except Exception as err:
31 |             if tries > 0:
32 |                 print(f'Exception while saving ({err})\nRetrying ({tries})')
33 |                 time.sleep(60)
34 |                 self.save(params, tries=(tries - 1))
35 |             else:
36 |                 print("Giving up on saving...")
37 | 


--------------------------------------------------------------------------------
/hype/common.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import torch as th
 9 | from torch.autograd import Function
10 | 
11 | 
12 | class Acosh(Function):
13 |     @staticmethod
14 |     def forward(ctx, x, eps):
15 |         z = th.sqrt(x * x - 1)
16 |         ctx.save_for_backward(z)
17 |         ctx.eps = eps
18 |         return th.log(x + z)
19 | 
20 |     @staticmethod
21 |     def backward(ctx, g):
22 |         z, = ctx.saved_tensors
23 |         z = th.clamp(z, min=ctx.eps)
24 |         z = g / z
25 |         return z, None
26 | 
27 | 
28 | acosh = Acosh.apply
29 | 


--------------------------------------------------------------------------------
/hype/graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | from collections import defaultdict as ddict
  9 | import pandas
 10 | import numpy as np
 11 | from numpy.random import choice
 12 | import torch as th
 13 | from torch import nn
 14 | from torch.utils.data import Dataset as DS
 15 | from sklearn.metrics import average_precision_score
 16 | from multiprocessing.pool import ThreadPool
 17 | from functools import partial
 18 | import h5py
 19 | from tqdm import tqdm
 20 | 
 21 | 
 22 | def load_adjacency_matrix(path, format='hdf5', symmetrize=False):
 23 |     if format == 'hdf5':
 24 |         with h5py.File(path, 'r') as hf:
 25 |             return {
 26 |                 'ids': hf['ids'].value.astype('int'),
 27 |                 'neighbors': hf['neighbors'].value.astype('int'),
 28 |                 'offsets': hf['offsets'].value.astype('int'),
 29 |                 'weights': hf['weights'].value.astype('float'),
 30 |                 'objects': hf['objects'].value
 31 |             }
 32 |     elif format == 'csv':
 33 |         df = pandas.read_csv(path, usecols=['id1', 'id2', 'weight'], engine='c')
 34 | 
 35 |         if symmetrize:
 36 |             rev = df.copy().rename(columns={'id1' : 'id2', 'id2' : 'id1'})
 37 |             df = pandas.concat([df, rev])
 38 | 
 39 |         idmap = {}
 40 |         idlist = []
 41 | 
 42 |         def convert(id):
 43 |             if id not in idmap:
 44 |                 idmap[id] = len(idlist)
 45 |                 idlist.append(id)
 46 |             return idmap[id]
 47 |         df.loc[:, 'id1'] = df['id1'].apply(convert)
 48 |         df.loc[:, 'id2'] = df['id2'].apply(convert)
 49 | 
 50 |         groups = df.groupby('id1').apply(lambda x: x.sort_values(by='id2'))
 51 |         counts = df.groupby('id1').id2.size()
 52 | 
 53 |         ids = groups.index.levels[0].values
 54 |         offsets = counts.loc[ids].values
 55 |         offsets[1:] = np.cumsum(offsets)[:-1]
 56 |         offsets[0] = 0
 57 |         neighbors = groups['id2'].values
 58 |         weights = groups['weight'].values
 59 |         return {
 60 |             'ids' : ids.astype('int'),
 61 |             'offsets' : offsets.astype('int'),
 62 |             'neighbors': neighbors.astype('int'),
 63 |             'weights': weights.astype('float'),
 64 |             'objects': np.array(idlist)
 65 |         }
 66 |     else:
 67 |         raise RuntimeError(f'Unsupported file format {format}')
 68 | 
 69 | 
 70 | def load_edge_list(path, symmetrize=False):
 71 |     df = pandas.read_csv(path, usecols=['id1', 'id2', 'weight'], engine='c')
 72 |     df.dropna(inplace=True)
 73 |     if symmetrize:
 74 |         rev = df.copy().rename(columns={'id1' : 'id2', 'id2' : 'id1'})
 75 |         df = pandas.concat([df, rev])
 76 |     idx, objects = pandas.factorize(df[['id1', 'id2']].values.reshape(-1))
 77 |     idx = idx.reshape(-1, 2).astype('int')
 78 |     weights = df.weight.values.astype('float')
 79 |     return idx, objects.tolist(), weights
 80 | 
 81 | 
 82 | class Embedding(nn.Module):
 83 |     def __init__(self, size, dim, manifold, sparse=True, com_n=1):
 84 |         super(Embedding, self).__init__()
 85 |         self.dim = dim
 86 |         self.nobjects = size
 87 |         self.manifold = manifold
 88 |         self.lt = nn.Embedding(size, com_n*dim, sparse=sparse)
 89 |         ############ add this line to store integer matrix
 90 |         if 'LTiling' in str(manifold):
 91 |             if 'N' in str(manifold):
 92 |                 self.int_matrix = th.Tensor(size, dim//3, 3, 3)
 93 |             else:
 94 |                 self.int_matrix = th.Tensor(size, 3, 3)
 95 |         ############
 96 |         self.dist = manifold.distance
 97 |         self.pre_hook = None
 98 |         self.post_hook = None
 99 |         self.init_weights(manifold)
100 | 
101 |     def init_weights(self, manifold, scale=1e-4):
102 |         manifold.init_weights(self.lt.weight, scale)
103 |         if 'LTiling' in str(self.manifold):
104 |             self.int_matrix.zero_()
105 |             manifold.init_weights_int_matrix(self.int_matrix)
106 | 
107 |     def forward(self, inputs):
108 |         e = self.lt(inputs)
109 |         with th.no_grad():
110 |             e = self.manifold.normalize(e)
111 |         if self.pre_hook is not None:
112 |             e = self.pre_hook(e)
113 |         if 'LTiling' in str(self.manifold):
114 |             fval = self._forward(e, self.int_matrix[inputs])
115 |         else:
116 |             fval = self._forward(e)
117 |         return fval
118 | 
119 |     def embedding(self):
120 |         return list(self.lt.parameters())[0].data.cpu().numpy()
121 | 
122 |     def optim_params(self, manifold):
123 |         return [{
124 |             'params': self.lt.parameters(),
125 |             'rgrad': manifold.rgrad,
126 |             'expm': manifold.expm,
127 |             'logm': manifold.logm,
128 |             'ptransp': manifold.ptransp,
129 |         }, ]
130 | 
131 | 
132 | # This class is now deprecated in favor of BatchedDataset (graph_dataset.pyx)
133 | class Dataset(DS):
134 |     _neg_multiplier = 1
135 |     _ntries = 10
136 |     _sample_dampening = 0.75
137 | 
138 |     def __init__(self, idx, objects, weights, nnegs, unigram_size=1e8):
139 |         assert idx.ndim == 2 and idx.shape[1] == 2
140 |         assert weights.ndim == 1
141 |         assert len(idx) == len(weights)
142 |         assert nnegs >= 0
143 |         assert unigram_size >= 0
144 | 
145 |         print('Indexing data')
146 |         self.idx = idx
147 |         self.nnegs = nnegs
148 |         self.burnin = False
149 |         self.objects = objects
150 | 
151 |         self._weights = ddict(lambda: ddict(int))
152 |         self._counts = np.ones(len(objects), dtype=np.float)
153 |         self.max_tries = self.nnegs * self._ntries
154 |         for i in range(idx.shape[0]):
155 |             t, h = self.idx[i]
156 |             self._counts[h] += weights[i]
157 |             self._weights[t][h] += weights[i]
158 |         self._weights = dict(self._weights)
159 |         nents = int(np.array(list(self._weights.keys())).max())
160 |         assert len(objects) > nents, f'Number of objects do no match'
161 | 
162 |         if unigram_size > 0:
163 |             c = self._counts ** self._sample_dampening
164 |             self.unigram_table = choice(
165 |                 len(objects),
166 |                 size=int(unigram_size),
167 |                 p=(c / c.sum())
168 |             )
169 | 
170 |     def __len__(self):
171 |         return self.idx.shape[0]
172 | 
173 |     def weights(self, inputs, targets):
174 |         return self.fweights(self, inputs, targets)
175 | 
176 |     def nnegatives(self):
177 |         if self.burnin:
178 |             return self._neg_multiplier * self.nnegs
179 |         else:
180 |             return self.nnegs
181 | 
182 |     @classmethod
183 |     def collate(cls, batch):
184 |         inputs, targets = zip(*batch)
185 |         return th.cat(inputs, 0), th.cat(targets, 0)
186 | 
187 | 
188 | # This function is now deprecated in favor of eval_reconstruction
189 | def eval_reconstruction_slow(adj, lt, lt_int_matrix, distfn):
190 |     ranks = []
191 |     ap_scores = []
192 | 
193 |     for s, s_types in adj.items():
194 |         s_e = lt[s].expand_as(lt)
195 |         s_e_int_matrix = lt_int_matrix[s].expand_as(lt_int_matrix)
196 |         _dists = distfn(s_e, s_e_int_matrix, lt, lt_int_matrix).data.cpu().numpy().flatten()
197 |         _dists[s] = 1e+12
198 |         _labels = np.zeros(lt.size(0))
199 |         _dists_masked = _dists.copy()
200 |         _ranks = []
201 |         for o in s_types:
202 |             _dists_masked[o] = np.Inf
203 |             _labels[o] = 1
204 |         for o in s_types:
205 |             d = _dists_masked.copy()
206 |             d[o] = _dists[o]
207 |             r = np.argsort(d)
208 |             _ranks.append(np.where(r == o)[0][0] + 1)
209 |         ranks += _ranks
210 |         ap_scores.append(
211 |             average_precision_score(_labels, -_dists)
212 |         )
213 |     return np.mean(ranks), np.mean(ap_scores)
214 | 
215 | 
216 | def reconstruction_worker(adj, lt, distfn, objects, progress=False, lt_int_matrix=None):
217 |     ranksum = nranks = ap_scores = iters = 0
218 |     labels = np.empty(lt.size(0))
219 |     for object in tqdm(objects) if progress else objects:
220 |         labels.fill(0)
221 |         neighbors = np.array(list(adj[object]))
222 |         if 'LTiling' in str(distfn):
223 |             dists = distfn(lt[None, object], lt_int_matrix[None, object], lt, lt_int_matrix)
224 |         else:
225 |             dists = distfn(lt[None, object], lt)
226 |         dists[object] = 1e12
227 |         sorted_dists, sorted_idx = dists.sort()
228 |         ranks, = np.where(np.in1d(sorted_idx.detach().cpu().numpy(), neighbors))
229 |         # The above gives us the position of the neighbors in sorted order.  We
230 |         # want to count the number of non-neighbors that occur before each neighbor
231 |         ranks += 1
232 |         N = ranks.shape[0]
233 | 
234 |         # To account for other positive nearer neighbors, we subtract (N*(N+1)/2)
235 |         # As an example, assume the ranks of the neighbors are:
236 |         # 0, 1, 4, 5, 6, 8
237 |         # For each neighbor, we'd like to return the number of non-neighbors
238 |         # that ranked higher than it.  In this case, we'd return 0+0+2+2+2+3=14
239 |         # Another way of thinking about it is to return
240 |         # 0 + 1 + 4 + 5 + 6 + 8 - (0 + 1 + 2 + 3 + 4 + 5)
241 |         # (0 + 1 + 2 + ... + N) == (N * (N + 1) / 2)
242 |         # Note that we include `N` to account for the source embedding itself
243 |         # always being the nearest neighbor
244 |         ranksum += ranks.sum() - (N * (N - 1) / 2)
245 |         nranks += ranks.shape[0]
246 |         labels[neighbors] = 1
247 |         # print(dists.detach().cpu().numpy().max())
248 |         # assert 1==2
249 |         # distss = th.clamp(dists,max=1e12)
250 |         # print(object,dists)
251 |         # print(dists !=dists)
252 |         # print(lt[object])
253 |         # print(lt[0])
254 |         # assert 1 == 2
255 |         ap_scores += average_precision_score(labels, -dists.detach().cpu().numpy())
256 |         iters += 1
257 |     return float(ranksum), nranks, ap_scores, iters
258 | 
259 | 
260 | def eval_reconstruction(adj, lt, distfn, workers=1, progress=False, lt_int_matrix=None):
261 |     '''
262 |     Reconstruction evaluation.  For each object, rank its neighbors by distance
263 |     Args:
264 |         adj (dict[int, set[int]]): Adjacency list mapping objects to its neighbors
265 |         lt (torch.Tensor[N, dim]): Embedding table with `N` embeddings and `dim`
266 |             dimensionality
267 |         distfn ((torch.Tensor, torch.Tensor) -> torch.Tensor): distance function.
268 |         workers (int): number of workers to use
269 |     '''
270 |     objects = np.array(list(adj.keys()))
271 |     if workers > 1:
272 |         with ThreadPool(workers) as pool:
273 |             if 'LTiling' in str(distfn):
274 |                 f = partial(reconstruction_worker, adj, lt, distfn, lt_int_matrix=lt_int_matrix)
275 |             else:
276 |                 f = partial(reconstruction_worker, adj, lt, distfn)
277 |             results = pool.map(f, np.array_split(objects, workers))
278 |             results = np.array(results).sum(axis=0).astype(float)
279 |     else:
280 |         if 'LTiling' in str(distfn):
281 |             results = reconstruction_worker(adj, lt, distfn, objects, progress, lt_int_matrix=lt_int_matrix)
282 |         else:
283 |             results = reconstruction_worker(adj, lt, distfn, objects, progress)
284 |     return float(results[0]) / results[1], float(results[2]) / results[3]


--------------------------------------------------------------------------------
/hype/graph_dataset.pyx:
--------------------------------------------------------------------------------
  1 | # cython: nonecheck=False
  2 | # cython: boundscheck=False
  3 | # cython: wraparound=False
  4 | # cython: cdivision=True
  5 | #
  6 | # Copyright (c) 2018-present, Facebook, Inc.
  7 | # All rights reserved.
  8 | #
  9 | # This source code is licensed under the license found in the
 10 | # LICENSE file in the root directory of this source tree.
 11 | 
 12 | cimport numpy as npc
 13 | cimport cython
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | from libcpp cimport bool
 18 | from libcpp.vector cimport vector
 19 | from libcpp.unordered_set cimport unordered_set
 20 | from libcpp.unordered_map cimport unordered_map
 21 | from libc.math cimport pow
 22 | from libc.stdlib cimport rand, RAND_MAX
 23 | import threading
 24 | import queue
 25 | 
 26 | # Thread safe random number generation.  libcpp doesn't expose rand_r...
 27 | cdef unsigned long rand_r(unsigned long* seed) nogil:
 28 |     seed[0] = (seed[0] * 1103515245) % <unsigned long>pow(2, 32) + 12345
 29 |     return seed[0] % RAND_MAX
 30 | 
 31 | cdef class BatchedDataset:
 32 |     cdef public list objects
 33 |     cdef public bool burnin
 34 |     cdef public double neg_multiplier
 35 |     cdef public npc.ndarray counts
 36 | 
 37 |     cdef long [:, :] idx
 38 |     cdef int nnegs, max_tries, N, batch_size, current, num_workers
 39 |     cdef double sample_dampening
 40 |     cdef vector[unordered_map[long, double]] _weights
 41 |     cdef double [:] S
 42 |     cdef long [:] A, perm
 43 |     cdef object queue
 44 |     cdef list threads
 45 | 
 46 |     def __cinit__(self, idx, objects, weights, nnegs, batch_size, num_workers,
 47 |                   burnin=False, sample_dampening=0.75):
 48 |         '''
 49 |         Create a dataset for training Hyperbolic embeddings.  Rather than
 50 |         allocating many tensors for individual dataset items, we instead
 51 |         produce a single batch in each iteration.  This allows us to do a single
 52 |         Tensor allocation for the entire batch and filling it out in place.
 53 | 
 54 |         Args:
 55 |             idx (ndarray[ndims=2]):  Indexes of objects corresponding to co-occurrence.
 56 |                 I.E. if `idx[0, :] == [4, 19]`, then item 4 co-occurs with item 19
 57 |             weights (ndarray[ndims=1]): Weights for each co-occurence.  Corresponds
 58 |                 to the number of times a pair co-occurred.  (Equal length to `idx`)
 59 |             nnegs (int): Number of negative samples to produce with each positive
 60 |             objects (list[str]): Mapping from integer ID to hashtag string
 61 |             nnegs (int): Number of negatives to produce with each positive
 62 |             batch_size (int): Size of each minibatch
 63 |             num_workers (int): Number of threads to use to produce each batch
 64 |             burnin (bool): ???
 65 |         '''
 66 |         self.idx = idx
 67 |         self.objects = objects
 68 |         self.nnegs = nnegs
 69 |         self.burnin = burnin
 70 |         self.N = len(objects)
 71 |         self.counts = np.zeros((self.N), dtype=np.double)
 72 |         self.num_workers = num_workers
 73 |         self.batch_size = batch_size
 74 |         self.sample_dampening = sample_dampening
 75 |         self._mk_weights(idx, weights)
 76 |         self.max_tries = 10 * nnegs
 77 |         self.neg_multiplier = 1
 78 |         self.queue = queue.Queue(maxsize=num_workers)
 79 | 
 80 |     # Setup the weights datastructure and sampling tables
 81 |     def _mk_weights(self, npc.ndarray[npc.long_t, ndim=2] idx, npc.ndarray[npc.double_t, ndim=1] weights):
 82 |         cdef int i
 83 |         cdef long t, h
 84 |         cdef set Tl, Th
 85 |         cdef npc.ndarray[npc.long_t, ndim=1] A
 86 |         cdef npc.ndarray[npc.double_t, ndim=1] S
 87 | 
 88 |         self._weights.resize(self.N)
 89 | 
 90 |         for i in range(idx.shape[0]):
 91 |             t = idx[i, 0]
 92 |             h = idx[i, 1]
 93 |             self.counts[h] += weights[i]
 94 |             self._weights[t][h] = weights[i]
 95 | 
 96 |         self.counts = self.counts ** self.sample_dampening
 97 | 
 98 |         if self.burnin:
 99 |             # Setup the necessary data structures for "Alias Method"
100 |             # See Lua Torch impl: https://github.com/torch/torch7/blob/master/lib/TH/generic/THTensorRandom.c
101 |             # Alias method: https://en.wikipedia.org/wiki/Alias_method
102 |             S = (self.counts / np.sum(self.counts)) * self.counts.shape[0]
103 |             A = np.arange(0, self.counts.shape[0], dtype=np.long)
104 |             Tl = set(list((S < 1).nonzero()[0]))
105 |             Th = set(list((S > 1).nonzero()[0]))
106 | 
107 |             while len(Tl) > 0 and len(Th) > 0:
108 |                 j = Tl.pop()
109 |                 k = Th.pop()
110 |                 S[k] = S[k] - 1 + S[j]
111 |                 A[j] = k
112 |                 if S[k] < 1:
113 |                     Tl.add(k)
114 |                 elif S[k] > 1:
115 |                     Th.add(k)
116 |             self.S = S
117 |             self.A = A
118 | 
119 |     def __iter__(self):
120 |         self.perm = np.random.permutation(len(self.idx))
121 |         self.current = 0
122 |         self.threads = []
123 |         for i in range(self.num_workers):
124 |             t = threading.Thread(target=self._worker, args=(i,))
125 |             t.start()
126 |             self.threads.append(t)
127 |         return self
128 | 
129 |     cpdef _worker(self, i):
130 |         cdef long [:,:] memview
131 |         cdef long count
132 | 
133 |         while self.current < self.idx.shape[0]:
134 |             current = self.current
135 |             self.current += self.batch_size
136 |             ix = torch.LongTensor(self.batch_size, self.nnegatives() + 2)
137 |             memview = ix.numpy()
138 |             with nogil:
139 |                 count = self._getbatch(current, memview)
140 |             if count < self.batch_size:
141 |                 ix = ix.narrow(0, 0, count)
142 |             self.queue.put((ix, torch.zeros(ix.size(0)).long()))
143 |         self.queue.put(i)
144 | 
145 |     def iter(self):
146 |         return self.__iter__()
147 | 
148 |     def __len__(self):
149 |         return int(np.ceil(float(self.idx.shape[0]) / self.batch_size))
150 | 
151 |     def __next__(self):
152 |         return self.next()
153 | 
154 |     def next(self):
155 |         '''
156 |         Python visible function for indexing the dataset.  This first
157 |         allocates a tensor, and then modifies it in place with `_getitem`
158 | 
159 |         Args:
160 |             idx (int): index into the dataset
161 |         '''
162 |         size = self.queue.qsize()
163 |         if size == 0 and all([not(t.is_alive()) for t in self.threads]):
164 |             # No more items in queue and we've joined with all worker threads
165 |             raise StopIteration
166 |         item = self.queue.get()
167 |         if isinstance(item, int):
168 |             self.threads[item].join()  # Thread `item` is finished, join with it...
169 |             return self.next()  # try again...
170 |         return item
171 | 
172 |     cdef public long _getbatch(self, int i, long[:,:] ix) nogil:
173 |         '''
174 |         Fast internal C method for indexing the dataset/negative sampling
175 | 
176 |         Args:
177 |             i (int): Index into the dataset
178 |             ix (long [:]) - A C memoryview of the result tensor that we will
179 |                 return to Python
180 |             N (int): Total number of unique objects in the dataset (convert to raw C)
181 |         '''
182 |         cdef long t, h, n, fu
183 |         cdef int ntries, ixptr, idx, j
184 |         cdef unordered_set[long] negs
185 |         cdef double weight_th, u
186 |         cdef unsigned long seed
187 | 
188 |         seed = i
189 |         j = 0
190 | 
191 |         while j < self.batch_size and i + j < self.perm.shape[0]:
192 |             ntries = 0
193 | 
194 |             idx = self.perm[i + j]
195 |             t = self.idx[idx, 0]
196 |             h = self.idx[idx, 1]
197 | 
198 |             ix[j, 0] = t
199 |             ix[j, 1] = h
200 |             ixptr = 2
201 | 
202 |             weight_th = self._weights[t][h]
203 | 
204 |             negs = unordered_set[long]()
205 | 
206 |             while ntries < self.max_tries and negs.size() < self._nnegatives():
207 |                 if self.burnin:
208 |                     u = <double>rand_r(&seed) / <double>RAND_MAX * self.N
209 |                     fu = <int>u
210 |                     if self.S[fu] <= u - fu:
211 |                         n = self.A[fu]
212 |                     else:
213 |                         n = fu
214 |                 else:
215 |                     n = <long>(<double>rand_r(&seed) / <double>RAND_MAX * self.N)
216 |                 if n != t and (self._weights[t].find(n) == self._weights[t].end() or (self._weights[t][n] < weight_th)):
217 |                     if negs.find(n) == negs.end():
218 |                         ix[j, ixptr] = n
219 |                         ixptr = ixptr + 1
220 |                         negs.insert(n)
221 |                 ntries = ntries + 1
222 | 
223 |             if negs.size() == 0:
224 |                 ix[j, ixptr] = t
225 |                 ixptr = ixptr + 1
226 | 
227 |             while ixptr < self._nnegatives() + 2:
228 |                 ix[j, ixptr] = ix[j, 2 + <long>(<double>rand_r(&seed)/RAND_MAX*(ixptr-2))]
229 |                 ixptr = ixptr + 1
230 |             j = j + 1
231 |         return j
232 | 
233 |     def nnegatives(self):
234 |         return self._nnegatives()
235 | 
236 |     cdef int _nnegatives(self) nogil:
237 |         if self.burnin:
238 |             return int(self.neg_multiplier * self.nnegs)
239 |         else:
240 |             return self.nnegs
241 | 


--------------------------------------------------------------------------------
/hype/manifold.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | from abc import abstractmethod
 9 | 
10 | 
11 | class Manifold(object):
12 |     def __init__(self, *args, **kwargs):
13 |         pass
14 | 
15 |     def init_weights(self, w, scale=1e-4):
16 |         w.data.uniform_(-scale, scale)
17 | 
18 |     @staticmethod
19 |     def dim(dim):
20 |         return dim
21 | 
22 |     def normalize(self, u):
23 |         return u
24 | 
25 |     @abstractmethod
26 |     def distance(self, u, v):
27 |         """
28 |         Distance function
29 |         """
30 |         raise NotImplementedError
31 | 
32 |     @abstractmethod
33 |     def expm(self, p, d_p, lr=None, out=None):
34 |         """
35 |         Exponential map
36 |         """
37 |         raise NotImplementedError
38 | 
39 |     @abstractmethod
40 |     def logm(self, x, y):
41 |         """
42 |         Logarithmic map
43 |         """
44 |         raise NotImplementedError
45 | 
46 |     @abstractmethod
47 |     def ptransp(self, x, y, v, ix=None, out=None):
48 |         """
49 |         Parallel transport
50 |         """
51 |         raise NotImplementedError
52 | 


--------------------------------------------------------------------------------
/hype/rsgd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | from torch.optim.optimizer import Optimizer, required
 9 | 
10 | 
11 | class RiemannianSGD(Optimizer):
12 |     r"""Riemannian stochastic gradient descent.
13 | 
14 |     Args:
15 |         rgrad (Function): Function to compute the Riemannian gradient
16 |            from the Euclidean gradient
17 |         retraction (Function): Function to update the retraction
18 |            of the Riemannian gradient
19 |     """
20 | 
21 |     def __init__(
22 |             self,
23 |             params,
24 |             lr=required,
25 |             rgrad=required,
26 |             expm=required,
27 |     ):
28 |         defaults = {
29 |             'lr': lr,
30 |             'rgrad': rgrad,
31 |             'expm': expm,
32 |         }
33 |         super(RiemannianSGD, self).__init__(params, defaults)
34 | 
35 |     def step(self, lr=None, counts=None, **kwargs):
36 |         """Performs a single optimization step.
37 | 
38 |         Arguments:
39 |             lr (float, optional): learning rate for the current update.
40 |         """
41 |         loss = None
42 | 
43 |         for group in self.param_groups:
44 |             for p in group['params']:
45 |                 lr = lr or group['lr']
46 |                 rgrad = group['rgrad']
47 |                 expm = group['expm']
48 | 
49 |                 if p.grad is None:
50 |                     continue
51 |                 d_p = p.grad.data
52 |                 # make sure we have no duplicates in sparse tensor
53 |                 if d_p.is_sparse:
54 |                     d_p = d_p.coalesce()
55 |                 d_p = rgrad(p.data, d_p)
56 |                 d_p.mul_(-lr)
57 |                 expm(p.data, d_p)
58 | 
59 |         return loss
60 | 


--------------------------------------------------------------------------------
/hype/sn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2018-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | import torch as th
 9 | from torch import nn
10 | from numpy.random import randint
11 | from . import graph
12 | from .graph_dataset import BatchedDataset
13 | 
14 | model_name = '%s_dim%d%com_n'
15 | 
16 | 
17 | class Embedding(graph.Embedding):
18 |     def __init__(self, size, dim, manifold, sparse=True, com_n=1):
19 |         super(Embedding, self).__init__(size, dim, manifold, sparse, com_n)
20 |         self.lossfn = nn.functional.cross_entropy
21 |         self.manifold = manifold
22 | 
23 |     def _forward(self, e, int_matrix=None):
24 |         o = e.narrow(1, 1, e.size(1) - 1)
25 |         s = e.narrow(1, 0, 1).expand_as(o)###source
26 |         if 'LTiling' in str(self.manifold):
27 |             o_int_matrix = int_matrix.narrow(1, 1, e.size(1) - 1)
28 |             s_int_matrix = int_matrix.narrow(1, 0, 1).expand_as(o_int_matrix)###source
29 |             dists = self.dist(s, s_int_matrix, o, o_int_matrix).squeeze(-1)
30 |         else:
31 |             dists = self.dist(s, o).squeeze(-1)
32 |         return -dists
33 |     
34 |     def loss(self, preds, targets, weight=None, size_average=True):
35 |         return self.lossfn(preds, targets)
36 | 
37 | 
38 | # This class is now deprecated in favor of BatchedDataset (graph_dataset.pyx)
39 | class Dataset(graph.Dataset):
40 |     def __getitem__(self, i):
41 |         t, h = self.idx[i]
42 |         negs = set()
43 |         ntries = 0
44 |         nnegs = int(self.nnegatives())
45 |         if t not in self._weights:
46 |             negs.add(t)
47 | #             print(negs)
48 |         else:
49 |             while ntries < self.max_tries and len(negs) < nnegs:
50 |                 if self.burnin:
51 |                     n = randint(0, len(self.unigram_table))
52 |                     n = int(self.unigram_table[n])
53 |                 else:
54 |                     n = randint(0, len(self.objects))
55 |                 if (n not in self._weights[t]) or \
56 |                         (self._weights[t][n] < self._weights[t][h]):
57 |                     negs.add(n)
58 |                 ntries += 1
59 |         if len(negs) == 0:
60 |             negs.add(t)
61 |         ix = [t, h] + list(negs)
62 |         while len(ix) < nnegs + 2:
63 |             ix.append(ix[randint(2, len(ix))])
64 | #         print(ix)
65 | #         assert 1==2
66 |         return th.LongTensor(ix).view(1, len(ix)), th.zeros(1).long()
67 | 
68 | 
69 | def initialize(manifold, opt, idx, objects, weights, sparse=True):
70 |     conf = []
71 |     mname = model_name % (opt.manifold, opt.dim, opt.com_n)
72 |     data = BatchedDataset(idx, objects, weights, opt.negs, opt.batchsize,
73 |         opt.ndproc, opt.burnin > 0, opt.dampening)
74 |     model = Embedding(
75 |         len(data.objects),
76 |         opt.dim,
77 |         manifold,
78 |         sparse=sparse,
79 |         com_n=opt.com_n,
80 |     )
81 |     data.objects = objects
82 |     return model, data, mname, conf
83 | 


--------------------------------------------------------------------------------
/hype/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) 2018-present, Facebook, Inc.
  3 | # All rights reserved.
  4 | #
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | import torch as th
  9 | import numpy as np
 10 | import timeit
 11 | from tqdm import tqdm
 12 | from torch.utils import data as torch_data
 13 | from hype.graph import eval_reconstruction
 14 | 
 15 | from hype.Euclidean import EuclideanManifold
 16 | from hype.Poincare import PoincareManifold
 17 | from hype.Lorentz import LorentzManifold
 18 | # from hype.Halfspace import HalfspaceManifold
 19 | from hype.NLorentz import NLorentzManifold
 20 | from hype.LTiling_rsgd import LTilingRSGDManifold
 21 | from hype.NLTiling_rsgd import NLTilingRSGDManifold
 22 | from hype.LTiling_sgd import LTilingSGDManifold
 23 | from hype.HTiling_rsgd import HTilingRSGDManifold
 24 | # import matplotlib
 25 | # matplotlib.use('Agg')
 26 | # import matplotlib.pyplot as plt
 27 | 
 28 | MANIFOLDS = {
 29 |     'Euclidean': EuclideanManifold,
 30 |     'Poincare': PoincareManifold,
 31 |     'Lorentz': LorentzManifold,
 32 |     'Halfspace': HalfspaceManifold,
 33 |     'NLorentz': NLorentzManifold,
 34 |     'LTiling_rsgd': LTilingRSGDManifold,
 35 |     'NLTiling_rsgd': NLTilingRSGDManifold,
 36 |     'LTiling_sgd': LTilingSGDManifold,
 37 |     'HTiling_rsgd': HTilingRSGDManifold
 38 | }
 39 | 
 40 | 
 41 | _lr_multiplier = 0.01
 42 | 
 43 | def normalize_g(g, g_int_matrix):
 44 |     L = th.sqrt(th.Tensor([[3.0,0,0],[0,1.0,0],[0,0,1.0]]))
 45 |     R = th.sqrt(th.Tensor([[1.0/3.0,0,0],[0,1.0,0],[0,0,1.0]]))
 46 |     ga = th.LongTensor([[2,1,0],[0,0,-1],[3,2,0]])
 47 |     gb = th.LongTensor([[2,-1,0],[0,0,-1],[-3,2,0]])
 48 |     gai = th.LongTensor([[2,0,-1],[-3,0,2],[0,-1,0]])
 49 |     gbi = th.LongTensor([[2,0,1],[3,0,2],[0,-1,0]])
 50 |     RVI = th.LongTensor([[1,0,0],[0,1,0],[0,0,1]])
 51 |     RV = th.LongTensor([[1,0,0],[0,1,0],[0,0,1]])
 52 |     gmat = g_int_matrix
 53 |     x = g[:3].clone()
 54 |     x[0]=th.sqrt(1+x[1]**2+x[2]**2)
 55 |     y=x.clone()
 56 |     while ((2 * x[1] ** 2 - x[2] ** 2 - 1 > 0) or (2 * x[2] ** 2 - x[1] ** 2 - 1 > 0)):
 57 |         prex = x.clone()
 58 |         preRV = RV.clone()
 59 |         if x[1] <= -th.abs(x[2]):
 60 |             RVI = th.matmul(ga, RVI)
 61 |             RV = th.matmul(RV, gai)
 62 |         elif x[1] >= th.abs(x[2]):
 63 |             RVI = th.matmul(gb, RVI)
 64 |             RV = th.matmul(RV, gbi)
 65 |         elif x[2] < -th.abs(x[1]):
 66 |             RVI = th.matmul(gbi, RVI)
 67 |             RV = th.matmul(RV, gb)
 68 |         elif x[2] > th.abs(x[1]):
 69 |             RVI = th.matmul(gai, RVI)
 70 |             RV = th.matmul(RV, ga)
 71 | #         x = th.matmul(L, th.matmul(RVI.float(), th.matmul(R, y.unsqueeze(-1)))).squeeze(-1)
 72 |         if L.dtype == th.float64:
 73 |             x = th.matmul(L, th.matmul(RVI.double(), th.matmul(R, y.unsqueeze(-1)))).squeeze(-1)
 74 |         elif L.dtype == th.float32:
 75 |             x = th.matmul(L, th.matmul(RVI.float(), th.matmul(R, y.unsqueeze(-1)))).squeeze(-1)
 76 |         x[0] = th.sqrt(1 + x[1] ** 2 + x[2] ** 2)
 77 |         if x[0] > prex[0]:
 78 |             if L.dtype == th.float64:
 79 |                 return prex, th.matmul(gmat, preRV.double())
 80 |             elif L.dtype == th.float32:
 81 |                 return prex, th.matmul(gmat, preRV.float())
 82 |     if L.dtype == th.float64:
 83 |         return x, th.matmul(gmat, RV.double())
 84 |     elif L.dtype == th.float32:
 85 |         return x, th.matmul(gmat, RV.float())
 86 | 
 87 | 
 88 | def normalize_gmatrix(gu, gu_int_matrix):
 89 |     uu = th.zeros_like(gu)
 90 |     uu_int_matrix = th.zeros_like(gu_int_matrix)
 91 |     if len(gu_int_matrix.size())==4:
 92 |         for i in range(gu.size(0)):
 93 |             for j in range(uu_int_matrix.size(1)):
 94 |                 uu[i,3*j:3*(j+1)], uu_int_matrix[i,j] = normalize_g(gu[i,3*j:3*(j+1)], gu_int_matrix[i,j])
 95 |     else:
 96 |         for i in range(gu.size(0)):
 97 |             uu[i], uu_int_matrix[i] = normalize_g(gu[i], gu_int_matrix[i])
 98 |     return uu, uu_int_matrix
 99 | 
100 | def normalize_halfspace(g):
101 |     y = th.zeros(g.size())
102 |     n = (g.size(0)-1)//2
103 |     a = th.floor(th.log2(g[n-1]))
104 |     y[-1] = g[-1] + a
105 |     y[n:-2] = th.floor(2**(-1*a) * (g[:n-1] + g[n:-2]))
106 |     y[:n] = 2**(-1*a) * (g[:n]+g[n:-1]) - y[n:-1]
107 |     assert y[-2]==0
108 |     return y
109 | 
110 | def normalize_halfspace_matrix(g):
111 |     y = th.zeros(g.size())
112 |     d = (g.size(-1)-1)//2
113 |     a = th.floor(th.log2(g[...,d-1]))#n
114 |     y[...,-1] = g[...,-1] + a#n
115 |     y[...,d:-2] = th.floor(2**(-1*a).unsqueeze(-1).expand_as(g[...,:d-1]) * (g[...,:d-1] + g[...,d:-2]))#n*(d-1)
116 |     y[...,:d] = 2**(-1*a).unsqueeze(-1).expand_as(g[...,:d]) * g[...,d:-1] - y[...,d:-1] + 2**(-1*a).unsqueeze(-1).expand_as(g[...,:d]) * g[...,:d]#n*d
117 |     assert y[...,-2].max()==0
118 |     return y
119 | 
120 | def train(
121 |         thread_id,
122 |         device,
123 |         model,
124 |         data,
125 |         optimizer,
126 |         opt,
127 |         log,
128 |         progress=False
129 | ):
130 |     if isinstance(data, torch_data.Dataset):
131 |         loader = torch_data.DataLoader(data, batch_size=opt.batchsize,
132 |             shuffle=False, num_workers=opt.ndproc)
133 |     else:
134 |         loader = data
135 | 
136 |     epoch_loss = th.Tensor(len(loader))
137 |     counts = th.zeros(model.nobjects, 1).to(device)
138 | 
139 |     LOSS = np.zeros(opt.epochs)        
140 |     for epoch in range(opt.epoch_start, opt.epochs):
141 |         print(th.abs(model.lt.weight.data).max().item())
142 |         
143 |         epoch_loss.fill_(0)
144 |         data.burnin = False
145 |         t_start = timeit.default_timer()
146 |         lr = opt.lr        
147 |         if epoch < opt.burnin:
148 |             data.burnin = True
149 |             lr = opt.lr * _lr_multiplier
150 |             
151 |             
152 |         loader_iter = tqdm(loader) if progress else loader
153 |         for i_batch, (inputs, targets) in enumerate(loader_iter):
154 |             elapsed = timeit.default_timer() - t_start
155 | 
156 |             inputs = inputs.to(device)
157 |             targets = targets.to(device)
158 | 
159 |             # count occurrences of objects in batch
160 |             if hasattr(opt, 'asgd') and opt.asgd:
161 |                 counts = th.bincount(inputs.view(-1), minlength=model.nobjects)
162 |                 counts.clamp_(min=1).div_(inputs.size(0))
163 |                 counts = counts.double().unsqueeze(-1)
164 | 
165 |             optimizer.zero_grad()
166 |             preds = model(inputs)
167 | 
168 |             loss = model.loss(preds, targets, size_average=True)
169 |             loss.backward()
170 |             optimizer.step(lr=lr, counts=counts)
171 |             epoch_loss[i_batch] = loss.cpu().item()
172 |         LOSS[epoch] = th.mean(epoch_loss).item()
173 |         log.info('json_stats: {'
174 |                  f'"thread_id": {thread_id}, '
175 |                  f'"epoch": {epoch}, '
176 |                  f'"elapsed": {elapsed}, '
177 |                  f'"loss": {LOSS[epoch]}, '
178 |                  '}')
179 |         if opt.nor!='none' and epoch>opt.stre and (epoch-opt.stre)%opt.norevery==0:
180 |             if opt.nor=='LTiling':
181 |                 NMD, NMD_int_matrix = normalize_gmatrix(model.lt.weight.data.cpu().clone(), model.int_matrix.data.clone())
182 |                 model.int_matrix.data.copy_(NMD_int_matrix)
183 |                 model.lt.weight.data.copy_(NMD)
184 |             elif opt.nor == 'HTiling':
185 |                 NMD = normalize_halfspace_matrix(model.lt.weight.data.clone())
186 |                 model.lt.weight.data.copy_(NMD)
187 |         
188 | #         if (epoch+1)%opt.eval_each==0 and thread_id==0:
189 | #             manifold = MANIFOLDS[opt.manifold](debug=opt.debug, max_norm=opt.maxnorm)
190 | #             if 'LTiling' in opt.manifold:
191 | #                 meanrank, maprank = eval_reconstruction(opt.adj, model.lt.weight.data.clone(), manifold.distance, lt_int_matrix = model.int_matrix.data.clone(), workers = opt.ndproc)
192 | #                 sqnorms = manifold.pnorm(model.lt.weight.data.clone(), model.int_matrix.data.clone())
193 | #             else:
194 | #                 meanrank, maprank = eval_reconstruction(opt.adj, model.lt.weight.data.clone(), manifold.distance)#, workers = opt.ndproc)
195 | #                 sqnorms = manifold.pnorm(model.lt.weight.data.clone())
196 | #             log.info(
197 | #                 'json_stats during training: {'
198 | #                 f'"sqnorm_min": {sqnorms.min().item()}, '
199 | #                 f'"sqnorm_avg": {sqnorms.mean().item()}, '
200 | #                 f'"sqnorm_max": {sqnorms.max().item()}, '
201 | #                 f'"mean_rank": {meanrank}, '
202 | #                 f'"map": {maprank}, '
203 | #                 '}'
204 | #             )
205 | 
206 |             
207 | #     print(LOSS)
208 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk
2 | scikit-learn
3 | pandas
4 | h5py
5 | cython
6 | tqdm
7 | numpy
8 | torch==1.0.0
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018-present, Facebook, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from __future__ import absolute_import, division, print_function, unicode_literals
 8 | from distutils.core import setup
 9 | from Cython.Build import cythonize
10 | import numpy
11 | from distutils.extension import Extension
12 | from subprocess import check_output
13 | from distutils import sysconfig
14 | import re
15 | 
16 | extra_compile_args = ['-std=c++11']
17 | 
18 | # Super hacky way of determining if clang or gcc is being used
19 | CC = sysconfig.get_config_vars().get('CC', 'gcc').split(' ')[0]
20 | out = check_output([CC, '--version'])
21 | if re.search('apple *llvm', str(out.lower())):
22 |     extra_compile_args.append('-stdlib=libc++')
23 | 
24 | extensions = [
25 |     Extension(
26 |         "hype.graph_dataset",
27 |         ["hype/graph_dataset.pyx"],
28 |         include_dirs=[numpy.get_include()],
29 |         extra_compile_args=extra_compile_args,
30 |         language='c++',
31 |     ),
32 |     Extension(
33 |         "hype.adjacency_matrix_dataset",
34 |         ["hype/adjacency_matrix_dataset.pyx"],
35 |         include_dirs=[numpy.get_include()],
36 |         extra_compile_args=extra_compile_args,
37 |         language='c++',
38 |     ),
39 | ]
40 | 
41 | 
42 | setup(
43 |     ext_modules=cythonize(extensions),
44 | )
45 | 


--------------------------------------------------------------------------------
/train-grqc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J gr_gh2
 3 | #SBATCH -o gr_gh2.o%j
 4 | #SBATCH -e gr_gh2.o%j
 5 | #SBATCH -N 1
 6 | #SBATCH -n 2
 7 | #SBATCH --mem=5000
 8 | #SBATCH -t 720:00:00
 9 | #SBATCH --partition=mpi-cpus  --gres=gpu:0
10 | 
11 | python3 embed.py \
12 |        -dim 2 \
13 |        -lr 0.3 \
14 |        -epochs 1000 \
15 |        -negs 50 \
16 |        -burnin 20 \
17 |        -ndproc 4 \
18 |        -manifold group_rie \
19 |        -dset wordnet/grqc.csv \
20 |        -batchsize 10 \
21 |        -eval_each 100 \
22 |        -sparse \
23 |        -train_threads 2


--------------------------------------------------------------------------------
/train-mammals.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J mmh2_2
 3 | #SBATCH -o mmh2_2.o%j
 4 | #SBATCH -e mmh2_2.o%j
 5 | #SBATCH -N 1
 6 | #SBATCH -n 2
 7 | #SBATCH --mem=2000
 8 | #SBATCH -t 24:00:00
 9 | #SBATCH --partition=mpi-cpus  --gres=gpu:0
10 | 
11 | python3 embed.py \
12 |        -dim 2 \
13 |        -com_n 1 \
14 |        -lr 0.3 \
15 |        -epochs 1000 \
16 |        -negs 50 \
17 |        -burnin 20 \
18 |        -ndproc 4 \
19 |        -manifold Halfspace \
20 |        -dset wordnet/mammal_closure.csv \
21 |        -batchsize 10 \
22 |        -eval_each 20 \
23 |        -sparse \
24 |        -train_threads 2


--------------------------------------------------------------------------------
/train-nouns.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J nsh2_2
 3 | #SBATCH -o nsh2_2.o%j
 4 | #SBATCH -e nsh2_2.o%j
 5 | #SBATCH -N 1
 6 | #SBATCH -n 5
 7 | #SBATCH --mem=5000
 8 | #SBATCH -t 720:00:00
 9 | #SBATCH --partition=mpi-cpus  --gres=gpu:0
10 | 
11 | DIMS="2"
12 | MODEL="Halfspace"
13 | COMN="1"
14 | 
15 | while true; do
16 |   case "$1" in
17 |     -c | --com_n ) COMN=$2; shift; shift ;;
18 |     -d | --dim ) DIMS=$2; shift; shift ;;
19 |     -m | --model ) MODEL=$2; shift; shift ;;
20 |     -- ) shift; break ;;
21 |     * ) break ;;
22 |   esac
23 | done
24 | 
25 | USAGE="usage: ./train-nouns.sh -d <dim> -m <model>
26 |   -d: dimensions to use
27 |   -m: model to use (can be lorentz or poincare)
28 |   Example: ./train-nouns.sh -m lorentz -d 10
29 | "
30 | 
31 | case "$MODEL" in
32 |   "Lorentz" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
33 |   "NLorentz" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
34 |   "LTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
35 |   "NLTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
36 |   "LTiling_sgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
37 |   "HTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;  
38 |   "Halfspace" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;  
39 |   "Poincare" ) EXTRA_ARGS=("-lr" "0.5");;
40 |   * ) echo "$USAGE"; exit 1;;
41 | esac
42 | 
43 | python3 embed.py \
44 |   -dset wordnet/noun_closure.csv \
45 |   -epochs 1000 \
46 |   -negs 50 \
47 |   -burnin 20 \
48 |   -dampening 0.75 \
49 |   -ndproc 4 \
50 |   -eval_each 100 \
51 |   -sparse \
52 |   -burnin_multiplier 0.01 \
53 |   -neg_multiplier 0.1 \
54 |   -lr_type constant \
55 |   -train_threads 5 \
56 |   -dampening 1.0 \
57 |   -batchsize 50 \
58 |   -manifold "$MODEL" \
59 |   -dim "$DIMS" \
60 |   -com_n "$COMN" \
61 |   "${EXTRA_ARGS[@]}"


--------------------------------------------------------------------------------
/train-verbs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J vsh2_5
 3 | #SBATCH -o vsh2_5.o%j
 4 | #SBATCH -e vsh2_5.o%j
 5 | #SBATCH -N 1
 6 | #SBATCH -n 5
 7 | #SBATCH --mem=5000
 8 | #SBATCH -t 720:00:00
 9 | #SBATCH --partition=mpi-cpus  --gres=gpu:0
10 | 
11 | DIMS="5"
12 | MODEL="Halfspace"
13 | COMN="1"
14 | 
15 | while true; do
16 |   case "$1" in
17 |     -c | --com_n ) COMN=$2; shift; shift ;;
18 |     -d | --dim ) DIMS=$2; shift; shift ;;
19 |     -m | --model ) MODEL=$2; shift; shift ;;
20 |     -- ) shift; break ;;
21 |     * ) break ;;
22 |   esac
23 | done
24 | 
25 | USAGE="usage: ./train-nouns.sh -d <dim> -m <model>
26 |   -d: dimensions to use
27 |   -m: model to use (can be lorentz or poincare)
28 |   Example: ./train-nouns.sh -m lorentz -d 10
29 | "
30 | 
31 | case "$MODEL" in
32 |   "Lorentz" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
33 |   "NLorentz" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
34 |   "LTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
35 |   "NLTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
36 |   "LTiling_sgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;
37 |   "HTiling_rsgd" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;  
38 |   "Halfspace" ) EXTRA_ARGS=("-lr" "0.5" "-no-maxnorm");;  
39 |   "Poincare" ) EXTRA_ARGS=("-lr" "0.5");;
40 |   * ) echo "$USAGE"; exit 1;;
41 | esac
42 | 
43 | python3 embed.py \
44 |   -dset wordnet/verb_closure.csv \
45 |   -epochs 1000 \
46 |   -negs 50 \
47 |   -burnin 20 \
48 |   -dampening 0.75 \
49 |   -ndproc 4 \
50 |   -eval_each 300 \
51 |   -sparse \
52 |   -burnin_multiplier 0.01 \
53 |   -neg_multiplier 0.1 \
54 |   -lr_type constant \
55 |   -train_threads 5 \
56 |   -dampening 1.0 \
57 |   -batchsize 10 \
58 |   -manifold "$MODEL" \
59 |   -dim "$DIMS" \
60 |   -com_n "$COMN" \
61 |   "${EXTRA_ARGS[@]}"


--------------------------------------------------------------------------------
/wordnet/mammals_filter.txt:
--------------------------------------------------------------------------------
 1 | \sliving_thing.n.01
 2 | \sobject.n.01
 3 | \sorganism.n.01
 4 | \sanimal.n.01
 5 | \sentity.n.01
 6 | \sphysical_entity.n.01
 7 | \swhole.n.02
 8 | \svertebrate.n.01
 9 | \schordate.n.01
10 | \sbeast_of_burden.n.01
11 | \swork_animal.n.01
12 | \sfemale.n.01
13 | \sfissipedia.n.01
14 | \spup.n.01
15 | \sabstraction.n.06
16 | \sgroup.n.01
17 | ^tusker.n.01
18 | ^female_mammal.n.01
19 | \scub.n.03
20 | \syoung.n.01
21 | \syoung_mammal.n.01
22 | \sdomestic_animal.n.01
23 | \sracer.n.03
24 | \smale.n.01
25 | 


--------------------------------------------------------------------------------
/wordnet/transitive_closure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2017-present, Facebook, Inc.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | #
 8 | 
 9 | import re
10 | import pandas
11 | from nltk.corpus import wordnet as wn
12 | from tqdm import tqdm
13 | try:
14 |     wn.all_synsets
15 | except LookupError as e:
16 |     import nltk
17 |     nltk.download('wordnet')
18 | 
19 | # make sure each edge is included only once
20 | nouns_edges = set()
21 | for synset in tqdm(wn.all_synsets(pos='n')):
22 |     # write the transitive closure of all hypernyms of a synset to file
23 |     for hyper in synset.closure(lambda s: s.hypernyms()):
24 |         nouns_edges.add((synset.name(), hyper.name()))
25 | 
26 |     # also write transitive closure for all instances of a synset
27 |     for instance in synset.instance_hyponyms():
28 |         for hyper in instance.closure(lambda s: s.instance_hypernyms()):
29 |             nouns_edges.add((instance.name(), hyper.name()))
30 |             for h in hyper.closure(lambda s: s.hypernyms()):
31 |                 nouns_edges.add((instance.name(), h.name()))
32 | 
33 | # make sure each edge is included only once
34 | verbs_edges = set()
35 | for synset in tqdm(wn.all_synsets(pos='v')):
36 |     # write the transitive closure of all hypernyms of a synset to file
37 |     for hyper in synset.closure(lambda s: s.hypernyms()):
38 |         verbs_edges.add((synset.name(), hyper.name()))
39 | 
40 |     # also write transitive closure for all instances of a synset
41 |     for instance in synset.instance_hyponyms():
42 |         for hyper in instance.closure(lambda s: s.instance_hypernyms()):
43 |             verbs_edges.add((instance.name(), hyper.name()))
44 |             for h in hyper.closure(lambda s: s.hypernyms()):
45 |                 verbs_edges.add((instance.name(), h.name()))
46 | 
47 | nouns = pandas.DataFrame(list(nouns_edges), columns=['id1', 'id2'])
48 | nouns['weight'] = 1
49 | 
50 | verbs = pandas.DataFrame(list(verbs_edges), columns=['id1', 'id2'])
51 | verbs['weight'] = 1
52 | 
53 | # Extract the set of nouns that have "mammal.n.01" as a hypernym
54 | mammal_set = set(nouns[nouns.id2 == 'mammal.n.01'].id1.unique())
55 | mammal_set.add('mammal.n.01')
56 | 
57 | # Select relations that have a mammal as hypo and hypernym
58 | mammals = nouns[nouns.id1.isin(mammal_set) & nouns.id2.isin(mammal_set)]
59 | 
60 | with open('mammals_filter.txt', 'r') as fin:
61 |     filt = re.compile(f'({"|".join([l.strip() for l in fin.readlines()])})')
62 | 
63 | filtered_mammals = mammals[~mammals.id1.str.cat(' ' + mammals.id2).str.match(filt)]
64 | 
65 | nouns.to_csv('noun_closure.csv', index=False)
66 | verbs.to_csv('verb_closure.csv', index=False)
67 | filtered_mammals.to_csv('mammal_closure.csv', index=False)
68 | 


--------------------------------------------------------------------------------