├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── data └── oxford │ ├── README.md │ └── oxford_features.fvecs ├── docs └── GUIDE.md ├── protobuf ├── README.md └── lopq_model.proto ├── python ├── README.md ├── lopq │ ├── __init__.py │ ├── eval.py │ ├── lopq_model_pb2.py │ ├── model.py │ ├── package_metadata.json │ ├── search.py │ └── utils.py ├── requirements.txt ├── setup.py ├── test.sh ├── test │ ├── testdata │ │ ├── random_test_model.lopq │ │ ├── test_accumulate_covariance_estimators_input.pkl │ │ ├── test_accumulate_covariance_estimators_output.pkl │ │ ├── test_compute_rotations_from_accumulators_input.pkl │ │ ├── test_compute_rotations_from_accumulators_output.pkl │ │ ├── test_eigenvalue_allocation_input.pkl │ │ └── test_searcher_data.pkl │ └── tests.py └── tox.ini ├── scripts ├── example.py └── query_runtime.py └── spark ├── README.md ├── compute_codes.py ├── example_udf.py ├── pca_preparation.py ├── train_model.py └── train_pca.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | .DS_Store 60 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | before_install: 5 | - sudo apt-get update -qq 6 | - sudo apt-get install -qq libatlas-dev libatlas-base-dev liblapack-dev gfortran 7 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 8 | - chmod +x miniconda.sh 9 | - ./miniconda.sh -b -p /home/travis/miniconda 10 | - export PATH=/home/travis/miniconda/bin:$PATH 11 | - conda update --yes conda 12 | install: 13 | - travis_retry conda install --yes python=$TRAVIS_PYTHON_VERSION pip numpy scipy 14 | - travis_retry pip install nose nose-cov coveralls setuptools 15 | - cd python/ 16 | - travis_retry python setup.py install 17 | script: nosetests --exe --with-xunit --xunit-file=nosetests.xml --with-coverage --cover-xml --cover-erase --cover-package=lopq --cover-xml-file=cobertura.xml test/tests.py 18 | after_success: 19 | coveralls 20 | after_failure: 21 | - echo "pip.log\n"; cat $HOME/.pip/pip.log 22 | cache: apt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by 10 | Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting 13 | the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are 16 | controlled by, or are under common control with that entity. For the purposes of this definition, 17 | "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, 18 | whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding 19 | shares, or (iii) beneficial ownership of such entity. 20 | 21 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 22 | 23 | "Source" form shall mean the preferred form for making modifications, including but not limited to software 24 | source code, documentation source, and configuration files. 25 | 26 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, 27 | including but not limited to compiled object code, generated documentation, and conversions to other media types. 28 | 29 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, 30 | as indicated by a copyright notice that is included in or attached to the work (an example is provided in 31 | the Appendix below). 32 | 33 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) 34 | the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, 35 | as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not 36 | include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work 37 | and Derivative Works thereof. 38 | 39 | "Contribution" shall mean any work of authorship, including the original version of the Work and any 40 | modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to 41 | Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to 42 | submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of 43 | electronic, verbal, or written communication sent to the Licensor or its representatives, including but not 44 | limited to communication on electronic mailing lists, source code control systems, and issue tracking systems 45 | that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but 46 | excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner 47 | as "Not a Contribution." 48 | 49 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been 50 | received by Licensor and subsequently incorporated within the Work. 51 | 52 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby 53 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license 54 | to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute 55 | the Work and such Derivative Works in Source or Object form. 56 | 57 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby 58 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated 59 | in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer 60 | the Work, where such license applies only to those patent claims licensable by such Contributor that are 61 | necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work 62 | to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including 63 | a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the 64 | Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under 65 | this License for that Work shall terminate as of the date such litigation is filed. 66 | 67 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any 68 | medium, with or without modifications, and in Source or Object form, provided that You meet the following 69 | conditions: 70 | 71 | You must give any other recipients of the Work or Derivative Works a copy of this License; and 72 | You must cause any modified files to carry prominent notices stating that You changed the files; and 73 | You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, 74 | trademark, and attribution notices from the Source form of the Work, excluding those notices that do not 75 | pertain to any part of the Derivative Works; and 76 | If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You 77 | distribute must include a readable copy of the attribution notices contained within such NOTICE file, 78 | excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the 79 | following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source 80 | form or documentation, if provided along with the Derivative Works; or, within a display generated by the 81 | Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file 82 | are for informational purposes only and do not modify the License. You may add Your own attribution notices 83 | within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, 84 | provided that such additional attribution notices cannot be construed as modifying the License. 85 | 86 | You may add Your own copyright statement to Your modifications and may provide additional or different 87 | license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such 88 | Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise 89 | complies with the conditions stated in this License. 90 | 91 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally 92 | submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this 93 | License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall 94 | supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding 95 | such Contributions. 96 | 97 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or 98 | product names of the Licensor, except as required for reasonable and customary use in describing the origin 99 | of the Work and reproducing the content of the NOTICE file. 100 | 101 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the 102 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 103 | OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, 104 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for 105 | determining the appropriateness of using or redistributing the Work and assume any risks associated with Your 106 | exercise of permissions under this License. 107 | 108 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), 109 | contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) 110 | or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, 111 | special, incidental, or consequential damages of any character arising as a result of this License or out 112 | of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work 113 | stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such 114 | Contributor has been advised of the possibility of such damages. 115 | 116 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, 117 | You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other 118 | liability obligations and/or rights consistent with this License. However, in accepting such obligations, 119 | You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, 120 | and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred 121 | by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional 122 | liability. 123 | 124 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Build Status](https://travis-ci.org/yahoo/lopq.svg?branch=master)](https://travis-ci.org/yahoo/lopq) 3 | [![Coverage Status](https://coveralls.io/repos/yahoo/lopq/badge.svg?branch=master&service=github)](https://coveralls.io/github/yahoo/lopq?branch=master) 4 | [![PyPI version](https://badge.fury.io/py/lopq.svg)](https://badge.fury.io/py/lopq) 5 | 6 | # Locally Optimized Product Quantization 7 | 8 | This is Python training and testing code for Locally Optimized Product Quantization (LOPQ) models, as well as Spark scripts to scale training to hundreds of millions of vectors. The resulting model can be used in Python with code provided here or deployed via a Protobuf format to, e.g., search backends for high performance approximate nearest neighbor search. 9 | 10 | ### Overview 11 | 12 | Locally Optimized Product Quantization (LOPQ) [1] is a hierarchical quantization algorithm that produces codes of configurable length for data points. These codes are efficient representations of the original vector and can be used in a variety of ways depending on the application, including as hashes that preserve locality, as a compressed vector from which an approximate vector in the data space can be reconstructed, and as a representation from which to compute an approximation of the Euclidean distance between points. 13 | 14 | Conceptually, the LOPQ quantization process can be broken into 4 phases. The training process also fits these phases to the data in the same order. 15 | 16 | 1. The raw data vector is PCA'd to `D` dimensions (possibly the original dimensionality). This allows subsequent quantization to more efficiently represent the variation present in the data. 17 | 2. The PCA'd data is then product quantized [2] by two k-means quantizers. This means that each vector is split into two subvectors each of dimension `D / 2`, and each of the two subspaces is quantized independently with a vocabulary of size `V`. Since the two quantizations occur independently, the dimensions of the vectors are permuted such that the total variance in each of the two subspaces is approximately equal, which allows the two vocabularies to be equally important in terms of capturing the total variance of the data. This results in a pair of cluster ids that we refer to as "coarse codes". 18 | 3. The residuals of the data after coarse quantization are computed. The residuals are then locally projected independently for each coarse cluster. This projection is another application of PCA and dimension permutation on the residuals, and it is "local" in the sense that there is a different projection for each cluster in each of the two coarse vocabularies. These local rotations make the next and final step, another application of product quantization, very efficient in capturing the variance of the residuals. 19 | 4. The locally projected data is then product quantized a final time by `M` subquantizers, resulting in `M` "fine codes". Usually the vocabulary for each of these subquantizers will be a power of 2 for effective storage in a search index. With vocabularies of size 256, the fine codes for each indexed vector will require `M` bytes to store in the index. 20 | 21 | The final LOPQ code for a vector is a `(coarse codes, fine codes)` pair, e.g. `((3, 2), (14, 164, 83, 49, 185, 29, 196, 250))`. 22 | 23 | ### Nearest Neighbor Search 24 | 25 | A nearest neighbor index can be built from these LOPQ codes by indexing each document into its corresponding coarse code bucket. That is, each pair of coarse codes (which we refer to as a "cell") will index a bucket of the vectors quantizing to that cell. 26 | 27 | At query time, an incoming query vector undergoes substantially the same process. First, the query is split into coarse subvectors and the distance to each coarse centroid is computed. These distances can be used to efficiently compute a priority-ordered sequence of cells [3] such that cells later in the sequence are less likely to have near neighbors of the query than earlier cells. The items in cell buckets are retrieved in this order until some desired quota has been met. 28 | 29 | After this retrieval phase, the fine codes are used to rank by approximate Euclidean distance. The query is projected into each local space and the distance to each indexed item is estimated as the sum of the squared distances of the query subvectors to the corresponding subquantizer centroids indexed by the fine codes. 30 | 31 | NN search with LOPQ is highly scalable and has excellent properties in terms of both index storage requirements and query-time latencies when implemented well. 32 | 33 | #### References 34 | 35 | More information and performance benchmarks can be found at http://image.ntua.gr/iva/research/lopq/. 36 | 37 | 1. Y. Kalantidis, Y. Avrithis. [Locally Optimized Product Quantization for Approximate Nearest Neighbor Search.](http://image.ntua.gr/iva/files/lopq.pdf) CVPR 2014. 38 | 2. H. Jegou, M. Douze, and C. Schmid. [Product quantization for nearest neighbor search.](https://lear.inrialpes.fr/pubs/2011/JDS11/jegou_searching_with_quantization.pdf) PAMI, 33(1), 2011. 39 | 3. A. Babenko and V. Lempitsky. [The inverted multi-index.](http://cache-ash04.cdn.yandex.net/download.yandex.ru/company/cvpr2012.pdf) CVPR 2012. 40 | 41 | ### Python 42 | 43 | Full LOPQ training and evaluation in implemented in the `lopq` python module. Please refer to the README in `python/` for more detail. 44 | 45 | ### Spark 46 | 47 | The training algorithm is also implemented on Spark using `pyspark` to scale parameter fitting to large datasets. Please refer to the README in `spark/` for documentation and usage information. 48 | 49 | #### Running Tests 50 | 51 | Tests can be run during development by running: 52 | 53 | ```bash 54 | cd python/ 55 | bash test.sh 56 | ``` 57 | 58 | To run tests in a virtual environment this project uses [tox](http://tox.testrun.org/). Tox can be installed with `pip install tox` and run from the `python/` directory: 59 | 60 | ```bash 61 | cd python/ 62 | tox 63 | ``` 64 | 65 | #### License 66 | 67 | Code licensed under the Apache License, Version 2.0 license. See LICENSE file for terms. 68 | -------------------------------------------------------------------------------- /data/oxford/README.md: -------------------------------------------------------------------------------- 1 | ### Oxford dataset 2 | 3 | This test dataset is included for tests and the short `example.py` tutorial. The `oxford_features.fvecs` files contains 5063 128-dimensional vectors representing each of the images in the Oxford5k dataset (http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/). The data format is that described here: http://corpus-texmex.irisa.fr/. 4 | 5 | The features themselves are the rectified fc7 layer outputs of the BVLC Caffe reference model (https://github.com/BVLC/caffe/tree/master/models/bvlc_reference_caffenet) extracted from resized images and then PCA'd to 128 dimensions from their original 4096. 6 | -------------------------------------------------------------------------------- /data/oxford/oxford_features.fvecs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yahoo/lopq/0f17655b901e6dfabe5c2aa62b4c8e492f34b05a/data/oxford/oxford_features.fvecs -------------------------------------------------------------------------------- /docs/GUIDE.md: -------------------------------------------------------------------------------- 1 | ## Guide to using deep features for similarity applications 2 | 3 | This is a short guide through the design and evaluation process for using features extracted from deep neural networks for similarity and deduplication applications. While it is targeted at image similarity applications, the guidelines outlined here are relevant to other types of data as well. 4 | 5 | There are many possible similarity applications, so different use cases may require different evaluation approaches. Part of the flexibility of nearest neighbor systems is their potential for multipurpose application. Here are a handful of the most common use cases: 6 | - nearest neighbor and approximate nearest neighbor retrieval and ranking as an end goal in itself 7 | - classification with kNN, possibly utilizing approximate nearest neighbor retrieval at classification time 8 | - clustering of data in the feature space 9 | - duplicate pair detection when pairs are known explicitly 10 | - deduplication of collections when all pairs in a collection should be considered 11 | 12 | 13 | ### 1. Assess raw features 14 | 15 | For any similarity application, the result will be no better than the quality of the features used. A good first step is to assess the suitability of your features. 16 | 17 | #### Similarity search 18 | 19 | In the case of similarity search, you should obtain features for an evaluation dataset. Create a set of queries from the dataset and allow the remaining data to comprise the search index for evaluation purposes. Exhaustively compute the top nearest neighbors in the index set for each query. If you already have queries and groundtruth results for your use case you should use those in the evaluation. Otherwise you can get some insight by manually inspecting the ranked results. If your evaluation dataset has labels, you can compute performance of a kNN classifier with the features. This will particulary be useful if you expect to do nearest neighbor classification in your application or if you expect to be able to cluster the features. 20 | 21 | #### Deduplication 22 | 23 | In the case of deduplication, you should construct an evaluation dataset of duplicate pairs and non-duplicate pairs in equal proportion, and then compute the feature distance for each pair. Sweeping a threshold on the distance and counting accuracy, false positive rate, false negative rate, etc. will allow you to prove that the features are good for your deduplication task. You might wish to focus on some performance metrics more than others depending on your use case. 24 | 25 | #### Dimensionality reduction 26 | 27 | Depending on the dimension of the feature in consideration, it can be beneficial to first reduce the dimensionality of the feature with PCA or PCA+whitening. This is, firstly, to reduce the complexity of downstream distance computations and, secondly, it may improve task performance. For example, in our experience with high-dimensional feature vectors (e.g. 4096 dimensions) extracted from convolutional networks, we find that PCA'ing the feature to a lower dimension (e.g. 128) actually improves the results of similiary applications, most likely be removing some noise from the representation. 28 | 29 | #### Getting new features 30 | 31 | If the features prove unsuitable for the task, the proper step is to get new features. In the "worst" case this means training a new model that produces features that better capture the information relevant to the task. For images, you might try features extracted at different layers of a deep model. For example, usually in deep vision models lower layers capture more spatial or literal information (e.g. that left-facing red car is in front of a tree) and deeper layers capture more abstract information (e.g. a red car appears with foliage). 32 | 33 | ### 2. Design quantization model 34 | 35 | After finding effective features, you may consider quantizing the features to save storage space and runtime complexity. 36 | 37 | #### Similarity search 38 | 39 | For similarity search applications, LOPQ is a great choice because it allows fast retrieval in large databases and it can produce accurate (though approximate) rankings at lower computational cost by amortizing computations over the distance computations in the candidate result set. 40 | 41 | There are two primary hyperparameters in an LOPQ model to consider. The first is the number of coarse clusters in the model. A large number will increase ranking accuracy, but may also result in a lower average number of index points in each cell. This, in turn, will result in many coarse cells being retrieved and ranked per query to meet a quota of candidate results. A handful of cells can be ranked efficiently, but too many cells will impose a prohibitive overhead. Secondly, the number of subquantizers can be set relatively independently - the more subquantizers, the more accurate ranking will be, but the more memory will be required for each index item. 42 | 43 | General advice is to set the coarse clusters as appropriate for expectations about retrieval and then set enough subquantizers for suitable ranking quality in experimentation. 44 | 45 | #### Pairwise deduplication with hashing 46 | 47 | If an application requires comparison of explicit pairs of items to determine whether they are duplicates, a simple approach is to threshold exact feature distance or approximate LOPQ distance between pairs. But if all that is required is determining duplicates, this is unnecessarily expensive. Instead, the LOPQ codes can be use as hashes in a scheme called Locally Optimized Hashing, or LOH. In this use case, it can be desirable to tune the LOPQ model for the best performance by hash collisions. 48 | 49 | Since LOPQ codes are comprised of coarse codes and fine codes, it is important to note that collisions must be computed first between the coarse codes. If any coarse code matches for the pair, next compute the number of collisions for fine codes corresponding to colliding coarse codes. With this two-phase comparison procedure, there tends to be a high false negative rate when, for instance, items are close but happen by chance to be assigned to different coarse codes, thus precluding a finer-grain comparison with fine codes. 50 | 51 | This false negative problem can be mitigated by choosing fewer coarse clusters for the LOPQ model. This allows more pairs to make it to the fine-grained phase. The next consideration is the number of subquantizers. More subquantizers will result in more gradations of collision, which could be desirable for some applications; for instance, determining a multilevel duplicate "score". 52 | 53 | Another relevant factor for this use case is the number of subquantizer clusters. Intuitively, the larger the number of subquantizer clusters, the smaller each cluster will be. Take point A and point B that fall into all the same clusters. Now consider moving B farther from A such that each fine code changes until, finally, there are no collisions in the fine codes. Setting the number of subquantizer clusters higher will on average make this distance smaller and vice versa. Thus, the number of subquantizer clusters is important in determining the total range of distances in the feature space that can be distinguished by LOH. Similarly, the number of subquantizers is important in determining the number of distinguishable distances within this range. 54 | 55 | #### Collection deduplication 56 | 57 | If the use case is to detect duplicates in a set of photos, there is an effective algorithm for clustering based on LOH collisions. This is a graph-based clustering algorithm that operates on a graph constructed such that neighboring nodes have an LOH collision of at least some threshold. Finding connected components in this graph can discover groups of duplicates in the collection, even if some pairs in the group have no collisions. For details, please see the paper [here](https://arxiv.org/abs/1604.06480). 58 | 59 | This approach can help mitigate the false negative problem described above without special tuning of the LOPQ model. In particular, the same LOPQ model optimized for similarity search can be used effectively for collection deduplication. This is a particularly great solution for search applications where the same codes can be used for similarity search in one use context and used for search result deduplication in another use context. 60 | -------------------------------------------------------------------------------- /protobuf/README.md: -------------------------------------------------------------------------------- 1 | # Protocol buffer schema 2 | 3 | The `.proto` file defines the data schema for LOPQ model parameters. 4 | It can be compiled into libraries for any target languages 5 | that will use this data (i.e., Java and Python). A compiled version 6 | for Python is included in the `lopq` module. 7 | 8 | See: [https://developers.google.com/protocol-buffers/docs/overview](https://developers.google.com/protocol-buffers/docs/overview) 9 | for details on protocol buffers, and how to update the schema. 10 | Note, to keep the schema backwards compatable, it is important not to 11 | alter the `tags` (ints) assigned to each field in `.proto` file. 12 | 13 | 14 | ### Compiling `.proto` file 15 | 16 | Compile for Java: 17 | 18 | ```bash 19 | # from the repository root 20 | protoc -I=protobuf \ 21 | --java_out=. \ 22 | protobuf/lopq_model.proto 23 | ``` 24 | 25 | Compile for Python: 26 | 27 | ```bash 28 | # from the repository root 29 | protoc -I=protobuf \ 30 | --python_out=python/lopq \ 31 | protobuf/lopq_model.proto 32 | ``` 33 | -------------------------------------------------------------------------------- /protobuf/lopq_model.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2015, Yahoo Inc. 2 | // Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | 4 | // define a namespace for protobuffer 5 | package com.flickr.vision.lopq; 6 | 7 | // options for how java class should be built and packaged 8 | option java_package = "com.flickr.vision.lopq"; 9 | option java_outer_classname = "LOPQModelParameters"; 10 | option optimize_for = SPEED; 11 | 12 | 13 | ///////////////////////////////////////////////////////////////// 14 | // GENERIC TYPES 15 | 16 | message Vector { 17 | repeated float values = 1 [packed = true]; 18 | } 19 | 20 | message Matrix { 21 | repeated float values = 1 [packed = true]; 22 | repeated uint32 shape = 2; 23 | } 24 | 25 | ///////////////////////////////////////////////////////////////// 26 | // LOPQ PARAMS 27 | 28 | // lopq model params 29 | // file extension: .lopq 30 | message LOPQModelParams { 31 | 32 | optional uint32 D = 1; // dimensionality of original vectors 33 | optional uint32 V = 2; // number of coarse quantizer centroids 34 | optional uint32 M = 3; // number of subvectors 35 | optional uint32 num_subquantizers = 4; // number of subquantizer clusters 36 | 37 | repeated Matrix Cs = 5; // coarse quantizer centroids - 2 of these; size V x (D/2) 38 | repeated Matrix Rs = 6; // rotations - 2 * V of these, each size D/2 x D/2 39 | repeated Vector mus = 7; // residual means - 2 * V of these, each size D/2 40 | repeated Matrix subs = 8; // subquantizer centroids - M of these, each size num_subquantizers x (D/2)) 41 | } 42 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Python LOPQ module 2 | 3 | This module implements training and testing of LOPQ models along with a variety of other utilities useful for evaluation and data management. It includes a simple implementation of approximate nearest neighbor search with an LOPQ index. 4 | 5 | ## Installation 6 | 7 | ```python 8 | pip install lopq 9 | ``` 10 | 11 | ## Usage 12 | 13 | ```python 14 | from lopq import LOPQModel, LOPQSearcher 15 | 16 | # Define a model and fit it to data 17 | model = LOPQModel(V=8, M=4) 18 | model.fit(data) 19 | 20 | # Compute the LOPQ codes for a vector 21 | code = model.predict(x) 22 | 23 | # Create a searcher to index data with the model 24 | searcher = LOPQSearcher(model) 25 | searcher.add_data(data) 26 | 27 | # Retrieve ranked nearest neighbors 28 | nns = searcher.search(x, quota=100) 29 | ``` 30 | 31 | A more detailed usage walk-through is found in `scripts/example.py`. 32 | 33 | ## Training 34 | 35 | Refer to the documentation in the `model` submodules and, in particular, the `LOPQModel` class for more usage information. 36 | 37 | Available parameters for fitting data: 38 | 39 | | Name | Default | Description | 40 | | ------------------------- | ------- | ------------------------------------------------------------------------- | 41 | | V | 8 | The number of clusters per coarse quantizer. | 42 | | M | 4 | The total number of fine codes. | 43 | | kmeans_coarse_iters | 10 | The number of iterations of k-means for training coarse quantizers. | 44 | | kmeans_local_iters | 20 | The number of iterations of k-means for training subquantizers. | 45 | | subquantizer_clusters | 256 | The number of clusters to train per subquantizer. | 46 | | subquantizer_sample_ratio | 1.0 | The ratio of the data to sample for training subquantizers. | 47 | | random_state | None | A seed for seeding random operations during training. | 48 | | parameters | None | A tuple of trained model parameters to instantiate the model with. | 49 | | verbose | False | A boolean indicating whether to produce verbose output. | 50 | 51 | ## Submodules 52 | 53 | There are a handful of submodules, here is a brief description of each. 54 | 55 | | Submodule | Description | 56 | | -------------- | ----------- | 57 | | model | Core training algorithm and the `LOPQModel` class that encapsulates model parameters. 58 | | search | An implementation of the multisequence algorithm for retrieval on a multi-index as well as the `LOPQSearcher` class, a simple Python implementation of an LOPQ search index and LOPQ ranking. | 59 | | eval | Functions to aid in evaluating and benchmarking trained LOPQ models. | 60 | | utils | Miscellaneous utility functions. | 61 | | lopq_model_pb2 | Protobuf generated module. | 62 | -------------------------------------------------------------------------------- /python/lopq/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import model 4 | import search 5 | import utils 6 | from .model import LOPQModel 7 | from .search import LOPQSearcher, multisequence 8 | 9 | __all__ = [LOPQModel, LOPQSearcher, multisequence, model, search, utils] 10 | -------------------------------------------------------------------------------- /python/lopq/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import time 4 | import numpy as np 5 | 6 | 7 | def compute_all_neighbors(data1, data2=None, just_nn=True): 8 | """ 9 | For each point in data1, compute a ranked list of neighbor indices from data2. 10 | If data2 is not provided, compute neighbors relative to data1 11 | 12 | :param ndarray data1: 13 | an m1 x n dim matrix with observations on the rows 14 | :param ndarray data2: 15 | an m2 x n dim matrix with observations on the rows 16 | 17 | :returns ndarray: 18 | an m1 x m2 dim matrix with the distance-sorted indices of neighbors on the rows 19 | """ 20 | from scipy.spatial.distance import cdist 21 | 22 | if data2 is None: 23 | data2 = data1 24 | 25 | dists = cdist(data1, data2) 26 | 27 | if just_nn: 28 | nns = np.zeros((data1.shape[0]), dtype=int) 29 | else: 30 | nns = np.zeros(dists.shape, dtype=int) 31 | 32 | for i in xrange(dists.shape[0]): 33 | if just_nn: 34 | nns[i] = np.argmin(dists[i]) 35 | else: 36 | nns[i] = np.argsort(dists[i]) 37 | 38 | return nns 39 | 40 | 41 | def get_proportion_nns_with_same_coarse_codes(data, model, nns=None): 42 | """ 43 | """ 44 | N = data.shape[0] 45 | 46 | # Compute nearest neighbors if not provided 47 | if nns is None: 48 | nns = compute_all_neighbors(data) 49 | 50 | # Compute coarse codes for data 51 | coarse_codes = [] 52 | for d in data: 53 | c = model.predict_coarse(d) 54 | coarse_codes.append(c) 55 | 56 | # Count the number of NNs that share the same coarse codes 57 | count = 0 58 | for i in xrange(N): 59 | nn = nns[i] 60 | if coarse_codes[i] == coarse_codes[nn]: 61 | count += 1 62 | 63 | return float(count) / N 64 | 65 | 66 | def get_cell_histogram(data, model): 67 | # Compute cells for data 68 | cells = [] 69 | for d in data: 70 | c = model.predict_coarse(d) 71 | cell = model.get_cell_id_for_coarse_codes(c) 72 | cells.append(cell) 73 | 74 | return np.histogram(cells, bins=range(model.V ** 2))[0] 75 | 76 | 77 | def get_proportion_of_reconstructions_with_same_codes(data, model): 78 | N = data.shape[0] 79 | 80 | # Compute coarse codes for data 81 | count = 0 82 | for d in data: 83 | c1 = model.predict(d) 84 | r = model.reconstruct(c1) 85 | c2 = model.predict(r) 86 | if c1 == c2: 87 | count += 1 88 | 89 | return float(count) / N 90 | 91 | 92 | def get_recall(searcher, queries, nns, thresholds=[1, 10, 100, 1000], normalize=True, verbose=False): 93 | """ 94 | Given a LOPQSearcher object with indexed data and groundtruth nearest neighbors for a set of test 95 | query vectors, collect and return recall statistics. 96 | 97 | :param LOPQSearcher searcher: 98 | a searcher that contains the indexed nearest neighbors 99 | :param ndarray queries: 100 | a collect of test vectors with vectors on the rows 101 | :param ndarray nns: 102 | a list of true nearest neighbor ids for each vector in queries 103 | :param list thresholds: 104 | the recall thresholds to evaluate - the last entry defines the number of 105 | results to retrieve before ranking 106 | :param bool normalize: 107 | flag to indicate whether the result should be normalized by the number of queries 108 | :param bool verbose: 109 | flag to print every 50th search to visualize progress 110 | 111 | :return list: 112 | a list of recalls for each specified threshold level 113 | :return float: 114 | the elapsed query time 115 | """ 116 | 117 | recall = np.zeros(len(thresholds)) 118 | query_time = 0.0 119 | for i, d in enumerate(queries): 120 | 121 | nn = nns[i] 122 | 123 | start = time.clock() 124 | results, cells_visited = searcher.search(d, thresholds[-1]) 125 | query_time += time.clock() - start 126 | 127 | if verbose and i % 50 == 0: 128 | print '%d cells visitied for query %d' % (cells_visited, i) 129 | 130 | for j, res in enumerate(results): 131 | rid, code = res 132 | 133 | if rid == nn: 134 | for k, t in enumerate(thresholds): 135 | if j < t: 136 | recall[k] += 1 137 | 138 | if normalize: 139 | N = queries.shape[0] 140 | return recall / N, query_time / N 141 | else: 142 | return recall, query_time 143 | 144 | 145 | def get_subquantizer_distortion(data, model): 146 | from .model import compute_residuals, project_residuals_to_local 147 | 148 | first_half, second_half = np.split(data, 2, axis=1) 149 | 150 | r1, a1 = compute_residuals(first_half, model.Cs[0]) 151 | r2, a2 = compute_residuals(second_half, model.Cs[1]) 152 | 153 | p1 = project_residuals_to_local(r1, a1, model.Rs[0], model.mus[0]) 154 | p2 = project_residuals_to_local(r2, a2, model.Rs[1], model.mus[1]) 155 | 156 | pall = np.concatenate((p1, p2), axis=1) 157 | suball = model.subquantizers[0] + model.subquantizers[1] 158 | 159 | dists = np.array([sum(np.linalg.norm(compute_residuals(d, c)[0], ord=2, axis=1) ** 2) for c, d in zip(suball, np.split(pall, 8, axis=1))]) 160 | 161 | return dists / data.shape[0] 162 | -------------------------------------------------------------------------------- /python/lopq/lopq_model_pb2.py: -------------------------------------------------------------------------------- 1 | # Generated by the protocol buffer compiler. DO NOT EDIT! 2 | # source: lopq_model.proto 3 | 4 | import sys 5 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import message as _message 8 | from google.protobuf import reflection as _reflection 9 | from google.protobuf import symbol_database as _symbol_database 10 | from google.protobuf import descriptor_pb2 11 | # @@protoc_insertion_point(imports) 12 | 13 | _sym_db = _symbol_database.Default() 14 | 15 | 16 | 17 | 18 | DESCRIPTOR = _descriptor.FileDescriptor( 19 | name='lopq_model.proto', 20 | package='com.flickr.vision.lopq', 21 | serialized_pb=_b('\n\x10lopq_model.proto\x12\x16\x63om.flickr.vision.lopq\"\x1c\n\x06Vector\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\"+\n\x06Matrix\x12\x12\n\x06values\x18\x01 \x03(\x02\x42\x02\x10\x01\x12\r\n\x05shape\x18\x02 \x03(\r\"\x80\x02\n\x0fLOPQModelParams\x12\t\n\x01\x44\x18\x01 \x01(\r\x12\t\n\x01V\x18\x02 \x01(\r\x12\t\n\x01M\x18\x03 \x01(\r\x12\x19\n\x11num_subquantizers\x18\x04 \x01(\r\x12*\n\x02\x43s\x18\x05 \x03(\x0b\x32\x1e.com.flickr.vision.lopq.Matrix\x12*\n\x02Rs\x18\x06 \x03(\x0b\x32\x1e.com.flickr.vision.lopq.Matrix\x12+\n\x03mus\x18\x07 \x03(\x0b\x32\x1e.com.flickr.vision.lopq.Vector\x12,\n\x04subs\x18\x08 \x03(\x0b\x32\x1e.com.flickr.vision.lopq.MatrixB/\n\x16\x63om.flickr.vision.lopqB\x13LOPQModelParametersH\x01') 22 | ) 23 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 24 | 25 | 26 | 27 | 28 | _VECTOR = _descriptor.Descriptor( 29 | name='Vector', 30 | full_name='com.flickr.vision.lopq.Vector', 31 | filename=None, 32 | file=DESCRIPTOR, 33 | containing_type=None, 34 | fields=[ 35 | _descriptor.FieldDescriptor( 36 | name='values', full_name='com.flickr.vision.lopq.Vector.values', index=0, 37 | number=1, type=2, cpp_type=6, label=3, 38 | has_default_value=False, default_value=[], 39 | message_type=None, enum_type=None, containing_type=None, 40 | is_extension=False, extension_scope=None, 41 | options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), 42 | ], 43 | extensions=[ 44 | ], 45 | nested_types=[], 46 | enum_types=[ 47 | ], 48 | options=None, 49 | is_extendable=False, 50 | extension_ranges=[], 51 | oneofs=[ 52 | ], 53 | serialized_start=44, 54 | serialized_end=72, 55 | ) 56 | 57 | 58 | _MATRIX = _descriptor.Descriptor( 59 | name='Matrix', 60 | full_name='com.flickr.vision.lopq.Matrix', 61 | filename=None, 62 | file=DESCRIPTOR, 63 | containing_type=None, 64 | fields=[ 65 | _descriptor.FieldDescriptor( 66 | name='values', full_name='com.flickr.vision.lopq.Matrix.values', index=0, 67 | number=1, type=2, cpp_type=6, label=3, 68 | has_default_value=False, default_value=[], 69 | message_type=None, enum_type=None, containing_type=None, 70 | is_extension=False, extension_scope=None, 71 | options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), 72 | _descriptor.FieldDescriptor( 73 | name='shape', full_name='com.flickr.vision.lopq.Matrix.shape', index=1, 74 | number=2, type=13, cpp_type=3, label=3, 75 | has_default_value=False, default_value=[], 76 | message_type=None, enum_type=None, containing_type=None, 77 | is_extension=False, extension_scope=None, 78 | options=None), 79 | ], 80 | extensions=[ 81 | ], 82 | nested_types=[], 83 | enum_types=[ 84 | ], 85 | options=None, 86 | is_extendable=False, 87 | extension_ranges=[], 88 | oneofs=[ 89 | ], 90 | serialized_start=74, 91 | serialized_end=117, 92 | ) 93 | 94 | 95 | _LOPQMODELPARAMS = _descriptor.Descriptor( 96 | name='LOPQModelParams', 97 | full_name='com.flickr.vision.lopq.LOPQModelParams', 98 | filename=None, 99 | file=DESCRIPTOR, 100 | containing_type=None, 101 | fields=[ 102 | _descriptor.FieldDescriptor( 103 | name='D', full_name='com.flickr.vision.lopq.LOPQModelParams.D', index=0, 104 | number=1, type=13, cpp_type=3, label=1, 105 | has_default_value=False, default_value=0, 106 | message_type=None, enum_type=None, containing_type=None, 107 | is_extension=False, extension_scope=None, 108 | options=None), 109 | _descriptor.FieldDescriptor( 110 | name='V', full_name='com.flickr.vision.lopq.LOPQModelParams.V', index=1, 111 | number=2, type=13, cpp_type=3, label=1, 112 | has_default_value=False, default_value=0, 113 | message_type=None, enum_type=None, containing_type=None, 114 | is_extension=False, extension_scope=None, 115 | options=None), 116 | _descriptor.FieldDescriptor( 117 | name='M', full_name='com.flickr.vision.lopq.LOPQModelParams.M', index=2, 118 | number=3, type=13, cpp_type=3, label=1, 119 | has_default_value=False, default_value=0, 120 | message_type=None, enum_type=None, containing_type=None, 121 | is_extension=False, extension_scope=None, 122 | options=None), 123 | _descriptor.FieldDescriptor( 124 | name='num_subquantizers', full_name='com.flickr.vision.lopq.LOPQModelParams.num_subquantizers', index=3, 125 | number=4, type=13, cpp_type=3, label=1, 126 | has_default_value=False, default_value=0, 127 | message_type=None, enum_type=None, containing_type=None, 128 | is_extension=False, extension_scope=None, 129 | options=None), 130 | _descriptor.FieldDescriptor( 131 | name='Cs', full_name='com.flickr.vision.lopq.LOPQModelParams.Cs', index=4, 132 | number=5, type=11, cpp_type=10, label=3, 133 | has_default_value=False, default_value=[], 134 | message_type=None, enum_type=None, containing_type=None, 135 | is_extension=False, extension_scope=None, 136 | options=None), 137 | _descriptor.FieldDescriptor( 138 | name='Rs', full_name='com.flickr.vision.lopq.LOPQModelParams.Rs', index=5, 139 | number=6, type=11, cpp_type=10, label=3, 140 | has_default_value=False, default_value=[], 141 | message_type=None, enum_type=None, containing_type=None, 142 | is_extension=False, extension_scope=None, 143 | options=None), 144 | _descriptor.FieldDescriptor( 145 | name='mus', full_name='com.flickr.vision.lopq.LOPQModelParams.mus', index=6, 146 | number=7, type=11, cpp_type=10, label=3, 147 | has_default_value=False, default_value=[], 148 | message_type=None, enum_type=None, containing_type=None, 149 | is_extension=False, extension_scope=None, 150 | options=None), 151 | _descriptor.FieldDescriptor( 152 | name='subs', full_name='com.flickr.vision.lopq.LOPQModelParams.subs', index=7, 153 | number=8, type=11, cpp_type=10, label=3, 154 | has_default_value=False, default_value=[], 155 | message_type=None, enum_type=None, containing_type=None, 156 | is_extension=False, extension_scope=None, 157 | options=None), 158 | ], 159 | extensions=[ 160 | ], 161 | nested_types=[], 162 | enum_types=[ 163 | ], 164 | options=None, 165 | is_extendable=False, 166 | extension_ranges=[], 167 | oneofs=[ 168 | ], 169 | serialized_start=120, 170 | serialized_end=376, 171 | ) 172 | 173 | _LOPQMODELPARAMS.fields_by_name['Cs'].message_type = _MATRIX 174 | _LOPQMODELPARAMS.fields_by_name['Rs'].message_type = _MATRIX 175 | _LOPQMODELPARAMS.fields_by_name['mus'].message_type = _VECTOR 176 | _LOPQMODELPARAMS.fields_by_name['subs'].message_type = _MATRIX 177 | DESCRIPTOR.message_types_by_name['Vector'] = _VECTOR 178 | DESCRIPTOR.message_types_by_name['Matrix'] = _MATRIX 179 | DESCRIPTOR.message_types_by_name['LOPQModelParams'] = _LOPQMODELPARAMS 180 | 181 | Vector = _reflection.GeneratedProtocolMessageType('Vector', (_message.Message,), dict( 182 | DESCRIPTOR = _VECTOR, 183 | __module__ = 'lopq_model_pb2' 184 | # @@protoc_insertion_point(class_scope:com.flickr.vision.lopq.Vector) 185 | )) 186 | _sym_db.RegisterMessage(Vector) 187 | 188 | Matrix = _reflection.GeneratedProtocolMessageType('Matrix', (_message.Message,), dict( 189 | DESCRIPTOR = _MATRIX, 190 | __module__ = 'lopq_model_pb2' 191 | # @@protoc_insertion_point(class_scope:com.flickr.vision.lopq.Matrix) 192 | )) 193 | _sym_db.RegisterMessage(Matrix) 194 | 195 | LOPQModelParams = _reflection.GeneratedProtocolMessageType('LOPQModelParams', (_message.Message,), dict( 196 | DESCRIPTOR = _LOPQMODELPARAMS, 197 | __module__ = 'lopq_model_pb2' 198 | # @@protoc_insertion_point(class_scope:com.flickr.vision.lopq.LOPQModelParams) 199 | )) 200 | _sym_db.RegisterMessage(LOPQModelParams) 201 | 202 | 203 | DESCRIPTOR.has_options = True 204 | DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\026com.flickr.vision.lopqB\023LOPQModelParametersH\001')) 205 | _VECTOR.fields_by_name['values'].has_options = True 206 | _VECTOR.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001')) 207 | _MATRIX.fields_by_name['values'].has_options = True 208 | _MATRIX.fields_by_name['values']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001')) 209 | # @@protoc_insertion_point(module_scope) 210 | -------------------------------------------------------------------------------- /python/lopq/package_metadata.json: -------------------------------------------------------------------------------- 1 | {"git_hash": "5be92898ae856b8c75fb5ec577f8cd0c95754488", "version": "1.0.36", "git_branch": "python2_parallel", "git_origin": "https://github.com/squall-1002/lopq.git"} -------------------------------------------------------------------------------- /python/lopq/search.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import heapq 4 | from collections import defaultdict, namedtuple 5 | from itertools import count 6 | import numpy as np 7 | import array 8 | from .utils import iterate_splits, compute_codes_parallel 9 | 10 | 11 | def multisequence(x, centroids): 12 | """ 13 | Implementation of multi-sequence algorithm for traversing a multi-index. 14 | 15 | The algorithm is described in http://download.yandex.ru/company/cvpr2012.pdf. 16 | 17 | :param ndarray x: 18 | a query vector 19 | :param list centroids: 20 | a list of ndarrays containing cluster centroids for each subvector 21 | 22 | :yields int d: 23 | the cell distance approximation used to order cells 24 | :yields tuple cell: 25 | the cell indices 26 | """ 27 | 28 | # Infer parameters 29 | splits = len(centroids) 30 | V = centroids[0].shape[0] 31 | 32 | # Compute distances to each coarse cluster and sort 33 | cluster_dists = [] 34 | sorted_inds = [] 35 | for cx, split in iterate_splits(x, splits): 36 | 37 | dists = ((cx - centroids[split]) ** 2).sum(axis=1) 38 | inds = np.argsort(dists) 39 | 40 | cluster_dists.append(dists) 41 | sorted_inds.append(inds) 42 | 43 | # Some helper functions used below 44 | def cell_for_inds(inds): 45 | return tuple([sorted_inds[s][i] for s, i in enumerate(inds)]) 46 | 47 | def dist_for_cell(cell): 48 | return sum([cluster_dists[s][i] for s, i in enumerate(cell)]) 49 | 50 | def inds_in_range(inds): 51 | for i in inds: 52 | if i >= V: 53 | return False 54 | return True 55 | 56 | # Initialize priority queue 57 | h = [] 58 | traversed = set() 59 | start_inds = tuple(0 for _ in xrange(splits)) 60 | start_dist = dist_for_cell(cell_for_inds(start_inds)) 61 | heapq.heappush(h, (start_dist, start_inds)) 62 | 63 | # Traverse cells 64 | while len(h): 65 | d, inds = heapq.heappop(h) 66 | yield d, cell_for_inds(inds) 67 | traversed.add(inds) 68 | 69 | # Add neighboring cells to queue 70 | if inds[1] == 0 or (inds[0] + 1, inds[1] - 1) in traversed: 71 | c = (inds[0] + 1, inds[1]) 72 | if inds_in_range(c): 73 | dist = dist_for_cell(cell_for_inds(c)) 74 | heapq.heappush(h, (dist, c)) 75 | 76 | if inds[0] == 0 or (inds[0] - 1, inds[1] + 1) in traversed: 77 | c = (inds[0], inds[1] + 1) 78 | if inds_in_range(c): 79 | dist = dist_for_cell(cell_for_inds(c)) 80 | heapq.heappush(h, (dist, c)) 81 | 82 | 83 | class LOPQSearcherBase(object): 84 | 85 | def add_data(self, data, ids=None, num_procs=1): 86 | """ 87 | Add raw data into the search index. 88 | 89 | :param ndarray data: 90 | an ndarray with data points on the rows 91 | :param ndarray ids: 92 | an optional array of ids for each data point; 93 | defaults to the index of the data point if not provided 94 | :param int num_procs: 95 | an integer specifying the number of processes to use to 96 | compute codes for the data 97 | """ 98 | codes = compute_codes_parallel(data, self.model, num_procs) 99 | self.add_codes(codes, ids) 100 | 101 | def get_result_quota(self, x, quota=10): 102 | """ 103 | Given a query vector and result quota, retrieve as many cells as necessary 104 | to fill the quota. 105 | 106 | :param ndarray x: 107 | a query vector 108 | :param int quota: 109 | the desired number of items to retrieve 110 | 111 | :returns list retrieved: 112 | a list of index items 113 | :returns int visited: 114 | the number of multi-index cells visited 115 | """ 116 | retrieved = [] 117 | visited = 0 118 | for _, cell in multisequence(x, self.model.Cs): 119 | retrieved += self.get_cell(cell) 120 | visited += 1 121 | 122 | if len(retrieved) >= quota: 123 | break 124 | 125 | return retrieved, visited 126 | 127 | def compute_distances(self, x, items): 128 | """ 129 | Given a query and a list of index items, compute the approximate distance of the query 130 | to each item and return a list of tuples that contain the distance and the item. 131 | Memoize subquantizer distances per coarse cluster to save work. 132 | 133 | :param ndarray x: 134 | a query vector 135 | :param list items: 136 | a list of items from the index 137 | 138 | :returns list: 139 | a list of items with distance 140 | """ 141 | memoized_subquant_dists = [{}, {}] 142 | 143 | def get_subquantizer_distances(x, coarse): 144 | 145 | d0, d1 = memoized_subquant_dists 146 | c0, c1 = coarse 147 | 148 | if c0 not in d0: 149 | d0[c0] = self.model.get_subquantizer_distances(x, coarse, coarse_split=0) 150 | 151 | if c1 not in d1: 152 | d1[c1] = self.model.get_subquantizer_distances(x, coarse, coarse_split=1) 153 | 154 | return d0[c0] + d1[c1] 155 | 156 | results = [] 157 | for item in items: 158 | 159 | codes = item[1] 160 | coarse, fine = codes 161 | 162 | subquantizer_distances = get_subquantizer_distances(x, coarse) 163 | dist = sum([subquantizer_distances[i][fc] for i, fc in enumerate(fine)]) 164 | 165 | results.append((dist, item)) 166 | 167 | return results 168 | 169 | def search(self, x, quota=10, limit=None, with_dists=False): 170 | """ 171 | Return euclidean distance ranked results, along with the number of cells 172 | traversed to fill the quota. 173 | 174 | :param ndarray x: 175 | a query vector 176 | :param int quota: 177 | the number of desired results to rank 178 | :param int limit: 179 | the number of desired results to return - defaults to quota 180 | :param bool with_dists: 181 | boolean indicating whether result items should be returned with their distance 182 | 183 | :returns list results: 184 | the list of ranked results 185 | :returns int visited: 186 | the number of cells visited in the query 187 | """ 188 | # Retrieve results with multi-index 189 | retrieved, visited = self.get_result_quota(x, quota) 190 | 191 | # Compute distance for results 192 | results = self.compute_distances(x, retrieved) 193 | 194 | # Sort by distance 195 | results = sorted(results, key=lambda d: d[0]) 196 | 197 | # Limit number returned 198 | if limit is None: 199 | limit = quota 200 | results = results[:limit] 201 | 202 | if with_dists: 203 | Result = namedtuple('Result', ['id', 'code', 'dist']) 204 | results = map(lambda d: Result(d[1][0], d[1][1], d[0]), results) 205 | else: 206 | Result = namedtuple('Result', ['id', 'code']) 207 | results = map(lambda d: Result(d[1][0], d[1]), results) 208 | 209 | return results, visited 210 | 211 | def add_codes(self, codes, ids=None): 212 | """ 213 | Add LOPQ codes into the search index. 214 | 215 | :param iterable codes: 216 | an iterable of LOPQ code tuples 217 | :param iterable ids: 218 | an optional iterable of ids for each code; 219 | defaults to the index of the code tuple if not provided 220 | """ 221 | raise NotImplementedError() 222 | 223 | def get_cell(self, cell): 224 | """ 225 | Retrieve a cell bucket from the index. 226 | 227 | :param tuple cell: 228 | a cell tuple 229 | 230 | :returns list: 231 | the list of index items in this cell bucket 232 | """ 233 | raise NotImplementedError() 234 | 235 | class LOPQSearcher(LOPQSearcherBase): 236 | def __init__(self, model): 237 | """ 238 | Create an LOPQSearcher instance that encapsulates retrieving and ranking 239 | with LOPQ. Requires an LOPQModel instance. This class uses a Python dict 240 | to implement the index. 241 | 242 | :param LOPQModel model: 243 | the model for indexing and ranking 244 | """ 245 | self.model = model 246 | self.index = defaultdict(list) 247 | 248 | def add_codes(self, codes, ids=None): 249 | """ 250 | Add LOPQ codes into the search index. 251 | 252 | :param iterable codes: 253 | an iterable of LOPQ code tuples 254 | :param iterable ids: 255 | an optional iterable of ids for each code; 256 | defaults to the index of the code tuple if not provided 257 | """ 258 | # If a list of ids is not provided, assume it is the index of the data 259 | if ids is None: 260 | ids = count() 261 | 262 | for item_id, code in zip(ids, codes): 263 | cell = code[0] 264 | self.index[cell].append((item_id, code)) 265 | 266 | def get_cell(self, cell): 267 | """ 268 | Retrieve a cell bucket from the index. 269 | 270 | :param tuple cell: 271 | a cell tuple 272 | 273 | :returns list: 274 | the list of index items in this cell bucket 275 | """ 276 | return self.index[cell] 277 | 278 | 279 | class LOPQSearcherLMDB(LOPQSearcherBase): 280 | def __init__(self, model, lmdb_path, id_lambda=int): 281 | """ 282 | Create an LOPQSearcher instance that encapsulates retrieving and ranking 283 | with LOPQ. Requires an LOPQModel instance. This class uses an lmbd database 284 | to implement the index. 285 | 286 | :param LOPQModel model: 287 | the model for indexing and ranking 288 | :param str lmdb_path: 289 | path for the lmdb database; if it does not exist it is created 290 | :param callable id_lambda: 291 | a lambda function to reconstruct item ids from their string representation 292 | (computed by calling `bytes`) during retrieval 293 | """ 294 | import lmdb 295 | 296 | self.model = model 297 | self.lmdb_path = lmdb_path 298 | self.id_lambda = id_lambda 299 | 300 | self.env = lmdb.open(self.lmdb_path, map_size=1024*2000000*2, writemap=False, map_async=True, max_dbs=1) 301 | self.index_db = self.env.open_db("index") 302 | 303 | def encode_cell(self, cell): 304 | return array.array("H", cell).tostring() 305 | 306 | def decode_cell(self, cell_bytes): 307 | a = array.array("H") 308 | a.fromstring(cell_bytes) 309 | return tuple(a.tolist()) 310 | 311 | def encode_fine_codes(self, fine): 312 | return array.array("B", fine).tostring() 313 | 314 | def decode_fine_codes(self, fine_bytes): 315 | a = array.array("B") 316 | a.fromstring(fine_bytes) 317 | return tuple(a.tolist()) 318 | 319 | def add_codes(self, codes, ids=None): 320 | """ 321 | Add LOPQ codes into the search index. 322 | 323 | :param iterable codes: 324 | an iterable of LOPQ code tuples 325 | :param iterable ids: 326 | an optional iterable of ids for each code; 327 | defaults to the index of the code tuple if not provided 328 | """ 329 | # If a list of ids is not provided, assume it is the index of the data 330 | if ids is None: 331 | ids = count() 332 | 333 | with self.env.begin(db=self.index_db, write=True) as txn: 334 | for item_id, code in zip(ids, codes): 335 | key_prefix = self.encode_cell(code[0]) 336 | key_suffix = bytes(item_id) 337 | key = key_prefix + key_suffix 338 | val = self.encode_fine_codes(code[1]) 339 | txn.put(key, val) 340 | self.env.sync() 341 | 342 | def get_cell(self, cell): 343 | """ 344 | Retrieve a cell bucket from the index. 345 | 346 | :param tuple cell: 347 | a cell tuple 348 | 349 | :returns list: 350 | the list of index items in this cell bucket 351 | """ 352 | prefix = self.encode_cell(cell) 353 | 354 | items = [] 355 | with self.env.begin(db=self.index_db) as txn: 356 | cursor = txn.cursor() 357 | cursor.set_range(prefix) 358 | for key, value in cursor: 359 | if not key.startswith(prefix): 360 | break 361 | else: 362 | item_id = self.id_lambda(key[4:]) 363 | cell = self.decode_cell(key[:4]) 364 | fine = self.decode_fine_codes(value) 365 | code = (cell, fine) 366 | items.append((item_id, code)) 367 | cursor.close() 368 | 369 | return items 370 | -------------------------------------------------------------------------------- /python/lopq/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import numpy as np 4 | import multiprocessing 5 | from itertools import chain 6 | 7 | 8 | def iterate_splits(x, splits): 9 | """ 10 | A helper to iterate subvectors. 11 | 12 | :param ndarray x: 13 | a vector to iterate over 14 | :param int splits: 15 | the number of subvectors 16 | :returns (np.array, int): 17 | subvector, split index pairs 18 | """ 19 | split_size = len(x) / splits 20 | for split in xrange(splits): 21 | start = split * split_size 22 | yield x[start:start + split_size], split 23 | 24 | 25 | def concat_new_first(arrs): 26 | """ 27 | Helper to concatenate a list of ndarrays along a new first dimension. 28 | """ 29 | arrs = map(lambda x: x[np.newaxis, ...], arrs) 30 | return np.concatenate(arrs, axis=0) 31 | 32 | 33 | def predict_cluster(x, centroids): 34 | """ 35 | Given a vector of dimension D and a matrix of centroids of dimension VxD, 36 | return the id of the closest cluster 37 | 38 | :params np.array x: 39 | the data to assign 40 | :params np.array centroids: 41 | a matrix of cluster centroids 42 | :returns int: 43 | cluster assignment 44 | """ 45 | return ((x - centroids) ** 2).sum(axis=1).argmin(axis=0) 46 | 47 | 48 | def load_xvecs(filename, base_type='f', max_num=None): 49 | """ 50 | A helper to read in sift1m binary dataset. This parses the 51 | binary format described at http://corpus-texmex.irisa.fr/. 52 | 53 | :returns ndarray: 54 | a N x D array, where N is the number of observations 55 | and D is the number of features 56 | """ 57 | import os 58 | import struct 59 | 60 | format_code, format_size, py_type = { 61 | 'f': ('f', 4, float), 62 | 'i': ('I', 4, int), 63 | 'b': ('B', 1, float) 64 | }[base_type] 65 | 66 | size = os.path.getsize(filename) 67 | 68 | f = open(filename, 'rb') 69 | D = np.uint32(struct.unpack('I', f.read(4))[0]) 70 | N = size / (4 + D * format_size) 71 | 72 | if max_num is None: 73 | max_num = N 74 | 75 | f.seek(0) 76 | A = np.zeros((max_num, D), dtype=py_type) 77 | for i in xrange(max_num): 78 | for j in xrange(D + 1): 79 | if j == 0: 80 | np.uint32(struct.unpack(format_code, f.read(4))) 81 | else: 82 | A[i, j - 1] = py_type(struct.unpack(format_code, f.read(format_size))[0]) 83 | f.close() 84 | return np.squeeze(A) 85 | 86 | 87 | def save_xvecs(data, filename, base_type='f'): 88 | """ 89 | A helper to save an ndarray in the binary format as is expected in 90 | load_xvecs above. 91 | """ 92 | import struct 93 | 94 | format_code, format_size, py_type = { 95 | 'f': ('f', 4, float), 96 | 'i': ('I', 4, int), 97 | 'b': ('B', 1, float) 98 | }[base_type] 99 | 100 | f = open(filename, 'wb') 101 | for d in data: 102 | 103 | if hasattr(d, "__len__"): 104 | D = len(d) 105 | 106 | f.write(struct.pack('=1.3.4 2 | numpy>=1.9 3 | protobuf>=2.6 4 | scikit-learn>=0.18 5 | scipy>=0.14 6 | lmdb>=0.87 -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015, Yahoo Inc. 4 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 5 | import os 6 | import json 7 | from setuptools import setup 8 | 9 | 10 | # Package Metadata filename 11 | METADATA_FILENAME = 'lopq/package_metadata.json' 12 | BASEPATH = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | 15 | # Long description of package 16 | LONG_DESCRIPTION = """ 17 | # Locally Optimized Product Quantization 18 | 19 | This is Python training and testing code for Locally Optimized Product Quantization (LOPQ) models, as well as Spark scripts to scale training to hundreds of millions of vectors. The resulting model can be used in Python with code provided here or deployed via a Protobuf format to, e.g., search backends for high performance approximate nearest neighbor search. 20 | 21 | ### Overview 22 | 23 | Locally Optimized Product Quantization (LOPQ) [1] is a hierarchical quantization algorithm that produces codes of configurable length for data points. These codes are efficient representations of the original vector and can be used in a variety of ways depending on application, including as hashes that preserve locality, as a compressed vector from which an approximate vector in the data space can be reconstructed, and as a representation from which to compute an approximation of the Euclidean distance between points. 24 | 25 | Conceptually, the LOPQ quantization process can be broken into 4 phases. The training process also fits these phases to the data in the same order. 26 | 27 | 1. The raw data vector is PCA'd to `D` dimensions (possibly the original dimensionality). This allows subsequent quantization to more efficiently represent the variation present in the data. 28 | 2. The PCA'd data is then product quantized [2] by two k-means quantizers. This means that each vector is split into two subvectors each of dimension `D / 2`, and each of the two subspaces is quantized independently with a vocabulary of size `V`. Since the two quantizations occur independently, the dimensions of the vectors are permuted such that the total variance in each of the two subspaces is approximately equal, which allows the two vocabularies to be equally important in terms of capturing the total variance of the data. This results in a pair of cluster ids that we refer to as "coarse codes". 29 | 3. The residuals of the data after coarse quantization are computed. The residuals are then locally projected independently for each coarse cluster. This projection is another application of PCA and dimension permutation on the residuals and it is "local" in the sense that there is a different projection for each cluster in each of the two coarse vocabularies. These local rotations make the next and final step, another application of product quantization, very efficient in capturing the variance of the residuals. 30 | 4. The locally projected data is then product quantized a final time by `M` subquantizers, resulting in `M` "fine codes". Usually the vocabulary for each of these subquantizers will be a power of 2 for effective storage in a search index. With vocabularies of size 256, the fine codes for each indexed vector will require `M` bytes to store in the index. 31 | 32 | The final LOPQ code for a vector is a `(coarse codes, fine codes)` pair, e.g. `((3, 2), (14, 164, 83, 49, 185, 29, 196, 250))`. 33 | 34 | ### Nearest Neighbor Search 35 | 36 | A nearest neighbor index can be built from these LOPQ codes by indexing each document into its corresponding coarse code bucket. That is, each pair of coarse codes (which we refer to as a "cell") will index a bucket of the vectors quantizing to that cell. 37 | 38 | At query time, an incoming query vector undergoes substantially the same process. First, the query is split into coarse subvectors and the distance to each coarse centroid is computed. These distances can be used to efficiently compute a priority-ordered sequence of cells [3] such that cells later in the sequence are less likely to have near neighbors of the query than earlier cells. The items in cell buckets are retrieved in this order until some desired quota has been met. 39 | 40 | After this retrieval phase, the fine codes are used to rank by approximate Euclidean distance. The query is projected into each local space and the distance to each indexed item is estimated as the sum of the squared distances of the query subvectors to the corresponding subquantizer centroid indexed by the fine codes. 41 | 42 | NN search with LOPQ is highly scalable and has excellent properties in terms of both index storage requirements and query-time latencies when implemented well. 43 | 44 | #### References 45 | 46 | For more information and performance benchmarks can be found at http://image.ntua.gr/iva/research/lopq/. 47 | 48 | 1. Y. Kalantidis, Y. Avrithis. [Locally Optimized Product Quantization for Approximate Nearest Neighbor Search.](http://image.ntua.gr/iva/files/lopq.pdf) CVPR 2014. 49 | 2. H. Jegou, M. Douze, and C. Schmid. [Product quantization for nearest neighbor search.](https://lear.inrialpes.fr/pubs/2011/JDS11/jegou_searching_with_quantization.pdf) PAMI, 33(1), 2011. 50 | 3. A. Babenko and V. Lempitsky. [The inverted multi-index.](http://www.computer.org/csdl/trans/tp/preprint/06915715.pdf) CVPR 2012. 51 | """ 52 | 53 | # Create a dictionary of our arguments, this way this script can be imported 54 | # without running setup() to allow external scripts to see the setup settings. 55 | setup_arguments = { 56 | 'name': 'lopq', 57 | 'version': '1.0.0', 58 | 'author': 'Clayton Mellina,Yannis Kalantidis,Huy Nguyen', 59 | 'author_email': 'clayton@yahoo-inc.com', 60 | 'url': 'http://github.com/yahoo/lopq', 61 | 'license': 'Apache-2.0', 62 | 'keywords': ['lopq', 'locally optimized product quantization', 'product quantization', 'compression', 'ann', 'approximate nearest neighbor', 'similarity search'], 63 | 'packages': ['lopq'], 64 | 'long_description': LONG_DESCRIPTION, 65 | 'description': 'Python code for training and deploying Locally Optimized Product Quantization (LOPQ) for approximate nearest neighbor search of high dimensional data.', 66 | 'classifiers': [ 67 | 'Development Status :: 5 - Production/Stable', 68 | 'License :: OSI Approved :: Apache Software License', 69 | 'Intended Audience :: Developers', 70 | 'Intended Audience :: Science/Research', 71 | 'Natural Language :: English', 72 | 'Operating System :: MacOS :: MacOS X', 73 | 'Operating System :: Microsoft :: Windows', 74 | 'Operating System :: POSIX', 75 | 'Operating System :: POSIX', 76 | 'Operating System :: Unix', 77 | 'Programming Language :: Python :: 2.7', 78 | 'Topic :: Scientific/Engineering', 79 | 'Topic :: Software Development' 80 | ], 81 | 'package_data': { 82 | 'lopq': ['package_metadata.json'] 83 | }, 84 | 'platforms': 'Windows,Linux,Solaris,Mac OS-X,Unix', 85 | 'include_package_data': True, 86 | 'install_requires': ['protobuf>=2.6', 'numpy>=1.9', 'scipy>=0.14', 'scikit-learn>=0.18', 'lmdb>=0.87'] 87 | } 88 | 89 | 90 | class Git(object): 91 | """ 92 | Simple wrapper class to the git command line tools 93 | """ 94 | version_list = ['0', '7', '0'] 95 | 96 | def __init__(self, version=None): 97 | if version: 98 | self.version_list = version.split('.') 99 | 100 | @property 101 | def version(self): 102 | """ 103 | Generate a Unique version value from the git information 104 | :return: 105 | """ 106 | git_rev = len(os.popen('git rev-list HEAD').readlines()) 107 | if git_rev != 0: 108 | self.version_list[-1] = '%d' % git_rev 109 | version = '.'.join(self.version_list) 110 | return version 111 | 112 | @property 113 | def branch(self): 114 | """ 115 | Get the current git branch 116 | :return: 117 | """ 118 | return os.popen('git rev-parse --abbrev-ref HEAD').read().strip() 119 | 120 | @property 121 | def hash(self): 122 | """ 123 | Return the git hash for the current build 124 | :return: 125 | """ 126 | return os.popen('git rev-parse HEAD').read().strip() 127 | 128 | @property 129 | def origin(self): 130 | """ 131 | Return the fetch url for the git origin 132 | :return: 133 | """ 134 | for item in os.popen('git remote -v'): 135 | split_item = item.strip().split() 136 | if split_item[0] == 'origin' and split_item[-1] == '(push)': 137 | return split_item[1] 138 | 139 | 140 | def add_scripts_to_package(): 141 | """ 142 | Update the "scripts" parameter of the setup_arguments with any scripts 143 | found in the "scripts" directory. 144 | :return: 145 | """ 146 | global setup_arguments 147 | 148 | if os.path.isdir('scripts'): 149 | setup_arguments['scripts'] = [ 150 | os.path.join('scripts', f) for f in os.listdir('scripts') 151 | ] 152 | 153 | 154 | def get_and_update_package_metadata(): 155 | """ 156 | Update the package metadata for this package if we are building the package. 157 | :return:metadata - Dictionary of metadata information 158 | """ 159 | global setup_arguments 160 | global METADATA_FILENAME 161 | 162 | if not os.path.exists('.git') and os.path.exists(METADATA_FILENAME): 163 | with open(METADATA_FILENAME) as fh: 164 | metadata = json.load(fh) 165 | else: 166 | git = Git(version=setup_arguments['version']) 167 | metadata = { 168 | 'version': git.version, 169 | 'git_hash': git.hash, 170 | 'git_origin': git.origin, 171 | 'git_branch': git.branch 172 | } 173 | with open(METADATA_FILENAME, 'w') as fh: 174 | json.dump(metadata, fh) 175 | return metadata 176 | 177 | 178 | if __name__ == '__main__': 179 | # We're being run from the command line so call setup with our arguments 180 | metadata = get_and_update_package_metadata() 181 | setup_arguments['version'] = metadata['version'] 182 | setup(**setup_arguments) 183 | -------------------------------------------------------------------------------- /python/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015, Yahoo Inc. 4 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 5 | 6 | set -e 7 | set -x 8 | 9 | # run tests 10 | nosetests -v -s test/tests.py 11 | 12 | # make sure we can distribute the module 13 | python setup.py sdist 14 | 15 | -------------------------------------------------------------------------------- /python/test/testdata/random_test_model.lopq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yahoo/lopq/0f17655b901e6dfabe5c2aa62b4c8e492f34b05a/python/test/testdata/random_test_model.lopq -------------------------------------------------------------------------------- /python/test/testdata/test_accumulate_covariance_estimators_input.pkl: -------------------------------------------------------------------------------- 1 | (cnumpy.core.multiarray 2 | _reconstruct 3 | p1 4 | (cnumpy 5 | ndarray 6 | p2 7 | (I0 8 | tS'b' 9 | tRp3 10 | (I1 11 | (I36 12 | I8 13 | tcnumpy 14 | dtype 15 | p4 16 | (S'f8' 17 | I0 18 | I1 19 | tRp5 20 | (I3 21 | S'<' 22 | NNNI-1 23 | I-1 24 | I0 25 | tbI00 26 | S'.B\xcb\xee\x1d\x81\xeb?\x0c\xef\x15\x9a\x17o\xe7?0\x11X\xcca-\xdb?h1\x8d4\r\x03\xcf?8e\xc3\xc0|3\xd7?;uvJ\x1f\x8e\xe2?\xd6\xd9\xac5\x135\xe1?4\xbe=\xfe\x8f@\xe5?\x00n]\xfa\x85\x9f\xdd?\'\xf9\x1e\xb8\xa0$\xed?\x1a\xfe\x15\xdc\xfb2\xdc?\xe8\xcfg\x00\xc3\x99\xd9?6\x1a\xb6\xe1x\xd3\xea?\xb8\xb0\x01\xdad\\\xba?\xae\x81\xc3^\xcdF\xed?\x86\xbbG\xb0\x8f\xcf\xe9?\xccJ\xf3\'\x0f\xf8\xce?\x90\xffN\xc3\xe8t\xe1?0\x87J\xe1Gu\xa7?\\Z-\x81L\xd1\xc6?K\xbb}\xfb\t#\xec?\xaa\xe8\x83e\xce#\xee?\x98n\x1f=>\xc2\xb6?\x1a\x12\x0b\x8c\x85*\xef?\x80#\xc4\xac|n\xad?\xcb\xebt\xdf\xc9W\xe3?\xe8\xcd 7W\xec\xd2?\xda.+C?\x15\xdc?\x1c\x0e\xedP\xb3\xc3\xe6?\x88\x14m*\xe8\xc9\xcd?bD\xd8\xcd\x8b\xbf\xed?,\x89\xd1$\xd1#\xd3?\x8fC\x19C\x9e\x91\xe8?\x19\xa5\xa6`\xb9\x8c\xe3?\xc4\xbe\x9d^"V\xc0?4KC\x92\x0e\x1b\xc0?\xe6\xff\x14\x0e\xddD\xd3?|&&\xfc\x11\x00\xd8?\xc00\xa1\xa1S\x8d\xea?\xc8\xfbi\x96\n\x1d\xe5?\x94\xe3\xac\x9a\x06\xec\xe0?j)\x0c\x88\x06\x1c\xed?\x98\xb2I`\xd5"\xec?\x9c\xa1i\xed\x06\xd3\xe5?\x96\xb6~d\x92\x91\xde?\xe6+\xdf\xb1\xf38\xec?o19#\x9d~\xe7?\x14`\xe0\xf1?\xbe\xe3?\xd8\xba\xfc\x13\xd6\x97\xbe?\xedV\xda.QH\xe9?\xbd\xe2\x8cG]\x94\xef?\xf06\x8fW\xe6U\xb9?`\x08\xe0 \xf7\x8f\x9d?\xeb\x81\xf6a\xcf\x1b\xd4\xec?\x97\xb6\xa4\xf5|E\xe6?\xfa\x18\xa2\xd4\x02\x99\xee?\x8e\x9c|z\xd2\\\xee?\xdaa\xfc\xdc.(\xde?\xe5q\x88\x01\x9a\x97\xe1?\x98\xab\x1c1r\x1b\xd3?\xe4\xb3\x806\xd0Q\xe8?\xfa\x7f\x9fb\xac\x83\xed?|\xbc2y\xe9\xea\xe3?\xa4\xb8f\x15Rw\xe7?\x08\xc9\xac\xc1\x0b\x93\xb9?:-\xddqUS\xed?l\xde\x92\xb4\x8e\xba\xdb?\x882\xaa^\xa2\x93\xc7?\x105.\xb1<\xa5\xdc?g\xf0Z\xe4\x95\x8b\xe4?\x0e\x18\xf7\xe3\xf0\x19\xe6?\xb0\xffH\x98\x98\xbc\xcd?\xa5\x00\t\x02\xd5\xfa\xeb?\r\x10=\xbcx{\xe9?\x86e\xf3\x1b\xf0\x84\xef?1]\x1b\x0bw\xfe\xef?\r\xdd\xa4K\r>\xec?I\xea\xe1k\xdb\xdd\xe4?~\x1bU\x9c\xfb\x87\xd6?\xc0|\x83\xc3]J\xa9?\xc0\x9d]\xf8_l\x91?=W\xd0\xde\xf6-\xef?"\xe4\xaf\x9a\x89\xa9\xd3?\x00^\xd4\xff\x1a\x11\xc6?\xce\xb0\xcfk\xd5\t\xdc?q\xc7\xa5z\xc8b\xe3?\xc2\x05\xb9\xf24\xd0\xd3?\xd2B$\x9c\n\xe2\xe0?0\xe6\xfb\x84L@\xd9?\xac6gqR}\xc3?\xe8d\xea!{\xbb\xc7?\xe2\xb0\xc7\rqy\xd0?\xca\x92\x13\x07H\xcd\xd3?\xb85\x17\xa5\xc2\x1f\xb1? 6\xed\xdf\xde\xc7\xa9?8\t\x03)\xc6x\xef?P4Hr\xa9\x16\xd0?\xe0q\x9dD\xe9$\xe0?)C3\xde\x8b0\xe2?\x9aL\x93\xfaj\xa2\xd8?\x0c#\x07\xc31Z\xce?\xf8\xb3\xb8B:2\xd7?\xbb\x8f\xc9\x92G\x8e\xee?\xf4#\x08J\xe4\x84\xc6?>\xc7\xedQf%\xed?\x00\xf0{bp\x97,?X\xcf\xfc\xb3\x11\xcc\xe8?\x85\xa0\xc9s\r\x14\xe9?k\xc0}h\xcd\xa2\xe6?\xa9\xbcI\xbf\x85N\xef?\xc2}\xd3\xf50A\xe4?\x9c\xf0\xe0\xc9i\x8f\xd1?\xac-D\x13\x82\x9b\xcc?\x8cr\xc7 7\x15\xed?\xa3S8p\xa0\xc5\xe4?\x18p\x13\x1c\xaa{\xd3?h\x05-c\xf4s\xe7?\xa8\xd9\xcf\x0fu\x04\xbc?\xcbX\rm@\x05\xea?\xd0Z\xb3\x9eqp\xca?\x02\xae^\x08\x7f`\xd3?\xb0\xa1|\xc1[\xfe\xe8?0\xdc\xf7\xec\x00\xf9\xaf?\xce\xac\xb2o\xe4\xa4\xea?\xcc\x1d\xc2"\xad,\xc0?\xb0\x14\xd9g\'\x14\xd8?q \xa4\xd5\x15\xea\xe2?\x8b\xa3p\xc6@\xd8\xe6?\xb4\xe2\xc6\xf6W\xa1\xe6?\x96B7R\x95\xf5\xd7?4?\xe4/\xb8\xa3\xce?i\x88\x9e\xe6\xcc\x8e\xe8?\x99V\xc7g\x9f\xeb\xe7?\xc4t\xabQ\x86\xd5\xce?N\xd3\xd5\x11\xb7\x07\xe0?\xf0\xec\xb5\xd1\x8e;\xce?\x1f\xcenq\x1b&\xe5?\xe4\xfd\xaag\x13\xab\xd1?\x0c5\x920;|\xc9?y\xe1\xa7}\xaf8\xee?H\x1ao\xd0\x7f\xfb\xba?\x18\x13~\x8a\xc1\xe8\xef?\x19\x8e\xc2>\xb3\xf6\xee?0fh\'\xbb\xd5\xca?\x90\\\xfb\x8f\xf1\xb0\xbb?m\x0fA\x95\x10\x1b\xec?J\xbbC\x7f\x88\x8f\xe9?M\xde\xf2\x9aL\x9d\xe0?Ng"\xe2w\xcc\xee?\xd8K\xc2tz\x06\xc6?\xa7\xe3\xc1`\xb5\xb1\xe0?.[Y\xdeV\xd2\xd4?3\xd90\x1ePE\xe1?\xfc\xbf\xac_t\xad\xcc?\xe9\xa9\x06\x18\xb0\xac\xe7?j\xed-r@\xa7\xd8?\xea\xb0E\x91\xc7u\xdd?\xd4\xd8l\x87\xa2\xf4\xe2?\xf3a0\x92"\xed\xe3?\x0e\xde\xa4\t\xf0Q\xea?M\xd58\x80 )\xe7?W\xf5!\xb6\xe2+\xe7?h^\x15\xa8\x8e\xf8\xb0?.\xc4\xfc*\xf5\xf9\xd7?\xd6\xe0k)\xb52\xd9?0\xbc\xab\xd2\x91n\xeb?\xef_yI\xe6\xcb\xe1?**\xac\xa0\xa7)\xe8?\x14\xa4\xfb\x16\x1d\xe9\xe3?\x14}!\xc3\xa5\xa3\xdd?\xb6\x8f\x1c\xc2\x96B\xe1?\xad\xa8\xd7\xcd\xa3\x90\xe9?\xc0QJ\xfb\xe50\x84?\xba\x91\x99T+_\xef?j\x84\xc2\xa9I\r\xd9?\x1bz\xdfS\xdbZ\xe5?\xe0&\x14\x08P\x1d\xa4?hI\xc53o\x87\xc5?`\xeb\xa0\x98j\x16\xe9?\x08\x17*z\xfb\xe1\xd4?M?\x91z\xd5\x11\xec?0\xa8\' s\x85\xb3?*\x19\xf4\xa2\xaea\xee?\xb0\x0e|\xb7\xbb\xf1\xac?a\xd7\x0c\x91\xafI\xed?:\xc4[\x8b\xf6}\xd7?\'\xfe\xa6\xce}\x02\xed?\x08\xd4\xa1\xdfBD\xef?\xf0\xbd\'+#\x92\xc2?\x0c2V\xee\xcf\xe1\xec?d\xc9\x88Q_\x1a\xd0?\xe3\x80\xda\xeb\xe8\xaf\xe5?\x81\xa3\xca\x84\x9a\xf9\xe9?_\xc6\xdf\xc6\x94\x8d\xe3?\x9a<\xc8\xfd\x9c\xa0\xdc?5\x89\xfe@\x95\xdf\xec?\xb41\xd96\xd5\xe3\xc8?\xd9}\xe0\xdbA|\xea?\xa0\xa0\xce\xb0V}\x9f?F@\xfa\xfe\xacw\xe6?\xdcP\xcd\xa6V\x1c\xcf?\x0c3\n% \x9f\xce?' 27 | tbg1 28 | (g2 29 | (I0 30 | tS'b' 31 | tRp6 32 | (I1 33 | (I10 34 | I8 35 | tg5 36 | I00 37 | S'\xf6\x14\xb9\xf3\xc3\xd9\xd2?\x81\x8ccT\xe55\xea?h4\x07Z\x1d\x92\xe8?\xc8kF\x7fJ\xc0\xd0?`# L\x809\xde?\xb9266\\G\xe7?\x86\xbe#\x11\x93\xb0\xef?\x80Cvl$\xd6q?\xc0\x06I\x0bR\xca\xbf?\x15\xf67\x11\x93\x8a\xe5?\xb8\x1a\xb4\xd4\xe3\xbf\xc4?N\x89\xca\xf3\x01\xd5\xdf?\xc0\xfc\x0e%\xcc)\xbc?\xe6\xcb3Z\x91=\xe0?"G\xddKz\xb7\xe1?\xd2\xa5\xb0\x8d&\xa9\xd7?\xf8\xc3\\\x03t\xf7\xe7?\x18\xb4\xfc\xc5\xd0:\xe6?8\x1fn\x90\x12R\xb3?\x97\x9f\xe1\x023\xb1\xe6?\xb0\xb5\xa6<\x9fz\xc3?T\x9f;\xa0\x90\x85\xd1?,\x1f~\x82\xd6D\xdd?\x0bJ\xd7F\xf6\x81\xef?\xc6Q\xf5|\xbfW\xd5?\xf4\xe1\xa8z1\xc0\xca?\xdf#\xb5a\xe4\x7f\xe5?\xd8Q+\xb4\x7f4\xbd?0\xf5\xc4g+\x87\xce?\xf6\x9faU\xbb\x8b\xe4?k\xeag\xf44q\xef?\xde\xdcN\x97q\xb1\xe2?Yx,\x98<\xb2\xe3?k\xd0Q]\xd4v\xe6?\xa2.1R\xd6\xdb?^}\x0c\xbd\xa9T\xda?*\xc0\xc4d=,\xeb?\x13\xd9\x94\xba\xa2*\xea?h\xb52$\x7f\xff\xe1?\\[\xdd\x94\x9e\x8a\xe2?\x18\x93\xf9\xcc\xfb\x18\xbe?\x10w\xc6]\x00%\xd4?\xd8\x8d6\x89\xb8V\xc8?\x93\x05v\x1e\x1dx\xec?\x10\x07\x83F\xfa\xd2\xdf?\xb0\x83I\xc7\x83Z\xda?\x00]k\xc2\x0b\xe4\x97?\xb3W\rg\x92\x97\xe3?\xc3\x95\x01\xc3\xf7=\xea?\xf9JJ\x97\x9a\x8c\xe6?G\xf0\xf22#\x85\xe2?\xe4\xb34\x1d\x11\xac\xdd?\xff\xaf\x86\xbd{N\xe8?\x8b?\\Zl\xbc\xe8?\xd0\xe9\xc7 \xb4J\xdd?f\xd7\xef\xa8\x90x\xe4?\xa8W)\x858#\xc1?\xaf\x99\xb1\x04\xbc\xef\xee?' 38 | tbt. -------------------------------------------------------------------------------- /python/test/testdata/test_accumulate_covariance_estimators_output.pkl: -------------------------------------------------------------------------------- 1 | (cnumpy.core.multiarray 2 | _reconstruct 3 | p1 4 | (cnumpy 5 | ndarray 6 | p2 7 | (I0 8 | tS'b' 9 | tRp3 10 | (I1 11 | (I10 12 | I8 13 | I8 14 | tcnumpy 15 | dtype 16 | p4 17 | (S'f8' 18 | I0 19 | I1 20 | tRp5 21 | (I3 22 | S'<' 23 | NNNI-1 24 | I-1 25 | I0 26 | tbI00 27 | S'\x8c@\x08\x1a\x18[\xa1?\x9e\x10\xefH\x87\xa9\x82?\xeef\xd1\x92K\xfb\xa3\xbfM\xeb\x8fO\xfe\xa9\x9e?\xe0\xf8\x9dA\x9dx\xb3?E\xb28\xd9\x04\x82\xa1?\xe4\xd1\x9c\xc5I\xb6\xbe?R\xe8o\x10\xcb\x83\x95\xbf\x9e\x10\xefH\x87\xa9\x82?]G\x8f\xd5\xff\x82\x84?\x96l\xcas\x860\x8f\xbf\x90\xcd4l\'\x96~?\x1aL\xd3\x99\xcd\xa8\x87?\xb4\x04-\tdD\x8f\xbf\x8aNS\x92\xef&\x82?L\xab\x8cr\xabma\xbf\xeef\xd1\x92K\xfb\xa3\xbf\x96l\xcas\x860\x8f\xbf\xfeR\xa5=tC\xb0?.y\x9eY@W\xa3\xbf\n\xd3\xe5>\x1c\x14\xb9\xbf\'\r\xfe\xff\xba5\xad?bm\x9f\xae\xd7S\xb6\xbf/6q\x81\xef;\x8f\xbfM\xeb\x8fO\xfe\xa9\x9e?\x90\xcd4l\'\x96~?.y\x9eY@W\xa3\xbf1\x1c[H\x86\x03\x9c?i\xdc3\xbb\x84U\xb2?\xe6mnK\xac\xc9\x8a?\x00[W\xa5\x7fj\xb9?\xe6\x7f\xd6\xd5\x0f\xb1\x84\xbf\xe0\xf8\x9dA\x9dx\xb3?\x1aL\xd3\x99\xcd\xa8\x87?\n\xd3\xe5>\x1c\x14\xb9\xbfi\xdc3\xbb\x84U\xb2?\xbb\'q\xff E\xc9?\x00\\-\xd1\x80-\x97?\xbe\xc7 \xff\xe3\x9d\xd0?\x17\x9d\x81@\x9c \x8b\xbfE\xb28\xd9\x04\x82\xa1?\xb4\x04-\tdD\x8f\xbf\'\r\xfe\xff\xba5\xad?\xe6mnK\xac\xc9\x8a?\x00\\-\xd1\x80-\x97?.:+\xd0}\x19\xe1?\xd1\xd1#2\x9a\x9f\xd8?\x18\x03\x98\xf8\x1f\xe6\xcc\xbf\xe4\xd1\x9c\xc5I\xb6\xbe?\x8aNS\x92\xef&\x82?bm\x9f\xae\xd7S\xb6\xbf\x00[W\xa5\x7fj\xb9?\xbe\xc7 \xff\xe3\x9d\xd0?\xd1\xd1#2\x9a\x9f\xd8?\x19\xe7\xaf>L\x97\xe2?\xe4\xa2\xc68N\xf7\xc5\xbfR\xe8o\x10\xcb\x83\x95\xbfL\xab\x8cr\xabma\xbf/6q\x81\xef;\x8f\xbf\xe6\x7f\xd6\xd5\x0f\xb1\x84\xbf\x17\x9d\x81@\x9c \x8b\xbf\x18\x03\x98\xf8\x1f\xe6\xcc\xbf\xe4\xa2\xc68N\xf7\xc5\xbf\xd7+\xf1\xdf\xf4u\xba?P\x11\xe7\xfe\xf7\xbb\x9f?bj\x0f\x1c9\xf8\x91\xbfG\xb6*\xfe\x19\x8e\x87?\xe6Of\xd8\xc9 \xb4\xbf00\x12d@\xddT\xbf\x8f\x8c\xf6\xaf\xe0S\xb0?\xfe\x03#\xf0\x8a\x04\xb2\xbfG\xd5\xd4\x9b\xe7\xfa\xa0?bj\x0f\x1c9\xf8\x91\xbf^\x1dum(\xe1\xd2?Fa\x91\x017\xab\xc3\xbf\xcd\xc8\xef\x176\xfb\xb4?"V!?\xf2\xa9\xb9\xbf\x8e\xf2t\xfa"\x99\xcd\xbfK:NA\xd1\xcb\xbb?\x9d\x9b}\xb0(\x16\xb0\xbfG\xb6*\xfe\x19\x8e\x87?Fa\x91\x017\xab\xc3\xbf\xbcS\x96O\n\xb4\xc0?\x7f\xa7\x93S\xa3\xbf\xfeG\xe8"G/s\xbf\xe8\xb5\x0eOx\xfdw?)\x1c3\xa9\xd3\xec\x99\xbf\x7f\x84h\x94\xb1\xf7\xbf\xbfN\x96\x81\x00qr\x91?\xc0\xc8\xd7\xcc\xde\x91\xb0?\xb6*\x0bp#\x93\xc3\xbf\xf0\rV3\xc8D\xc5\xbfR\x1be\xc2u\xb0\xbe?D\xd6)\xffn[\xa1?\x1c\xbdI\xb0\xdf\xd5\xb4\xbf\x02\x8b\x00\x8c\x11\xb8\xd2?\xc2\r\x1f\x06\xd7z\xb2\xbf\xb6*\x0bp#\x93\xc3\xbf\xb6\xdfb\xcaw\xe9\xdf?eL\xa5\'\xa9A\xd2?\xfc\xf3\x80\xd3/K\xbf\xbfx\x89\x97*\xa4\x12\xaf\xbf6P\x81\xfd\xa9\xe4\xc4?dX\xc8\x8c\xa5M\xe0?\x10\xa7\xdc>\x93S\xa3\xbf\xf0\rV3\xc8D\xc5\xbfeL\xa5\'\xa9A\xd2?\xbe\xe2\xaf\xa2\x1b\x95\xe2?\xb5\x11(\xa5\'\t\xdc\xbf\xc6\xcb\x80\xfc\xd8\x88\xbb\xbf\xd0\r\xad\xcb\xcbY\xd1?n\x19<\xf4\xb7\x80\xcf\xbf\xfeG\xe8"G/s\xbfR\x1be\xc2u\xb0\xbe?\xfc\xf3\x80\xd3/K\xbf\xbf\xb5\x11(\xa5\'\t\xdc\xbf\xf2\xc6$\xdc\x80O\xda?\xb36\xaf\x96\x98\xfc\xb5?"N^\xe1I\xfa\xc7\xbf\xb1\x03$N\'|\xb4\xbf\xe8\xb5\x0eOx\xfdw?D\xd6)\xffn[\xa1?x\x89\x97*\xa4\x12\xaf\xbf\xc6\xcb\x80\xfc\xd8\x88\xbb\xbf\xb36\xaf\x96\x98\xfc\xb5?\xec\xdd\n\xe8\xd5h\x95?\x907\x1a*2x\xa9\xbf~\xe1]\xdda\xcd\xd0?)\x1c3\xa9\xd3\xec\x99\xbf\x1c\xbdI\xb0\xdf\xd5\xb4\xbf6P\x81\xfd\xa9\xe4\xc4?\xd0\r\xad\xcb\xcbY\xd1?"N^\xe1I\xfa\xc7\xbf\x907\x1a*2x\xa9\xbfv\x03d[\x13\xb9\xc0?\xedw\xe7+\xb5\xf8\xa2?\x0b\xe4\xab!n\xcf\xa2\xbfW\x14_\xfc\xfd\xc4\x96?\xf2\xa2\xc9R\x8f\xc4\xa2\xbfl\x8a.\xf7\x04\x93\x96?HA\xbe0i}\xb0\xbf\xdb\xb7\x02\xdd\x15\xf5\x87?\xabJ\'\xad\xb6\xd3\x95\xbf\x0b\xe4\xab!n\xcf\xa2\xbf\x87j\xb0K\\\x94\xac?\x18$(\xb9"\x90Q\xbf]uB^\x8a[\xb7?\x00\x0cCc\xff\xfc\xa3\xbf\xdcZ\xfd_U\xb9\xbc?\xd8\xcb\xa8\xef\xb5\xc9\x9e\xbf~\x1e4\x1e\x98\xf1\x97?W\x14_\xfc\xfd\xc4\x96?\x18$(\xb9"\x90Q\xbf\xd9\x1d=\xe4\x19\xb0\xb0?S\x05\x80\xce\x95\xb5Z?\xe2\xa6r5\x9a,\xa6\xbf~\xf2\xed\xb4:\rz\xbfT\xcaRK\'\xe8\x92?$\xf55\x15\xed\xd3\x80\xbf\xf2\xa2\xc9R\x8f\xc4\xa2\xbf]uB^\x8a[\xb7?S\x05\x80\xce\x95\xb5Z??\x06\x8b\xf6i$\xce?hn\xc9\x81\x88|\x98\xbf\x1e\xb2\x13Y\x94\xce\xcc?\x91\x02\xba-\xdf4\xba\xbf cC\x9d[k\x99?l\x8a.\xf7\x04\x93\x96?\x00\x0cCc\xff\xfc\xa3\xbf\xe2\xa6r5\x9a,\xa6\xbfhn\xc9\x81\x88|\x98\xbfi\xddj\x98\x1e\xe7\xb3?\xd8V%\xe2S:\xac\xbf{\xae\x97Jn\xff\x91\xbf\xccy\x85\xf2\xd8\xe1\x91\xbfHA\xbe0i}\xb0\xbf\xdcZ\xfd_U\xb9\xbc?~\xf2\xed\xb4:\rz\xbf\x1e\xb2\x13Y\x94\xce\xcc?\xd8V%\xe2S:\xac\xbf\\\xa1\xe4\xdc\x00\x82\xcf?\xe2\x9a\xca^\xc5\x80\xb6\xbf`\xed\xd2\x0c\xc6\x16\xa5?\xdb\xb7\x02\xdd\x15\xf5\x87?\xd8\xcb\xa8\xef\xb5\xc9\x9e\xbfT\xcaRK\'\xe8\x92?\x91\x02\xba-\xdf4\xba\xbf{\xae\x97Jn\xff\x91\xbf\xe2\x9a\xca^\xc5\x80\xb6\xbf?\xe7CS\x94:\xac?\x0b\x9f0F\xfc\xdfz\xbf\xabJ\'\xad\xb6\xd3\x95\xbf~\x1e4\x1e\x98\xf1\x97?$\xf55\x15\xed\xd3\x80\xbf cC\x9d[k\x99?\xccy\x85\xf2\xd8\xe1\x91\xbf`\xed\xd2\x0c\xc6\x16\xa5?\x0b\x9f0F\xfc\xdfz\xbf\xbbiCy\xf9\xf7\x89?Q8\x18Z\x9c\x06\xc2?\xb9\xeaO\x0c4\xca\xbc?( \xa9\xf6\xeb,\xa6\xbf)8X\x9cx\xf0\xbc?9Ec\x90\x97r\xc4?\x8e\xe2K\xee)\xcd\xb8?\n\xe3l\xf0\xff\x9a\xa5\xbf\x89\x1e\x1c\xb7^\xe6\xa1\xbf\xb9\xeaO\x0c4\xca\xbc?\x03\xbe\xc0p\xa4\xfd\xb6?\x0cCs o\xb5\xa1\xbf\xce\x9d\xf2\xb43\x1c\xb7?\xaf!X\xa43T\xc0?\n\x18\xe3\xa6D\xce\xb3?>\xcd0\xa1\xe7@\xa1\xbf\x0f\x9e\xc6\xca\xb5\x96\x9c\xbf( \xa9\xf6\xeb,\xa6\xbf\x0cCs o\xb5\xa1\xbf!\x89Av\xc8G\x8b?\xfe\x14\x8f\x05\xf9\xcc\xa1\xbfnG\xc5]\xa0\'\xa9\xbf\xa9\x1bI}\xc7\x82\x9e\xbf\xd5e\x81\xdbD\x94\x8a?\x9f\xc6TJB\x05\x86?)8X\x9cx\xf0\xbc?\xce\x9d\xf2\xb43\x1c\xb7?\xfe\x14\x8f\x05\xf9\xcc\xa1\xbf\xb0\n\xec\x97\xeb:\xb7?T\xdf\'\x04\xe8i\xc0?\x15\xf8\xf7\x1c\x98\xe8\xb3?je\xea\xa1\xd6W\xa1\xbfb\xa7\xcb\xe8\xb5\xbc\x9c\xbf9Ec\x90\x97r\xc4?\xaf!X\xa43T\xc0?nG\xc5]\xa0\'\xa9\xbfT\xdf\'\x04\xe8i\xc0?px\xa8s\xde1\xc7?\xaf\xcf!x/"\xbc?\xd4\x1a\x16~\x19\x82\xa8\xbfe\x1dN\t\x05N\xa4\xbf\x8e\xe2K\xee)\xcd\xb8?\n\x18\xe3\xa6D\xce\xb3?\xa9\x1bI}\xc7\x82\x9e\xbf\x15\xf8\xf7\x1c\x98\xe8\xb3?\xaf\xcf!x/"\xbc?Y\x1dYP\xda\x0f\xb1?\xee\x10\xe3\xf9\x01\xba\x9d\xbf\xeaG\xca\xe4\xcd\xa0\x98\xbf\n\xe3l\xf0\xff\x9a\xa5\xbf>\xcd0\xa1\xe7@\xa1\xbf\xd5e\x81\xdbD\x94\x8a?je\xea\xa1\xd6W\xa1\xbf\xd4\x1a\x16~\x19\x82\xa8\xbf\xee\x10\xe3\xf9\x01\xba\x9d\xbf \xa0\xd8\x83^\xe5\x89?\x19l\xa0B[t\x85?\x89\x1e\x1c\xb7^\xe6\xa1\xbf\x0f\x9e\xc6\xca\xb5\x96\x9c\xbf\x9f\xc6TJB\x05\x86?b\xa7\xcb\xe8\xb5\xbc\x9c\xbfe\x1dN\t\x05N\xa4\xbf\xeaG\xca\xe4\xcd\xa0\x98\xbf\x19l\xa0B[t\x85?\xa2\xadz\xbeZ\xc6\x81?\xc6\n\xac\xa5E]\xbc?$\x97?c\xbc\xd9\xc3\xbf&B\xbd;\xce\x85\xbe?\x08\xc2\xbe_\xdc\x06\xa4\xbf\xff\xc2\xeb|\xaa\x93\xb5\xbf\xb5T\xcb\xa51\xde\xac\xbf\xf2\xec\x95\x05o\n\xb7\xbf\x98\x1f8\x8e\x85\xd6\x81?$\x97?c\xbc\xd9\xc3\xbf\xa0\xceO\x9dD\x11\xd7?@#G@\x97\xe6[?\x80\xc71\x9b\xc7\xeaS?\x81f.k\xb2\t\xb0?*9\x17\x15\xf1\xce\xc5?o6\xa7\x06\xff\xc0\xb6?\xc6\xbb f\xf4\x8f\xa1?&B\xbd;\xce\x85\xbe?@#G@\x97\xe6[?J\x98"\x86n@\xd9?\x14\x9d|V\xa9\x96\xc6\xbf\x0b\xd2\x10K\xa8\x03\xb4\xbfj\x07w\xde*p\xbd?O\xe6\x0f\xe2\x14\xd1\xb5\xbf.\x00\xd9j0\xe1\xb2\xbf\x08\xc2\xbe_\xdc\x06\xa4\xbf\x80\xc71\x9b\xc7\xeaS?\x14\x9d|V\xa9\x96\xc6\xbf\xd3\xf4\xfe\x07h\xe1\xbb?\x9e\x02\x8c\xa4w\xb4\x9e\xbf\x937\x04\xe8\xdd\xa9\xb6\xbf\xd3\xedf\xdc\xe9\x9a\x86\xbf}\x1d\x91%\xf4\x8a\xc0?\xff\xc2\xeb|\xaa\x93\xb5\xbf\x81f.k\xb2\t\xb0?\x0b\xd2\x10K\xa8\x03\xb4\xbf\x9e\x02\x8c\xa4w\xb4\x9e\xbf\xa7\xb1\x0e\x83k\xfc\xc7?]\x12\x14\x06D\xf9\x96?a\x88\xd5h\x8b\x8e\xdb?\xf7\x19\xca%\xe7\x0e\xd7\xbf\xedG!`\xd5\xc4\xd0\xbf\x10\x9f\xa4\xe3\xf7\x19\x82?\xa0q\xd2\x8f\xd47\xd2?\x9c\x11\xb6\xee\xddh\xc2?l\xdf\x87\xa4C*\xf6?\xc2S\x8a &\x13\xbd?R\x06\x8f\x08\xa1\xee\xb2\xbfp \xf0\xf9R*j?\xe8\x13RVI\x87\xbe?S\xa7\xb3.!L\xb8?\'\x80\x97\x84\x01\xa9\x90?>\x12\x14\x06D\xf9\x96?\xc2S\x8a &\x13\xbd?\xf0\x92\t\xe5\xb1\x1d\xc4?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00mH\xff3\xd1\xbd\xdf?\x9dt\x87-\x90\x14\xda?~V4\xe8LA\xc9?\x8b\xa5\x02\xa2\xe7O\xb4\xbf\xa4h^~\x80\x01\x9a\xbf\xe2\x96.\xcf\xe2\r\xe2\xbfR/\xde\xf6\xbd\x0f\xbd?|4\xd1%f\xe6\xb9\xbf\x9dt\x87-\x90\x14\xda?\x12U\'\x1b\x9f\x1d\xe5?\x80mM\x9e]\x9b\x94\xbft&x\xd6=\xb3\xc6\xbfJ\x16\x7f\xf9\x9f=\x9e?\tr\xd1z\x19`\xe1\xbf\x9e\xb9\xeb\x8e\xa3\xc8\xb4\xbfP\'#\xa8y\x83o\xbf~V4\xe8LA\xc9?\x80mM\x9e]\x9b\x94\xbf\xbb\xceM\x86\xcf\xfe\xe3?\xd4\xf7MjNk\xe0?.\x9c8Vp\x8d\xd9\xbf\x07\xe2\x1f/\xfe\xe4\xd1\xbf\x11\xdeK\xbd<\x15\xc6\xbf\x80\xba\xab\xcf\x7f\xd1\xb5?\x8b\xa5\x02\xa2\xe7O\xb4\xbft&x\xd6=\xb3\xc6\xbf\xd4\xf7MjNk\xe0?-zY_&q\xf2?\x01\xc5\xfa\x0e\rl\xe1\xbf`\xaaW9\x06\xa4\xd1?\xa9FB\xcb\xe4k\xc8\xbf\x90\xfda\x00(\xd4\xc8?\xa4h^~\x80\x01\x9a\xbfJ\x16\x7f\xf9\x9f=\x9e?.\x9c8Vp\x8d\xd9\xbf\x01\xc5\xfa\x0e\rl\xe1\xbf\x06\x83!\x98XI\xdc?\x1a\xba\t_\x9a4\xaf\xbfF6&\x99\xf6\xf9\xc5?T\x81\xd7z\xed[\xc2\xbf\xe2\x96.\xcf\xe2\r\xe2\xbf\tr\xd1z\x19`\xe1\xbf\x07\xe2\x1f/\xfe\xe4\xd1\xbf`\xaaW9\x06\xa4\xd1?\x1a\xba\t_\x9a4\xaf\xbfuZ\xb7\x88\xd4g\xee?\xc6\x9c\x04\xd9\xafK\xa1?\xd4\x8eboE\t\xb7?R/\xde\xf6\xbd\x0f\xbd?\x9e\xb9\xeb\x8e\xa3\xc8\xb4\xbf\x11\xdeK\xbd<\x15\xc6\xbf\xa9FB\xcb\xe4k\xc8\xbfF6&\x99\xf6\xf9\xc5?\xc6\x9c\x04\xd9\xafK\xa1?\x8a\xea\x12\xff\xea\xdd\xd9?N\xdb\xeb`.\xa4\xc7\xbf|4\xd1%f\xe6\xb9\xbfP\'#\xa8y\x83o\xbf\x80\xba\xab\xcf\x7f\xd1\xb5?\x90\xfda\x00(\xd4\xc8?T\x81\xd7z\xed[\xc2\xbf\xd4\x8eboE\t\xb7?N\xdb\xeb`.\xa4\xc7\xbf\xbc\x98\x98\x88\x9a0\xbc?' 28 | tbg1 29 | (g2 30 | (I0 31 | tS'b' 32 | tRp6 33 | (I1 34 | (I10 35 | I8 36 | tg5 37 | I00 38 | S'\xf0_\x02\xa5Z\x95\xcb\xbf\x00\x80\xf1\x85\x94\xd8\xf1>0\xeb\'c\x06]\xb3?\x84\xd6\x10\xde/\xda\xc4\xbfR\xae\x0f\xb0\x0cg\xda\xbf\xe4\xc8\xc7\x9e\x9e\xc7\xf1\xbfr\xa4\xcd\xec;\x1f\xf4\xbf\xde{\x8c\x07\xcd\x08\xdf?0@x\xeb\x9d.\xa7?\xe64a{ \xe6\xe5\xbf\x07\x02\xc2;\xd5\xd6\xe7?\x07\x8a\xccow\x14\xeb\xbfx?t\xe6Qg\x04@ \xb8rC_\x05\xf2?\x00\xb6\xff9\xc9\xda\xc1?\xd8\x8e~\xa7\x04[\xcf?r\xfd\x85\x81\x0e\xd3\xd0\xbf\x8c\xf2&\x80\x00\x1b\xdf?r;t\xfa(\x07\xe0?\x12\x1a\xfe\xa0\xb32\xe2\xbf\xd57&I\xb8b\xf4?@\xe4\xc6\xbd^\xe8\xe7?\xa2\xb67\x12\xe5P\xdd?2LJ\x95\xcc\x00\xf2\xbfZ\xcbn\xa8u\xde\xf4?\\\x89*\xd1n(\xc3\xbf\x8e\x87o\xd0J0\xd9\xbf\xfe\xe7\x85W\xd3~\xec?\\l\x8e\xeaU\xee\xf3?`O#c>8\xe9\xbf\xb4\tm\x85Y\x18\xcd\xbfC$\xdd\xfcp\xc7\xe3?h\xa4\x9e\xe1\xe7M\xc8\xbf\xbc\xc8ce\x14\x1b\xd8?\x00VQI\x1c\xc1\x8c?}\xc1\xd8\x88q\x0c\xea?\xb8\x9c\x18\xa8\xf62\xc7\xbf\xea\xb7\xcd\x89$@\xeb?\x9az\x1b!3\x8b\xd4\xbf\xdc6m\xa2b\x10\xc0?\xaa\x1f\x8c\xd4g\x04\xd8?zx^\xbb\xf2-\xd3?\xf8\xcd\xdb(\xce\x8b\xbd\xbf\xcc\xd0\t\x18qG\xd3?\xb4Puzt>\xdb?8\xb7\xdc.\xbe\x85\xd0?\xd0\x98\x0f\xd1a\xc9\xbc\xbf8\x7f\xe7\xddr\xd9\xb7\xbf\xf6J\x8d\x19\x93\xd4\xd5?\xb3\xca}\x02$\x07\xe9\xbf\xe2SH\xbc\x02\'\xd7\xbf\xb8\xf8W\x88&\xcc\xd8?\xc2w\xce\xed\x8a\x16\xe0\xbf\x8aT\x15\xe1\x03\xad\xec\xbf(C\x8c\xb6\xf0\xf0\xda\xbf12\x96]\xd5\xcb\xee?3\xb7y5\xafT\xf0?\x8c+\x11\x91\xa1\x8b\xd7\xbf\xef1\x83f\xc3\x03\xed\xbf\x1b\xa6\x8a\xe3\xd8\xf9\xba\xbf\xa4\x90\x8dP>\x07\xc7?\x14N\xbc\xb0\xf0\xdd\xd1\xbf@\xd25\xa2MA\xcf?XkW\xbe\x97{\xba\xbf\xe2\x94=&\xefQ\xd2\xbf\xc0Eq\x02d\x87\x85\xbf\xba\x8as\xa6\x98S\xe6?(;\xec\xd1>\x8e\xd5\xbf\xf0,\xa5\xa5\x01\xf7\xb4?\x8c\x07r?\'\xd7\xe6\xbf\xf4\xe8\x10:\x19\x08\xe3\xbf\xc6\x8c3\xd6_\xfb\xda?\x88"(y{V\xd3?p\x81f\x9ae\x08\xa7\xbf\x805\xbc\xac\xc3d}?\x00\xf5\xe6\xb4\x13\x13\xb1\xbfPR\x18\x8eI\x96\xb1\xbf\x18\x81\x8d\x99\xca\x18\xc1?\xa0\xd3\xfa\x84\x15\xfe\xad\xbf\x84.K\xccy>\xe3?\x88\r1\x9f.\x96\xd1\xbf\x80\xfa\xf5\x03#\x10\xd8?\x98r|\xa3U\x15\xb2\xbf\xe0\xf2\x8f\xf7GE\x93?\xf8w\xb0*\xbbp\xb5\xbf\xa0\xbc\x9aYd\xb4\xaa?\xca\xccP^o\xaa\xe2\xbf\x1cJ\x83\xdf\x1a\x0f\xc3?\xa0\x1c\xaao\x05\xea\xb9?TB\xc4\xc0\xd0\xd5\xd7?\x86\x9c\xda`\xd7\xc9\xd4\xbf\xaa\x1f\x8c\xd4g\x04\xd8?zx^\xbb\xf2-\xd3?\xf8\xcd\xdb(\xce\x8b\xbd\xbf\xcc\xd0\t\x18qG\xd3?\xb4Puzt>\xdb?8\xb7\xdc.\xbe\x85\xd0?\xd0\x98\x0f\xd1a\xc9\xbc\xbf8\x7f\xe7\xddr\xd9\xb7\xbf\x80\xccs\xdd\x9cg\xc6\xbf\x80\xb2&\xb1\x84\xb2\x9d\xbfT\xb9\x16\xb6\xff\x08\xcc?\x18<\xc5\xd2\xa1\xd5\xc4\xbf\xda"\x12\xda\x80`\xdc\xbfp\x86\xfd\xc9\x87\xdf\xb5\xbf\xa4\x83xr`x\xe3\xbf\xc0W\x8b:(\xb5\xa8?\xfcc\xe8\xf6\t\t\xe2?\x00\xbe,2oej?\x98\x9b\xe7\xf8-\xe7\xb3\xbf\x08Oms\x92\xac\xb8\xbf\')T\xb4\xb9M\xe2?f\xb9(\xdd?\xdb\xde\xbf\x00\x0b\x08M4\x8e\xb7\xbf\xcc\x86,b\x91"\xcf?\xf5P\x8ai\xa8\xe5\xe0?\xc8\xca\x04\x92\x1cC\xc0\xbf\x0cJ\x8e\xba\x90\xd0\xc9\xbf\xf0\xb5h\xd6\x10C\xb9\xbf\x8ay\xce\x9b\xbf\x9a\xd1?\x80L\xd0\xde\x0ei}\xbfR\xf2\x0c\xdc\x08Z\xd5?*\x9b+^1\xb1\xd1\xbf\xbcY\xedz\xce\x8f\xd8?v\xc9Ph\xa2\xf0\xdc?\x80\x19\xcf\x07\x0f\xd4\xb5\xbf*[%\x10\xe5\xcc\xdf\xbfXh@\xf8}\x1e\xca?\xae\xe3\xf6Fi\x8c\xd8\xbf\xa0(\xa7\xe6\x1d1\xab?\xe8\xffl\nl\x89\xba\xbf@\xf7\xa3\x8eN\x82\xaf?`\xb6{\xfb\x8d#\xab\xbf\x10#*\x15\xde\xdc\xc1?\xc2\xf0\x12\xdc$\xd1\xdd\xbfR\x18\xb7\xc4\xed\xa5\xda?\xa0V\xd7\xb7\x92_\xc5?P\xea\xe9d\x97\xd6\xb3\xbf"\x9a\x84\x94\xe2\xdc\xd6\xbf\x80t\xf5\xff\x1e!\x8e?x,\xba#\x82\xa6\xb4?\x00\x99\x06w\t\x81\xbf\xbf\x00C\xae\x0c\x054\x95?\x00\xa6x;)\xc3a?~)\x87\xc2\xd3\x1a\xe1\xbf\xb0\xa9\xdb\x8e?w\xd4\xbf \xd8\x83T\xda\x1c\xc5?cl&\xbey\x82\xe4?\xe8\x8c\xf4que\xc0\xbf\xe87\xe6\xff\xcco\xb7\xbff\x17N\xa8\xad\xbf\xe0\x1a+\xea\xe9\x03\xb2?\x98\x87B\x02\xce`\xc4\xbf\xe0R5K\xebP\x9b\xbfVDb\xb8\xdd;\xe5\xbf\xc6\xc7R\xb9\xcc\xbf\xd6?d\x01\x86\x82\xe8\xb8\xdb\xbf\\\x04\x94\x8b\xc5\x9d\xc5?\xfc\xdf\xd3\x0c\x81\xc5\xc7\xbfP1\x9a)\xa3\x9b\xaf\xbf\xe4\xda\xeayEi\xc4?\xb0\xf3\xc7\xc7\xdaL\xa2\xbfx\xd2\xc5/j\x03\xbf\xbf\xb2\x81\x04\xa2\xb8\xc9\xde?\x94^\xf3\xb0\xbdj\xca?Hn\xa6\xabv\xa7\xc3?\x001\xa7!\xb1\x1bs?\xe0\xaa7\xde>?\xac\xbf\x80AP\xdc\x86q\xaa\xbf\xe0\xb9\xfbG\xbe\xcf\x94\xbf`\x16\x0bguX\x95\xbf\xc03\xb8x\xdd^\x9d?\xfan\x91\x03\xf1p\xdf\xbf\xce\xe0i?\xef\x14\xd5\xbf\xd6$\xf9\xd5\xbac\xd1?\xb5687e\x98\xe1\xbf\x84\xb5\xac\xdez\xf7\xcf?@\xec\x03\x00\xb5\xa5\x9e?\x02\xe78\x0f\x1do\xd2?\xad\xe0\x98o\x0b\x18\xea?\xf0\xb0\x1d2\xccj\xb0\xbf\x08H\x7f\x1e\x9aX\xd6\xbf\xf0\xd4\xb1\x8c-7\xbb\xbf\xd0\xf1\x8e\x96)\x82\xcd?\xc4\xda"$\x12\x16\xcb\xbf\xdc\x04\xe5n\x06\x10\xcf?@\xf2C\xb1\x10\xa2\x90?\xf4G~\xf1l\xe2\xda\xbf\x88\x97\x92\xf9\xc5Z\xb0\xbf$\xdb8\xa15\xaf\xd8\xbf\x98\xf9\xb7p0{\xbd?\x88\xe6N\x943\x9e\xc4?\xe8\x19\x1b\xacB\x13\xb4\xbf\x14\xaf\xc7\xe9/\'\xd3\xbf\xdc\x9a\xbdK\'\x1f\xc7\xbf,\xb41\x07"\x1f\xc5?\xa0\x1a\xd4\x82}e\xc7?c\x7f\xee^R\xe0\xe2?`\x91>:e\x0f\xcf\xbf\xb0Pg\xc6\x86\xa3\xad\xbf\xfc\'s\xf70\x1b\xd3\xbf\xf4\xa6#~\x86\xa5\xcd?\x12\xef\x8c\xb1!\x08\xd7?\xae\x00\xaf\x89Y\x8d\xdc?\x10y\xe1\x19Y\xb0\xcf?\x90\xe7\xf2X\x16\x8d\xb1?\x08]\xc3\xd5\xfc\xe9\xb7?\x18E\x7f\xb0.}\xb3\xbf\x10\xc2.\x84{\xce\xb8? \xce\xcd\x8fs\xac\xc6\xbf,\xd1X\xad\x1e\xd5\xd4?\xdc\x9d\x91HE\x89\xc1?\xbaQ\x96Wg\xab\xd2?\x80\x15\x81 X\x03\xcb\xbf`,j\xb8 \xe0\xa1?(\xb2\x03\xfc\xb1\x9d\xcb?\xf4\x12\xcd\xcd\xa6\xe3\xd9\xbf\xd8d\xa0\xd0|%\xc4?\xf0\xf2\x0b\x9d\xfd^\xb4\xbf\x80\xc4\x88\xb9da\x7f?:\x7f\xce\xe5\xd0J\xd5\xbf\x00\xd9\x1bp\xd7\xce\x84\xbf\xa2b\xb3w\x08\x86\xd0?\xbc\xe8(-\',\xc4?\xe0?J3\xe4\x12\xd2?\xa8\\\xe1\xc53\xd2\xb1?\xd4I\xd2\xads\xc2\xc8?0\xba\xd5\xcc>t\xb9?rby7A\xda\xd9?\xd6\'\x17\x87^\xae\xd6?X\x07\xef\xff\xc2\xe8\xd7\xbf0h!e\xb8\x87\xc5\xbf(\xe3\xb2\x8e\x03\xa3\xc9?\x00\xcf\xd7\xeah\x8a~?\xd4R\xcdw\x97\x1e\xc6?\xe4\x96\xd94\xa3\xa1\xcc\xbf\x10\xde\xcb\xad\n\xe5\xd4?\x80\xcb\x19w\xaf\xca}\xbfx\xe9\xbe>r\x1f\xbb?' 79 | tbtp11 80 | . -------------------------------------------------------------------------------- /python/test/testdata/test_compute_rotations_from_accumulators_input.pkl: -------------------------------------------------------------------------------- 1 | (cnumpy.core.multiarray 2 | _reconstruct 3 | p1 4 | (cnumpy 5 | ndarray 6 | p2 7 | (I0 8 | tS'b' 9 | tRp3 10 | (I1 11 | (I10 12 | I8 13 | I8 14 | tcnumpy 15 | dtype 16 | p4 17 | (S'f8' 18 | I0 19 | I1 20 | tRp5 21 | (I3 22 | S'<' 23 | NNNI-1 24 | I-1 25 | I0 26 | tbI00 27 | S'\xd0\xa0\xea?\x02N\xdd\xbf\xa8\x8f\x9b8km\xd8\xbf\x14SFK\x88B\xd3\xbf\xea\xe5\xf6\xdf\xb2\xba\xd4\xbf\xb4;\xa8\xdeg\x8b\xd2\xbf$8\xf7\xc5\x98\x84\xd7\xbf\xfc\xc6\x8fw0\x1b\xd7\xbf\xa3/?\x0fA9\xd4\xbf\xb3S\x1b\xe0>x\xe5?(VU_\x83@\xa8\xbf8\xef_r\xf24\xc5?p\x17\x98\xb7\x9c\x86\xcf\xbfR\xc0\xeeU\x00\x8e\xdb\xbf\xd2\x19\x1e\xca\t6\xc5?jV\xd78\xcf\x98\xdf\xbf\xe0\x80\xc9"\xbc\xb9\xab\xbfl*\x80c\x9cd\xb7?\xbe\xb6z\xfc\xb3\x16\xd9?T/\xf0\x13\xc4@\xe1\xbf\xceTx\\\xb3T\xdf\xbf\x1e\x82(\xab\xd6+\xd5?>Z{\x9bf\x82\xd5?\x80B\xe9\x1c\xa0jy?\x04J\xe1 \xbb\xae\xd2\xbf\xd4h\xdf\xe0\xd4@\xc4\xbf\x95qEM\x94\xdb\xc3\xbf\xaa\xbc\x1c]\xa4\x12\xc8?+3\x8a\x9c8\xa8\xe2\xbf\x00p9\t\xc6\x18\xd0?d\xb4\xb7\xdaOa\xa5?$.\xc6\xb2\x81u\xc0\xbf"\xf9\x82c\x1b~\xe6?[p\xd9\x9a\xad6\xcc?\x0b\x87\xce\x81l[\xdd\xbf\x10\x1e\x93\x05\x99uq?\xe8\xe7\xab\x82\x07K\xd6\xbf\x02\xaa\xf8\xcb\x04\xc9\xcb\xbf\x8a\x13%\xb9\x9d\xc4\xae?\x18\xbdVV\x03\xc4\xe7?\x92\xad\x8aB}\xdc\xc0\xbf\x0ccJHj\x08\xbe?\xcdt\x9a\x00.\xa7\xe5\xbf\xae\x92\x8e\xcf\x02\xf4\xbd\xbfxM\xfe\x0b\xdc\xaa\xd0?Qi\x960\xc3q\xdf?\xba\xa4\xe5\x07\x16\x95\xd7?<\x02E\xadaP\xce\xbfO\xd3u\x85\xb8\xdb\xbd\xbf`\x18U\x97\xda\xe5\xbd\xbfbe\xe9s\xb1\x91\xa9\xbfm\x15\xef\x1ev@\xe2\xbfby\x8f\xd5p\xad\xce?\xec\xd6[\xa5Uh\xde\xbfK\x9f\xbat\x1a[\xd9?O\xffgu\xc8\x03\xa0?\x8d\x92\xf2"\xb3\xd7\xdd?\x8d\x1d~I\x84\xbf\xde?P\x90\xd5\xd1\xf1\x94\xae\xbf\xa4+\x83\x03s\xce\xdd\xbf\x04\xd5\x87\xf0}@\xb7?t\xd0G\xd7\xb7\xc4\xca?V\xe7\xa2\xf5\xb6\xe5\xe4\xbf\xf0\xc21\n\x98,\x9b?J\xfe\xf7\x16\x1b\xe1\xd0?Q(\x03$Z+\xda\xbf\xae~\n\xa3l\x06\xd4\xbfn\xc8sy\x1e\x84\xdd\xbf\xf9i\xd5\xc90F\xd4\xbfQ\xce1\x00\x0e\xeb\xd1\xbf\xc7Z\xb8\xdaTb\xd5\xbf\x05M\x9d\xab\xbe\x8a\xd8\xbf_\xe6h\x00\x9a\xa2\xd2\xbfm\xa6\x08D"$\xc6?@\x18\xb0\xd2\xd4\xa0\xc5\xbf8\xbe\x18wm\xb0\xd7?X\xf6\xb2\xe6\xb7\xd6\xce\xbf\xa3\x98\xaf|\x80\xed\xd6\xbf\x92[W\xa5\xc2\xd3\xc5\xbf.\xe4\xb5.`\xb7\xd8\xbfS\x06_\x13j9\xe5?Fi\x858\xa4\x9e\xe1?\xdeX*\xa7\xce\x1f\xe0?\x82(=\x92kD\xb5\xbf\xf0\\\x0c\x81*)\xb9?\xe0\xa2W\xeb\x18\xe5\xb2\xbf\x88\xdaqr\xab\x04\xe1\xbf\xc22Z\xb7\xfc\xd0\xd0\xbf\xc0\xb1(\x9f\x0f\xd1\xd0\xbfHx\xe5x\xbez\xc4?X0\xb1\xeb\xc5n\xe1\xbf\xf7\xab4W\xfe\xcb\xc5\xbf\x999\xa0\xb2\xbbw\xe5?\x0e\xeb\xb8\xe9\xe2\x1e\xbb\xbfG\xf0\x16\xb4d\xaf\xd6\xbf\xe8j%3\x86\xe9\xca?+L\x8e\xcf\x93\xee\xc0?~\x01"\x1eD\x96\xe2?}\xe8\'\x01P\xeb\xd9\xbf\xa43k\x84\x9e\x83\xa3?E\x19\xa4\xa2\x03J\xdb\xbf\x80\x9a@\x98\x84\xa3\xd0\xbf\x9fZ\x83L\xaf/\xc8?\x8e\xf8\x16\x10\x84\x81\xd3?[\xf0\x84\x9a\xa7\x17\xd6\xbf;`OS\xbf\x1f\xc5?\x84\x9e\xb8\xc8&\x06\xc0?\xa8\xd1w\xb8\xf4\xd8\xe4\xbf@\x19?\xfa\x04\xe2\xd4\xbf\x8az \xde\xcf\xd9\xcc?IL<\xd9\x08>\xbc\xbf\xfc\x9d\xfa\x86yS\xd4?\x87f7"\xf7c\xe0?2f\xb0\xe9n0w\xbf\x03\x07#\xd4\xf39\xd3\xbf\x10\xf8\x92\x85\x80H\xd5?,w!\xdb?\xf2\xd0\xbf\xc3\r\x88\xbd\xac\x13\xe7?\x18\xa2n,\xd6\xc2\xdc\xbf\x80 x\xe9*\xc2W\xbf\xa2\xebx9\xe0l\xb5\xbfER\xd3;\x9f,\xd5\xbf\x0e\xf1\xe7\xbags\xcd?F\xa5\x94\xd7\x9f\xdd\xd1?W\x05~]4\x9e\xc3\xbf\xe5Ko\x17\xb9\xc9\xd7\xbf\x06\x91\xfcO\x1b\xb1\xdc\xbf\x0eFYSR(\xe4?\xe0 ]\xd9\x15-\x90\xbf\x05du\x9b}\xa1\xdb\xbffO7u\xbdS\xd6\xbfs#\xf6\xbbG\x08\xd2\xbf-[!\xdf\xe1\xfe\xd4\xbf)\x16\x9d\x0b;>\xd6\xbf\xd5\xa8-9\x9da\xd7\xbfA\x14\x8e\xf5mv\xd4\xbfOxZ\xcd\xd2\xa7\xd8\xbf\xdb\x10{\x89\xba;\xa2\xbfH\xc9\xfd\xb4\xc0\xb5\xd3\xbf8\xe3\xcd\xb2\xf1\xa2\xbd?\xcav\x01\xdbVu\xc6\xbf\xbf\xd8\xa7\x18\xdc\x06\xe5\xbfP=l\x90(\x1b\xd1?\xd2\xf0i\xcc\x0e\xb6\xe0?\x9c(\xdciG\x86\xd2?@\xd3$\x03\xc3\x90v\xbf)\xbcB\xc8J\x9f\xb4?\xecj\x85\x99\xc8\xa4\xe9\xbfoWm\x06@\xf2\xca\xbf\x0f4\x16\x99J\xa2\xb7?G\x0c\x15\x947\xf8\xd1?\x9c\xa1xRd\xbd\xbf\xbf\x968\xb1\xcc\x88\xec\xdc?\xf7\x90\xc1t\x85\xa0\xc8?\t\x866\xc6n$\xd0?\x03\xb1n#\x0f\x7f\xc7\xbfB\xd5G\xc9\xe5j\xdf\xbf\'(\xcf\xf5/c\xca?\x12z\xce2\xc01\xd8\xbf\x00\x06\xe0v*\xd8\xe3?\x82?\xa7\x1eD\xf0\xcd\xbf\xd1\xfc\x17hq\x11\xe5\xbf\xeb\x98\xeb\x0fT\xa1\xce\xbf\x14\xbf\x9f\xd9P4\xd0?\xc0\x8az\x87|=\xce\xbf7G\x80\xb2\xe9B\xe1? )\xf4x\xb8\xc6\xbb?\x989g\x89\x7fs\xc5?@\\\x9b\xbe\xfe\xf6\xce?\xcc\xa9q\x82\xa3f\xd0?\xd9\xad\xc1\xa7E\\\xe6\xbf<\xfdJ\x8b\xe5\xa3\xd1\xbf\xcc\x8e v\xab\x90\xd2?n\xbc\xd7\xb8\xb7-\xd3?\xb4\x9f\xef\x8e\xd2y\xc6?*\xe0}:\\\x15\xce?n\x19\xbd\x8b\xb3;\xd5\xbfT6\x02\xce\x80\xaf\xd6?t\xed\x1c\xa4O3\xb8\xbf\xa4Q\xccG\xe8\xdc\xd0?\xf6o%.\x8d\xa0\xe4\xbf\xe8\xa7\xde\xad\xe6\xd1\xb5?\x85\xff]\xf5+A\xde?\x8f\xb1\xfc^\xf5\xb9\xd4\xbf\x95\xd27\xe7o\x1d\xcb\xbf+w\xdd\xb3[\xd4\xd8?\x9bx5\xe8\x82\x82\xd9\xbf\x9c\x97\xdb\x00H\xe9\xc2?p\xfd\xbe\xd7\xc1x\xc2\xbf\n\xb6\x0bJor\xb7?\x01Ua\xdf\xf9\xbc\xe1\xbfv\x04\x00\x91\x86\x97\xc8\xbf\xe5]\xc2\x9aRd\xe1?E\xb3\xfb\xef\x01\xb9\xd4\xbf\x84\x8e\x15c\xbc\xb9\xd7\xbf}\x9b\xaa~\xa6j\xcf\xbfQ7L/\xcb\x00\xd8\xbfX\xef8*\xd9C\xd8\xbfb\xb1\xe1t\x1a\x08\xd6\xbf\xe4\xff\xbdu4O\xda\xbfI\xca\xaf\x18.\xae\xd6\xbfc\xa6\x87\x8cR\xb3\xd5?\xfa\xef\x93\x83\x1a\x98\xd9?\xc4\xc6l\xc7\x04;\xc7\xbf\xb0\xadp\x08E\'\xd0\xbf\xf8"\xe5\xcd\x06\xa9\xc2?x\x80\xf7Pl\xa9\xe0\xbf\xd4\x93\x0f\xc5\x88\xae\xd8?r\xaaJH\x10\xb4\xdb\xbf&0\xc3\xcd\x8f\xc9\xcb?J\xd2\x81\xda\xb5\x06\xe0\xbfN\xc6\x8e\x96\xda\xff\xcc?\x8b\xa4\xc1\xd1\r\x8e\xd1?\xc0w\x88\xc3\xe0S\xdd?\x05\xffRDU\x91\xb7?D\xdf\xa3N\xe2\xa2\xb8\xbf\x96\x08\xbf>\xee\xde\xe2\xbf=\x9bR\x04\xf3&\xa2\xbf\x85\xa1\xf4\x8az\xfa\xdd?N\xc8\xe3\xe4\xab\x12\xb4?\xee v&\xe1]\xcd\xbfd1&\xe1rH\xc6\xbf\xa0[\xe1\xdc\xad\x94\xe4?\xe8\xf5\xd4\xe4\xf3\xca\xc7\xbfr\x88\xdel\xab|\xdf\xbfp\xc4\x98g_\xce\xdd?\x95%\x93)\x89\x94\xc5?G\x89\xa4\x98\xcb\x99\xd7?w\xf0\r\x81+\xb1\xd8?V\xd9<\x8b\xb6Y\xe0\xbf\xd3;I\x03kd\xd1\xbf\xe0|\x004\x88m\xd7\xbf\x18\x0c\xe5q\x07\x11\x9f\xbfQ\x05\xcf\xbb\xe7n\xdd?\xaf,\x19\xc7,Y\xb1?@$\x99\x9dU\x91\xe2\xbfy\xfc\xbc\x03\xf3\xd7\xb0\xbf\x8d\xff\xb0\xd2Za\xd3?\xcfH\xda\x82y(\xb9?NTh\xb8\xb1\x96\xe1\xbf\xea\x1c\xb37\xc8j\xc9?&(\xd0\xaeq\xfa\xc6\xbf\xf1\x0c\xe5\xc1\x12T\xa9?\xe8\xcd\xaf\n\xad\x12\xe3\xbf\xdf)\'\xf6\xf9u\xe5?\xb7\xed\x13u\xbb\x15\xcf\xbft\xd5\x8c_s:\xb4?}\xf6\x83\xc0\x03\xab\xc9?G\x97\x175\x91\xfb\xcd\xbf\x7f\xfb\x81\xdf\xfb\xa9\xe0?\x85\xf1nu\x17g\xdc\xbf\xc5T,U\x8d\xbc\xc2\xbf\x8c7\x94/\xdaJ\xd0\xbf\x82\xe4\xbbP\r`\xdb\xbf\xd6\xa2T\x1f\x8d\x16\xd4?\xb7l\xb9\xb0e\xc5\xd9?\xa4M\x8am\xd6\xf7\xa6?\xd7\x0b\x8a\xfe\t!\xcb?\x86\xd5\x8cp\xb2\x9f\xda?\xc1\xcf\x8dvMf\xd3?0\x89\x10\x04\xbe(\xd8?\x8f--\x16\xc1\x9a\xd9?\xd8\x1c\xd0\x8e\xe5`\xd5?\xc1#\xf6%\x02\xdc\xd6?\t\x13\x0b\xcaD\xa8\xd8?i\x90\xf9\xfeS\xe0\xb1?\x7f$\xb5\r~J\xe0\xbfN@\xa8\x96\r\x8f\xbd?0\xea=h\x99\x7f\xe3?a\x82\x05\x91\xf4U\xc1?\xac\x04J\xba\x8a{\xb4?$p\xee\xf4\xe5\xcf\xe1\xbf\xd5p\x81\x8e\xf0\x91\xc0?\x80\x85\xced\x0b\xce\xe1?\x04[\xda\x9a\x1dk\xdd?\xec~(\x1e\x8b\xd7\xb7\xbf\xca\x1b/\n\x94\x00\xb0\xbf3\xd4\x02\xa9\xb7\x15\xc9\xbfP\x1cz\x98mF\xd4?\xa0\x0c\xe6\xe1\xd7\x1e\xe0\xbf\xa1\x06\x17\xe4\xe6h\xd1\xbf\xa8?\xac\x06\xd1\x86\xd0\xbfp\xb8)\x00"\xa9\xac?L\x8az\xc2\x1f\xdb\xe5?\xe8\xdc\xbf\xe6\xbc\x1b\xc8?\x025N\x08\x13\x13\xe2\xbfR?1d/\xc8\xc4?\x7f\xde\r\xf8\xe1\x18\xb6? \x0b\xea\xc9Q\x92\xd1\xbf\xdf\xa0Dz\xa8\x84\xcf\xbf0\x16\xab;F\x15\xcc?\xea\x08\x1f+\x1e\x87\xbd\xbf\xd0&6^\xaf\x94\xd6?\x1d\xdcr]b\x04\xdc?u\xd1\x80\x0f\x9e\x84\xc1\xbf\xc8\xb2\x17\xcb\xf8\xd4\xa7?/\xcdM#\xe8\x88\xe7\xbf\x19L}\x11\xf1U\xd3?\xa6}_\x8d\x0c\xef\xb9\xbf\x00(\x89\xc7\x9f \xde\xbf0\xa2]\xba\xdf\n\xe0?\xbaT\xf3\xb3\xda\x05\xde\xbf\xac4\xb7eYR\xab\xbf\xa8\xbb\xdc\xf1i\x91\xdc?b\x9b]\xf2\x95\xbd\xad\xbf\xef\xa0\x1a^\xb0\xcb\xdb?\xcfT%I\x01L\xe1\xbf\x9e\x16\x15\\7\xf0\xc8?g\'d\x86\xea\xfc\xd0\xbfF\x99\xb0\xac@\xcc\xcc?\xfe\x9d\x00\x0b\x9d$\xd6?C\xc2\xe6D\xa18\xd4?\xb2x\xa3?R\x00\xd8\xbf\x9c:\x1a\x7fu\xeb\xde?,2\x11=\\\xfc\xaa?\xbf\xf4\x01\x18\xb7\x84\xd8?\xcfl\xbdn\xfa\x7f\x98?;\xe0\xf7\xaf\x19\xfc\xa5?\xb8J\x00\x15(\x13\xe9\xbf\x8a\xda\xb4vg\xc1\x85?PBy\xb9\xb6 \x97\xbf\x99\xac\x9b\x17Pj\xd2\xbf\xf0q\xe8\xc0#\x98\xdc\xbf\xf9KG\x03l\x08\xd3\xbf.;y\x82\x96$\xd7\xbf\xf9\x89\xccujn\xd3\xbf$$\xae>\x10j\xd8\xbf\xa9\x98\xbe\xdc\xb77\xd7\xbf\xac\xf0^\x04Z\xfe\xd6\xbfl&%\xdd\xa2\xe7\xaa?\xbc\xd5d\xad\x9d\x1e\xba?\x1a\xfa&\xa7\xe0D\xb3\xbf\x06\xe1\x94i\x99"\xe1\xbf#j\x17%\xb7%\x93?\x9c\x99\xa4\xeeh[\xdf\xbf\x86\x84\xcc\x86\xaa\xac\xd7?\xbfz\x1a\x8d\xb4\x05\xe2?\x19\xa3\x93@lG\xdc\xbf\n\x81S\xbb&P\xc5\xbf\x08\x16\x0f\x04\xffo\xe2?\x8b\x0c.@n\xdd\xa2?\xf6y\x8e?\x82/\xe0\xbf\xf8`5o\x84P\xb0?\xe6\xef\xf3\xfd\xf1c\xdb?V\xddm\x08/\x0c\x9a\xbf\xda\xd6@K^\xb9\xe1?\xd2m\xadp\x92\x1c\xe6\xbf\x85t\xa1\xff\\\x8b\xc2?\xfd\xe8\x82\x07\x1ee\xd1?&\xba\xcb\x97\x7f\xb5\xb6?\x06\xc9h\xc0\x80\xd9\xcf\xbft\xbe\x15\xbd\xd7\xdc\xcc?\xa0\xa5s\xac\x82\x9f\x90\xbf^\r\x13/\x0f\xdc\xe0?\xb5H\xb3\xc3\xfd\xb0\xde?\xceN\xea\xd2\xda\xc1\xb3?\xa0\xf9"k\xa0]\xab\xbf\x05\xed\xe6\xc3\x8d\xea\xde\xbfJ`\xb7\x15e\x13\xd0\xbf&\x9f\x86\xfa\x0b\x87\xb2?\xe3\xda\x84X\xa7L\xdb\xbf\x8a\x15\xa1&a\xa5\xbc?2\xf3NKD;\x80?\xd3U/t7\x0f\xe7?\xc8\xda(\x07\xdfw\xda\xbf2\x85F\xd5P\xb6\xd5?x\xf2c|\xf2\xad\xaa\xbfL\xbc\xb2S\x90k\xda\xbf\x04bg\xe7\x8f\x1f\xb8\xbf\xc6\xd08\xa3U\xf0\xba\xbf\x1eq\xdf/F\x94e?F\x03j8\x07\x1c\xbd\xbf\x83R>\x87\x86\x9e\xd2\xbf0N\x93\x07{\x83\xdd?1\xe0\xa6"(Q\xb7?/\x86\x12\xa8\x89#\xe2?\xf6;\xf9\xe2(\xe8\xe2\xbf\x1e7\x02\xce\xa3X\xd5?\nz\x17\xa9\xc5\xf2\xcd\xbf\x91\xd2Q3\xb4\x1c\xbf\xbf\xe2C\x94]i\xeb\xdf\xbfce\xb0\xf5\x92f\xd2\xbfH\x81}B\n\n\xe6?\xf8T\n\x95c\xdd\x98?\xca\xb4\xf9d\x00\x14\xbd?r\x1f\xd2\xe9\xc7\x0b\xd6\xbfrp\xbc\x8a\xb3V\xd6\xbf_"M+\xf4\xfd\xd6\xbf\xc8\xee\x85\x0e\x18\xd2\xd7\xbfD\x1b\xce\x8bz^\xd6\xbf-K\x87\xdf\x92\xf8\xd5\xbf\x8e\x9bC\x1f\x1a1\xd7\xbf\t\x9d[\x83\x8a9\xd6\xbfD?O\x14\x18\x89\xc8\xbf\xaf\xb7uu\xd6\xab\xce\xbf\x98\x83m\xc8\x0e\x8c\x95?\x10\x8b9\xe9\x08\xd9\xd3?\x0c\xdd\x9dN|\x84\xd3\xbfz\x97\x86 \xaf\x11\xc1\xbfB\xdd:\x97Q\xfb\xd0\xbf\xafg^\xb9\t\\\xe9?\x89a\xde\x99\x18\x9f\xdd?\x1ai\xcd\x13\xa8\x12\xd3?\xe3Q\xf5\x96\xd5\x13\xc0\xbf&\xae3\xcc\x90\x92\xaf?:\x17\x12\t\xcb\xfb\xe8\xbf\xc8~`\xd6\xc9s\xce?\x03X\x87\x96\xa1)\xa8\xbfO&o\xf1iN\xb8\xbf\x81~8\xc5/+\xc8\xbfU\xdd\xfc\xc6`\xf7\xe0?@\x98o? \xd0\xe0?\x90\xc1\x98\xe7+\xf3\xe0\xbf\x1c\xdf\xca\xee\xdc\x11\xb3\xbf\xd2WY\xce[\xf4\xbd\xbf\xdc%j\x9e\xb8 \xd2\xbf<\xb2\xfd%\x85\x17\xc5?ef;8\xfb\x0c\xc6\xbf\xd0\xd7\xf9%~\x9f\xa3?\x8d\x8d\x80\xfc\x1a\xa1\xd1\xbfa\xca\xc0\xfa\xecy\xb7\xbf=\xef#;b\x8e\xca?\xe7\xf4\x8d\x9d\xea\xb1\xe8?z\x9f\x89\xbb6\x86\xdf\xbfh\x12\xe8/t\xa4\xad?*\x81?z1"\xd8?\xa8\xe5\xebq#R\xe4\xbf\x9eB\xda\xa0\xdd\xf3\xd5?\x9c\xc8e\xecSb\xdf\xbf\xb8\x8a\xb7\x98u(\xae\xbfin$\x00h\x1f\xd0?\xba&m[\xe5\xa6\xba?\x15E\x9e\x9f*{\xc1?GdS$\r\xcc\xdf??\xa2\xa2\xd4\xc0(\xb5\xbf\xeb\xa9\xa8!\xfe,\xcb?\x86\xdbZ\xd8h\xb0\xd1?1]C\xac\xacT\xce?\x1e7n\xd7\x8b\x94\xd0\xbfrF~o\xfc\xc0\xe5\xbf\xbeT\x8f\x97\xf4l\xc9\xbf2J\xc9P\xd4\xd9\xda?\x82\xab\xd6^\xa8]\xc9?5,\xa5<#\xa9\xe2\xbf:2[\xe8\xfa?\xd9\xbf\xac\xb6\xcf\xdeG\x93\xd0?\x14\x9e@;\xba\xae\xd0\xbf$\xf1\xf6ZE\xcb\x8e?\xb1t\xa8\xc7K\x18\xd9?\xf8\xbc\xdd\xfc\x10\x8f\xd7\xbf\x94\x93\x8a\x8b}\xba\xd5\xbfr\x95\xdf/\xe5\x1a\xda\xbfh;\xb5 _\xae\xd9\xbf\xb5\xe45\xd3&\x85\xda\xbf\x85\x85\xd6F\xc3K\xd1\xbfZ\x08\x06\x95\xaci\xd0\xbfr\x00\n\xb4\xbcZ\xd5\xbf \xcc\x13T\xa4\xf5\xde?%\x16\xf6\x0b\xd2\xe6\xe2\xbf\xa0\xa0\xe8a7\x02\x84?0W.\x87\xf5\xe5\xc2\xbf\t\xcbN\xe9\xfd\x12\xdb?@\xc7\xcd}%o\xb9?\xe3\x7fH\xd2\x0c\xe3\xb7?%a\x0e\x87At\xdc\xbf \x9b\x86G\xf6\xeb\xdf\xbf\x00[v\x820P\xb7\xbf\x1b\xadF{\xe6\xd9\xb3\xbf6D,\xd16\xa1\xd2?\xea4\xa9Q\x86\x8f\xe5?\xb0\xa1G\xd5R$\xc4\xbf\xdd\x17zk\x88\xa7\xda\xbf`\xb7\xaaR\x16nY\xbfI\xd3\xd8\xff?\xdb\xbe\xbf\xa8;F_T\x0e\xdc?\x8f\x15\xef\xd0\xe1\xa2\xbb?\x18\xed\xc3\x89y\xce\x93?\xa3\x080\x8a\xb8W\xc1?\xa0\xe1Q\xea\xb9v\xdd\xbf\xee \x87\xac\x87\xfc\xdb?7q\xba\xf6\x7f1\xe3\xbf6\x13\xb5\x86#\x17\xd9\xbf7\xcb\x0b\xcd\xe6\xb0\xde\xbf4I\x97O\x1eM\xe2?H^z\xe8m\x90\xd7?\x97\xee\x98\xd5\x13P\xd2\xbf0Q\xfeM\\\x9a\xb2?\xf7\x0b\xe8\xae\xbe\x1c\xce?D\x01)"q\x15\xbb\xbfA\xa7`\x1b\x8eM\xd9\xbf\xba7\xea\xe7\xe7\x1d\xa8?\xbcTq\xdc\xdbM\xd0?\xe6z~z{\xf7\xe6\xbf\x87i \xe5&\x0c\xd2?yn\x8f\x8e\x8f\x18\xd4?\xa2\xd5\xdcK\xec\x12\xd0?\x9d\x88\x05C\x949\xc2?\x99\x89I`\xe9\xdb\xcc\xbf\xc1d\x8e\x87S\x9c\xbc?<\x94;\x84\x91>\xdf\xbf\xbfX\xbd\xaf*p\xd1?X!\xd5\x87\xc2\xd3\x93\xbf[k\xd2\xda\xe7\xf6\xe5?0\x02\x99\xa8Ic\xce?\x90\xaa\xb14\xa2\xda\xd3\xbfr\n\xe6`\xf6e\xb4\xbf\xc9y\xbb\x0f+F\xd3\xbf\x94l!yc\x91\xdb\xbfo4cl\xb3\xab\xb5?\xed\xc5u\x0e\xcf\x9a\xbf?Km\x02\xd7\xb7\xc6\xd4\xbfW\x08,\xd4\x10\xe2\xe3?\x93\xde\xca\x92\xdd\xdf\xdc?p\xe8\x80\x18S\xe1\xd8\xbf\x1cm\xb1PF\x8d\xd5\xbf\xcbj\x1e_tw\xd7\xbfRE&\xae\xc1:\xd4\xbf\xe6?\xea\xe6\x91\xfb\x7f\xa3\xc0\xbf\xc5\xb9\xf3of\xce\xc1?`\xa9\xe3\xd3^\xf9\xe4\xbf\xf0-\\\xf8k\xfc\x9f?0]\xce\x81\xc8>\xc0\xbf\xcd.\xe0\xe4\xaf\x80\xc5?@\xa0\xdb\x0fW\x10\xa5?|\x9a\xa1\xaa\xe7\x8d\xac\xbfj\xa3\x9bH\x18\xb3\xea\xbf\xaf\x81\xd2\xc1n\xa8\xd8?J\xdekR\xb6\x9a\xc6?\x1b\xccB\x80\xad\xfb\xb8\xbfY\x90(@\x14J\xd2?Ni\xcf\xa3.\x93\xd4\xbf%\xf5\x8f\xe8\x0f\x99\x99?\x8a:j\xd1E\x1e\xa2?\xfb\x1e\xc0_=\xf1\xa8?}\xdfe\x924\x17\xcf\xbfR!\xb5\xb4\x7fZ\xd1\xbflpF\x13\xc8\xd9\xba?\xef\xcf\xdd\x04+\xb1\xeb?j!\xc1\x9b\xaa\xd0\xbf\xbf\xe5\x1d\xae\xe8K\x7f\xe2?"-\xb4\xe3\x8a\x88\xc5\xbfed@\xae\xcdm\xd4\xbf\xa6\x19SO\x9e\x88\xd6\xbf\xaa\xfd\xd1u9\x15\xbb\xbf7\x04\xf5\xaf\nR\xe2?p\x10G\x82\x07\xa4\xce\xbf\x87H1=\x91\xdd\xe1\xbfr\xfb\xcd\xc0\xc4\x86\xde\xbf\x84\x0b"y]\xd9\xd4?A\x0cL3s\x87\xc0\xbf\xd8\xb5[\xf3\x97\x1d\xc4?\xf0\xf9;\xe9\x8e\xfa\xd0?\xffB\xea\x83\xf5l\xde?D9%\x0b\xeb\xa5\xc0\xbf\x97x\x8c\xcb/\xdf\x86?/\xdbf\x13\xb5\xb8\xc3\xbf\xdb\xd1\xe52\xf1@\xe1\xbf[\xba\xae\xd00A\xc3?\xc4}w\xa0\xf3-\xe2?\xf8\x04uh\\5\xde\xbfj\xd5}\xbb\xe7\xe8\xd5?\x8c\xaf\x08z)\xbd\x7f\xbf' 28 | tbg1 29 | (g2 30 | (I0 31 | tS'b' 32 | tRp6 33 | (I1 34 | (I10 35 | I8 36 | tg5 37 | I00 38 | S'\x94\xc5\xa9\x05\xd5e\x98?\xbb3a\x8dW\xab\xa9?\xd6_\xf3p\x0e\x14\xac?1$\x1ea\xd2\xcc\x9f?6E]\x8cn\xa6\xb0?\x93\xc4\x96!\xd2\xdb\xb1?V\xdfg\xf8e\x1e\xa8?,\n\xb9^\xec\xe7\xa2?*\xfa\t*\xe1\xa8\xa3?f\xa7\xde+l\'u?\x15hJ6\xb0O\xa2?\xca\x19\xfari\xcc\x94?\xb8\x95\xb1L\x14+\xae?%\ns_g\xfa\xa0?\n\xcc?\xbd\xceh\x98?zx\xe4^\xb0X\xa8?\xe7\xab\xd9q$h\x82?\x1c\x1d{u\x01q\xa0?\x97\x14\x03\xa5p\xee\xa2?\x9b\xc1%\xb1\xd9\xae\xa9?\xd0z\xee\xdcv\xd5\xa3?C[\x0b{\x0e\xd4\xaa?\xb3)\xcb0\\\x88\xa8?\xad\xfb\xca\xf1qo\xa3?\x1c\xac\xa0;\x9bM\xa1?\xbe\xcdv\xf0\xe1\r\xa1?Ol\xad\x94H\xcf8?b\'\xe4e\xab\xb3n?\xbf\'c\xc4\xa0\x89\xa1?\xbc\x1a\x10\x13\xe7\xc9\xb0?\x0b\x82\xd5\x84x\xd0\xa6?\x14\x82 \x923_\xa5?t\xa1S_\xc20\xb5?:\xf9\xc0*\x94\xc3\xb0?\xb9\xf23\xc9\xb8\xa5\xab?G\x02F\xec\xae\xf7\xb2?\xd1\x08\xc5\x92\x0b\x0ec?\\\xe3\xd7\x9c\x83\xa3\xb5?dC6|\xb8\x1d\xa4?3\xcf\xc1\x8f\xe3\x7f\xa7?\xf3\x94\x8d\xad\x04\xb6w?\xdb\xc8\xfd\xe8\xe8\xa1\x90? %Q\x97\xf9`\x9e?\xca\xc7\x99fHY\x80?X\xf0>E\xc7^\x8e?\x87\xf2\x0fS\xaf\x03 ?\xf8\xab\xb8{\xe8B\xa3? \x07-\x17\x0cg\xa3?\xe2\x14"\xf2\x1e\x08\x8f?\xbd\xa0\x80\x96\x93\xa9\xa5?T+Cs\xd2\xe2\xa9?/\xf7\rd\x02\xbf\x8a?\x0e\xaa`\xa0)\x98\xa9?\xdc\'\xad\xf0\xc0J\x94?;I\xd5\xeb\x17\x9d\xa6?Z\xe3\xfdW\xdcx\xae?%\x8d\xdf?I$T?e\xa4\xc3M-,\x86?@l1sv2\x9f?w\xa7v\x07\xa6\n\x9b?_\xe4\xf8\xe1Z;\xac?U\\]\xccA`\x9f?\xb34x7v\xc9\xa6?%[c\x98\xb9\xd9\xaf?p\xbb(\x80\x9b\x7f\xa3?wY\xa6\xf2\xd6\xed]?i\xcc\xc3\xa5m\xe6\xa3?\xcd\xe5\n?<\x1b\x9a?\x12dB\x11\xc4\xa4\xad?R\xda\x97\x8f\x9ez\xad?\xc2\x7fW\x02\xba\xd1\xa3?\xc7\xa2\xdd\xd1i\xf5\xa2?S\xc2\x97\xbc\xdf\x1a\x8e?\xc8$k\xdb\xdf\xdb\xae?\x8f\x14\x9e\xdf\x15M\xb4?h\xc6\xf4\xf1\xe8L\xb4?H\x8b\x8b \xaf\xc4\xb5?`\x05\xc8R\x0b\x89\x8d?%7\x16i\x7fy\xa8?\x00\x07\xe1qt\xc1Q?' 39 | tbg1 40 | (g2 41 | (I0 42 | tS'b' 43 | tRp7 44 | (I1 45 | (I10 46 | tg4 47 | (S'i8' 48 | I0 49 | I1 50 | tRp8 51 | (I3 52 | S'<' 53 | NNNI-1 54 | I-1 55 | I0 56 | tbI00 57 | S'\r\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x12\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00' 58 | tbI2 59 | t. -------------------------------------------------------------------------------- /python/test/testdata/test_compute_rotations_from_accumulators_output.pkl: -------------------------------------------------------------------------------- 1 | (cnumpy.core.multiarray 2 | _reconstruct 3 | p1 4 | (cnumpy 5 | ndarray 6 | p2 7 | (I0 8 | tS'b' 9 | tRp3 10 | (I1 11 | (I10 12 | I8 13 | I8 14 | tcnumpy 15 | dtype 16 | p4 17 | (S'f8' 18 | I0 19 | I1 20 | tRp5 21 | (I3 22 | S'<' 23 | NNNI-1 24 | I-1 25 | I0 26 | tbI00 27 | S'\xb0g\xe2\x14\xe5~\xa4?\x82\x18!]#;\xcf?\x08\xe82\x1c\xb1\x81\xd6?j<\x0b\xb3\xf7v\xcf\xbf\x93\x98\xcb_\x9b\n\x9e\xbf\x8a\x96\xc1@v\xe8\xd3?\xb3@+\x1f\xbf\xff\xd8\xbf\xc7V\xffq\x0c\xbb\xe6\xbf\x07\xd2\xceX\xd4\x00\xe8\xbf(\xf0\x92J\x84\xfb\x8c?\xac\x89\xcf\xc9\xadH\xdb?\x97\xe4\xd5\x9d\xbb\xe5\xd6?\xf4\xcdo\x04\x9f\xaf\xc1?\\C\x12\x99\xc8\xfb\xd0\xbf\x03\xb2"?S\x8c\xc8\xbf\xe6\xe4\xa6\xd2\x12\x17\xa1?\x9b\xf0%:\xe0}\xbf\xbf\xaa\xf74\xb4\xe5\'\xdb?\xe0\x8e\xe9\xd6\x166\xe5\xbf\x14\x8cT<\xdfU\xd0?\xc8\xbc\x16\x1d\xf6h\xdd?\xce\xf4\xb9\x8e?\xd0"\xdf\x88\xb4b\xd3?4\xa5V*\xaa\xd0\xe6?p\xd4\xf6#\xd6\xa4\xa2?~\xcaT\x19\x1bZ\xc3\xbf\x1c9\xe21|w\xce?\x8d\xae7\x81u\xc6\xd0\xbfAc\x87\xb7\x00T\xca\xbf\xb6RPe*\xf2\xdf\xbf\xf8\xed&\xd0<\xf6\x98\xbf$\x1d>\xe9\xb3\xcd\xc0?\xd2\x97\xc1\xa88\xb3\xd8?\xd8C\x98\r\xe3\xf5\xe4?\xef\x17[\xdf\xbd\x96\xd3?\xdeq\x9c\x00\x82\xa8\xc1\xbf\xcd\xa9\x14\n\x99\xf1\xd5\xbf\xc0\xb0\x91\x8b$|\xd6?,\xe3\xf2V\x83\xe2\xba\xbfI\xec\x18\xe2\xfe\n\xb1?\xeai\xb7\xbf\xf4\xf7;{\x05\xb6\xb6?.\'\x15#?\xb5\xd9\xbf\xa7\x10\xc4\xb0(\x1e\xd8\xbf\rud\x05\xab\x04\xd7\xbf.\x0e\xe0\xb1\x1bZ\xdf?D\x89\x01\x12\xc2*\xd9?ic\xfdr\x9aL\xd1?\x89CP\xb7xb\xd3\xbf\xbc\x8c\xdb\x18#*\xd7\xbf\x00w\x96\x8e\xdcV\x8d?"\xc0\x9e\x99\xa6\xc4\xe0\xbfJZ3{e[\xca?R\xeb\x14f:\xa8\xce\xbf\t\x1f\xc8\x98H\xf6\xdb?\xf8\xcbT\x05\x1bE\xb7?\xd5\xed\x1e\x82\x99c\xe1?A\x0bl\x84v\x98\xe2\xbf\xb8\x13\xd84\x11y\xd6\xbf\xe6\x18\xf5\xf3\xa8\xf7\xc8\xbf\x1c`h\xf5 [\xd6\xbf\x7f\x1e\x11\xc7\xfc\xf8\xd6\xbfb\xa8\xc1\xf1\xa4T\xdc\xbf\x8c\xcd%&\xf41\xa0\xbf\xbe\xec\xa5\xe7?\x82\xcd\xbf\xedD\x83F\x9d\xb6\xb6\xbf\x16\xe8\xf9\xed3\xe2\xe1\xbf\x04\xe1 \x0c|e\xd4?\xfei\x12\xa3\xed\xce\xb4\xbf>\xd6\xb7\x92\x02\x86\xd2?:\xda\x87_\x0f\xdc\xa2?\xfaBp!\xffI\xe0\xbf\xd3\x1b`\xc0\xbb\x84\xde?\xd5\xc2N\x9f\xa8\xd1\xaa\xbfX\x87C\x8f^\x98\xa3\xbf\xe7\x8d}sn\x02\xc4?LN\xf9\x19\x92\xab\xbb\xbfc\x04\x0b\x8e\xd1\xca\xc6? \x84Y\xd8\x80u\xde\xbfL\x0f\x07\xbfL\xce\xcd\xbf\x85\x19\xb4\x7f\x90\xbe\xe9?Sr\xa0\xe4R\x97\xe0\xbf"\xafr\xb2!\xcc\xbb?\x80\x88U\x18\xfe\x14\xa5?\xcd4\xff\xdf\x1b\'\xc5?\xc2\xd3\xf2\xee\x8dA\xcf\xbf\xac\xf4\xc4M\x0b\x8e\xde?\xd7h\xca\xe9\x81\xc9\xe3\xbfr(\xbfWRQ\xc2?\xbb\x94C_\xde\x99\xd3\xbf\x08\xf6Zl?\x01\xe1\xbf\x08\xe0\xc3\xdc\xcb(\xe4?\x90\xf5\xe4n\x93\xe6\xc8?\xe8C\x07v\x8c\x15\xd4\xbf`\x96"\xaal\xae\xc4\xbf\\x*\xa1\x94\xe1\xcd?\x139\x10\x9d\xdd\x9c\xb9\xbf\xddN\xf1>u\x88\xe0?\x12\xe7\xd5\xd18\xfe\xd2?#\xa3\xdbGq\x93\xe4?\x0e\xa0$%\x7f&\xd1?P\x9d\xd0\xb7\xe21\xcb?!\x8f\x06\x8b\xa3%\xb9?z\xbc\xb1XoR\xd3\xbf\xa7;\x0ch&\x13\xbe\xbf\x0caMDU\xc6\xd1?\x1c\xfb\xd3\x99c\xcc\xd1?\xab\x89I\x18\xfc\x07\xb9\xbfzh\x83\xb2^\xab\x9d?!r<\x9e\x08\x9d\xea\xbf\xc5\x99\xbc\xcc\xa9\xbc\xd6\xbf\xa9\xa5\xd3(s\xc2\xc0\xbf\xa3\x15q\x85\xf9\x07\x84\xbf\x94z\xabR~I\xe0\xbf\x1a\xbd\xed\xe75\xc4\xe0?x\x91\xb5\xa52C\x9a?\x07\xf2\x08^\xe9N\xd1?\xec\xe5\xd2\xedq,\xcc?\xe0]_SH\xe2\xdf\xbft\xb3\x8bR\x84\xd3\xa9?\x16\x06yHj\x90\xd3\xbfS\x9f\xfa\xc5Y6\xc1?I\x88\xaa\xd1\x02n\xe0\xbf\x90T\xd2M\x96\xef\xd2\xbf\x98\x88kGd\xde\xce?\xb0u>\t\xcd\'\xc4?\xa9\xa6\x89\x88\xb17\xd6\xbfl\xd9\x12\x16N\x11\xe2\xbf\xbe\xeb\xf0\xc7\x1b\x15\xd5\xbf\xff\x7f\x17>\xc3\xad\xbd?\xb3&dE\xd6-\x9d\xbf\x03\x83\x0fKNj\xd0\xbf\x15k\x02\x0c[#\xeb?\x00\x01\xf9J\xd1e\x87\xbf\x7f\xcd\xea\xcaA?\xbf?CB6k\xd9\x05\xd2?<\xe2u\xcf\xa7\xe0\xd4?\xc6}S\xec\x80\xb0\xc5?\xdfJ*T\xdb\x1e\xe6\xbf\x96\x9aP\xdf\x91\xe2\xc1?\xd4\xb1Sg`7\xca\xbfX\xaf\x02EP\x12\x93\xbf>Z\xa21\xee\x8b\xd3?\xfe\xd3?\x03@|\xda\xbf\x96\x05WW\x0b)\xda?1\xa2L*s\xce\xa4?\xdd\xa7\x81\t\xa1\xdc\xcf?\xb5\xfa\x1aZM\x1c\xcc\xbf\x83_Jw]\xc2\xda?\x126\xaa\xd9U\xeb\xc1?\x90\xbe \xbaY#\xe9?\xff\xe0\x96\xc9\x98\xdc\xce\xbf\xdc6\xe8\x0fGh\xc1\xbf\x92\xf1\xa6\x9dr4\xe9?\xc8\x93\xa4\xfd<\xd8\xcf\xbf\xd4\xb1I\xd3\xbf\x84\xb9\xbf\x8a\xc4\xb7\x1f\xaf\x13\xc7? \x87\xb1\xafD\x97\xd1\xbf\x01\xdebE\xa0]\xa4\xbfLiC\x91\xcf\x07\xcb?\xbb\x16\x082\x08\x14\xd9\xbf\xf2\xd8U\x8d\x07\xba\xa5\xbf\xed\x03\xa9\xa3j\x06\xd3?\xcd\xcej\x06z\xc2\xde?Qa\xa5\x94\xc2\x90\xcc\xbf\xd9\x00\xba\xc64\xa9\xe7\xbf\xaa6\x04\xf1KP\xd2?0\x9d?\x17\x12J\x8a\xbf\x8a\xd6\x82\xcf\xb8\xaf\x90\xbf\xa5\x076\x85\xf2\xc0\xa1\xbfx\xac\xeeP\xffT\x8a?\x19\xc2\x1e9\x91\x96\xe5\xbf\x8fUc\xe8\xdc\xd5\xc2\xbf\x00;\xe1\xe9\x8e\x15\xd6\xbfG\x0e\nP\xda\x8f\xc3?\x00\x94\xf2\x87\xe6\x9c\xd9?\xdb\xf2\xba)\xf0\xf7\xdd?\xad\xa4\xc3E\xc0p\xda\xbf\x12\xf5\xd6K\xfc\xb2\xdd\xbf\xc3Og\x80V\xbc\xd0?\xc23\x9dD\xd2\xcc\xdf?\xe6&\xd0\xa7NB\xb8\xbf}u\xda\xbbz\x8d\xc2?\xe5\xe5\x8c\x9d\x9a\x87\xe0?\xca\xbaR\xa6\xe9O\xa6\xbf\xd0\xf1mh\x05\xa4\xda\xbf\x02\\@\xcfWH\xd2\xbf\xda\xe5\xa6`%(\xda\xbf\xf2\x1e\xd1\xb0:\xac\xb5\xbf\x86?\x16\xd2\x87\x98\xd6\xbf*\x86\xfdah\xc7\xba\xbf\x16\xb2\xe9\xe3\xfdr\xd8\xbf\xac\xdc\xdb(\xdd6\xe1\xbfr\x9f\xf8#\xcey\xa8\xbf\x12\xdf\x95~\xd4\xfa\xbc\xbf\xc7\x90\x90\xea8y\x9c?[\x9a\xc6\x08\xe6\xdd\xe4\xbf^Kx/7\xbf\xd4?\xe1Iaa-\xdb\xd8?eY6g\x03\x07\xd9?\xd2n#c\xd2\xca\xd8\xbf\x1e\x14\x16\xc6\xe0\xd1\xb1?\x80\xb5\x90j\xb6b\xd5\xbf\x80e+\xf1\xbd\x9a\xd1\xbf\x1a\xe8\x9f2\xc9g\xc3\xbf!\x9a\x7f\x18\x85\x18\xca?#\x0cu\xcf\xcb\xdb\xd5?9\xd7n\x17\x03\xf7\xe6?\x04P\x04Z\x9da\xd5\xbf\xdbD\x90\x7f\xc9.\xca\xbf\xbe\xb9q\xeb\x87\x11\xd4\xbf\xb1\x88A4\xee3\xc7\xbf\x9f\xc2\xad\xc9\xdf}\xcd\xbf\x07\xe3\xa6F\x9b\xde\xea\xbfy9\x15:\xa2V\xb6?fJ\x83\x02\xce_\xb0?q\x86x\xd9\x1d\x98\xce?$\x15\x9b\xee\xe5\xa2\xaf\xbf\xfa\x85\xaf\xb1\xfbG\xe6\xbf.;\xf9={m\xce?\x81\x07A\xc6\xe7\xfa\xae?\x0b\xda+7\x85\x8c\xd3?2\x1dd\xc1@\xbd\xd8?N\xaa7K\xc51\xd6\xbf8\xb8q\xcc!\x02\xd3?]\xb4w\xa04\xcd\xe0?~\xb5@\xb3\xa6\xdf\xdb\xbf\x9c\xcdO\x8f\x90p\xd0\xbf\xabE\rK\xea{\xc8?\x148d>\xcd\x93\xc2\xbf \xfd\x9e\xc0\xfc\xbb\xd8\xbfm9d\xaf(5\xd2\xbf\x0e\xfd\xf8p\xc24\xdb\xbf\'_\x9bI 2\xe3\xbf\x1c?\x90\xe0\xe2v\xc3\xbfb\x12\x00\xb8I\x8c\xd8\xbf\x7f\x81\xafx\xe9\xec\xd9\xbf\xc0\xa3\xd6bS-\xd3?ZnM\xf9[\xd2\xd8\xbf*4\x0b\x1d\x1f0\xcb\xbf/\xda\xf0f\x82S\xc2\xbf\x83\xbf\xf7\xd4E\x89\xde?\xca]\xdc+\xbf*\xb1\xbf!qM\xdb\x18\x99\xa0?\xdac\xd5\xddV\xe7\xe0\xbf{\x8b:\xe3\x88\xac\xc6?\xfa\xec\xe7\x1a.\x0c\xd7\xbf\xed\x97-\xae\x12\xe8\xc8?\xda@F\x9d\xac2\xe1?4p\xc9\x88\xf9\xca\xd0? \xe3?#\xe1\xd8\xfe\xae\xf3\xcc\xbf\\\xc9pC0\xab\xcf?]\xe4\xb2\xe1\x10C\xdd\xbf[\x8f\x93\xe6\x97\x8a\xd9\xbf\x15\xd7\x0e~\t\x0b\xd7\xbf\xcdv\xfa/l\xc8\xc2?\x16\xd9s\xe7i\xa4\xb4\xbf>\xfbak\xf3{?\xa4\x94\xf8J\xaf\xe4\xe4\xbf:<>dTK\xb5\xbf\xfa\x89\xf7\xe7\xad\xdb\xb0\xbf\x00\xb0\xd6T\xcf\x9e\xb6\xbf\x11#\xaf\xba\x13\xe2\xe3\xbf\x1d\xae\xa4f\xde\xe6\xd1?\x89\x14S\xd6\xe0\r\xe4?x\x89T\xe0h(\xd6?&\xb2\xa9C\xe34\xd3?>\xb9\x1b\xb3%\x8c\xd3?\xca\x1d\x8f\x8e\rB\xdc?\x0f\x18)*\xa7\xb9\xb9?8\xeaP\xe1+;\x9f?\xbb\xd38\xe2\x9d\n\xd4?\x06\xad\xd7\x86K?\xcb?\xe33|\x86"\xcc\xdb?\x82\x9a?\xb6\x06\xfa\xc2\xbf\x84]I\xbb\x84\x1e\xd1?\x91o\x17\x8e\x08\xd6\xe3\xbf\x98U\xca\x83\xa4\xd2\xdb?D*\\0\xfa\xb6\xb8?\x086V6\xe01\xd3\xbfn\x10\xdfI\x0c\x1f\xa5\xbf\xd3\x94xx\x82B\xdb?\xf0\x84\x1etV\xa3\xd0?\xdc\xa3\xf5\x89\xe3\xec\xd4?oL\xe4\xc7\xea\xe6\xe1\xbf\x8c\x12\xac\'\xc4C\xe2\xbf\x100\x12\xa0z>\xa1?\xf6W\xe2\xe3A\xa1\xa9?l\xeb\xdd?\x1a#\xa0\x16]\x0c\xe3\xbf\xb0\x8d:\x93\xbf\xe3\xb0\xbf|\xd4c\xdd\xe3\x12\xc9\xbf\x83/C\xd0\x9cp\xe6\xbf\xf3\x89\xb2\x12]}\xc1?\x89\x17\x0f\xf0\x11\x19\xc9?%L\xb8\x81d.\xd1\xbf2\xd1\x82\xebs6\xe0?p\xb0\xde\x1c\x1a\xfc\xc4\xbfr\x12\x16V\xee\x97\xce\xbf\xc5Ta=\xe3\xb7\x82\xbf\x8fO.\xc8@&\xd2\xbfp+\x87\xad\xaf\xab\x8a\xbfq\x1e\xda\xcd\xa2\xbf\xe6\xbfhr@C\x1b\xa3\x98?\x8e\x12\xd6\x8fi\x95\xb8?\x96(U\xd2\x1a\x90\xce\xbf\xf2\xa3\xac\x7f\xa4\xda\xe2?p\xc5\xed\xe3\x7fk\xe5?@u\xbfe=a\xa6\xbf0\x1b\xd8\x9f\x92\xf4\xd7?\x12\t[\xa2s\xdc\xda?C\x04\xcb\x95\xd7f\xb1\xbf\xec\x13`\xca\x1b\xad\xb8?P\xe5\xe7\x95\xa5\x00\xb1\xbfrE93\xed\xb4\xdd?\xe4\n\xa7\x03\xc92\xd7?\xbc\xe0?D\xac#\xc3?l-\xcc\xc2Q\x13\xa0\xbf\x1b\xfb\x8f\x85f\x88\xbf\xbf\xe4\x87\x06\x82\x01D\xe1\xbf8\xad\xea\xbaj\xf4\xbd?\x10\x01\x11a\xf4\xc5\xde\xbf\xbc\xa9\xb8_\x85Z\xe1\xbfqY\xd3x9L\xe5?\x14\xae>y\xe6I\xcb?\xc4\xfe(\xfc\xff2\xb0\xbf\xc8"c\xf8\xe9\x89\xcb\xbf\xd2\xb7\x08l\x12\xa4\xcd\xbfpP4\x13\xe2\xf3\xd2\xbf\x02\xc5\xa1\xa8\x8c\xe9\xde?\x05\xc6\xf6\x8f\xac\xd7\xd2?\x8a@}z\x92\xd0\x83\xbf\x85e\x9bz\xad\xe1\xe9\xbf\x15\xed\xd27P\xf9\xcf\xbf\xd7\x9a\xdb\x84\xf5\xad\xbd\xbfF\xe2r\xa93\xf7\xc0\xbf\xd13\'\xed\x85\x01\xd6\xbfh@\xfa\xc1\xafx\xc9?\xd8:\xc6e\t\xad\xd3\xbf\x07\xef\x11A\x0c\x90\xd9?\xa2\x000\xcc@\xb0\xd1\xbf`\xbbNv\xf4\x99\xb7?\xac\x04nf\xf4a\xe0\xbf)d\x08 \x88\x11\xe0?\xc8h\x85\xb0c\x8e\xde?\xa8\n\'\xaf\x04\x8e\xbd\xbf\xb0^#S\xc4\xb2~?R\xe0:\xf7\xfa\xd0\xce?L\xf5\x16\xcb\x92\x0f\xbe\xbf!\x85\xe1\xbf\x94>\xc4?>\xdbb<\x9e/\xbc?\x05c\x93\x84\xfe$\xcf?Q\x1c\xc0\xfd0\x08\xe3\xbf\x92z\x83\xc4\xaa/\xe4\xbf\xc2-\x05yT!\xd2?\x0e\x0f\xc8\x83\xa5\t\xaa\xbfz\xae\x17\xf8\x903\xa2\xbf\x14\x08\xf4\x98AB\xed?\x88\xff\xcbK*8\xb7\xbfJ\xe4h\xa0QT\x9e\xbf-\xcc\x01&,\xe9\xc4\xbf\x0c xn\xcc\r\xcc?c\xceK\xcd\xe5\xad\xd1\xbf*\xcbAML\xe3\xdc\xbfB\xa6\x89\xe0\x91\xa1\xc7?\xd3\x0f\xddX:\x8b\xb2\xbf\xb8\xaf\xb7\xb0\x1f\xa1\xe9\xbf\xa2\xf7\x02\tj]\xc2\xbf\xda\xf3\x06\t\xc2\x10\xcf\xbf\xade\x1f\xa1Pp\xc0\xbfHe\xe9$]\xdd\xc1?]\xb8\xc6y\xaaT\xa5\xbf\xfa=\xd3\x8eh\xfe\xd8\xbf7\xf0\xe9\x81|\xc8\xce?"\xc6Uj\x04\x95\xa1?~:wQM\xab\xe1\xbf\xb9\xad\x9c}b\x86\xd5?\x86r\x10w\xad\'\xc2\xbfT\xb5?9\xe5\xe4\xe2?' 28 | tbg1 29 | (g2 30 | (I0 31 | tS'b' 32 | tRp6 33 | (I1 34 | (I10 35 | I8 36 | tg5 37 | I00 38 | S',\xf3\xd0\x90-\x07^?I\x18\xda7\xce\x97o?\xabN3YkGq?\xcf\xb3\x88\x8a\xbc\x91c?~A\xd5\xe7\x11~t?fT\x08=\xdb\xfau?\xb9\x9c\xcelB\xafm?\x85\x0c\xd0%\xd4Dg?\x10]vv\x9bto?R\xec~\x89V\xec@?"@\xaa#MLm?\x08{.\x8f\x87\xa3`?\x93D\xc1\xa3v"x?\xa2v\x1e\xff>*k?\x08p\x99\xca\x0b\x87c?b`P\xb2&zs??\xb5O,\x92\\@?\xdc\xfa\xa1B\xad:]?j\xd9\xc9Y\xf2\xd3`?\x18\xac\xe8\xb9O\xd4f?\xb9P)6M\xa1a?\x91m_m\xf0\xd8g?-\xec\t\xd6\x8a\xcee?\xd3\xa6B,\x9eFa?q\xac\x8a\xbf\xd2Ke?%\xc2/\xd9d\xfdd?:J_\xde\xf6\x88\xfe>w\xc9\xb3>\xb8\xe42?u\xce\xc8,\xb2\x95e?"\xd2\x13\xf0\xb9\xa9t?\xbf\xb3\xcb\x8fE\x14l?@\x16cQ\xc9Mj?c\x19K\x16\x8f\xd2~?T\xb0\x18\xe1Kbx?)<\xf7\x1do\x1bt?\xf3\x8eN\xe3\xcf\x96{?\xd3\x0c\xf0\xa6V\xb7+?\xb4JQ\x9eyy\x7f?\xef\x04\xac\xb4\x80Bm?k\x7f\xbb"1\x17q?\x89G\\&\x98\xf73?-\x88um7\x03L?\x00\xa6\xa2\x13\x08\x95Y?\xf6]F\xba\xe5\x88;?\x07\x87\'\x8b.\x93I?j\xb3k~\xbb\xf8\xda>J\x9ee\xfcW8`?\r\xeb%\xd0\xc6V`?#\x0b\xf0M\xdd\x8cP?t\xab\x00\x18H\x1bg?\x15\x0c\xae6\x9c\x9ck?\xcc)S\x04z\x87L?\xfeN\x9a3\xf9Lk?\x1e\xc4\xc9\x11\x12\xa5U?r\xa3\x05s\x08\x1fh?\xec#C\x84u@p?s\xea\xda\xff\xe5\x04\x17?*NM4\x0fWI?nbef\xb1\xd3a?\xf6\xe3\x19-\x99\xe7^?\xc9\x14E\xca\xea!p?z\xa2\xc7t\xdc\xeda?\x83\xced?\xd0\nj?:}\xa6{E3r?3\x0c\xc5\x99a\xccd?\xa1\x92\xd3\x02\xa1\xec\x1f?\xe7\xb7\xae\x8e\x0e:e?\x85[\xc7\xcb\xc8\xd8[?5\xe2W\x01\xaf\x9eo?\x13\x1c\xc42\xbaqo?\xcf\x10\x19\x9c\xf9#e?\xd4\xe0ch\xf98d?\xdc\x01\x13\xca\x7f\x15X?\xd3\x83"I\xe6\xafx?\xa6CK\x19\xab=\x80? \x05\xf7\'\x87=\x80?\xd3\xd5\xa2\xb3%j\x81?\x807\xd3\xdb\xd5\xa0W?Q_\xab\xede\x94s?\x9aq\xceO\xbah\x1c?' 39 | tbtp7 40 | . -------------------------------------------------------------------------------- /python/test/testdata/test_searcher_data.pkl: -------------------------------------------------------------------------------- 1 | cnumpy.core.multiarray 2 | _reconstruct 3 | p1 4 | (cnumpy 5 | ndarray 6 | p2 7 | (I0 8 | tS'b' 9 | tRp3 10 | (I1 11 | (I100 12 | I8 13 | tcnumpy 14 | dtype 15 | p4 16 | (S'f8' 17 | I0 18 | I1 19 | tRp5 20 | (I3 21 | S'<' 22 | NNNI-1 23 | I-1 24 | I0 25 | tbI00 26 | S'r y\xba\xb3\xb6\x02@\xaf\xa5Lb\t\xf9\x11@\x9d\xf7]\x17a\x13\x08\xc0+\xfd\xdb\x89\xacU\x1e@\x8a?I\xa3\xba\xc7\x18\xc0\xc4\xdb\xa0u\xa0\n\xeb?9\xe5\xf6\xf3W&\x11@\xb7\xf73Y\x00{ \xc0\xfeXK\x08R\xb5\x14\xc0\xbe\x0b2\x08\xb7\xd5\x1f\xc0\x99 r\x02\x83S!@\xedQ\x00\xca\xc1\x81\n\xc0\xf4D\x7fb\x94\x04\xf7?\x029\xff\x0b85\xf4\xbf\xd4\x0b\xe1\x07\xd7S#\xc0\xdc\xb2Y\xbc\xa27\xcb?\x0e\x16H\x18a\xc2\x06@\x1b\xc7\x91,i\x11\x10\xc0yw@\xe0\x0e\x04\x07@\xb7\xae\xcd>\x9c\xf7\x18\xc0j4\xe4\xf8AM\x11\xc0\xd4\xde\xd1\xdftB\x00\xc0\xe5\xc4B\x80\xd1@\x00@\xb0\xf8\x18\x90|L\n\xc0*\x14\xb7\xea\x01\x02\x1f\xc0U\xf4\x13\x97\x9aS\x1b\xc0\x16\x8e!\x11\x98\x81\r\xc0}\x97\x17\x1c\xa1\x82$\xc0U\x9a\xf6{\xda\xaf\x13@s7\xa4\xd9z\xbc\x17\xc0\x039\xacy\xf2\xac\x05\xc0\x07\x8f\x19\xd9\xeb\r\xf9\xbf<\x90Z\xab\x97\xac\x17@\xc6\nh\xe1\x13n\x02\xc0\xb2!\xcd\xf51\xe0\x18@\x06\x03\xfa\xb3\x86:\x17@P\x0b"\x11\x98\xbe\xe1\xbf\xb8\xa8\xbd7\x88\xa2\x14\xc0\xb3\xea#*\x14\x8b\t@=g\xca\xa2\xd8\x95\x12\xc0\x98{\xf6Fp\x00\x18\xc0F8my\x0e\xae%\xc0\xf2@\x8c\x07\xb8\x9e\xf7?\xce\x82`B\xfe\x8d\x1d\xc0NF\xf1\xcb\xc1\xf5\t\xc08N%"M\x84\x02\xc01\xd1`\x8e\xb9\x1a\xc0w\x9cn\x19\x90\xea\x1f\xc0\xf8^\xe8:&\xa8 \xc0\xc9\xf6\xb8c\xe4\xc4\x12@\x0f\xe1\xe8\xe0T\x0b$\xc0\x8d\xd3\xf2\xf1\xe53\x1d@\xde\x031\xbf\xc7Y\x0e@\xc3\x03\xb3\xf2;F\x1e\xc0\x08\xee\xae\xa4{\xcc\xf3\xbf\x88"\x86&`\x07\x15\xc0\xa4c\xdf\xbe+\xdf\xc5?Sl^\xea\x81\x17*@\xe7\xbfr\x12\xfd\x89\x1d\xc0+A\xe07\x15\x07\x11\xc0F\x82\x95\xcfI\xac\x1a@\xe3\x86\xc6\x01\xd0p\x17@\x15\xc4\xc9}\xda+\x1b@"U\xa0Da\x88\x15@\xce\x1e!\x9b\xaf)\xfa\xbf\xd0h$\xcc\xba\x14\x10\xc0(\x12j50\xc2\x13@\xb7\xee\xbb\xf19\xbc\x15\xc0D\x1e!\xebC\xc9\'\xc0xG 7A\xe2\x16@\xd4\xbd>C\x05$\x18\xc08\xc0u\x9f:\xb3\xe2\xbf~\xa53dB\xa9 @\x00@\x8e\x90\xb7+\xa3\xbf\x86\xb1\xc1\x1f\xac\x92\x01\xc0\xce\xd3O6\xba\xf0\xf2?\xda\x1e&?=;\x14\xc0v\xb9\xec\xdeg\x02#@hD\x9e\xe51\xe7\xeb\xbf0\xec\xf4\t\x19=\x1a@\xdcXOx"Y\xe2\xbf@\xde\xb8O\x97\x18\xe2\xbfG\xb1\x81\x12\x15\xcb\x12\xc0\xec\xac\x0f\x1e\xc1\xd2\x0b\xc0\xaew\xf8\xfe,\x1d\x10@\xe4A5\x90h$\xe4\xbf\n\xa3:\xfa\x85\xdc\x18\xc0=vC\xf6\xe3a!@~\x9bTQ\xf5\x0b\x0b@@H\x92\xbe\xab\x90!@_\x0f]\x88\xf0\xc6\x12\xc0\x8c-B\xf5\xfc\xba\xff\xbfD\xd5\x02y"\x02\r\xc0\xf4YD\xbd\x89\xba\x16@\xe8EF\xcb\x1d\xe4\xe5\xbf\xd5\xae\xb6\xd5\xc1Z\x1e@\xc4\xcf\xe7ad\x96 \xc04\xa9\x8c\xef\xd8M\xf6\xbf\xe4\xc9\xddr|\xb1\x11\xc0o\x06Q\xce\xf3x\x1b@\x15\x84]\xfdL\x1a\x16\xc0\xf5r\xe9\xef\xbd&\x11@\xf92\x07 \x92:\x12\xc0~\x0e(\xc1\xe9\xbd\xfb\xbf\x10Re&"h\xd3?\xa1\xf7\'\xe0\xab|\xf4\xbf\tq&|)\x05\x19@\xccL"\x87F\xe6\x18@\xb6H\xf8\xcc\x15\xb6\x1b\xc0p\xfc}\x80\xdb\x19\x14@\x12\xab\xe0`\x19/\xf8?z8\xd9H\xfe\xae"@\xdd\xda\xf1\xb1\xfb\xa9\x04@\x106\xecq\xbbb\x01\xc0.\xdfi\xf6\x06\x9a\x00\xc0\x80\xe6\xe8tu\xba\x16@\xfb\x7f\xab\xcbb\x1a\x1f@\xde&\xb6\xfa\x90\xe1\x19@\xcct/Ko\x1d\xfd?\xe7\x9aFd\xcbS\t@l#\x89T\x0f}%\xc0\x93Z\xe8\x92\xe3\\\x12\xc0\\\xa1\xc0\xb6\x9bU\xef\xbf\xe8\xfd\x855\xd6p\x0e@ \xac\xddgw\xdd\xf4?p\xfd\xe0\xd8\xaeE*@\xb4\x03\xde\xff^?\x12\xc0\x92D\x96\x1c\xfa<\x0f@!D(\x16\xd0\x1d$@\xeaM\xf3\xc6+S\xfe?Lp\x0b\xe3V% \xc0F\x8f_)\xf6\xec\x00\xc0\x18\x83t\xef\xa1\xf3\xff?\xf0\xf4M\xce\xb4H\xfb\xbf\xec\x12w\xdb!\xcc"@h\xa3\x9a{w\xd7\x1f@\x12\x94\xf4a\xf8\x94\xfd\xbf\xe85\xd0\x10\x88\xbc\x10\xc00\xfd\xbc\xdf\xb3\x8c$\xc0\xbeTF\xe9\xcb\x9e\x0e\xc00\x8c44Q\xf2\xdc\xbf`\x03\x98\xd5\xcb\xc4\xfc\xbf\xd9\x0c\x8b|&\xf7\x1b@T\xb6\xa9\x7fKT\x0f\xc0|D\xf0\x12\x9e\xfb\xf2?K\x1aD\x83\xdd\xf3\x19@k\xa8\x8f!*!%\xc0\x8e+\xf0[V\x85"@\xc1\xc1\xe5\x10\x85\xbd\x16@0\x0c\x15b:/\xe1?\xce"Zis\x95\x05@\xa0\xa0\xd4\xf3\x1e\x13\xde\xbfV\x0f_\xea\xfe\x07\xe8\xbf \xa6\xe1\x1aX\x96\xbb?\x07\n\xc4\xec\x84E\x1a\xc0`IG1\xb4\xaa\xcf?\x0f\xb9\xa2\x85\x9c\x9f\x1e\xc0\x10#V\x04>\x04\n\xc0\x82*p\xba\x07\xe4\x0f\xc0\xf8!\tK\xff\xf8%@\xb2\xbd\x8e\xff\x02c\x0b@\x90@\\l\xcf]\x08\xc0\xd3\xb6\xbd\x10\xe0\xd8\'@\x84\x9c\xf67jN\x16@\xcc<(\xdf\r9&@>*I\xaa"\x06\x01@\x8a\xf0%\x84\xb5F\'\xc0FD\xedjHO\x06@\x89)\x8a\xd8 \x17 \xc0D\x12\xa0m\x16\x97\x18\xc0\xb8\xd1\x82\x82s\\\xd1?\x08\xc4\x86`\xe1\xe9 @0e!\x19\xa85 \xc0\x00\x15\x9b\'\xb9\xc5\xf5\xbf\x00\xfd\xec\x8eL\xcb\x01@\xa0zPT`V\xce\xbf\xa2\xcf9\x9b\xc6\xb2\xf4?\x10\xc24\x8eZ5\xfc\xbf4|U\x9e\x13\xe2!\xc0\xb4\xb8O]75\x15\xc07\x03K\xab&y%@\xe0\xb1\x02\xa0\n\x87\xf5?{\xe1(\xdf"P\x01@i\xe63m\x9b\xaf\x06@s \xe0\x93\xbc\x94%\xc0\x8dgM\x826\xd8\x16\xc0\x88\'Ri\xe4A#\xc0l\xe2\xde\xf4J_\x00\xc0\xaa\t\x13p\xdaW\x1f@\xb0-+@ \xc2\x03\xc0k\x86`\x06I\xd1!@\x9d?\x05\'\r\xa1\xf6\xbf\x84\xc8\xf7\xc1\x8c=&@j\x85%x\xdf\xf9\xfc?wt1\x00w~\x16\xc0\xf3\x83[\xfcv\xaf\x1a\xc0\xc25U[i\xf7\x07@pC\xe8\x9a\x94\x85\x11\xc0oJ\x90\xcd\x13)\xe8\xbf\xa8\x9e[A#\x08"@\x1e\x15\x94\xf4\x16\x18\x10@\x9d(\x8dK6\xc5\x02\xc0f\x82S`\xc1k&@K\x14\xae\x90\xe2\xed\x12\xc0\xcc\x04\xa4\xa1-\xca"@D\x91Y\x90\xf7\xe1\xe0\xbf\xaa\xc2\x19$\x84R$\xc0\xb2jh\xbe\x82\xf0\x05\xc0\xde\xb6\r=\'\xa9\xd4\xbfs\x94\xd6\xaa\x99\x11\xea?\xc38hP\xbd\xc1\x1a\xc0\xbe\x1a\x7f_:v!\xc0`\xef\x18:,*&\xc0\x00 \xc8\xfd~\xb0\x1b\xc0\xd6\x7f\xa5\x18)\xe5 \xc0T1qj\xec.\xf4\xbfb\x1b\x12\x16\xfb\' \xc0\x19(\xeaf\xa6\x8f\x1b@ 5,\x1ds\xfe\x10\xc0\x05\x0b6\xe7\xd9\xad\x16@\xca\xb5{T"\'\x13@\x98\xb4h\x99\n\x9c$\xc0UG][f\xb6\x19@\xad7g\xb82_\xf7?\xda\r\xdb\x91{\xf6\xfd\xbf\xb0\xe4\x88U9\x8b\x1b@~\x0f\'sP\xcd\x13@\xaf\x97\x1daM\xf0\x1a\xc0\xae+\x01I)1!@\x80\xeb&\x80\x1e?\xea\xbf+(,\xa82f\t@\xe6\xd0\x0f\x1a9a\x1f@\xd8\xb2\x0b\xd7\xecx\x1f@L\x0c\xfe\xf2+Z\x1e\xc0\x86\xeb\\\xc1\xd6\xf5\x1b@M\xeciM\xf6\xcf\x04\xc0\x00B\x86\xf3\x0co\xee?h{~\xa2\x1e\x1b%@@oVF*E\xc4\xbf\xdfS\x02\xefV\r\x12\xc0r\tm\xc3\x0e\xfc\x1d@\x94\xc9\xa0\xfe\xdd\x9e\x1d@\x1a6\xc3\xce\x97"\x1b@F\xaa\xa2W\xedN\x10\xc0\xff\x9cssj\x1a\x1c\xc0@\xaf\xe9\x02\xf2\xa2\x16\xc0T\x06\x11\xaa7W\x17@\xe0%>\x9f\xf4<\r\xc0\x9e\xbe\x08\x1dJA\x18\xc0,\xe9\xa5\x00\xb5n\xed?\x00=\x11%~.\x1a@b\xc8\x0b\xec\xc6\xa8$\xc0>\x0e\xc0y|\xd2\x1e@\x90\xad\x1aH\x1d\xc1\xdd?\xb4\x97X\xc1O\x91\x11\xc0\xe4\x13^\x94\xf6\x9b\x0c@\xf0\x05W\x870p"\xc0$\';\x84\xf9i\xfd?\xa4V\xd3x\xaa\x0f @\xa8\x9c\xda\xb1\x99\'\xe8\xbf\xe0\xac|\xac\xdb8\xfd?\x00e\x861\x15\x96\xf4?\x12 \x04\x05\xdf@\xfd\xbfK\xa3c\x1b\xa33\x13\xc0\xef\xff\xe3\xc3)\x10\x1f@\xee\x0b\xfe\xe0\xef<\x1a@\xbe\xfd\xd6\x11l\x18\x14@\xd3\xa8r0\xa5\xc8#\xc0\x9a\xc9\xb8\x9fR\n\x01@b&>S\r\xff\x19\xc0\xba\xc0t\xad\x7f\x05 @\xd2\xfd\xd4\xc8\x96i\x18\xc014uT\xa2?\x07@xJ\x05R\xeff\x16@\x04Tv\xce\xbd\xad\xf6?\xce\x9d9\xf8\xfdq!@\x90\xb8\'\x820\x9c\xf1?\xc8\x80H\xa8\xdc\xb2\x04@\xdcb\x99\xafYO!\xc0\x8c}S\xb6\xb8\x85\xf3\xbf\x80\xb5\xe0\x88\n1\x04\xc0\x9daE\x96R&\x1a\xc0\x80J\xcd\xe6V\xa5\xf4\xbf\xf7\xb3] \x87\xa7\x17@\xc1\xc1\xe5\x10\x85\xbd\x16@0\x0c\x15b:/\xe1?\xce"Zis\x95\x05@\xa0\xa0\xd4\xf3\x1e\x13\xde\xbf\xc7\x7f\x99\xa4\xb6\x98\x1d\xc0\x84\x92f\xc7\x0b\xde\xfe?\xf6\x11"g\x1cr\x11\xc0*bC\xd2\xf58\x01\xc0\xdf\xb3ZW.\x13\x17@B;{R\xd7T\x12@\xb9\x95\x8aFc\xb7\x18\xc0N\xd1\xa4\xe2&\x8a\x07\xc0\xf6\x19\xa5\r9t\xf1\xbf\xef\x1600\xbd\xd4\xea?H0\xcc\x80\xbd\xc1 \xc0\xd6\x9e\x03\xd9\xa8i\x16\xc0 k\xa8 -4\xcc?\x84E(U\xa4\xe0\x1b\xc0\xa2\xe4\xa9\x8a\xfd\xbb#@s\xd7\xcdx\xb7R\x13@8"n\xf1F\xa7\x0f@\xe3\xe4bH[)\x12\xc0\xc0@\x8e\x01Y\x8b\xcc?O2\xc6A\xdf3\x03@X\xef\xf0\x0fv\x0f\x1b\xc0\x001\x00dg4\xc0?\xd4\xf8\x8c\x97dH\x04\xc0p\xeb\x1385H\xee\xbf\x03o1\x1aY\xc5\xfc?\r$^Qb\x1c\x0f@0j@\xc4=\xf5!\xc0\xb8\x00?\xb0=\x03\n\xc0t6\x04\xd6\xc0/)\xc0(\x08R\x8en^\x14@B\xb7\xf7K\xde\xa5#\xc0\x0e\x81\x9d\xb4\xca\x96\x02\xc0\xf3\x1dU^\xfb\xe5\x1d\xc0\xa1\x80\'\xbc`\x82\x13\xc0\x90@\x1c^,\x0c\x05@\x04>\xab\xdf!F"\xc0\x9f\xc0\xe8\xb2\xcfV\x12\xc0@\xc8\xae\x9fa(\x1e\x8c\xcb?R\xee\xc3\t\xdb\x0e\x16@\xbb\xcct\x06eC\x11\xc0U\x9a\xf6{\xda\xaf\x13@s7\xa4\xd9z\xbc\x17\xc0\x039\xacy\xf2\xac\x05\xc0\x07\x8f\x19\xd9\xeb\r\xf9\xbf\x18\x93\xb8w7\x1a\x0b@yrU\x01u\xc5\x1d@\x7f\xfd\xe0\xe3uS"@\x8d\xa2\x12\xd2\xf3\xed\x07\xc0?*\xaa\x03\xba\xe3#@!\x82k\x89$\x81\x16\xc0\x8c\x02\x83\x8b\x92y!\xc0L\xef\xfc_5\xc6\xb5\xbf\xa8\xf7\x8b)\xd6\xa8\x03\xc0\xe1\xe9e\xba~\xd2\x03@\xd6w\xc8\x13\x14[\t\xc0}\xfa{y\xa8w\x1d\xc0\xe0\xac|\xac\xdb8\xfd?\x00e\x861\x15\x96\xf4?\x12 \x04\x05\xdf@\xfd\xbfK\xa3c\x1b\xa33\x13\xc0 ^\x8e\x98B\x98\xc6\xbf\x90\xb1\x9e\xf3Ku\x0b@\x02,@\xb6|\xa2\x13@\xfa\x04&$\xf8\x80"@G\xf3\xd2\xd9\x8e\xd0!\xc0\x00\xea^H\x8d\xf8\x0c@A\xf2\xeei\x89\xca\x05@5\x07\xc7\xf0h\xa3\x1a@\xe3\xe5\xf5\x9e\xf3\x13\x0e@fcI\xf0\x943\x1e\xc05\xd8-]\x15\xbd"@\x9ctn\xe0\x1e@\xf8\xbfci\xec\x08)j\x18\xc0\x8b\x1f\xfe=\xd3\xa5 \xc0\xc2\xfe\xc0k\x11\xc7\x15@7\xfa\xb1\xe4\x18o\x11@O\xe8\xa1\xca\xdc\x1e#\xc0\x04\x17\x96x\x97\x83\xf0\xbf\xfc\xe2I:\xd1\xf1\x14@\xe9\xb1\x9b\xe4\x0b\xce\x18@&\x7fSk\x7f\xec\x12\xc0\x90\xce\x1e\x0f\x1ew\x06@\xd0%\xb5\x16\xf9o!@\xe0w\x04;\xcd\xd9\x1a@\x9e\x11ZD\x1b\xb9\x1e@\x00\x86\xd6\xd1%\xb1\x1f\xc0(\x1d\xebh\x0e{\x08@\x07j\x9e8\n\xa2\x15\xc0F\x82\x95\xcfI\xac\x1a@\xe3\x86\xc6\x01\xd0p\x17@\x15\xc4\xc9}\xda+\x1b@"U\xa0Da\x88\x15@\x08\xc76/S\xea\xff\xbf\r\x96\xb2\xd3\xd7\x94\x1b\xc0h\xef\x02\x1e-|\x13\xc0\xcew\xe3h\xb1\xc8\xe6\xbf\xdf\x858cw\x1f\xfe\xbf`\xfc\xef$^\xe1&@\xc9\x02\xd0H\x99g\x0b\xc0RG \x80\x16e\x0b@d\x02\xea\x1b02\x16\xc0!\xcal\xe9R#\x1c@\xe3E%\x89\xe3\xe9 @\x17\xdd\xed\x15\xd3\'\x13\xc0\x13\xfb\x8a\xac\xd6\x95!\xc0\xdc\xd7\\\xfc\xbb\xe2"@\xb1\xc0\x08Z\x19\xdf\x17@\x14\xf8\xe6\x9e\x1d\xfa\x13@Bh`C\x03\x03\xf8\xbf\x98\xf8\nN\xe86 \xc0\xea\xc4\xc0e\x8f\xa0\x14@\x8f\xe3/0[\t\x15\xc0jz\xf8L\x8f\xb1#@p[9},b\x10@\xa8\x91r\xd6h\xf2\xe2?\xf4.c\x96\xb1T\xf8\xbf\x11\x08\xb50+\xd4\x10\xc0\x00\xac\xd6\xad\x9b\xcf\xfe?\x84\xccW\xe8tn\t@P\x05\x05\x87S\xab\xde?\xe9/\x81\x98\x864\x1c\xc0\x9f\xba\xf1\x0fU\xd7\x11@h\xe3\xaa[\xb6\x8c\x1f@+\xec\x18\x118\xe2\x00\xc0\xfc\xf5\x0cm\xcc\xc0\x18\xc0H%A\xfb\xf9\x8b\x18@\xa2SI\xa1g\xfd\x19@l\r\x9aC\xe8\xc0%\xc0_!\x80dA\xd8\xf9\xbf\x80k}\xe8\x04B\x17@\xaa|\xa0\xdf\xf6A$\xc0\xd4V\x96\x8d\x15\xad\xfe?1\xc7\x84uA1\x01@\x82X\x89\xbeO\xbe\xe2\xbf6\x80\x9aG\xf1\'#@\xda\n\xd8\x18\x9e\xa5!@\xecl_V \xd6!\xc0\x88l\x89Py7\x0f\xc0\xba5\x9fE\xf0s\x0f\xc0\x18L\xdb,a\x10!@\xd7>P\x1d\xda\xb3\x1c\xc0Z\x04\x83\x85 \x94\x05@-\xda\xc2\x14\x0c\xbc\x03\xc0\x9by\xee\xc3\xe5~\x16@\xdf\xe0:=\x02X\x11\xc0\xc0\x0f\xc1\n\xfd\x80\x08\xc0HU\xb4E\x19v\xfa?&K\xa3\xb6\xd2\x9d,@\xb6\xec\xdd\xa4gF"\xc0\x90F\xeb\xe4,W\xf9\xbf\xf6k\x9b\xe1\x8b]\xf6?\x16\x9aM\xdct\xce\xeb\xbfLF\xb6\xb3V)\x16@\x86\r\xa6\xd6\x94Q\x10@ygjo\xa8\x90\x07@9\x99\xce]\xc0?!@\xdd\xda\xf1\xb1\xfb\xa9\x04@\x106\xecq\xbbb\x01\xc0.\xdfi\xf6\x06\x9a\x00\xc0\x80\xe6\xe8tu\xba\x16@?b.A\x9aR\'@\xb1uw\x84\xa8\xc3\x07\xc0l0\x8f\xeb\xc3m$\xc0\xbf\xe3*\xbf\xcc\xc8\x15@\xc7\x7f\x99\xa4\xb6\x98\x1d\xc0\x84\x92f\xc7\x0b\xde\xfe?\xf6\x11"g\x1cr\x11\xc0*bC\xd2\xf58\x01\xc08gK\xeb\x95\x03\xe6?\xa2\xf1\x1c\xa2?\xaa\xf4\xbf\xd0\xae\xdb9\x0c\xf6\x0e\xc0\xa6\xe1\x0c\xd8\x9cS\x06\xc0\xd8\x92p\x90\xbdZ\x0e@\xc6\xc109\xe7\xfb\x04\xc0\xb8\x8c\xac\xbd\xf5y\t@=\xea\x9f\xda\xb3/\x08@\xaew\xf8\xfe,\x1d\x10@\xe4A5\x90h$\xe4\xbf\n\xa3:\xfa\x85\xdc\x18\xc0=vC\xf6\xe3a!@LBQ\x8a\xd1\xb5\x12\xc0\x88/e\x7f/\x8b"\xc0\xa2c(\x11\xf7\xf9\x13\xc0o\xb1C\xd1\rX"\xc0\x90\xa2Porn\x12@D\x05\xfa\xb47\x80(@\x82\x9b[m\xc4\x87\x15\xc0\xbf\xa3L\xd0\xcd0\x16\xc0.4\xfe\x9cS\x9b\x04@\xaf\xd9V\xcb]$\x0c@\xc04\x9f\x88\x9f\xfc\x02@o\xeb\xca^\xd8\xab\x19\xc0l\xea\xf2\xf3ov\xf4\xbf(\x8ay\x9dV\xac\xf7\xbf(36\x1cb\xef\xfb\xbfW\x01\xe9\n*\xd9$@\xe0\x9f~?g\xdc\xc2?<\xc0/Y\x08\x08\x11@\x06\x15pO\xbej"@\xb6\xc4\xbc_\x1c: @\xe4\x0e6\xd8b?\x12@\xb8\x1ce\x1c\xb1\xd3\x16@4\x0b5\xc8\x05\x9e\xf6?\xeb\xea\x1azg\x8e\x1e\xc0\x0fL\x1e\xeb&\n\x18@\xe8T/Xh\xeb\x15\xc0>3\xb4(\xc9\xb8\x0b\xc0\xa8:~\x05w\x0e!@\x01\x9bs\xd29\xef\x17@\xaeb\xf8\xa6\x8b^\'\xc0\xa4\xffC#>\xb9\x14\xc08h\xfd\xca\xd0\xae\x03\xc0\xe4g\x90\xcd3X\xfe\xbf\x1b\xa1;\x07\xe2\xa9\x17@h\xafLgM\xb0\x06@<\x8cF#\x87D\xfd\xbfA\x03\xf6\xc4\x1fh"\xc0\xd4\x00\x95\xe8\x90\xc6\x1c@g\xdc\xfa h\x12\x18@\x18\xbc\xc4\xc6Nb\x06\xc0]\xa6\x18c\xfe\xfc\x12@Z\xa2\xe9\x85\xbf@*\xc0\x08\xb7\xc9KU\xbb\x08\xc0\x07\x8b\x13j{\x18\x07@\x12]\xcbP\x88\xa0\x15\xc0\x8c\x0b\xc7\xea\t.\x17@\xfd\x07\x9a\'\xa1\x06\x11@I%\xe7n>\x1d"@\x0ee\x85}\xbc\x95$@\x00\x86\x1b\xd1\xf3\x97\x01@\xd2\xad\xbf;\x02e\x0e@\xd2\xde\xaa\xa0\xc5\\\x12@@\xc7\xf6\xb1C\xa3\xcd\xbf \xfcsI\xf9c\xfe?\xe1\x04P\xbd\x95\x03\x1d@y\xf9\xabf\xef\xba\x06@\xba}\xc4\x7f\x1b\x02\t\xc0\xe2z\t+\xe7;\x12@&<\xf0\xa4\xd7\x95\x19\xc0]\xd7I\xa7*\x8b\x1e@\xdb\x12R\xe6\xb9\xb4\x1c\xc0\x8a\x8f7O\xd6\x91\x1c\xc06\xe2=eN(\x14\xc05\x14\x1f\x12Pt\x11@8"n\xf1F\xa7\x0f@\xe3\xe4bH[)\x12\xc0\xc0@\x8e\x01Y\x8b\xcc?O2\xc6A\xdf3\x03@.+v\x1fH\xdd\x1e\xc0\x00\x17\xc3\x1aiI\xb3?\x02 T\x92\xf4\xad\xf8?\x0c\xc9\x83\xfe\x80\xd2\x1f\xc0\x85D/\x92\xc9\x92\xe5?\xae\xcex\xd5z\xbf\x1d\xc0\x8f}N\x14\xfb\x96\x1d\xc0\xf7\ru=U\xa9\x1c\xc0\xd7\xec\xb5\xc9\xd7\x19!\xc0\xd0\r\xa9\xe1t\xea @\x02\x8e\x14\x1fU\xcd\x16@p=\x8a\x908\xc1\xf0?$Kn\xfc\xda\xa0\x07\xc0\xd5\xe5\x17\xfa\x92\xe0\r@\xe3\x97Gk\x0c-\x19\xc0\n^V2\x16_"\xc0\xc6\x07;\xd2\x81p\x0e\xc0\x16\xe6 \x96F9\x10\xc0\xbeB\rd\xe3\xc1\x16@\xee_\x87\x8e}\xbc\x05@\x9e\x8a\x15s\x9d\x00\r@\x0e\xb6\xaf@\xfe\xb4\x0b\xc0\xd4~\x02\xc5\xe5M"@<\xa1\x9e\xe6\x99\x0b\xfb\xbf\xf0\xf2:\x06\x85\xdc\xbd\xbf\xb0+\x17R\xf2\xac\x05@\xd7g\xf1\x85\xc8\xad\x13\xc0\xba\xe1/J\x9d)\'\xc0\xc9\x05\xf6\x84\xc3\x89\x1a\xc0\x04~\x0cZ\xc3\x9f\xf9?\x9c\xef\xd3Ln\x88\xea?\x91\xeaV\xac\x8c\xa7\x13@\x97F\x89\x8a\xcc\x9f\x1e\xc0\x14\xa0\xb5\xabN\xfe\x13\xc0L\n\x0c\xf4h\\\xfc?nD\xe8\n\xc9\x99 @b\x1b\x12\x16\xfb\' \xc0\x19(\xeaf\xa6\x8f\x1b@ 5,\x1ds\xfe\x10\xc0\x05\x0b6\xe7\xd9\xad\x16@\x9a\xc9\xb8\x9fR\n\x01@b&>S\r\xff\x19\xc0\xba\xc0t\xad\x7f\x05 @\xd2\xfd\xd4\xc8\x96i\x18\xc0\x12\xd1=1$\xfc\x05\xc0\xa3(i\xbf4\x03\x10@=\xd3h\xfb\x06! \xc04v\xc3\xf2.\xa3\xf0?\x10\xe7\xba\xc9\x01t\xee?\xfd\xe0\xedd\xd5R!\xc0\x9cL\x89\xcfU\xe6\x14@\x03\xbfN\x04\xc8=\xe6?*\x14\xb7\xea\x01\x02\x1f\xc0U\xf4\x13\x97\x9aS\x1b\xc0\x16\x8e!\x11\x98\x81\r\xc0}\x97\x17\x1c\xa1\x82$\xc0\x17\xfa|\x01#\xcb\x17\xc0H/*.O\xfb\x03\xc0\xa8aN9\xc5\x93\x12\xc0\xcc\x8cfz\xca+\xf3\xbf\xce\x07\x8bnW\x00\x05@\xa3\x9b\x91\xed\x17\x7f\x1d@\xb4\x9a\x14F:\xab\x10@Q0/;}+\x16@\xda\xdb\xac>\xd3: \xc0.A\xaa\xd9\xe4\x10\x08@\x83\xc6\xea\xa8\x1f\xef @\xf5j\xfck\xa3\x85\x18\xc0\xe69\x90\xd4\xf1k\x17@\x1cY\x8b\xfb\x00\xc8\x1b\xc0\xf2\r\xe2\xbd\xcd\xa7!\xc0\xd8]\xeb\xaf3~\xe6?F\x82\x95\xcfI\xac\x1a@\xe3\x86\xc6\x01\xd0p\x17@\x15\xc4\xc9}\xda+\x1b@"U\xa0Da\x88\x15@s \xe0\x93\xbc\x94%\xc0\x8dgM\x826\xd8\x16\xc0\x88\'Ri\xe4A#\xc0l\xe2\xde\xf4J_\x00\xc0\x8f \x82]F|\x19\xc0\x1cK\xceJ\x8cw$\xc0|\x00N\xd9\xa1\x1c\x13\xc0B)\x91\x9bD\xce\x05\xc0-Np x\xf2\x0f@DQ\xb9\xc9i\xdd\xf6?\x9b\x86\x07\x91\xa4\xbf\x1f@\x17\x807\xe1\x8d\x89\x1f\xc0\xa3\xfdH\xf7\xfe"\x1c\xc0\x98\x90\xb7p\xf4\xcc\xfb\xbf\xdd\x83\xa6\'\xde\x97\x00@\xa6\xd2O!kw\r@1\xc7\x84uA1\x01@\x82X\x89\xbeO\xbe\xe2\xbf6\x80\x9aG\xf1\'#@\xda\n\xd8\x18\x9e\xa5!@\x91U\x90\xf3\xed\x7f%\xc0\x14\xdf\x9f\x94\xa5\xc7\x14@\r*=\xf1a\xb0\x04@ #\x88\xba\tK\x01\xc0\xd6a`\xf4KF\x17\xc0\xd6*\x16\xb0\xc5\xd2"\xc0L\xce+\xb0&\xd1\x17@"<\xed\'?\xc6\xe6?\xb8#\x9b&2\xf0\xe3\xbf\xbaF[\xec\xc5\x94#\xc0\x86\xed^\xcd|\xc1\x0e@\'\x13\xd8el\xfe\x17@\xa2\xcf9\x9b\xc6\xb2\xf4?\x10\xc24\x8eZ5\xfc\xbf4|U\x9e\x13\xe2!\xc0\xb4\xb8O]75\x15\xc0@\xbaA\xf0\xb2\xcc\x1f@\xc0\xf8\xf1\x96X\xdd\x12@X\xa0\xb8g\xbc\x1f\xf5\xbf^?\x85O\xc0N\x06\xc0:V\'<;\xc8\x0b@\xbe\'\xbb\xa60U%\xc0\xbdH+8\x88\xbf\x15\xc0\xb4>\xc3\xae\x0f\x89\x18@\x10\xca\xe5\xba\xcav\x0b\xc0\xd4I\x83\xef\x83\xeb"@\x95&T/\xbc\xb9\n\xc0\x06\xba\xdc-\x08\xbf\x02\xc0,a\xa29\xf7\xfc\xf6?\xce\xa5:\xc3O_!@\xd8\xc3y\x9e\x1d*\x13\xc0\x95\x8e#\xad\xa0\x84\x1e@\xb8\xcc\xd1\r\xd8\xc8\xf9\xbfx&0\xe5\xe6\x8b\x15\xc0\xb9\x80[\x05,k\x1c@\xf1W\xafb\x8d\xf1\x17\xc0|\xb5\x83\xf8M\x1c\xe3\xbf\xa4j\x898F\x1c\x1e\xc0p\x8fl\xf3\xe9y\x10@\xffc\x1dJ@\xd6#\xc0\x06]\xe5\x04\x04*\x07\xc0\xd3\xf7a\xf5\xc3a$\xc0\xeb\xbb\xae\xf4E\xea\xe9\xbf.k*\x03\xee\x81\x08@d\xf41bX\xdb\xf1?\x8e8\xaad\xa9o\xf1\xbf\x94\xa8v\x8bK\x9f%\xc0\xcc\x9f\xf6\xb2\x04:\xf9\xbf\x88\xd8UoGh\x0f@ \r\x9b\xa5A\x0e\xeb\xbf\x07\xfd&M#\x86\x06@-\'\xe7\x808\xf8$@\xb60\xd2b\xcd!\x18@|\xfb\xedEmy\x1e\xc0#\xc3\x81\xa1+\xd5\x1b\xc0\x0cJ)u\xdf-\x08\xc0P\x0b"\x11\x98\xbe\xe1\xbf\xb8\xa8\xbd7\x88\xa2\x14\xc0\xb3\xea#*\x14\x8b\t@=g\xca\xa2\xd8\x95\x12\xc0' 27 | tb. -------------------------------------------------------------------------------- /python/test/tests.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | from nose.tools import assert_true, assert_equal 4 | 5 | import pickle as pkl 6 | import sys 7 | import os 8 | import numpy as np 9 | from sklearn.model_selection import train_test_split 10 | 11 | sys.path.insert(1, os.path.abspath('..')) 12 | from lopq.model import LOPQModel, eigenvalue_allocation, accumulate_covariance_estimators, compute_rotations_from_accumulators 13 | from lopq.search import LOPQSearcher, LOPQSearcherLMDB 14 | from lopq.eval import compute_all_neighbors, get_cell_histogram, get_recall 15 | 16 | ######################################## 17 | # Helpers 18 | ######################################## 19 | 20 | 21 | relpath = lambda x: os.path.abspath(os.path.join(os.path.dirname(__file__), x)) 22 | 23 | 24 | def load_oxford_data(): 25 | from lopq.utils import load_xvecs 26 | 27 | data = load_xvecs(relpath('../../data/oxford/oxford_features.fvecs')) 28 | return data 29 | 30 | 31 | def make_random_model(): 32 | m = LOPQModel(V=5, M=4, subquantizer_clusters=10) 33 | m.fit(np.random.RandomState(42).rand(200, 8), n_init=1) 34 | return m 35 | 36 | ######################################## 37 | # Tests 38 | ######################################## 39 | 40 | 41 | def test_eigenvalue_allocation(): 42 | a = pkl.load(open(relpath('./testdata/test_eigenvalue_allocation_input.pkl'))) 43 | 44 | vals, vecs = np.linalg.eigh(a) 45 | res = eigenvalue_allocation(4, vals) 46 | 47 | expected = np.array([ 48 | 63, 56, 52, 48, 44, 40, 36, 30, 26, 22, 18, 14, 10, 6, 3, 0, 49 | 62, 57, 53, 51, 45, 41, 39, 33, 32, 31, 29, 25, 21, 17, 13, 9, 50 | 61, 58, 54, 49, 47, 42, 38, 34, 28, 24, 20, 16, 12, 8, 5, 2, 51 | 60, 59, 55, 50, 46, 43, 37, 35, 27, 23, 19, 15, 11, 7, 4, 1 52 | ]) 53 | 54 | assert_true(np.equal(res, expected).all()) 55 | 56 | 57 | def test_eigenvalue_allocation_normalized_features(): 58 | eigenvalues = np.array([ 59 | 2.02255824, 1.01940991, 0.01569471, 0.01355569, 0.01264379, 60 | 0.01137654, 0.01108961, 0.01054673, 0.01023358, 0.00989679, 61 | 0.00939045, 0.00900322, 0.00878857, 0.00870027, 0.00850136, 62 | 0.00825236, 0.00813437, 0.00800231, 0.00790201, 0.00782219, 63 | 0.00763405, 0.00752334, 0.00739174, 0.00728246, 0.00701366, 64 | 0.00697365, 0.00677283, 0.00669658, 0.00654397, 0.00647679, 65 | 0.00630645, 0.00621057 66 | ]) 67 | indices = eigenvalue_allocation(2, eigenvalues) 68 | 69 | first_half = eigenvalues[indices[:16]] 70 | second_half = eigenvalues[indices[16:]] 71 | diff = np.abs(np.sum(np.log(first_half)) - np.sum(np.log(second_half))) 72 | assert_true(diff < .1, "eigenvalue_allocation is not working correctly") 73 | 74 | 75 | def test_accumulate_covariance_estimators(): 76 | data, centroids = pkl.load(open(relpath('./testdata/test_accumulate_covariance_estimators_input.pkl'))) 77 | expected = pkl.load(open(relpath('./testdata/test_accumulate_covariance_estimators_output.pkl'))) 78 | 79 | actual = accumulate_covariance_estimators(data, centroids) 80 | 81 | # Summed residual outer products 82 | assert_true(np.allclose(expected[0], actual[0])) 83 | 84 | # Summed residuals 85 | assert_true(np.allclose(expected[1], actual[1])) 86 | 87 | # Assignment count per cluster 88 | assert_true(np.array_equal(expected[2], actual[2])) 89 | 90 | # Assignments over data 91 | assert_true(np.array_equal(expected[3], actual[3])) 92 | 93 | # Residual data 94 | assert_true(np.allclose(expected[4], actual[4])) 95 | 96 | 97 | def test_compute_rotations_from_accumulators(): 98 | 99 | A, mu, count, num_buckets = pkl.load(open(relpath('./testdata/test_compute_rotations_from_accumulators_input.pkl'))) 100 | expected = pkl.load(open(relpath('./testdata/test_compute_rotations_from_accumulators_output.pkl'))) 101 | 102 | actual = compute_rotations_from_accumulators(A, mu, count, num_buckets) 103 | 104 | # Rotations 105 | assert_true(np.allclose(expected[0], actual[0])) 106 | 107 | # Mean residuals 108 | assert_true(np.allclose(expected[1], actual[1])) 109 | 110 | 111 | def test_reconstruction(): 112 | m = LOPQModel.load_proto(relpath('./testdata/random_test_model.lopq')) 113 | 114 | code = ((0, 1), (0, 1, 2, 3)) 115 | r = m.reconstruct(code) 116 | expected = [-2.27444688, 6.47126941, 4.5042611, 4.76683476, 0.83671082, 9.36027283, 8.11780532, 6.34846377] 117 | 118 | assert_true(np.allclose(expected, r)) 119 | 120 | 121 | def test_oxford5k(): 122 | 123 | random_state = 40 124 | data = load_oxford_data() 125 | train, test = train_test_split(data, test_size=0.2, random_state=random_state) 126 | 127 | # Compute distance-sorted neighbors in training set for each point in test set 128 | nns = compute_all_neighbors(test, train) 129 | 130 | # Fit model 131 | m = LOPQModel(V=16, M=8) 132 | m.fit(train, n_init=1, random_state=random_state) 133 | 134 | # Assert correct code computation 135 | assert_equal(m.predict(test[0]), ((3, 2), (14, 164, 83, 49, 185, 29, 196, 250))) 136 | 137 | # Assert low number of empty cells 138 | h = get_cell_histogram(train, m) 139 | assert_equal(np.count_nonzero(h == 0), 6) 140 | 141 | # Assert true NN recall on test set 142 | searcher = LOPQSearcher(m) 143 | searcher.add_data(train) 144 | recall, _ = get_recall(searcher, test, nns) 145 | assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97])) 146 | 147 | # Test partial fitting with just coarse quantizers 148 | m2 = LOPQModel(V=16, M=8, parameters=(m.Cs, None, None, None)) 149 | m2.fit(train, n_init=1, random_state=random_state) 150 | 151 | searcher = LOPQSearcher(m2) 152 | searcher.add_data(train) 153 | recall, _ = get_recall(searcher, test, nns) 154 | assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97])) 155 | 156 | # Test partial fitting with coarse quantizers and rotations 157 | m3 = LOPQModel(V=16, M=8, parameters=(m.Cs, m.Rs, m.mus, None)) 158 | m3.fit(train, n_init=1, random_state=random_state) 159 | 160 | searcher = LOPQSearcher(m3) 161 | searcher.add_data(train) 162 | recall, _ = get_recall(searcher, test, nns) 163 | assert_true(np.all(recall > [0.51, 0.92, 0.97, 0.97])) 164 | 165 | 166 | def test_proto(): 167 | import os 168 | 169 | filename = './temp_proto.lopq' 170 | m = make_random_model() 171 | m.export_proto(filename) 172 | m2 = LOPQModel.load_proto(filename) 173 | 174 | assert_equal(m.V, m2.V) 175 | assert_equal(m.M, m2.M) 176 | assert_equal(m.subquantizer_clusters, m2.subquantizer_clusters) 177 | 178 | assert_true(np.allclose(m.Cs[0], m2.Cs[0])) 179 | assert_true(np.allclose(m.Rs[0], m2.Rs[0])) 180 | assert_true(np.allclose(m.mus[0], m2.mus[0])) 181 | assert_true(np.allclose(m.subquantizers[0][0], m.subquantizers[0][0])) 182 | 183 | os.remove(filename) 184 | 185 | 186 | def test_mat(): 187 | import os 188 | 189 | filename = './temp_mat.mat' 190 | m = make_random_model() 191 | m.export_mat(filename) 192 | m2 = LOPQModel.load_mat(filename) 193 | 194 | assert_equal(m.V, m2.V) 195 | assert_equal(m.M, m2.M) 196 | assert_equal(m.subquantizer_clusters, m2.subquantizer_clusters) 197 | 198 | assert_true(np.allclose(m.Cs[0], m2.Cs[0])) 199 | assert_true(np.allclose(m.Rs[0], m2.Rs[0])) 200 | assert_true(np.allclose(m.mus[0], m2.mus[0])) 201 | assert_true(np.allclose(m.subquantizers[0][0], m.subquantizers[0][0])) 202 | 203 | os.remove(filename) 204 | 205 | 206 | def searcher_instance_battery(searcher, q): 207 | """ 208 | Helper to perform battery of searcher tests. 209 | """ 210 | retrieved, visited = searcher.get_result_quota(q) 211 | assert_equal(len(retrieved), 12) 212 | assert_equal(visited, 3) 213 | 214 | retrieved, visited = searcher.search(q) 215 | assert_equal(len(retrieved), 10) 216 | assert_equal(visited, 3) 217 | 218 | retrieved, visited = searcher.get_result_quota(q, quota=20) 219 | assert_equal(len(retrieved), 28) 220 | assert_equal(visited, 5) 221 | 222 | retrieved, visited = searcher.search(q, quota=20) 223 | assert_equal(len(retrieved), 20) 224 | assert_equal(visited, 5) 225 | 226 | retrieved, visited = searcher.search(q, quota=20, limit=10) 227 | assert_equal(len(retrieved), 10) 228 | assert_equal(visited, 5) 229 | 230 | 231 | def test_searcher(): 232 | data = pkl.load(open(relpath('./testdata/test_searcher_data.pkl'))) 233 | m = LOPQModel.load_proto(relpath('./testdata/random_test_model.lopq')) 234 | 235 | q = np.ones(8) 236 | 237 | # Test add_data 238 | searcher = LOPQSearcher(m) 239 | searcher.add_data(data) 240 | searcher_instance_battery(searcher, q) 241 | 242 | # Test add_codes 243 | searcher = LOPQSearcher(m) 244 | codes = [m.predict(x) for x in data] 245 | searcher.add_codes(codes) 246 | searcher_instance_battery(searcher, q) 247 | 248 | 249 | def test_searcher_lmdb(): 250 | import shutil 251 | 252 | data = pkl.load(open(relpath('./testdata/test_searcher_data.pkl'))) 253 | m = LOPQModel.load_proto(relpath('./testdata/random_test_model.lopq')) 254 | 255 | lmbd_test_path = './test_lopq_lmbd' 256 | q = np.ones(8) 257 | 258 | # Test add_data 259 | searcher = LOPQSearcherLMDB(m, lmbd_test_path) 260 | searcher.add_data(data) 261 | searcher_instance_battery(searcher, q) 262 | 263 | # Clean up 264 | shutil.rmtree(lmbd_test_path) 265 | 266 | # Test add_codes 267 | searcher = LOPQSearcherLMDB(m, lmbd_test_path) 268 | codes = [m.predict(x) for x in data] 269 | searcher.add_codes(codes) 270 | searcher_instance_battery(searcher, q) 271 | 272 | # Clean up 273 | shutil.rmtree(lmbd_test_path) 274 | 275 | 276 | def test_proto_partial(): 277 | import os 278 | 279 | filename = './temp_proto_partial.lopq' 280 | c = (np.random.rand(8, 8), np.random.rand(8,8)) 281 | m = LOPQModel(parameters=(c, None, None, None)) 282 | m.export_proto(filename) 283 | m2 = LOPQModel.load_proto(filename) 284 | 285 | assert_equal(m.V, m2.V) 286 | assert_equal(m.M, m2.M) 287 | assert_equal(m.subquantizer_clusters, m2.subquantizer_clusters) 288 | 289 | assert_true(np.allclose(m.Cs[0], m2.Cs[0])) 290 | assert_true(m.Rs == m2.Rs) 291 | assert_true(m.mus == m2.mus) 292 | assert_true(m.subquantizers == m.subquantizers) 293 | 294 | os.remove(filename) 295 | -------------------------------------------------------------------------------- /python/tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py27 8 | 9 | [testenv] 10 | deps= 11 | nose 12 | nose-cov 13 | coveralls 14 | 15 | commands= 16 | nosetests --exe --with-xunit --xunit-file=nosetests.xml --with-coverage --cover-xml --cover-erase --cover-package=lopq --cover-xml-file=cobertura.xml test/tests.py 17 | 18 | [flake8] 19 | filename= *.py 20 | show-source = False 21 | 22 | # H104 File contains nothing but comments 23 | # H405 multi line docstring summary not separated with an empty line 24 | # H803 Commit message should not end with a period (do not remove per list discussion) 25 | # H904 Wrap long lines in parentheses instead of a backslash 26 | ignore = H104,H405,H803,H904 27 | 28 | builtins = _ 29 | exclude=.venv,.git,.tox,build,dist,docs,*lib/python*,*egg,tools,vendor,.update-venv,*.ini,*.po,*.pot,lopq_model_pb2.py 30 | max-line-length = 160 31 | 32 | [testenv:pep8] 33 | deps= 34 | flake8 35 | commands = 36 | flake8 {posargs} 37 | 38 | -------------------------------------------------------------------------------- /scripts/example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import sys 4 | import os 5 | 6 | # Add the lopq module - not needed if they are available in the python environment 7 | sys.path.append(os.path.abspath('../python')) 8 | 9 | import numpy as np 10 | from sklearn.cross_validation import train_test_split 11 | 12 | from lopq import LOPQModel, LOPQSearcher 13 | from lopq.eval import compute_all_neighbors, get_recall 14 | from lopq.model import eigenvalue_allocation 15 | 16 | 17 | def load_oxford_data(): 18 | from lopq.utils import load_xvecs 19 | 20 | data = load_xvecs('../data/oxford/oxford_features.fvecs') 21 | return data 22 | 23 | 24 | def pca(data): 25 | """ 26 | A simple PCA implementation that demonstrates how eigenvalue allocation 27 | is used to permute dimensions in order to balance the variance across 28 | subvectors. There are plenty of PCA implementations elsewhere. What is 29 | important is that the eigenvalues can be used to compute a variance-balancing 30 | dimension permutation. 31 | """ 32 | 33 | # Compute mean 34 | count, D = data.shape 35 | mu = data.sum(axis=0) / float(count) 36 | 37 | # Compute covariance 38 | summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), data, np.zeros((D, D))) 39 | A = summed_covar / (count - 1) - np.outer(mu, mu) 40 | 41 | # Compute eigen decomposition 42 | eigenvalues, P = np.linalg.eigh(A) 43 | 44 | # Compute a permutation of dimensions to balance variance among 2 subvectors 45 | permuted_inds = eigenvalue_allocation(2, eigenvalues) 46 | 47 | # Build the permutation into the rotation matrix. One can alternately keep 48 | # these steps separate, rotating and then permuting, if desired. 49 | P = P[:, permuted_inds] 50 | 51 | return P, mu 52 | 53 | 54 | def main(): 55 | """ 56 | A brief demo script showing how to train various LOPQ models with brief 57 | discussion of trade offs. 58 | """ 59 | 60 | # Get the oxford dataset 61 | data = load_oxford_data() 62 | 63 | # Compute PCA of oxford dataset. See README in data/oxford for details 64 | # about this dataset. 65 | P, mu = pca(data) 66 | 67 | # Mean center and rotate the data; includes dimension permutation. 68 | # It is worthwhile see how this affects recall performance. On this 69 | # dataset, which is already PCA'd from higher dimensional features, 70 | # this additional step to variance balance the dimensions typically 71 | # improves recall@1 by 3-5%. The benefit can be much greater depending 72 | # on the dataset. 73 | data = data - mu 74 | data = np.dot(data, P) 75 | 76 | # Create a train and test split. The test split will become 77 | # a set of queries for which we will compute the true nearest neighbors. 78 | train, test = train_test_split(data, test_size=0.2) 79 | 80 | # Compute distance-sorted neighbors in training set for each point in test set. 81 | # These will be our groundtruth for recall evaluation. 82 | nns = compute_all_neighbors(test, train) 83 | 84 | # Fit model 85 | m = LOPQModel(V=16, M=8) 86 | m.fit(train, n_init=1) 87 | 88 | # Note that we didn't specify a random seed for fitting the model, so different 89 | # runs will be different. You may also see a warning that some local projections 90 | # can't be estimated because too few points fall in a cluster. This is ok for the 91 | # purposes of this demo, but you might want to avoid this by increasing the amount 92 | # of training data or decreasing the number of clusters (the V hyperparameter). 93 | 94 | # With a model in hand, we can test it's recall. We populate a LOPQSearcher 95 | # instance with data and get recall stats. By default, we will retrieve 1000 96 | # ranked results for each query vector for recall evaluation. 97 | searcher = LOPQSearcher(m) 98 | searcher.add_data(train) 99 | recall, _ = get_recall(searcher, test, nns) 100 | print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m.V, m.M, m.subquantizer_clusters, str(recall)) 101 | 102 | # We can experiment with other hyperparameters without discarding all 103 | # parameters everytime. Here we train a new model that uses the same coarse 104 | # quantizers but a higher number of subquantizers, i.e. we increase M. 105 | m2 = LOPQModel(V=16, M=16, parameters=(m.Cs, None, None, None)) 106 | m2.fit(train, n_init=1) 107 | 108 | # Let's evaluate again. 109 | searcher = LOPQSearcher(m2) 110 | searcher.add_data(train) 111 | recall, _ = get_recall(searcher, test, nns) 112 | print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m2.V, m2.M, m2.subquantizer_clusters, str(recall)) 113 | 114 | # The recall is probably higher. We got better recall with a finer quantization 115 | # at the expense of more data required for index items. 116 | 117 | # We can also hold both coarse quantizers and rotations fixed and see what 118 | # increasing the number of subquantizer clusters does to performance. 119 | m3 = LOPQModel(V=16, M=8, subquantizer_clusters=512, parameters=(m.Cs, m.Rs, m.mus, None)) 120 | m3.fit(train, n_init=1) 121 | 122 | searcher = LOPQSearcher(m3) 123 | searcher.add_data(train) 124 | recall, _ = get_recall(searcher, test, nns) 125 | print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m3.V, m3.M, m3.subquantizer_clusters, str(recall)) 126 | 127 | # The recall is probably better than the first but worse than the second. We increased recall 128 | # only a little by increasing the number of model parameters (double the subquatizer centroids), 129 | # the index storage requirement (another bit for each fine code), and distance computation time 130 | # (double the subquantizer centroids). 131 | 132 | 133 | if __name__ == '__main__': 134 | main() 135 | -------------------------------------------------------------------------------- /scripts/query_runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | from math import sqrt, ceil 4 | 5 | 6 | def multiseq_flops(V, D): 7 | """ 8 | Given the number of coarse clusters and the dimension of the data, 9 | compute the number of flops to required to rank each coarse vocabulary 10 | to the query. 11 | """ 12 | # (total coarse vocabulary) * (dims per coarse split) * (flops per squared distance) 13 | return (2 * V) * (D / 2) * 2 14 | 15 | 16 | def cluster_rotation_flops(D): 17 | """ 18 | Given the dimension of the data, compute the number of flops for a 19 | single local projection. 20 | """ 21 | D2 = D / 2 22 | return D2 ** 2 + D2 23 | 24 | 25 | def subquantizer_flops(D, M, clusters=256): 26 | """ 27 | Given the dimension of the data, the number of subquantizers and the 28 | subquantizer vocabulary size, compute the number of flops to compute 29 | a projected query's LOPQ distance for a single half of the query. 30 | """ 31 | # (subquants per half) * (dims per subquant) * (cluster per subquant) * (flops per squared distance) 32 | return (M / 2) * (D / M) * clusters * 2 33 | 34 | 35 | def total_rank_flops(D, M, N, cells, badness=0.5): 36 | """ 37 | Given the dimension of the data, the number of subquantizers, the number 38 | of results to rank, the number of multi-index cells retrieved by the query, 39 | and a badness measure that interpolates between best case and worst case 40 | in terms of reusable (cacheable) subquantizer distance computations, compute 41 | the number of flops to rank the N results. 42 | 43 | The 'badness' will vary query to query and is determined by the data distribution. 44 | """ 45 | # Corresponds to traversing a row or column of the multi-index grid 46 | worst_case = cells + 1 47 | 48 | # Corresponds to traversing a square in the multi-index grid 49 | best_case = 2 * sqrt(cells) 50 | 51 | # Interpolated number of clusters 52 | num_clusters = ceil(worst_case * badness + best_case * (1 - badness)) 53 | 54 | # (total local projections required) + (total number of sums to compute distance) 55 | return num_clusters * (cluster_rotation_flops(D) + subquantizer_flops(D, M)) + N * (M - 1) 56 | 57 | 58 | def brute_force_flops(D, N): 59 | """ 60 | Given the data dimension and the number of results to rank, compute 61 | the number of flops for brute force exact distance computation. 62 | """ 63 | return N * (3 * D) 64 | 65 | 66 | def ratio(D, M, N, cells, badness=0.5): 67 | return total_rank_flops(D, M, N, cells, badness) / brute_force_flops(D, N) 68 | -------------------------------------------------------------------------------- /spark/README.md: -------------------------------------------------------------------------------- 1 | # Spark 2 | 3 | This is an implementation of LOPQ training for [Apache Spark](https://spark.apache.org/). Spark's in-memory execution model is well-suited to LOPQ training since there are multiple steps of clustering that involve repeated access to the same data. The scripts provided here run with pyspark and use core functionality implemented in the `lopq` python module. 4 | 5 | #### A note about Spark environments 6 | 7 | The following usage examples assume that you have a well configured Spark environment suited to the available hardware. Additionally, we assume that the python environment available on both the Spark driver and executors contains all the necessary dependencies, namely the modules listed in `python/requirements.txt` as well as the `lopq` module itself. The [Anaconda](https://www.continuum.io/why-anaconda) environment is a good starting point. At the time of writing, it contains all required dependencies by default except the `protobuf` module, which can be easily installed. To distribute the `lopq` module itself, you could either install it into the environment running on your Spark cluster, or submit it with the Spark job. For example, you can zip the module from the `python/` directory (`zip -r lopq.zip lopq/`) and then submit this zip file with the `--py-files` argument. More information about submitting jobs to Spark is available [here](https://spark.apache.org/docs/latest/submitting-applications.html). 8 | 9 | ## PCA Training 10 | 11 | A recommended preprocessing step for training is to PCA and variance balance the raw data vectors to produce the LOPQ data vectors, i.e. the vectors that LOPQ will quantize. The PCA step is important because it axis-aligns the data and optionally reduces the dimensionality, resulting in better quantization. The variance balancing step permutes the dimensions of the PCA'd vectors so that the first half and second half of the data vectors have roughly the same total variance, which makes the LOPQ coarse codes much better at quantizing the data since each half will be equally "important". The benefit of PCA, dimensionality reduction, and variance balancing in terms of retrieval performance of the downstream LOPQ model will vary based on the data, but it has been seen to provide considerable improvements in many contexts. 12 | 13 | The `train_pca.py` script is provided to compute PCA parameters on Spark. It will output a pickled dict of PCA parameters - refer to `train_pca.py` for the contents of this dict. See discussion of data handling in the LOPQ Training section below to learn about loading custom data formats. 14 | 15 | After the PCA parameters are computed, the PCA matrix must be truncated to the desired final dimension and the two halves must be variance balanced by permuting the PCA matrix. The `pca_preparation.py` script is provided to do these two preparation steps. Afterwards the training data can be transformed before LOPQ training, perhaps via a data UDF (discussed below). 16 | 17 | #### Available parameters 18 | 19 | | Command line arg | Default | Description | 20 | | ----------------------------- | ------- | ------------------------------------------------------------------------------ | 21 | | --data | None | hdfs path to input data | 22 | | --data_udf | None | optional module name contained a `udf` function to load training data | 23 | | --seed | None | optional random seed | 24 | | --sampling_ratio | 1.0 | proportion of data to sample for training | 25 | | --agg_depth | 4 | depth of tree aggregation for computing covariance - increase if you have driver memory issues | 26 | | --output | None | hdfs output path | 27 | 28 | 29 | ## LOPQ Training 30 | 31 | The `train_model.py` script can be configured to run full or partial training of LOPQ models on Spark. The script can resume training from an existing model, using some parameters from the existing model. An existing model can be provided to the script as a pickle file. The `--steps` parameters indicates which steps of training to perform; `0` indicates coarse clustering, `1` indicates rotations fittiing, and `2` indicates subquantizer clustering. The default is for all training steps to be performed. 32 | 33 | #### Available parameters 34 | 35 | | Command line arg | Default | Description | 36 | | ----------------------------- | ------- | ------------------------------------------------------------------------------ | 37 | | --data | None | hdfs path to input data | 38 | | --data_udf | None | optional module name contained a `udf` function to load training data | 39 | | --seed | None | optional random seed | 40 | | --sampling_ratio | 1.0 | proportion of data to sample for training | 41 | | --subquantizer_sampling_ratio | 1.0 | proportion of data to subsample for subquantizer training | 42 | | --existing_model_pkl | None | a pickled LOPQModel from which to extract existing parameters | 43 | | --existing_model_proto | None | a protobuf of existing parameters | 44 | | --V | None | number of coarse clusters | 45 | | --M | None | total number of subquantizers | 46 | | --subquantizer_clusters | 256 | number of subquantizer clusters | 47 | | --steps | 0,1,2 | comma-separated list of integers indicating which steps of training to perform | 48 | | --model_pkl | None | hdfs path to save pickle file of resulting LOPQModel | 49 | | --model_proto | None | hdfs path to save protobuf file of resulting model parameters | 50 | 51 | #### Usage 52 | 53 | Here is an example of training a full model from scratch and saving the model parameters as both a pickle file and a protobuf file: 54 | 55 | ```bash 56 | spark-submit train_model.py \ 57 | --data /hdfs/path/to/data \ 58 | --V 16 \ 59 | --M 8 \ 60 | --model_pkl /hdfs/output/path/model.pkl \ 61 | --model_proto /hdfs/output/path/model.lopq 62 | ``` 63 | 64 | By providing an existing model, the script can use existing parameters and only the training pipeline for the remaining parameters. This is useful when you want to explore different hyperparameters without retraining everything from scratch. Here is an example of using the coarse quantizers in an existing model and training only rotations and subquantizers. Note that the existing model must be provided to Spark via the `--files` argument. The model can also be provided in protobuf format with `--existing_model_proto`. 65 | 66 | ```bash 67 | spark-submit \ 68 | --files /path/to/name_of_existing_model.pkl \ 69 | train_model.py \ 70 | --data /hdfs/path/to/data \ 71 | --model_pkl /hdfs/output/path/model.pkl \ 72 | --existing_model_pkl name_of_existing_model.pkl \ 73 | --M 8 \ 74 | --steps 1,2 75 | ``` 76 | 77 | #### Data handling 78 | 79 | By default, the training script assumes that your training data is in a text file of tab-delimited `(id, data)` pairs, where the data vector is a base64-encoded pickled numpy array. If this is not the format that your data is in, you can provide the training script a UDF to load the data from your format. This UDF has the following signature: 80 | 81 | ```python 82 | def udf(sc, data_path, sampling_ratio, seed): 83 | pass 84 | ``` 85 | 86 | where `sc` is the SparkContext instance, `data_path` is the path provided to the `--data` argument, and `sampling_ratio` and `seed` are the values provided to the arguments of the same name. This UDF must return an RDD of numpy arrays representing the training data and must be named `udf`. An example is provided in `example_udf.py`. The UDF is provided to the script by submitting its module via `--py-files` and passing the module name to the script via `--data-udf`, e.g.: 87 | 88 | ```bash 89 | spark-submit \ 90 | --py-files example_udf.py \ 91 | train_model.py \ 92 | --data /hdfs/path/to/data \ 93 | --data_udf example_udf \ 94 | --V 16 \ 95 | --M 8 \ 96 | --model_proto /hdfs/output/path/model.lopq 97 | ``` 98 | 99 | ## Code Computation 100 | 101 | The `compute_codes.py` script takes a fully trained model, an input file of features on hdfs, and an output path on hdfs, and computes LOH codes for all points. The model must be distributed with the job using the `--files` option. The script consumes `(id , data)` pairs and produces a text file of tab-delimited `(id, json-formatted LOPQ code)` pairs, e.g.: 102 | 103 | ``` 104 | 33 [[15, 13], [0, 165, 1, 72, 178, 147, 170, 69]] 105 | 34 [[5, 9], [104, 227, 160, 185, 248, 152, 170, 126]] 106 | 35 [[14, 10], [221, 144, 4, 186, 172, 40, 32, 228]] 107 | 36 [[3, 5], [76, 216, 141, 161, 247, 2, 34, 219]] 108 | 37 [[0, 5], [205, 140, 214, 194, 39, 229, 131, 0]] 109 | 38 [[12, 3], [149, 48, 249, 224, 98, 255, 210, 131]] 110 | ``` 111 | 112 | #### Available parameters 113 | 114 | | Command line arg | Default | Description | 115 | | ----------------------------- | ------- | ------------------------------------------------------------------------------ | 116 | | --data | None | hdfs path to input data | 117 | | --data_udf | None | optional module name contained a `udf` function to load training data | 118 | | --seed | None | optional random seed | 119 | | --sampling_ratio | 1.0 | proportion of data to sample | 120 | | --output | None | hdfs output path | 121 | | --model_pkl | None | file name of the model pickle | 122 | | --model_proto | None | file name of the model protobuf file | 123 | 124 | #### Usage 125 | 126 | ```bash 127 | spark-submit \ 128 | --files /path/to/name_of_existing_model.pkl \ 129 | compute_codes.py \ 130 | --data /hdfs/path/to/data \ 131 | --output /hdfs/output/path \ 132 | --model_pkl name_of_existing_model.pkl 133 | ``` 134 | 135 | #### Data handling 136 | 137 | This script also provides a way for the user to load data from other formats via a UDF. It differs from the training script only in that the output of the UDF must be an RDD of `(id, data)` pairs. 138 | -------------------------------------------------------------------------------- /spark/compute_codes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | from pyspark.context import SparkContext 4 | 5 | import cPickle as pkl 6 | import base64 7 | import json 8 | 9 | from lopq.model import LOPQModel 10 | 11 | 12 | def default_data_loading(sc, data_path, sampling_ratio, seed): 13 | """ 14 | This function loads data from a text file, sampling it by the provided 15 | ratio and random seed, and interprets each line as a tab-separated (id, data) pair 16 | where 'data' is assumed to be a base64-encoded pickled numpy array. 17 | The data is returned as an RDD of (id, numpy array) tuples. 18 | """ 19 | # Compute the number of cores in our cluster - used below to heuristically set the number of partitions 20 | total_cores = int(sc._conf.get('spark.executor.instances')) * int(sc._conf.get('spark.executor.cores')) 21 | 22 | # Load and sample down the dataset 23 | d = sc.textFile(data_path, total_cores * 3).sample(False, sampling_ratio, seed) 24 | 25 | # The data is (id, vector) tab-delimited pairs where each vector is 26 | # a base64-encoded pickled numpy array 27 | d = d.map(lambda x: x.split('\t')).map(lambda x: (x[0], pkl.loads(base64.decodestring(x[1])))) 28 | 29 | return d 30 | 31 | 32 | def main(sc, args, data_load_fn=default_data_loading): 33 | 34 | # Load model 35 | model = None 36 | if args.model_pkl: 37 | model = pkl.load(open(args.model_pkl)) 38 | elif args.model_proto: 39 | model = LOPQModel.load_proto(args.model_proto) 40 | 41 | # Load data 42 | d = data_load_fn(sc, args.data, args.sampling_ratio, args.seed) 43 | 44 | # Distribute model instance 45 | m = sc.broadcast(model) 46 | 47 | # Compute codes and convert to string 48 | codes = d.map(lambda x: (x[0], m.value.predict(x[1]))).map(lambda x: '%s\t%s' % (x[0], json.dumps(x[1]))) 49 | 50 | codes.saveAsTextFile(args.output) 51 | 52 | if __name__ == "__main__": 53 | from argparse import ArgumentParser 54 | parser = ArgumentParser() 55 | 56 | # Data handling parameters 57 | parser.add_argument('--data', dest='data', type=str, default=None, required=True, help='hdfs path to input data') 58 | parser.add_argument('--data_udf', dest='data_udf', type=str, default=None, help='module name from which to load a data loading UDF') 59 | parser.add_argument('--seed', dest='seed', type=int, default=None, help='optional random seed for sampling') 60 | parser.add_argument('--sampling_ratio', dest='sampling_ratio', type=float, default=1.0, help='proportion of data to sample for model application') 61 | parser.add_argument('--output', dest='output', type=str, default=None, required=True, help='hdfs path to output data') 62 | 63 | existing_model_group = parser.add_mutually_exclusive_group(required=True) 64 | existing_model_group.add_argument('--model_pkl', dest='model_pkl', type=str, default=None, help='a pickled LOPQModel to evaluate on the data') 65 | existing_model_group.add_argument('--model_proto', dest='model_proto', type=str, default=None, help='a protobuf LOPQModel to evaluate on the data') 66 | 67 | args = parser.parse_args() 68 | 69 | sc = SparkContext(appName='LOPQ code computation') 70 | 71 | # Load UDF module if provided 72 | if args.data_udf: 73 | udf_module = __import__(args.data_udf, fromlist=['udf']) 74 | load_udf = udf_module.udf 75 | main(sc, args, data_load_fn=load_udf) 76 | else: 77 | main(sc, args) 78 | 79 | sc.stop() 80 | -------------------------------------------------------------------------------- /spark/example_udf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | import cPickle as pkl 4 | import base64 5 | 6 | 7 | def udf(sc, data_path, sampling_ratio, seed): 8 | """ 9 | This is an example UDF function to load training data. It loads data from a text file 10 | with base64-encoded pickled numpy arrays on each line. 11 | """ 12 | 13 | # Compute the number of cores in our cluster - used below to heuristically set the number of partitions 14 | total_cores = int(sc._conf.get('spark.executor.instances')) * int(sc._conf.get('spark.executor.cores')) 15 | 16 | # Load and sample down the dataset 17 | d = sc.textFile(data_path, total_cores * 3).sample(False, sampling_ratio, seed) 18 | 19 | deserialize_vec = lambda s: pkl.loads(base64.decodestring(s)) 20 | vecs = d.map(deserialize_vec) 21 | 22 | return vecs 23 | -------------------------------------------------------------------------------- /spark/pca_preparation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | """ 4 | This script illustrates how to prepare PCA parameters before using them in the LOPQ pipeline. 5 | 6 | The `pca_params` argument is the path to a pickle file containing PCA parameters like that 7 | produced as a result of `train_pca.py`, and the `D` argument is the desired dimension of the 8 | final feature. This script truncates then permutes the dimensions of the PCA matrix to balance 9 | the variance across the the two halves of the final vector. 10 | """ 11 | import cPickle as pkl 12 | import base64 13 | import numpy as np 14 | from lopq.model import eigenvalue_allocation 15 | 16 | 17 | def main(args): 18 | 19 | params = pkl.load(open(args.pca_params)) 20 | P = params['P'] 21 | E = params['E'] 22 | mu = params['mu'] 23 | 24 | # Reduce dimension - eigenvalues assumed in ascending order 25 | E = E[-args.D:] 26 | P = P[:,-args.D:] 27 | 28 | # Balance variance across halves 29 | permuted_inds = eigenvalue_allocation(2, E) 30 | P = P[:, permuted_inds] 31 | 32 | # Save new params 33 | pkl.dump({ 'P': P, 'mu': mu }, open(args.output, 'w')) 34 | 35 | 36 | def apply_PCA(x, mu, P): 37 | """ 38 | Example of applying PCA. 39 | """ 40 | return np.dot(x - mu, P) 41 | 42 | 43 | if __name__ == '__main__': 44 | from argparse import ArgumentParser 45 | parser = ArgumentParser() 46 | 47 | parser.add_argument('--pca_params', dest='pca_params', type=str, required=True, help='path to pickle file of PCA parameters') 48 | parser.add_argument('--D', dest='D', type=int, default=128, help='desired final feature dimension') 49 | parser.add_argument('--output', dest='output', type=str, required=True, help='path to pickle file of new PCA parameters') 50 | args = parser.parse_args() 51 | 52 | main(args) 53 | -------------------------------------------------------------------------------- /spark/train_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | from pyspark.context import SparkContext 4 | 5 | import numpy as np 6 | import cPickle as pkl 7 | import base64 8 | import os 9 | import subprocess 10 | import sys 11 | from tempfile import NamedTemporaryFile 12 | from operator import add 13 | 14 | from pyspark.mllib.clustering import KMeans, KMeansModel 15 | from lopq.model import LOPQModel, compute_rotations_from_accumulators 16 | 17 | 18 | STEP_COARSE = 0 19 | STEP_ROTATION = 1 20 | STEP_SUBQUANT = 2 21 | 22 | 23 | def default_data_loading(sc, data_path, sampling_ratio, seed): 24 | """ 25 | This function loads training data from a text file, sampling it by the provided 26 | ratio and random seed, and interprets each line as a tab-separated (id, data) pair 27 | where 'data' is assumed to be a base64-encoded pickled numpy array. The ids are discarded. 28 | The data is returned as an RDD of numpy arrays. 29 | """ 30 | # Compute the number of cores in our cluster - used below to heuristically set the number of partitions 31 | total_cores = int(sc._conf.get('spark.executor.instances')) * int(sc._conf.get('spark.executor.cores')) 32 | 33 | # Load and sample down the dataset 34 | d = sc.textFile(data_path, total_cores * 3).sample(False, sampling_ratio, seed) 35 | 36 | # The data is (id, vector) tab-delimited pairs where each vector is 37 | # a base64-encoded pickled numpy array 38 | deserialize_vec = lambda s: pkl.loads(base64.decodestring(s.split('\t')[1])) 39 | vecs = d.map(deserialize_vec) 40 | 41 | return vecs 42 | 43 | 44 | def load_data(sc, args, data_load_fn=default_data_loading): 45 | """ 46 | Load training data as an RDD. 47 | """ 48 | # Load data 49 | vecs = data_load_fn(sc, args.data, args.sampling_ratio, args.seed) 50 | 51 | # Split the vectors 52 | split_vecs = vecs.map(lambda x: np.split(x, 2)) 53 | 54 | return split_vecs 55 | 56 | 57 | def train_coarse(sc, split_vecs, V, seed=None): 58 | """ 59 | Perform KMeans on each split of the data with V clusters each. 60 | """ 61 | 62 | # Cluster first split 63 | first = split_vecs.map(lambda x: x[0]) 64 | first.cache() 65 | print 'Total training set size: %d' % first.count() 66 | print 'Starting training coarse quantizer...' 67 | C0 = KMeans.train(first, V, initializationMode='random', maxIterations=10, seed=seed) 68 | print '... done training coarse quantizer.' 69 | first.unpersist() 70 | 71 | # Cluster second split 72 | second = split_vecs.map(lambda x: x[1]) 73 | second.cache() 74 | print 'Starting training coarse quantizer...' 75 | C1 = KMeans.train(second, V, initializationMode='random', maxIterations=10, seed=seed) 76 | print '... done training coarse quantizer.' 77 | second.unpersist() 78 | 79 | return np.vstack(C0.clusterCenters), np.vstack(C1.clusterCenters) 80 | 81 | 82 | def train_rotations(sc, split_vecs, M, Cs): 83 | """ 84 | For compute rotations for each split of the data using given coarse quantizers. 85 | """ 86 | 87 | Rs = [] 88 | mus = [] 89 | counts = [] 90 | for split in xrange(2): 91 | 92 | print 'Starting rotation fitting for split %d' % split 93 | 94 | # Get the data for this split 95 | data = split_vecs.map(lambda x: x[split]) 96 | 97 | # Get kmeans model 98 | model = KMeansModel(Cs[split]) 99 | 100 | R, mu, count = compute_local_rotations(sc, data, model, M / 2) 101 | Rs.append(R) 102 | mus.append(mu) 103 | counts.append(count) 104 | 105 | return Rs, mus, counts 106 | 107 | 108 | def accumulate_covariance_estimators(sc, data, model): 109 | """ 110 | Analogous function to function of the same name in lopq.model. 111 | 112 | :param SparkContext sc: 113 | a SparkContext 114 | :param RDD data: 115 | an RDD of numpy arrays 116 | :param KMeansModel model: 117 | a KMeansModel instance for which to fit local rotations 118 | """ 119 | 120 | def get_residual(x): 121 | cluster = model.predict(x) 122 | centroid = model.clusterCenters[cluster] 123 | residual = x - centroid 124 | return (cluster, residual) 125 | 126 | def seq_op(acc, x): 127 | acc += np.outer(x, x) 128 | return acc 129 | 130 | # Compute (assignment, residual) k/v pairs 131 | residuals = data.map(get_residual) 132 | residuals.cache() 133 | 134 | # Collect counts and mean residuals 135 | count = residuals.countByKey() 136 | mu = residuals.reduceByKey(add).collectAsMap() 137 | 138 | # Extract the dimension of the data 139 | D = len(mu.values()[0]) 140 | 141 | # Collect accumulated outer products 142 | A = residuals.aggregateByKey(np.zeros((D, D)), seq_op, add).collectAsMap() 143 | 144 | residuals.unpersist() 145 | 146 | return A, mu, count 147 | 148 | 149 | def dict_to_ndarray(d, N): 150 | """ 151 | Helper for collating a dict with int keys into an ndarray. The value for a key 152 | becomes the value at the corresponding index in the ndarray and indices missing 153 | from the dict become zero ndarrays of the same dimension. 154 | 155 | :param dict d: 156 | a dict of (int, ndarray) or (int, number) key/values 157 | :param int N: 158 | the size of the first dimension of the new ndarray (the rest of the dimensions 159 | are determined by the shape of elements in d) 160 | """ 161 | 162 | el = d.values()[0] 163 | if type(el) == np.ndarray: 164 | value_shape = el.shape 165 | arr = np.zeros((N,) + value_shape) 166 | else: 167 | arr = np.zeros(N) 168 | 169 | for i in d: 170 | arr[i] = d[i] 171 | return arr 172 | 173 | 174 | def compute_local_rotations(sc, data, model, num_buckets): 175 | """ 176 | Analogous to the function of the same name in lopq.model. 177 | 178 | :param SparkContext sc: 179 | a SparkContext 180 | :param RDD data: 181 | an RDD of numpy arrays 182 | :param KMeansModel model: 183 | a KMeansModel instance for which to fit local rotations 184 | :param int num_buckets: 185 | the number of subvectors over which to balance residual variance 186 | """ 187 | # Get estimators 188 | A, mu, count = accumulate_covariance_estimators(sc, data, model) 189 | 190 | # Format as ndarrays 191 | V = len(model.centers) 192 | A = dict_to_ndarray(A, V) 193 | mu = dict_to_ndarray(mu, V) 194 | count = dict_to_ndarray(count, V) 195 | 196 | # Compute params 197 | R, mu = compute_rotations_from_accumulators(A, mu, count, num_buckets) 198 | 199 | return R, mu, count 200 | 201 | 202 | def train_subquantizers(sc, split_vecs, M, subquantizer_clusters, model, seed=None): 203 | """ 204 | Project each data point into it's local space and compute subquantizers by clustering 205 | each fine split of the locally projected data. 206 | """ 207 | b = sc.broadcast(model) 208 | 209 | def project_local(x): 210 | x = np.concatenate(x) 211 | coarse = b.value.predict_coarse(x) 212 | return b.value.project(x, coarse) 213 | 214 | projected = split_vecs.map(project_local) 215 | 216 | # Split the vectors into the subvectors 217 | split_vecs = projected.map(lambda x: np.split(x, M)) 218 | split_vecs.cache() 219 | 220 | subquantizers = [] 221 | for split in xrange(M): 222 | data = split_vecs.map(lambda x: x[split]) 223 | data.cache() 224 | sub = KMeans.train(data, subquantizer_clusters, initializationMode='random', maxIterations=10, seed=seed) 225 | data.unpersist() 226 | subquantizers.append(np.vstack(sub.clusterCenters)) 227 | 228 | return (subquantizers[:len(subquantizers) / 2], subquantizers[len(subquantizers) / 2:]) 229 | 230 | 231 | def save_hdfs_pickle(m, pkl_path): 232 | """ 233 | Given a python object and a path on hdfs, save the object as a pickle file locally and copy the file 234 | to the hdfs path. 235 | """ 236 | print 'Saving pickle to temp file...' 237 | f = NamedTemporaryFile(delete=False) 238 | pkl.dump(m, f, -1) 239 | f.close() 240 | 241 | print 'Copying pickle file to hdfs...' 242 | copy_to_hdfs(f, pkl_path) 243 | os.remove(f.name) 244 | 245 | 246 | def save_hdfs_proto(m, proto_path): 247 | """ 248 | Given an LOPQModel object and a path on hdfs, save the model parameters as a protobuf file locally and 249 | copy the file to the hdfs path. 250 | """ 251 | print 'Saving protobuf to temp file...' 252 | f = NamedTemporaryFile(delete=False) 253 | m.export_proto(f) 254 | f.close() 255 | 256 | print 'Copying proto file to hdfs...' 257 | copy_to_hdfs(f, proto_path) 258 | os.remove(f.name) 259 | 260 | 261 | def copy_to_hdfs(f, hdfs_path): 262 | subprocess.call(['hadoop', 'fs', '-copyFromLocal', f.name, hdfs_path]) 263 | 264 | 265 | def validate_arguments(args, model): 266 | """ 267 | Check provided command line arguments to ensure they are coherent. Provide feedback for potential errors. 268 | """ 269 | 270 | # Parse steps 271 | args.steps = set(map(int, args.steps.split(','))) 272 | 273 | # Check that the steps make sense 274 | if STEP_ROTATION not in args.steps and len(args.steps) == 2: 275 | print 'Training steps invalid' 276 | sys.exit(1) 277 | 278 | # Find parameters and warn of possibly unintentional discrepancies 279 | if args.V is None: 280 | if model is not None: 281 | args.V = model.V 282 | print 'Parameter V not specified: using V=%d from provided model.' % model.V 283 | else: 284 | print 'Parameter V not specified and no existing model provided. Exiting.' 285 | sys.exit(1) 286 | else: 287 | if model is not None and model.V != args.V: 288 | if STEP_COARSE in args.steps: 289 | print 'Parameter V differs between command line argument and provided model: ' + \ 290 | 'coarse quantizers will be trained with V=%d' % args.V 291 | else: 292 | print 'Parameter V differs between command line argument and provided model: ' + \ 293 | 'coarse quantizers must be retrained or this discrepancy corrected. Exiting.' 294 | sys.exit(1) 295 | 296 | if STEP_ROTATION in args.steps or STEP_SUBQUANT in args.steps: 297 | if args.M is None: 298 | if model is not None: 299 | args.M = model.M 300 | print 'Parameter M not specified: using M=%d from provided model.' % model.M 301 | else: 302 | print 'Parameter M not specified and no existing model provided. Exiting.' 303 | sys.exit(1) 304 | else: 305 | if model is not None and model.M != args.M: 306 | if STEP_ROTATION in args.steps: 307 | print 'Parameter M differs between command line argument and provided model: ' + \ 308 | 'model will be trained with M=%d' % args.M 309 | else: 310 | print 'Parameter M differs between command line argument and provided model: ' + \ 311 | 'rotations must be retrained or this discrepancy corrected. Exiting.' 312 | sys.exit(1) 313 | 314 | if STEP_ROTATION in args.steps: 315 | if STEP_COARSE not in args.steps and (model is None or model.Cs is None): 316 | print 'Cannot train rotations without coarse quantizers. Either train coarse quantizers or provide an existing model. Exiting.' 317 | sys.exit(1) 318 | 319 | if STEP_SUBQUANT in args.steps: 320 | if STEP_COARSE not in args.steps and (model is None or model.Cs is None): 321 | print 'Cannot train subquantizers without coarse quantizers. Either train coarse quantizers or provide an existing model. Exiting.' 322 | sys.exit(1) 323 | if STEP_ROTATION not in args.steps and (model is None or model.Rs is None or model.mus is None): 324 | print 'Cannot train subquantizers without rotations. Either train rotations or provide an existing model. Exiting.' 325 | sys.exit(1) 326 | 327 | return args 328 | 329 | if __name__ == "__main__": 330 | from argparse import ArgumentParser 331 | parser = ArgumentParser() 332 | 333 | # Data handling parameters 334 | parser.add_argument('--data', dest='data', type=str, required=True, help='hdfs path to input data') 335 | parser.add_argument('--data_udf', dest='data_udf', type=str, default=None, help='module name from which to load a data loading UDF') 336 | parser.add_argument('--seed', dest='seed', type=int, default=None, help='optional random seed') 337 | parser.add_argument('--sampling_ratio', dest='sampling_ratio', type=float, default=1.0, help='proportion of data to sample for training') 338 | parser.add_argument('--subquantizer_sampling_ratio', dest='subquantizer_sampling_ratio', type=float, default=1.0, 339 | help='proportion of data to subsample for subquantizer training') 340 | 341 | # Model parameters 342 | existing_model_group = parser.add_mutually_exclusive_group() 343 | existing_model_group.add_argument('--existing_model_pkl', dest='existing_model_pkl', type=str, default=None, 344 | help='a pickled LOPQModel from which to extract existing parameters') 345 | existing_model_group.add_argument('--existing_model_proto', dest='existing_model_proto', type=str, default=None, 346 | help='a protobuf of existing model parameters') 347 | 348 | # Model hyperparameters 349 | parser.add_argument('--V', dest='V', type=int, default=None, help='number of coarse clusters') 350 | parser.add_argument('--M', dest='M', type=int, default=None, help='total number of subquantizers') 351 | parser.add_argument('--subquantizer_clusters', dest='subquantizer_clusters', type=int, default=256, help='number of subquantizer clusters') 352 | 353 | # Training and output directives 354 | parser.add_argument('--steps', dest='steps', type=str, default='0,1,2', 355 | help='comma-separated list of integers indicating which steps of training to perform') 356 | parser.add_argument('--model_pkl', dest='model_pkl', type=str, default=None, help='hdfs path to save pickle file of resulting LOPQModel') 357 | parser.add_argument('--model_proto', dest='model_proto', type=str, default=None, help='hdfs path to save protobuf file of resulting model parameters') 358 | 359 | args = parser.parse_args() 360 | 361 | # Check that some output format was provided 362 | if args.model_pkl is None and args.model_proto is None: 363 | parser.error('at least one of --model_pkl and --model_proto is required') 364 | 365 | # Load existing model if provided 366 | model = None 367 | if args.existing_model_pkl: 368 | model = pkl.load(open(args.existing_model_pkl)) 369 | elif args.existing_model_proto: 370 | model = LOPQModel.load_proto(args.existing_model_proto) 371 | 372 | args = validate_arguments(args, model) 373 | 374 | # Build descriptive app name 375 | get_step_name = lambda x: {STEP_COARSE: 'coarse', STEP_ROTATION: 'rotations', STEP_SUBQUANT: 'subquantizers'}.get(x, None) 376 | steps_str = ', '.join(filter(lambda x: x is not None, map(get_step_name, sorted(args.steps)))) 377 | APP_NAME = 'LOPQ{V=%d,M=%d}; training %s' % (args.V, args.M, steps_str) 378 | 379 | sc = SparkContext(appName=APP_NAME) 380 | 381 | # Load UDF module if provided and load training data RDD 382 | if args.data_udf: 383 | udf_module = __import__(args.data_udf, fromlist=['udf']) 384 | load_udf = udf_module.udf 385 | data = load_data(sc, args, data_load_fn=load_udf) 386 | else: 387 | data = load_data(sc, args) 388 | 389 | # Initialize parameters 390 | Cs = Rs = mus = subs = None 391 | 392 | # Get coarse quantizers 393 | if STEP_COARSE in args.steps: 394 | Cs = train_coarse(sc, data, args.V, seed=args.seed) 395 | else: 396 | Cs = model.Cs 397 | 398 | # Get rotations 399 | if STEP_ROTATION in args.steps: 400 | Rs, mus, counts = train_rotations(sc, data, args.M, Cs) 401 | else: 402 | Rs = model.Rs 403 | mus = model.mus 404 | 405 | # Get subquantizers 406 | if STEP_SUBQUANT in args.steps: 407 | model = LOPQModel(V=args.V, M=args.M, subquantizer_clusters=args.subquantizer_clusters, parameters=(Cs, Rs, mus, None)) 408 | 409 | if args.subquantizer_sampling_ratio != 1.0: 410 | data = data.sample(False, args.subquantizer_sampling_ratio, args.seed) 411 | 412 | subs = train_subquantizers(sc, data, args.M, args.subquantizer_clusters, model, seed=args.seed) 413 | 414 | # Final output model 415 | model = LOPQModel(V=args.V, M=args.M, subquantizer_clusters=args.subquantizer_clusters, parameters=(Cs, Rs, mus, subs)) 416 | 417 | if args.model_pkl: 418 | save_hdfs_pickle(model, args.model_pkl) 419 | if args.model_proto: 420 | save_hdfs_proto(model, args.model_proto) 421 | 422 | sc.stop() 423 | -------------------------------------------------------------------------------- /spark/train_pca.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015, Yahoo Inc. 2 | # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. 3 | from pyspark.context import SparkContext 4 | 5 | import numpy as np 6 | import base64 7 | import cPickle as pkl 8 | from tempfile import NamedTemporaryFile 9 | import os 10 | import subprocess 11 | from operator import add 12 | 13 | 14 | def default_data_loading(sc, data_path, sampling_ratio, seed): 15 | """ 16 | This function loads training data from a text file, sampling it by the provided 17 | ratio and random seed, and interprets each line as a tab-separated (id, data) pair 18 | where 'data' is assumed to be a base64-encoded pickled numpy array. The ids are discarded. 19 | The data is returned as an RDD of numpy arrays. 20 | """ 21 | # Compute the number of cores in our cluster - used below to heuristically set the number of partitions 22 | total_cores = int(sc._conf.get('spark.executor.instances')) * int(sc._conf.get('spark.executor.cores')) 23 | 24 | # Load and sample down the dataset 25 | d = sc.textFile(data_path, total_cores * 3).sample(False, sampling_ratio, seed) 26 | 27 | # The data is (id, vector) tab-delimited pairs where each vector is 28 | # a base64-encoded pickled numpy array 29 | deserialize_vec = lambda s: pkl.loads(base64.decodestring(s.split('\t')[1])) 30 | vecs = d.map(deserialize_vec) 31 | 32 | return vecs 33 | 34 | 35 | def main(sc, args, data_load_fn=default_data_loading): 36 | 37 | def seqOp(a, b): 38 | a += np.outer(b, b) 39 | return a 40 | 41 | def combOp(a, b): 42 | a += b 43 | return a 44 | 45 | # Load data 46 | d = data_load_fn(sc, args.data, args.sampling_ratio, args.seed) 47 | d.cache() 48 | 49 | # Determine the data dimension 50 | D = len(d.first()) 51 | 52 | # Count data points 53 | count = d.count() 54 | mu = d.aggregate(np.zeros(D), add, add) 55 | mu = mu / float(count) 56 | 57 | # Compute covariance estimator 58 | summed_covar = d.treeAggregate(np.zeros((D, D)), seqOp, combOp, depth=args.agg_depth) 59 | 60 | A = summed_covar / (count - 1) - np.outer(mu, mu) 61 | E, P = np.linalg.eigh(A) 62 | 63 | params = { 64 | 'mu': mu, # mean 65 | 'P': P, # PCA matrix 66 | 'E': E, # eigenvalues 67 | 'A': A, # covariance matrix 68 | 'c': count # sample size 69 | } 70 | 71 | save_hdfs_pickle(params, args.output) 72 | 73 | 74 | def save_hdfs_pickle(m, pkl_path): 75 | """ 76 | Given a python object and a path on hdfs, save the object as a pickle file locally and copy the file 77 | to the hdfs path. 78 | """ 79 | print 'Saving pickle to temp file...' 80 | f = NamedTemporaryFile(delete=False) 81 | pkl.dump(m, f, -1) 82 | f.close() 83 | 84 | print 'Copying pickle file to hdfs...' 85 | copy_to_hdfs(f, pkl_path) 86 | os.remove(f.name) 87 | 88 | 89 | def copy_to_hdfs(f, hdfs_path): 90 | subprocess.call(['hadoop', 'fs', '-copyFromLocal', f.name, hdfs_path]) 91 | 92 | 93 | if __name__ == '__main__': 94 | from argparse import ArgumentParser 95 | parser = ArgumentParser() 96 | 97 | # Data handling parameters 98 | parser.add_argument('--data', dest='data', type=str, required=True, help='hdfs path to input data') 99 | parser.add_argument('--data_udf', dest='data_udf', type=str, default=None, help='module name from which to load a data loading UDF') 100 | parser.add_argument('--seed', dest='seed', type=int, default=None, help='optional random seed') 101 | parser.add_argument('--sampling_ratio', dest='sampling_ratio', type=float, default=1.0, help='proportion of data to sample for training') 102 | parser.add_argument('--agg_depth', dest='agg_depth', type=int, default=4, help='depth of tree aggregation to compute covariance estimator') 103 | 104 | parser.add_argument('--output', dest='output', type=str, default=None, help='hdfs path to output pickle file of parameters') 105 | 106 | args = parser.parse_args() 107 | 108 | sc = SparkContext(appName='PCA') 109 | 110 | # Load UDF module if provided 111 | if args.data_udf: 112 | udf_module = __import__(args.data_udf, fromlist=['udf']) 113 | load_udf = udf_module.udf 114 | main(sc, args, data_load_fn=load_udf) 115 | else: 116 | main(sc, args) 117 | 118 | sc.stop() 119 | --------------------------------------------------------------------------------