├── .bumpversion.cfg ├── .coveragerc ├── .gitignore ├── .venv ├── LICENSE ├── README.rst ├── build.py ├── docs ├── conf.py ├── index.rst ├── installation.rst ├── readme.rst ├── reference │ ├── index.rst │ └── shift_detect.rst ├── requirements.txt ├── spelling_wordlist.txt └── usage.rst ├── src ├── main │ └── python │ │ └── shift_detect │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── driver.py │ │ ├── kernels.py │ │ └── rulsif.py └── test │ ├── integration │ └── python │ │ └── shift_detect_integration_tests.py │ └── unit │ └── python │ └── shift_detect_unit_tests.py └── support ├── build ├── __init__.py ├── clean_project.py ├── copy_files.py ├── devpi.py └── distribute.py ├── deps ├── pip-compile-deps.sh ├── requirements-dev-build.in ├── requirements-dev-runtime.in ├── requirements-prod-build.in └── requirements-prod-runtime.in └── dist ├── setup.cfg ├── setup.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | files = build.py src/main/python/shift_detect/__init__.py support/dist/setup.py 4 | commit = True 5 | tag = False 6 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [paths] 2 | source = src 3 | 4 | [run] 5 | branch = True 6 | source = src 7 | parallel = true 8 | 9 | [report] 10 | show_missing = true 11 | precision = 2 12 | omit = *migrations* 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | # Python artifacts 3 | ######################################################################################## 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .cache 45 | .coverage 46 | .coverage.* 47 | nosetests.xml 48 | coverage.xml 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | requirements-build.txt 62 | requirements.txt 63 | target/ 64 | -------------------------------------------------------------------------------- /.venv: -------------------------------------------------------------------------------- 1 | pyenv-shift-detect 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | Covariate shift estimator 3 | =============================== 4 | 5 | Learns a covariate shift estimator for a given dataset via a kernel method using 6 | the Relative Unconstrained Least-Squares Importance Fitting algorithm [1]. 7 | 8 | The RULSIF kernel method estimates the relative ratio of probability densities 9 | 10 | P(X_reference) / (alpha * P(X_reference) + (1 - alpha) * P(X_test)) 11 | 12 | from samples: 13 | 14 | X_test[i] | X_test[i] in R^{d}, with i=1 to X_test{N} 15 | 16 | drawn independently from P(X_test) 17 | 18 | and samples 19 | 20 | X_reference[i] | X_reference[i] in R^{d}, with i=1 to X_reference{N} 21 | 22 | drawn independently from P(X_reference) 23 | 24 | Using relative density ratios allows the RULSIF method to calculate a divergence 25 | score between a reference and test sample. 26 | 27 | 28 | Usage 29 | ===== 30 | 31 | :: 32 | 33 | $ python 34 | >>> import numpy 35 | >>> from shift_detect import rulsif 36 | >>> estimator = RULSIF() 37 | 38 | # Acquire training data 39 | >>> X_reference_train = numpy.array([[-327.538995,1060.88410,-5135.11159], \ 40 | [-6079.76383,4540.07072, 4683.89186], \ 41 | [-519.485848,-65.427245,-460.108594], \ 42 | [-102.050993,-486.05520,-373.829956]]) 43 | 44 | >>> X_test_train = numpy.array([[4968.97172, 3051.50683,-102.050991], \ 45 | [-5501.4825,-1951.72530,-44.1323003], \ 46 | [2872.91368,-555.026187, 1582.54918], \ 47 | [-715.46199,-544.196344,-61.4378131]]) 48 | 49 | # Train the model 50 | >>> estimator.train(X_reference_train, X_test_train) 51 | 52 | # Compare real data using the trained estimator 53 | >>> for (X_reference, X_test) in real_dataset : 54 | >>> divergence_score = estimator.apply(X_reference, X_test) 55 | 56 | 57 | Installation 58 | ============ 59 | 60 | :: 61 | 62 | $ pip install shift-detect 63 | 64 | Development 65 | =========== 66 | 67 | To run the all tests run 68 | :: 69 | 70 | $ pyb run_unit_tests / $ pyb run_integration_tests 71 | 72 | or 73 | 74 | $ tox 75 | 76 | 77 | References 78 | ========== 79 | 80 | 1. Relative Density-Ratio Estimation for Robust Distribution Comparison. Makoto Yamada, 81 | Taiji Suzuki, Takafumi Kanamori, Hirotaka Hachiya, and Masashi Sugiyama. NIPS, 82 | page 594-602. (2011) 83 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | # -*- c-file-style: "sourcery" -*- 2 | # 3 | # Use and distribution of this software and its source code is governed 4 | # by the terms and conditions defined in the "LICENSE" file that is part 5 | # of this source code package. 6 | # 7 | from pybuilder.core import use_bldsup 8 | from pybuilder.core import use_plugin 9 | from pybuilder.core import init 10 | from pybuilder.core import task 11 | from pybuilder.core import Author 12 | from pybuilder.utils import assert_can_execute 13 | 14 | import glob 15 | import os 16 | import shutil 17 | 18 | use_plugin("python.core") 19 | use_plugin("python.flake8") 20 | use_plugin("python.unittest") 21 | use_plugin("python.integrationtest") 22 | use_plugin("python.install_dependencies") 23 | 24 | # Import local build support plugins 25 | use_bldsup(build_support_dir="support/build") 26 | use_plugin("copy_files") 27 | use_plugin("clean_project") 28 | use_plugin("distribute") 29 | use_plugin("devpi") 30 | use_plugin("exec") 31 | 32 | # Declare default build phase tasks to execute 33 | default_task = [ "clean_project", "analyze", "install_dependencies", "publish" ] 34 | 35 | # Declare top level project properties 36 | authors = [Author("Paolo de Dios", "paolodedios@gmail.com")] 37 | name = "shift-detect" 38 | url = "http://paolodedios.com" 39 | summary = "Covariate shift detector." 40 | version = "0.1.0" 41 | license = "MPL" 42 | 43 | 44 | @init 45 | def set_properties(project) : 46 | 47 | # Generate build and runtime dependency specs 48 | project.set_property("dir_deps_requirements", "support/deps") 49 | project.set_property("analyze_command", "support/deps/pip-compile-deps.sh {}".format(project.get_property("dir_deps_requirements"))) 50 | project.set_property("analyze_propagate_stdout", True) 51 | project.set_property("analyze_propagate_stderr", True) 52 | 53 | # Declare project build dependencies 54 | project.build_depends_on_requirements("{}/requirements-build.txt".format(project.get_property("dir_deps_requirements"))) 55 | 56 | # Declare project runtime dependencies 57 | project.depends_on_requirements("{}/requirements.txt".format(project.get_property("dir_deps_requirements"))) 58 | 59 | # Declare the location of all unit tests 60 | project.set_property("dir_source_unittest_python", "src/test/unit/python") 61 | project.set_property("unittest_module_glob", "*_tests") 62 | project.set_property("unittest_test_method_prefix", "test") 63 | 64 | # Declare the location of all integration tests 65 | project.set_property("dir_source_integrationtest_python", "src/test/integration/python") 66 | project.set_property("integrationtest_module_glob", "*_tests") 67 | project.set_property("integrationtest_test_method_prefix", "test") 68 | 69 | # Disable Teamcity output during normal builds. When the TEAMCITY_VERSION 70 | # environment variable is set (by either Teamcity or a user), teamcity 71 | # output will be generated automatically 72 | project.set_property("teamcity_output", False) 73 | 74 | # Specify unit and integration test artifacts that can be removed with the 75 | # "clean_project" task 76 | project.get_property("clean_project_files_glob").extend([ 77 | "{}/requirements-build.txt".format(project.get_property("dir_deps_requirements")), 78 | "{}/requirements.txt".format(project.get_property("dir_deps_requirements")), 79 | "{}/__pycache__".format(project.get_property("dir_source_unittest_python")), 80 | "{}/*.pyc".format(project.get_property("dir_source_unittest_python")), 81 | "{}/__pycache__".format(project.get_property("dir_source_integrationtest_python")), 82 | "{}/*.pyc".format(project.get_property("dir_source_integrationtest_python")) 83 | ]) 84 | 85 | # Check sources during the analyze phase, but ignore certain PEP8 error codes. 86 | # @see http://pep8.readthedocs.org/en/latest/intro.html#error-codes 87 | project.set_property("flake8_ignore", "E201,E202,E203,E221,E272,E302,E303,E501") 88 | project.set_property("flake8_verbose_output", True) 89 | project.set_property("flake8_include_test_sources", True) 90 | project.set_property("flake8_break_build", False) 91 | 92 | # Copy files to the top level of the distribution staging directory 93 | project.set_property("copy_root_files_target", "$dir_dist") 94 | project.get_property("copy_root_files_glob").extend([ 95 | "LICENSE", 96 | "README.rst", 97 | "support/deps/requirements.txt", 98 | "support/dist/setup.cfg", 99 | "support/dist/tox.ini" 100 | ]) 101 | 102 | # Declare which copied resources will be packaged for installation via 103 | # MAINIFEST.in 104 | project.install_file(".", "LICENSE") 105 | project.install_file(".", "README.rst") 106 | project.install_file(".", "requirements.txt") 107 | project.install_file(".", "tox.ini") 108 | 109 | # Package all scripts in the bin directory 110 | project.set_property("dir_dist_scripts", "bin") 111 | 112 | # Add PyPi package metdata data classifiers. 113 | # 114 | # Note: Invoking "setup.py release" will typically release all code to the 115 | # wild. In order to ensure that this doesn't accidentally happen during the 116 | # publish phase of the build, the "Private" classifier property is specified 117 | # by default. As a result the public PyPI service will reject this package 118 | # but a private PyPi or DevPI server will accept it. 119 | # 120 | # For a complete classifier list, @see http://pypi.python.org/pypi?%3Aaction=list_classifiers 121 | project.set_property("distutils_classifiers", [ 122 | "Private :: Do Not Upload", 123 | "Development Status :: 4 - Beta", 124 | "Intended Audience :: Developers", 125 | "License :: Other/Proprietary License" 126 | "Operating System :: Unix", 127 | "Operating System :: POSIX", 128 | "Programming Language :: Python", 129 | "Programming Language :: Python :: 2.6", 130 | "Programming Language :: Python :: 2.7", 131 | "Programming Language :: Python :: 3", 132 | "Programming Language :: Python :: 3.3", 133 | "Programming Language :: Python :: 3.4", 134 | "Programming Language :: Python :: 3.5", 135 | "Programming Language :: Python :: Implementation :: CPython", 136 | "Programming Language :: Python :: Implementation :: PyPy", 137 | "Topic :: Utilities" 138 | ]) 139 | 140 | # Force setup.py to generate and install a shell script for the entry point 141 | project.set_property("distutils_console_scripts", [ 142 | "shift_detect = shift_detect.__main__:main" 143 | ]) 144 | 145 | # Extend the list of setup.py commands to be executed from sdist, bdist_dumb 146 | project.get_property("distutils_commands").extend([ "bdist_egg", "bdist_wheel" ]) 147 | 148 | # Set user name and destination index for local devpi/PyPi central 149 | # repository 150 | project.set_property("devpi_user", "root") 151 | project.set_property("devpi_developer_index", "dev") 152 | project.set_property("devpi_staging_index" , "staging") 153 | project.set_property("devpi_release_index" , "release") 154 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Documentation generator configuration 4 | # 5 | import re 6 | import os 7 | import sphinx_py3doc_enhanced_theme 8 | 9 | extensions = [ 10 | 'sphinx.ext.autodoc', 11 | 'sphinx.ext.autosummary', 12 | 'sphinx.ext.todo', 13 | 'sphinx.ext.coverage', 14 | 'sphinx.ext.ifconfig', 15 | 'sphinx.ext.viewcode', 16 | 'sphinxcontrib.napoleon' 17 | ] 18 | 19 | if os.getenv('SPELLCHECK') : 20 | extensions += 'sphinxcontrib.spelling', 21 | spelling_show_suggestions = True 22 | spelling_lang = 'en_US' 23 | 24 | source_suffix = '.rst' 25 | master_doc = 'index' 26 | project = u'Change Detector' 27 | copyright = u'2016, Paolo de Dios' 28 | version = re.findall('version="(.*)"', open(os.path.join(os.path.dirname(__file__), '../setup.py')).read())[0] 29 | release = version 30 | html_theme = "sphinx_py3doc_enhanced_theme" 31 | html_theme_path = [sphinx_py3doc_enhanced_theme.get_html_theme_path()] 32 | 33 | pygments_style = 'trac' 34 | templates_path = ['.'] 35 | html_use_smartypants = True 36 | html_last_updated_fmt = '%b %d, %Y' 37 | html_split_index = True 38 | html_sidebars = { 39 | '**': ['searchbox.html', 'globaltoc.html', 'sourcelink.html'], 40 | } 41 | html_short_title = '%s-%s' % (project, version) 42 | html_theme_options = { 43 | } 44 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Change Detector's documentation! 2 | ====================================== 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | readme 10 | installation 11 | usage 12 | reference/index 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line:: 6 | 7 | pip install change-detect 8 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | ######## 2 | Overview 3 | ######## 4 | 5 | .. include:: ../README.rst 6 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | shift_detect* 8 | -------------------------------------------------------------------------------- /docs/reference/shift_detect.rst: -------------------------------------------------------------------------------- 1 | shift_detect 2 | ============================= 3 | 4 | .. automodule:: shift_detect 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinxcontrib-napoleon 3 | sphinx-py3doc-enhanced-theme 4 | -e . 5 | -------------------------------------------------------------------------------- /docs/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | builtin 2 | builtins 3 | classmethod 4 | staticmethod 5 | classmethods 6 | staticmethods 7 | args 8 | kwargs 9 | callstack 10 | Changelog 11 | Indices 12 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | To use the Covariate Shift Detector in a project:: 6 | 7 | import shift_detect 8 | -------------------------------------------------------------------------------- /src/main/python/shift_detect/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | Initialization code for the "shift_detect" package 10 | 11 | For more information, read: https://docs.python.org/2/tutorial/modules.html 12 | """ 13 | from __future__ import print_function 14 | 15 | import sys 16 | 17 | __python_version__ = (2, 7) 18 | __app_name__ = "Covariate Shift Detector" 19 | __log_module__ = "shift_detect" 20 | __version__ = "0.1.0" 21 | 22 | if sys.version_info[:2] < __python_version__ : 23 | print("Change Detector requires Python version {}".format(".".join(map(str, __python_version__))), file=sys.stderr) 24 | sys.exit(1) 25 | -------------------------------------------------------------------------------- /src/main/python/shift_detect/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | Main entry point when running the "shift_detect" package 10 | 11 | For more information, read: 12 | https://www.python.org/dev/peps/pep-0338/ 13 | https://docs.python.org/2/using/cmdline.html#cmdoption-m 14 | https://docs.python.org/3/using/cmdline.html#cmdoption-m 15 | 16 | If this package depends on functions and variables defined in __init__.py, then 17 | this package should be instead executed with: 18 | $ python -m shift_detect 19 | """ 20 | from __future__ import print_function 21 | from driver import Driver 22 | 23 | import sys 24 | 25 | def main(argv=()) : 26 | """ 27 | Args: 28 | argv (list): List of arguments 29 | 30 | Returns: 31 | int: Program exit status code 32 | """ 33 | return Driver().start() 34 | 35 | 36 | if __name__ == "__main__" : 37 | sys.exit(main()) 38 | -------------------------------------------------------------------------------- /src/main/python/shift_detect/driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | UNDER CONSTRUCTION: Driver for training the covariate shift detector on the command line. 10 | 11 | Usage: 12 | shift_detect [options] DATASETS PREPROCESS 13 | 14 | Arguments: 15 | 16 | DATASETS comma separated list of datasets in importance order 17 | PREPROCESS comma separated list of preprocessing directives for each dataset specified 18 | 19 | Options: 20 | -h, --help show this help message and exit 21 | -v, --version show version and exit 22 | 23 | -g, --debug operate in debug mode 24 | -b, --benchmark operate in benchmark mode 25 | 26 | -m, --host M database host name 27 | -p, --port P database port 28 | -d, --database D database name 29 | -c, --collection C run detector on named database collection 30 | 31 | -o, --test use fixed gaussian sample data against covariate shift detection algorithm 32 | 33 | -a, --alpha A use A as the alpha relative parameter (ULSIF {a = 0}, RULSIF {a > 0} ) 34 | -s, --sigma S use G as the gaussian width parameter override, sigma 35 | -l, --lambda L use L as the regularization parameter override, lamda 36 | -k, --kernels K use K number of kernels basis functions as an override 37 | -f, --folds F use F number of cross validation folds, where leave one out CV is used if {f = 0} 38 | 39 | --dataset1 1 run detector on named data set field name from the specified collection; treated as R^d=1 40 | --dataset2 2 run detector on named data set field name from the specified collection; treated as R^d=2 41 | --dataset3 3 run detector on named data set field name from the specified collection; treated as R^d=3 42 | --dataset4 4 run detector on named data set field name from the specified collection; treated as R^d=4 43 | --dataset5 5 run detector on named data set field name from the specified collection; treated as R^d=5 44 | --dataset6 6 run detector on named data set field name from the specified collection; treated as R^d=6 45 | --dataset7 7 run detector on named data set field name from the specified collection; treated as R^d=7 46 | --dataset8 8 run detector on named data set field name from the specified collection; treated as R^d=8 47 | --dataset9 9 run detector on named data set field name from the specified collection; treated as R^d=9 48 | --dataset0 0 run detector on named data set field name from the specified collection; treated as R^d=10 49 | 50 | --preprocessCategorical 1 preprocess categorical variables using [NONE, ONEHOT, BINARIZE_LABEL] 51 | --preprocessOrdinal 2 preprocess ordinal variables using [NONE, BINARIZE_LABEL] 52 | --preprocessContinuous 3 preprocess continuous variables using [NONE, STANDARDIZE, RESCALE, NORMALIZE] 53 | 54 | """ 55 | from __future__ import print_function 56 | from docopt import docopt 57 | 58 | import sys 59 | import signal 60 | import json 61 | import shift_detect 62 | 63 | 64 | class Driver(object) : 65 | 66 | def _front_matter(self, options) : 67 | print("Change Detector") 68 | print("------------------------------------------------------------\n") 69 | print("Command line tool still under construction\n") 70 | self._show_options(options) 71 | 72 | 73 | def _show_options(self, options, message=None) : 74 | if options["--debug"] : 75 | print(message if message is not None else "[ Compute Options ]") 76 | print(json.dumps(options, sort_keys=True, indent=4, separators=(",", " : "))) 77 | print("------------------------------------------------------------\n") 78 | 79 | 80 | def _exit_signal_handler(self, signal, frame) : 81 | """ 82 | Default signal handler that exits gracefully on an interrupt. 83 | """ 84 | sys.exit(0) 85 | 86 | 87 | def _install_signal_handlers(self) : 88 | """ 89 | Installs signal handlers to trap user or system initiated interrupts. 90 | """ 91 | signal.signal(signal.SIGINT , self._exit_signal_handler) 92 | signal.signal(signal.SIGHUP , self._exit_signal_handler) 93 | signal.signal(signal.SIGUSR2, self._exit_signal_handler) 94 | 95 | 96 | def start(self, override_docopt=None) : 97 | try : 98 | self._install_signal_handlers() 99 | 100 | options = docopt(__doc__, version=shift_detect.__version__) 101 | 102 | self._front_matter(options) 103 | 104 | except Exception as e : 105 | print("ERROR: Caught {} : {}".format(type(e), e), file=sys.stderr) 106 | sys.exit(1) 107 | -------------------------------------------------------------------------------- /src/main/python/shift_detect/kernels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | Kernel interfaces 10 | """ 11 | from __future__ import print_function 12 | 13 | import numpy as numpy 14 | 15 | 16 | class Estimator : 17 | """ 18 | Marker interface for all Estimators. Provides basic apply() style 19 | interface and a show() method for debugging 20 | """ 21 | def apply(self, samples=None) : 22 | self.show("Missing Implementation", samples) 23 | 24 | 25 | def show(self, displayName=None, estimate=None, title=None) : 26 | print("[" + title + "]") 27 | print(displayName + " : " + str(estimate) + "\n") 28 | print("---------------") 29 | 30 | 31 | class Vector : 32 | @classmethod 33 | def show(cls, displayName=None, vector=None, options=None) : 34 | if options["--debug"] is None : 35 | return 36 | 37 | print("[" + displayName + "]") 38 | print("elements : " + str(numpy.size(vector)) + "\n") 39 | print(str(vector) + "\n") 40 | print("---------------") 41 | 42 | 43 | class Matrix(Vector) : 44 | @classmethod 45 | def show(cls, displayName=None, matrix=None, options=None) : 46 | if options["--debug"] is None : 47 | return 48 | 49 | print("[" + displayName + "]") 50 | print("(rows, cols) : " + str(matrix.shape) + "\n") 51 | print(str(matrix) + "\n") 52 | print("---------------") 53 | 54 | 55 | class Kernel(Matrix) : 56 | def apply(self, samples=None) : 57 | self.show("Missing Implementation", samples) 58 | 59 | def show(self, results=None, displayName=None, options=None) : 60 | Matrix.show(displayName, results) 61 | 62 | 63 | class GaussianKernel(Kernel) : 64 | 65 | sigmaWidth = None 66 | 67 | 68 | def __init__(self, sigma=1.0) : 69 | self.sigmaWidth = sigma 70 | 71 | 72 | def computeDistance(self, samples=None, sampleMeans=None) : 73 | """ 74 | Compute the distances between points in the sample's feature space 75 | to points along the center of the distribution 76 | """ 77 | (sampleRows, sampleCols) = samples.shape 78 | (meanRows , meanCols ) = sampleMeans.shape 79 | 80 | squaredSamples = sum(samples**2, 0) 81 | squaredMeans = sum(sampleMeans**2, 0) 82 | 83 | return numpy.tile(squaredMeans, (sampleCols, 1)) + numpy.tile(squaredSamples[:, None], (1, meanCols)) - 2 * numpy.dot(samples.T, sampleMeans) 84 | 85 | 86 | def apply(self, samples=None, sampleMeans=None) : 87 | """ 88 | Computes an n-dimensional Gaussian/RBF kernel matrix by taking points 89 | in the sample's feature space and maps them to kernel coordinates in 90 | Hilbert space by calculating the distance to each point in the sample 91 | space and taking the Gaussian function of the distances. 92 | 93 | K(X,Y) = exp( -(|| X - Y ||^2) / (2 * sigma^2) ) 94 | 95 | where X is the matrix of data points in the sample space, 96 | Y is the matrix of gaussian centers in the sample space 97 | sigma is the width of the gaussian function being used 98 | """ 99 | squaredDistance = self.computeDistance(samples, sampleMeans) 100 | return numpy.exp(-squaredDistance / ( 2 * (self.sigmaWidth**2) )) 101 | -------------------------------------------------------------------------------- /src/main/python/shift_detect/rulsif.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | Relative Unconstrained Least Squares Importance Fitting algorithm 10 | """ 11 | from __future__ import print_function 12 | 13 | from kernels import Estimator 14 | from kernels import Vector 15 | from kernels import Matrix 16 | from kernels import GaussianKernel 17 | from scipy import linalg 18 | import numpy as numpy 19 | 20 | 21 | class AlphaRelativeDensityRatioEstimator(Estimator) : 22 | """ 23 | Computes the alpha-relative density ratio estimate of P(X_ref) and P(X_test) 24 | 25 | The alpha-relative density ratio estimator, r_alpha(X), is given by the 26 | following kernel model: 27 | 28 | g(X; theta) = SUM( (theta_l * K(X, X_centers_l)), l=0, n ) 29 | 30 | where theta is a vector of parameters [theta_1, theta_2, ..., theta_l]^T 31 | to be learned from the data samples. The parameters theta in the model 32 | g(X; theta) is calculated by solving the following optimization problem: 33 | 34 | theta_hat = argmin [ ( (1/2) * theta^T * H_hat * theta) - (h_hat^T * theta) + ( lambda/2 * theta^T * theta) ] 35 | 36 | where the expression (lamba/2 * theta^T * theta), with lambda >= 0, is 37 | a regularization term used to penalize against overfitting 38 | 39 | Reference: 40 | Relative Density-Ratio Estimation for Robust Distribution Comparison. Makoto Yamada, 41 | Taiji Suzuki, Takafumi Kanamori, Hirotaka Hachiya, and Masashi Sugiyama. NIPS, 42 | page 594-602. (2011) 43 | """ 44 | alphaConstraint = None 45 | sigmaWidth = None 46 | lambdaRegularizer = None 47 | kernelBasis = None 48 | 49 | def __init__(self, alphaConstraint=0.0, sigmaWidth=1.0, lambdaRegularizer=0.0, kernelBasis=1) : 50 | self.alphaConstraint = alphaConstraint 51 | self.sigmaWidth = sigmaWidth 52 | self.lambdaRegularizer = lambdaRegularizer 53 | self.kernelBasis = kernelBasis 54 | 55 | 56 | def apply(self, referenceSamples=None, testSamples=None, gaussianCenters=None) : 57 | """ 58 | Computes the alpha-relative density ratio, r_alpha(X), of P(X_ref) and P(X_test) 59 | 60 | r_alpha(X) = P(Xref) / (alpha * P(Xref) + (1 - alpha) * P(X_test) 61 | 62 | Returns density ratio estimate at X_ref, r_alpha_ref, and at X_test, r_alpha_test 63 | """ 64 | # Apply the kernel function to the reference and test samples 65 | K_ref = GaussianKernel(self.sigmaWidth).apply(referenceSamples, gaussianCenters).T 66 | K_test = GaussianKernel(self.sigmaWidth).apply(testSamples, gaussianCenters).T 67 | 68 | # Compute the parameters, theta_hat, of the density ratio estimator 69 | H_hat = AlphaRelativeDensityRatioEstimator.H_hat(self.alphaConstraint, K_ref, K_test) 70 | h_hat = AlphaRelativeDensityRatioEstimator.h_hat(K_ref) 71 | theta_hat = AlphaRelativeDensityRatioEstimator.theta_hat(H_hat, h_hat, self.lambdaRegularizer, self.kernelBasis) 72 | 73 | # Estimate the density ratio, r_alpha_ref = r_alpha(X_ref) 74 | r_alpha_ref = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_ref, theta_hat).T 75 | # Estimate the density ratio, r_alpha_test = r_alpha(X_test) 76 | r_alpha_test = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_test, theta_hat).T 77 | 78 | return (r_alpha_ref, r_alpha_test) 79 | 80 | @staticmethod 81 | def H_hat(alpha=0.0, KernelMatrix_refSamples=None, KernelMatrix_testSamples=None) : 82 | """ 83 | Calculates the H_hat term of the theta_hat optimization problem 84 | """ 85 | N_ref = KernelMatrix_refSamples.shape[1] 86 | N_test = KernelMatrix_testSamples.shape[1] 87 | 88 | H_hat = (alpha / N_ref) * numpy.dot(KernelMatrix_refSamples, KernelMatrix_refSamples.T) + \ 89 | ( (1.0 - alpha) / N_test ) * numpy.dot(KernelMatrix_testSamples, KernelMatrix_testSamples.T) 90 | 91 | return H_hat 92 | 93 | @staticmethod 94 | def h_hat(KernelMatrix_refSamples) : 95 | """ 96 | Calculates the h_hat term of the theta_hat optimization problem 97 | """ 98 | h_hat = numpy.mean(KernelMatrix_refSamples, 1) 99 | 100 | return h_hat 101 | 102 | @staticmethod 103 | def theta_hat(H_hat=None, h_hat=None, lambdaRegularizer=0.0, kernelBasis=None) : 104 | """ 105 | Calculates theta_hat given H_hat, h_hat, lambda, and the kernel basis function 106 | Treat as a system of lienar equations and find the exact, optimal 107 | solution 108 | """ 109 | theta_hat = linalg.solve(H_hat + (lambdaRegularizer * numpy.eye(kernelBasis)), h_hat) 110 | 111 | return theta_hat 112 | 113 | @staticmethod 114 | def J_of_theta(alpha=0.0, g_Xref_theta=None, g_Xtest_theta=None) : 115 | """ 116 | Calculates the squared error criterion, J 117 | """ 118 | return ( (alpha / 2.0) * (numpy.mean(g_Xref_theta ** 2) ) + 119 | ((1 - alpha) / 2.0) * (numpy.mean(g_Xtest_theta ** 2) ) - 120 | numpy.mean(g_Xref_theta) ) 121 | 122 | @staticmethod 123 | def g_of_X_theta(KernelMatrix_samples=None, theta_hat=None) : 124 | """ 125 | Calculate the alpha-relative density ratio kernel model 126 | """ 127 | return numpy.dot(KernelMatrix_samples.T, theta_hat) 128 | 129 | 130 | 131 | 132 | class PearsonRelativeDivergenceEstimator(Estimator) : 133 | """ 134 | Calculates the alpha-relative Pearson divergence score 135 | 136 | The alpha-relative Pearson divergence is given by the following expression: 137 | 138 | PE_alpha = -(alpha/2(n_ref)) * SUM(r_alpha(X_ref_i)^2, i=0, n_ref) - 139 | ((1-alpha)/2(n_test)) * SUM(r_alpha(X_test_j)^2, j=0, n_test) + 140 | (1/n_ref) * SUM(r_alpha(X_ref_i), i=0, n_ref) - 141 | 1/2 142 | 143 | where r_alpha(X) is the alpha-relative density ratio estimator and is given by 144 | the following kernel model: 145 | 146 | g(X; theta) = SUM( (theta_l * K(X, X_centers_l)), l=0, n ) 147 | 148 | Reference: 149 | Relative Density-Ratio Estimation for Robust Distribution Comparison. Makoto 150 | Yamada, Taiji Suzuki, Takafumi Kanamori, Hirotaka Hachiya, and Masashi Sugiyama. 151 | NIPS, page 594-602. (2011) 152 | """ 153 | alphaConstraint = None 154 | sigmaWidth = None 155 | lambdaRegularizer = None 156 | kernelBasis = None 157 | 158 | def __init__(self, alphaConstraint=0.0, sigmaWidth=1.0, lambdaRegularizer=0.0, kernelBasis=1) : 159 | self.alphaConstraint = alphaConstraint 160 | self.sigmaWidth = sigmaWidth 161 | self.lambdaRegularizer = lambdaRegularizer 162 | self.kernelBasis = kernelBasis 163 | 164 | 165 | def apply(self, referenceSamples=None, testSamples=None, gaussianCenters=None) : 166 | """ 167 | Calculates the alpha-relative Pearson divergence score 168 | """ 169 | densityRatioEstimator = AlphaRelativeDensityRatioEstimator(self.alphaConstraint , 170 | self.sigmaWidth , 171 | self.lambdaRegularizer, 172 | self.kernelBasis ) 173 | 174 | # Estimate alpha relative density ratio and pearson divergence score 175 | (r_alpha_Xref, r_alpha_Xtest) = densityRatioEstimator.apply(referenceSamples, testSamples, gaussianCenters) 176 | 177 | PE_divergence = ( numpy.mean(r_alpha_Xref) - 178 | ( 0.5 * ( self.alphaConstraint * numpy.mean(r_alpha_Xref ** 2) + 179 | (1.0 - self.alphaConstraint) * numpy.mean(r_alpha_Xtest ** 2) ) ) - 0.5) 180 | 181 | return (PE_divergence, r_alpha_Xtest) 182 | 183 | 184 | 185 | class RULSIF(Estimator) : 186 | """ 187 | Estimates the alpha-relative Pearson Divergence via Least Squares Relative 188 | Density Ratio Approximation 189 | 190 | Reference: 191 | Relative Density-Ratio Estimation for Robust Distribution Comparison. Makoto 192 | Yamada, Taiji Suzuki, Takafumi Kanamori, Hirotaka Hachiya, and Masashi Sugiyama. 193 | NIPS, page 594-602. (2011) 194 | """ 195 | 196 | alphaConstraint = None 197 | sigmaWidth = None 198 | lambdaRegularizer = None 199 | kernelBasis = None 200 | crossFolds = None 201 | gaussianCenters = None 202 | settings = None 203 | 204 | def __init__(self, settings=None) : 205 | self.settings = settings 206 | 207 | self.alphaConstraint = settings["--alpha"] 208 | self.alphaConstraint = float(self.alphaConstraint) if self.alphaConstraint is not None else 0.0 209 | 210 | self.sigmaWidth = settings["--sigma"] 211 | self.sigmaWidth = float(self.sigmaWidth) if self.sigmaWidth is not None else None 212 | 213 | self.lambdaRegularizer = settings["--lambda"] 214 | self.lambdaRegularizer = float(self.lambdaRegularizer) if self.lambdaRegularizer is not None else None 215 | 216 | self.kernelBasis = settings["--kernels"] 217 | self.kernelBasis = int(self.kernelBasis) if self.kernelBasis is not None else 100 218 | 219 | self.crossFolds = settings["--folds"] 220 | self.crossFolds = int(self.crossFolds) if self.crossFolds is not None else 5 221 | 222 | 223 | def getMedianDistanceBetweenSamples(self, sampleSet=None) : 224 | """ 225 | Jaakkola's heuristic method for setting the width parameter of the Gaussian 226 | radial basis function kernel is to pick a quantile (usually the median) of 227 | the distribution of Euclidean distances between points having different 228 | labels. 229 | 230 | Reference: 231 | Jaakkola, M. Diekhaus, and D. Haussler. Using the Fisher kernel method to detect 232 | remote protein homologies. In T. Lengauer, R. Schneider, P. Bork, D. Brutlad, J. 233 | Glasgow, H.- W. Mewes, and R. Zimmer, editors, Proceedings of the Seventh 234 | International Conference on Intelligent Systems for Molecular Biology. 235 | """ 236 | numrows = sampleSet.shape[0] 237 | samples = sampleSet 238 | 239 | G = numpy.sum((samples * samples), axis=1) 240 | Q = numpy.tile(G[:, None], (1, numrows)) 241 | R = numpy.tile(G, (numrows, 1)) 242 | 243 | distances = Q + R - 2 * numpy.dot(samples, samples.T) 244 | distances = distances - numpy.tril(distances) 245 | distances = distances.reshape(numrows**2, 1, order="F").copy() 246 | 247 | return numpy.sqrt(0.5 * numpy.median(distances[distances > 0])) 248 | 249 | 250 | def computeGaussianWidthCandidates(self, referenceSamples=None, testSamples=None) : 251 | """ 252 | Compute a candidate list of Gaussian kernel widths. The best width will be 253 | selected via cross-validation 254 | """ 255 | allSamples = numpy.c_[referenceSamples, testSamples] 256 | medianDistance = self.getMedianDistanceBetweenSamples(allSamples.T) 257 | 258 | return medianDistance * numpy.array([0.6, 0.8, 1, 1.2, 1.4]) 259 | 260 | 261 | def generateRegularizationParams(self) : 262 | """ 263 | Generatees a candidate list of regularization parameters to be used 264 | with the L1 regularizer term of RULSIF optimization problem. The 265 | best regularizer parameter will be chosen via cross-validation 266 | """ 267 | return 10.0 ** numpy.array([-3, -2, -1, 0, 1]) 268 | 269 | 270 | def generateAllGaussianCenters(self, referenceSamples=None) : 271 | """ 272 | Generates kernels in the region where the P(X_reference) takes large values 273 | """ 274 | self.kernelBasis = referenceSamples.shape[1] 275 | return referenceSamples[:, numpy.r_[0:self.kernelBasis]] 276 | 277 | 278 | def generateRandomGaussianCenters(self, referenceSamples=None) : 279 | """ 280 | Randomly chooses Gaussian centers as an optimization 281 | """ 282 | numcols = referenceSamples.shape[1] 283 | referenceSampleIdxs = numpy.random.permutation(numcols) 284 | 285 | self.kernelBasis = min(self.kernelBasis, numcols) 286 | return referenceSamples[:, referenceSampleIdxs[0:self.kernelBasis]] 287 | 288 | 289 | def generateFirstNGaussianCenters(self, referenceSamples=None) : 290 | """ 291 | Chooses the firts N samples as Gaussian centers as an optimization 292 | """ 293 | numcols = referenceSamples.shape[1] 294 | self.kernelBasis = min(self.kernelBasis, numcols) 295 | return referenceSamples[:, numpy.r_[0:self.kernelBasis]] 296 | 297 | 298 | def generateGaussianCenters(self, referenceSamples=None) : 299 | """ 300 | Choose Gaussian centers based on a strategy 301 | """ 302 | gaussianCenters = self.generateAllGaussianCenters(referenceSamples) 303 | 304 | Matrix.show("Gaussian Centers", gaussianCenters, self.settings) 305 | 306 | return gaussianCenters 307 | 308 | 309 | def computeModelParameters(self, referenceSamples=None, testSamples=None, gaussianCenters=None) : 310 | """ 311 | Computes model parameters via k-fold cross validation process 312 | """ 313 | (refRows , refCols ) = referenceSamples.shape 314 | (testRows, testCols) = testSamples.shape 315 | 316 | sigmaWidths = self.computeGaussianWidthCandidates(referenceSamples, testSamples) 317 | lambdaCandidates = self.generateRegularizationParams() 318 | 319 | Vector.show("Sigma Candidates", sigmaWidths, self.settings) 320 | Vector.show("Lambda Candidates", lambdaCandidates, self.settings) 321 | 322 | # Initialize cross validation scoring matrix 323 | crossValidationScores = numpy.zeros( (numpy.size(sigmaWidths), numpy.size(lambdaCandidates)) ) 324 | 325 | # Initialize a cross validation index assignment list 326 | referenceSamplesCVIdxs = numpy.random.permutation(refCols) 327 | referenceSamplesCVSplit = numpy.floor(numpy.r_[0:refCols] * self.crossFolds / refCols) 328 | testSamplesCVIdxs = numpy.random.permutation(testCols) 329 | testSamplesCVSplit = numpy.floor(numpy.r_[0:testCols] * self.crossFolds / testCols) 330 | 331 | # Initiate k-fold cross-validation procedure. Using variable 332 | # notation similar to the RULSIF formulas. 333 | for sigmaIdx in numpy.r_[0:numpy.size(sigmaWidths)] : 334 | 335 | # (re-)Calculate the kernel matrix using the candidate sigma width 336 | sigma = sigmaWidths[sigmaIdx] 337 | K_ref = GaussianKernel(sigma).apply(referenceSamples, gaussianCenters).T 338 | K_test = GaussianKernel(sigma).apply(testSamples, gaussianCenters).T 339 | 340 | # Initialize a new result matrix for the current sigma candidate 341 | foldResult = numpy.zeros( (self.crossFolds, numpy.size(lambdaCandidates)) ) 342 | 343 | for foldIdx in numpy.r_[0:self.crossFolds] : 344 | 345 | K_ref_trainingSet = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit != foldIdx]] 346 | K_test_trainingSet = K_test[:, testSamplesCVIdxs[testSamplesCVSplit != foldIdx]] 347 | 348 | H_h_KthFold = AlphaRelativeDensityRatioEstimator.H_hat(self.alphaConstraint, K_ref_trainingSet, K_test_trainingSet) 349 | h_h_KthFold = AlphaRelativeDensityRatioEstimator.h_hat(K_ref_trainingSet) 350 | 351 | for lambdaIdx in numpy.r_[0:numpy.size(lambdaCandidates)] : 352 | 353 | lambdaCandidate = lambdaCandidates[lambdaIdx] 354 | 355 | theta_h_KthFold = AlphaRelativeDensityRatioEstimator.theta_hat(H_h_KthFold, h_h_KthFold, lambdaCandidate, self.kernelBasis) 356 | 357 | # Select the subset of the kernel matrix not used in the training set 358 | # for use as the test set to validate against 359 | K_ref_testSet = K_ref[:, referenceSamplesCVIdxs[referenceSamplesCVSplit == foldIdx]] 360 | K_test_testSet = K_test[:, testSamplesCVIdxs[testSamplesCVSplit == foldIdx]] 361 | 362 | r_alpha_Xref = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_ref_testSet , theta_h_KthFold) 363 | r_alpha_Xtest = AlphaRelativeDensityRatioEstimator.g_of_X_theta(K_test_testSet, theta_h_KthFold) 364 | 365 | # Calculate the objective function J(theta) under the current parameters 366 | J = AlphaRelativeDensityRatioEstimator.J_of_theta(self.alphaConstraint, r_alpha_Xref, r_alpha_Xtest) 367 | 368 | foldResult[foldIdx, lambdaIdx] = J 369 | 370 | crossValidationScores[sigmaIdx, :] = numpy.mean(foldResult, 0) 371 | 372 | Matrix.show("Cross-Validation Scores", crossValidationScores, self.settings) 373 | 374 | crossValidationMinScores = crossValidationScores.min(1) 375 | crossValidationMinIdxForLambda = crossValidationScores.argmin(1) 376 | crossValidationMinIdxForSigma = crossValidationMinScores.argmin() 377 | 378 | optimalSigma = sigmaWidths[crossValidationMinIdxForSigma] 379 | optimalLambda = lambdaCandidates[crossValidationMinIdxForLambda[crossValidationMinIdxForSigma]] 380 | 381 | return (optimalSigma, optimalLambda) 382 | 383 | 384 | 385 | def train(self, referenceSamples=None, testSamples=None) : 386 | """ 387 | Learn the proper model parameters 388 | """ 389 | 390 | # Reset RNG to ensure consistency of experimental results. In a production 391 | # environment, the RNG should use a truly random seed and hyper-parameters 392 | numpy.random.seed(0) 393 | 394 | self.gaussianCenters = self.generateGaussianCenters(referenceSamples) 395 | 396 | (optimalSigma, optimalLambda) = self.computeModelParameters(referenceSamples, testSamples, self.gaussianCenters) 397 | 398 | self.sigmaWidth = optimalSigma 399 | self.lambdaRegularizer = optimalLambda 400 | 401 | 402 | def apply(self, referenceSamples=None, testSamples=None) : 403 | """ 404 | Estimates the alpha-relative Pearson divergence as determined by the relative 405 | ratio of probability densities: 406 | 407 | P(ReferenceSamples[x]) / (alpha * P(ReferenceSamples[x]) + (1 - alpha) * P(TestSamples[x])) 408 | 409 | from samples: 410 | ReferenceSamples[x_i] | ReferenceSamples[x_i] in R^{d}, with i=1 to ReferenceSamples{N} 411 | 412 | drawn independently from P(ReferenceSamples[x]) 413 | 414 | and from samples: 415 | TestSamples[x_j] | TestSamples[x_j] in R^{d}, with j=1 to TestSamples{N} 416 | 417 | drawn independently from P(TestSamples[x]) 418 | 419 | After the model hyper-parameters have been learned and chosen by the train() 420 | method, the RULSIF algorithm can be applied repeatedly on both in-sample and out 421 | of sample data 422 | """ 423 | 424 | if self.gaussianCenters is None or self.kernelBasis is None : 425 | raise Exception("Missing kernel basis function parameters") 426 | 427 | if self.sigmaWidth == 0.0 or self.lambdaRegularizer == 0.0 : 428 | raise Exception("Missing model selection parameters") 429 | 430 | divergenceEstimator = PearsonRelativeDivergenceEstimator(self.alphaConstraint, self.sigmaWidth, self.lambdaRegularizer, self.kernelBasis) 431 | (PE_alpha, r_alpha_Xtest) = divergenceEstimator.apply(referenceSamples, testSamples, self.gaussianCenters) 432 | 433 | self.show("RULSIF Results", self.sigmaWidth, self.lambdaRegularizer, PE_alpha, self.settings) 434 | 435 | return PE_alpha 436 | 437 | 438 | def show(self, displayName=None, optimalSigma=0.0, optimalLambda=0.0, PE_alpha=0.0, options=None) : 439 | if options["--debug"] is None : 440 | return 441 | 442 | print("[" + displayName + "]\n") 443 | print("Alpha Constraint : " + str(self.alphaConstraint)) 444 | print("Kernel Basis Functions : " + str(self.kernelBasis)) 445 | print("Basis Function Width : " + str(optimalSigma)) 446 | print("Regularization Parameter : " + str(optimalLambda)) 447 | print("Pearson Divergence Score : " + str(PE_alpha)) 448 | print("\n") 449 | print("---------------") 450 | -------------------------------------------------------------------------------- /src/test/integration/python/shift_detect_integration_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | class TestIntegrations(unittest.TestCase) : 4 | """ 5 | Test class skeleton implementation 6 | """ 7 | def test_integrations(self) : 8 | self.assertEqual(True, True) 9 | 10 | 11 | if __name__ == '__main__' : 12 | unittest.main() 13 | -------------------------------------------------------------------------------- /src/test/unit/python/shift_detect_unit_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | class TestUnits(unittest.TestCase) : 4 | """ 5 | Test class skeleton implementation 6 | """ 7 | def test_units(self) : 8 | self.assertEqual(True, True) 9 | 10 | 11 | if __name__ == '__main__' : 12 | unittest.main() 13 | -------------------------------------------------------------------------------- /support/build/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- c-file-style: "sourcery" -*- 3 | # 4 | # Use and distribution of this software and its source code is governed 5 | # by the terms and conditions defined in the "LICENSE" file that is part 6 | # of this source code package. 7 | # 8 | """ 9 | Initialization code for the PyBuilder build support package 10 | 11 | For more information, read: https://docs.python.org/2/tutorial/modules.html 12 | """ 13 | from __future__ import print_function 14 | 15 | import sys 16 | 17 | __python_version__ = (2, 7) 18 | 19 | if sys.version_info[:2] < __python_version__ : 20 | print("PyBuilder build support requires Python version {}".format(".".join(map(str, __python_version__))), file=sys.stderr) 21 | sys.exit(1) 22 | -------------------------------------------------------------------------------- /support/build/clean_project.py: -------------------------------------------------------------------------------- 1 | # -*- c-file-style: "sourcery" -*- 2 | # 3 | # Use and distribution of this software and its source code is governed 4 | # by the terms and conditions defined in the "LICENSE" file that is part 5 | # of this source code package. 6 | # 7 | """ 8 | Remove build artifacts not normally managed by the default "clean" phase 9 | """ 10 | import os 11 | import glob 12 | import shutil 13 | 14 | from pybuilder.core import use_plugin 15 | from pybuilder.core import init 16 | from pybuilder.core import task 17 | from pybuilder.core import depends 18 | from pybuilder.core import description 19 | from pybuilder.utils import apply_on_files 20 | 21 | use_plugin("core") 22 | 23 | 24 | @init 25 | def init_clean_project_plugin(project) : 26 | project.set_property_if_unset("clean_project_files_glob", []) 27 | 28 | 29 | @task 30 | @depends("clean") 31 | @description("Cleans build artifacts from project directories.") 32 | def clean_project(project, logger) : 33 | file_globs = project.get_mandatory_property("clean_project_files_glob") 34 | if not file_globs : 35 | logger.warn("No files to clean configured. Consider removing plugin.") 36 | return 37 | 38 | for file_glob in file_globs : 39 | clean_project_files(file_glob, logger) 40 | 41 | 42 | def clean_project_files(path_or_glob, logger) : 43 | """ 44 | Resolve file name references and ensure they are properly deleted 45 | """ 46 | if "*" in path_or_glob : 47 | files_to_clean = glob.glob(path_or_glob) 48 | else : 49 | files_to_clean = [os.path.expanduser(path_or_glob)] 50 | 51 | for file_to_clean in files_to_clean : 52 | if not os.path.exists(file_to_clean) : 53 | continue 54 | 55 | if os.path.isdir(file_to_clean) : 56 | logger.info("Removing directory {}".format(file_to_clean)) 57 | shutil.rmtree(file_to_clean) 58 | else : 59 | logger.info("Removing file {}".format(file_to_clean)) 60 | os.remove(file_to_clean) 61 | -------------------------------------------------------------------------------- /support/build/copy_files.py: -------------------------------------------------------------------------------- 1 | # -*- c-file-style: "sourcery" -*- 2 | # 3 | # Use and distribution of this software and its source code is governed 4 | # by the terms and conditions defined in the "LICENSE" file that is part 5 | # of this source code package. 6 | # 7 | """ 8 | Copies files specified in the copy_files_glob list to the top level 9 | of the copy_files_target 10 | """ 11 | import os 12 | import shutil 13 | 14 | from pybuilder.core import use_plugin 15 | from pybuilder.core import init 16 | from pybuilder.core import task 17 | from pybuilder.utils import apply_on_files 18 | 19 | use_plugin("core") 20 | 21 | 22 | @init 23 | def init_copy_files_plugin(project) : 24 | project.set_property_if_unset("copy_root_files_target", "$dir_target") 25 | project.set_property_if_unset("copy_root_files_glob", []) 26 | 27 | 28 | @task 29 | def package(project, logger) : 30 | globs = project.get_mandatory_property("copy_root_files_glob") 31 | if not globs : 32 | logger.warn("No files to copy configured. Consider removing plugin.") 33 | return 34 | 35 | source = project.basedir 36 | target = project.expand_path("$copy_root_files_target") 37 | logger.info("Copying files matching '%s' from %s to %s", " ".join(globs), source, target) 38 | 39 | apply_on_files(source, copy_files, globs, target, logger) 40 | 41 | 42 | def copy_files(absolute_file_name, relative_file_name, target, logger) : 43 | logger.debug("Copying files %s", relative_file_name) 44 | 45 | parent = os.path.dirname(target) 46 | if not os.path.exists(parent) : 47 | os.makedirs(parent) 48 | shutil.copy(absolute_file_name, target) 49 | -------------------------------------------------------------------------------- /support/build/devpi.py: -------------------------------------------------------------------------------- 1 | # -*- c-file-style: "sourcery" -*- 2 | # 3 | # Use and distribution of this software and its source code is governed 4 | # by the terms and conditions defined in the "LICENSE" file that is part 5 | # of this source code package. 6 | """ 7 | PyBuilder devpi plugin 8 | """ 9 | import os 10 | import subprocess 11 | 12 | from pybuilder.core import use_plugin 13 | from pybuilder.core import init 14 | from pybuilder.core import task 15 | from pybuilder.core import description 16 | from pybuilder.core import depends 17 | from pybuilder.errors import BuildFailedException 18 | from pybuilder.utils import assert_can_execute 19 | 20 | use_plugin("python.core") 21 | 22 | @init 23 | def initialize_devpi_plugin(project) : 24 | project.set_property_if_unset("devpi_user" , "root") 25 | project.set_property_if_unset("devpi_staging_index", "root") 26 | project.set_property_if_unset("devpi_release_index", "root") 27 | 28 | 29 | @task("stage") 30 | @depends("publish") 31 | @description("Upload packaged distribution to devpi server's staging index") 32 | def upload_distribution(project, logger) : 33 | project.build_depends_on("devpi") 34 | assert_can_execute(["devpi", "--version"], prerequisite="devpi PyPi Server", caller="devpi_plugin") 35 | 36 | index_name = "{}/{}".format(project.get_property("devpi_user"), project.get_property("devpi_staging_index")) 37 | 38 | logger.info("Uploading binary distribution in %s to staging index %s", project.expand_path("$dir_dist"), index_name) 39 | 40 | run_devpi_command(project, logger, ["login", project.get_property("devpi_user"), "--password="]) 41 | run_devpi_command(project, logger, ["use", project.get_property("devpi_staging_index")]) 42 | run_devpi_command(project, logger, ["upload", "--no-vcs", "--formats=bdist_wheel,bdist_egg"]) 43 | 44 | 45 | @task("release") 46 | @depends("stage") 47 | @description("Push uploaded distribution to devpi server's release index from its staging index") 48 | def push_distribution(project, logger) : 49 | project.build_depends_on("devpi") 50 | assert_can_execute(["devpi", "--version"], prerequisite="devpi PyPi Server", caller="devpi_plugin") 51 | 52 | distribution_name = "{}-{}".format(project.name, project.version) 53 | index_name = "{}/{}".format(project.get_property("devpi_user"), project.get_property("devpi_release_index")) 54 | 55 | logger.info("Pushing binary distribution %s to release index %s", distribution_name, index_name) 56 | 57 | run_devpi_command(project, logger, ["login", project.get_property("devpi_user"), "--password="]) 58 | run_devpi_command(project, logger, ["use", project.get_property("devpi_release_index")]) 59 | run_devpi_command(project, logger, ["push", distribution_name, index_name]) 60 | 61 | 62 | def run_devpi_command(project, logger, params) : 63 | reports_dir = project.expand_path("$dir_reports/devpi") 64 | if not os.path.exists(reports_dir) : 65 | os.mkdir(reports_dir) 66 | 67 | logger.debug("Executing devpi command %s", params) 68 | 69 | output_file_path = os.path.join(reports_dir, params[0].replace("/", "")) 70 | 71 | with open(output_file_path, "w") as output_file : 72 | commandexec = ["devpi"] 73 | commandexec.extend(params) 74 | working_dir = project.expand_path("$dir_dist") 75 | process = subprocess.Popen(commandexec, cwd=working_dir, stdout=output_file, stderr=output_file, shell=False) 76 | return_code = process.wait() 77 | if return_code != 0 : 78 | raise BuildFailedException("Error while executing devpi command %s, see %s for details" % (params, output_file_path)) 79 | -------------------------------------------------------------------------------- /support/build/distribute.py: -------------------------------------------------------------------------------- 1 | # -*- c-file-style: "sourcery" -*- 2 | # 3 | # Use and distribution of this software and its source code is governed 4 | # by the terms and conditions defined in the "LICENSE" file that is part 5 | # of this source code package. 6 | # 7 | """ 8 | Fork of the PyBuilder distutils_plugin 9 | """ 10 | import os 11 | import string 12 | import subprocess 13 | import sys 14 | 15 | try: 16 | from StringIO import StringIO 17 | except ImportError as e: 18 | from io import StringIO 19 | 20 | from pybuilder.core import after 21 | from pybuilder.core import before 22 | from pybuilder.core import use_plugin 23 | from pybuilder.core import init 24 | from pybuilder.core import task 25 | from pybuilder.core import description 26 | from pybuilder.core import depends 27 | from pybuilder.core import RequirementsFile 28 | from pybuilder.core import Dependency 29 | from pybuilder.errors import BuildFailedException 30 | from pybuilder.utils import as_list 31 | 32 | from pybuilder.pip_utils import build_dependency_version_string 33 | 34 | use_plugin("python.core") 35 | 36 | DATA_FILES_PROPERTY = "distutils_data_files" 37 | SETUP_TEMPLATE = string.Template("""#!/usr/bin/env python 38 | $remove_hardlink_capabilities_for_shared_filesystems 39 | from $module import setup 40 | 41 | if __name__ == '__main__': 42 | setup( 43 | name = '$name', 44 | version = '$version', 45 | description = '''$summary''', 46 | long_description = '''$description''', 47 | author = "$author", 48 | author_email = "$author_email", 49 | license = '$license', 50 | url = '$url', 51 | scripts = $scripts, 52 | packages = $packages, 53 | py_modules = $modules, 54 | classifiers = $classifiers, 55 | entry_points={ 56 | 'console_scripts': 57 | [$console_scripts] 58 | }, 59 | $data_files 60 | $package_data 61 | $dependencies 62 | $dependency_links 63 | zip_safe=True 64 | ) 65 | """) 66 | 67 | 68 | def default(value, default="") : 69 | if value is None: 70 | return default 71 | return value 72 | 73 | 74 | @init 75 | def initialize_distutils_plugin(project) : 76 | project.set_property_if_unset("distutils_commands", ["sdist", "bdist_dumb"]) 77 | 78 | # Workaround for http://bugs.python.org/issue8876 , unable to build a bdist 79 | # on a filesystem that does not support hardlinks 80 | project.set_property_if_unset("distutils_issue8876_workaround_enabled", False) 81 | 82 | project.set_property_if_unset("distutils_classifiers", [ 83 | "Private :: Do Not Upload", 84 | "Development Status :: 3 - Alpha", 85 | "Programming Language :: Python" 86 | ]) 87 | 88 | project.set_property_if_unset("distutils_use_setuptools", True) 89 | 90 | project.set_property_if_unset("distutils_devpi_user" , "root") 91 | project.set_property_if_unset("distutils_devpi_index", "dev") 92 | 93 | 94 | @after("package") 95 | def write_setup_script(project, logger) : 96 | setup_script = project.expand_path("$dir_dist/setup.py") 97 | logger.info("Writing setup.py as %s", setup_script) 98 | 99 | with open(setup_script, "w") as setup_file : 100 | setup_file.write(render_setup_script(project)) 101 | 102 | os.chmod(setup_script, 0o755) 103 | 104 | 105 | def render_setup_script(project) : 106 | author = ", ".join(map(lambda a: a.name, project.authors)) 107 | author_email = ", ".join(map(lambda a: a.email, project.authors)) 108 | console_scripts = project.get_property("distutils_console_scripts", []) 109 | 110 | template_values = { 111 | "module" : "setuptools" if project.get_property("distutils_use_setuptools") else "distutils.core", 112 | "name" : project.name, 113 | "version" : project.version, 114 | "summary" : default(project.summary), 115 | "description" : default(project.description), 116 | "author" : author, 117 | "author_email" : author_email, 118 | "license" : default(project.license), 119 | "url" : default(project.url), 120 | "scripts" : build_scripts_string(project), 121 | "packages" : str([package for package in project.list_packages()]), 122 | "modules" : str([module for module in project.list_modules()]), 123 | "classifiers" : project.get_property("distutils_classifiers"), 124 | "console_scripts" : ",".join(["'%s'" % mapping for mapping in console_scripts]), 125 | "data_files" : build_data_files_string(project), 126 | "package_data" : build_package_data_string(project), 127 | "dependencies" : build_install_dependencies_string(project), 128 | "dependency_links" : build_dependency_links_string(project), 129 | "remove_hardlink_capabilities_for_shared_filesystems": ( 130 | "import os\ndel os.link" if project.get_property("distutils_issue8876_workaround_enabled") else "" 131 | ) 132 | } 133 | 134 | return SETUP_TEMPLATE.substitute(template_values) 135 | 136 | 137 | @after("package") 138 | def write_manifest_file(project, logger) : 139 | if len(project.manifest_included_files) == 0 : 140 | logger.debug("No data to write into MANIFEST.in") 141 | return 142 | 143 | logger.debug("Files included in MANIFEST.in: %s" % project.manifest_included_files) 144 | 145 | manifest_filename = project.expand_path("$dir_dist/MANIFEST.in") 146 | logger.info("Writing MANIFEST.in as %s", manifest_filename) 147 | 148 | with open(manifest_filename, "w") as manifest_file: 149 | manifest_file.write(render_manifest_file(project)) 150 | 151 | os.chmod(manifest_filename, 0o664) 152 | 153 | 154 | def render_manifest_file(project) : 155 | manifest_content = StringIO() 156 | 157 | for included_file in project.manifest_included_files : 158 | manifest_content.write("include %s\n" % included_file) 159 | 160 | return manifest_content.getvalue() 161 | 162 | 163 | @before("publish") 164 | def build_binary_distribution(project, logger) : 165 | logger.info("Building binary distribution in %s", project.expand_path("$dir_dist")) 166 | commands = as_list(project.get_property("distutils_commands")) 167 | run_setup_commands(project, logger, commands) 168 | 169 | 170 | @task("develop") 171 | @depends("package") 172 | @description("Create an .egg-link in site-packages back to the packaged source") 173 | def build_developer_distribution(project, logger) : 174 | logger.info("Creating .egg-link in site packages to %s", project.expand_path("$dir_dist")) 175 | run_setup_commands(project, logger, ["develop"]) 176 | 177 | 178 | def run_setup_commands(project, logger, commands) : 179 | reports_dir = project.expand_path("$dir_reports/distutils") 180 | if not os.path.exists(reports_dir) : 181 | os.mkdir(reports_dir) 182 | 183 | setup_script = project.expand_path("$dir_dist/setup.py") 184 | 185 | for command in commands : 186 | logger.debug("Executing distutils command %s", command) 187 | 188 | output_file_path = os.path.join(reports_dir, command.replace("/", "")) 189 | 190 | with open(output_file_path, "w") as output_file : 191 | commandexec = [sys.executable, setup_script] 192 | commandexec.extend(command.split()) 193 | working_dir = project.expand_path("$dir_dist") 194 | process = subprocess.Popen(commandexec, cwd=working_dir, stdout=output_file, stderr=output_file, shell=False) 195 | return_code = process.wait() 196 | if return_code != 0 : 197 | raise BuildFailedException("Error while executing setup command %s, see %s for details" % (command, output_file_path)) 198 | 199 | 200 | def strip_comments(requirements) : 201 | return [requirement for requirement in requirements if not requirement.strip().startswith("#")] 202 | 203 | 204 | def quote(requirements) : 205 | return ['"%s"' % requirement for requirement in requirements] 206 | 207 | 208 | def flatten_and_quote(requirements_file) : 209 | with open(requirements_file.name, 'r') as requirements_file : 210 | requirements = [requirement.strip("\n") for requirement in requirements_file.readlines()] 211 | requirements = [requirement for requirement in requirements if requirement] 212 | return quote(strip_comments(requirements)) 213 | 214 | 215 | def format_single_dependency(dependency) : 216 | return '"%s%s"' % (dependency.name, build_dependency_version_string(dependency)) 217 | 218 | 219 | def build_install_dependencies_string(project) : 220 | dependencies = [ 221 | dependency for dependency in project.dependencies if isinstance(dependency, Dependency) and not dependency.url 222 | ] 223 | 224 | requirements = [ 225 | requirements for requirements in project.dependencies if isinstance(requirements, RequirementsFile) 226 | ] 227 | 228 | if not dependencies and not requirements : 229 | return "" 230 | 231 | dependencies = [format_single_dependency(dependency) for dependency in dependencies] 232 | requirements = [strip_comments(flatten_and_quote(requirement)) for requirement in requirements] 233 | flattened_requirements = [dependency for dependency_list in requirements for dependency in dependency_list] 234 | 235 | dependencies.extend(flattened_requirements) 236 | 237 | result = "install_requires = [ " 238 | result += ", ".join(dependencies) 239 | result += " ]," 240 | 241 | return result 242 | 243 | 244 | def build_dependency_links_string(project) : 245 | dependency_links = [ 246 | dependency for dependency in project.dependencies if isinstance(dependency, Dependency) and dependency.url 247 | ] 248 | 249 | if not dependency_links : 250 | return "" 251 | 252 | def format_single_dependency(dependency) : 253 | return '"%s"' % dependency.url 254 | 255 | result = "dependency_links = [ " 256 | result += ", ".join(map(format_single_dependency, dependency_links)) 257 | result += " ]," 258 | 259 | return result 260 | 261 | 262 | def build_scripts_string(project) : 263 | scripts = [script for script in project.list_scripts()] 264 | 265 | scripts_dir = project.get_property("dir_dist_scripts") 266 | if scripts_dir : 267 | scripts = list(map(lambda s : os.path.join(scripts_dir, s), scripts)) 268 | 269 | return str(scripts) 270 | 271 | 272 | def build_data_files_string(project) : 273 | data_files = project.files_to_install 274 | 275 | if not len(data_files) : 276 | return "" 277 | 278 | return "data_files = %s," % str(data_files) 279 | 280 | 281 | def build_package_data_string(project) : 282 | package_data = project.package_data 283 | if package_data == {} : 284 | return "" 285 | 286 | package_data_string = "package_data = {" 287 | 288 | sorted_keys = sorted(package_data.keys()) 289 | last_element = sorted_keys[-1] 290 | 291 | for key in sorted_keys : 292 | package_data_string += "'%s': %s" % (key, str(package_data[key])) 293 | 294 | if key is not last_element : 295 | package_data_string += ", " 296 | 297 | package_data_string += "}," 298 | 299 | return package_data_string 300 | -------------------------------------------------------------------------------- /support/deps/pip-compile-deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # 3 | # Generate pip requirements.txt specification for build and runtime dependencies 4 | # 5 | ################################################################################ 6 | 7 | ################################################################################ 8 | # 9 | # Output variables 10 | # 11 | ################################################################################ 12 | 13 | PIP_COMPILER_BUILD_DEPS_OUTFILE=requirements-build.txt 14 | PIP_COMPILER_RUNTIME_DEPS_OUTFILE=requirements.txt 15 | 16 | ################################################################################ 17 | # 18 | # Input variables 19 | # 20 | ################################################################################ 21 | 22 | PIP_COMPILER_BUILD_DEPS_INFILE=requirements-build.in 23 | PIP_COMPILER_RUNTIME_DEPS_INFILE=requirements-runtime.in 24 | 25 | 26 | function compile_requirements() 27 | { 28 | echo "Generating build dependencies list from [$PIP_COMPILER_BUILD_DEPS_INFILE]" 29 | pip-compile -v --no-index -o $PIP_COMPILER_BUILD_DEPS_OUTFILE $PIP_COMPILER_BUILD_DEPS_INFILE 30 | 31 | echo "Generating runtime dependencies list from [$PIP_COMPILER_RUNTIME_DEPS_INFILE]" 32 | pip-compile -v --no-index -o $PIP_COMPILER_RUNTIME_DEPS_OUTFILE $PIP_COMPILER_RUNTIME_DEPS_INFILE 33 | } 34 | 35 | 36 | function sync_requirements() 37 | { 38 | echo "Synchronizing environment with dependency specifications" 39 | 40 | pip-sync $PIP_COMPILER_BUILD_DEPS_OUTFILE $PIP_COMPILER_RUNTIME_DEPS_OUTFILE 41 | } 42 | 43 | 44 | if [ $# -eq 1 ]; then 45 | PIP_COMPILER_BUILD_DIRECTORY="$1" 46 | else 47 | echo "Missing source directory argument." 48 | PIP_COMPILER_BUILD_DIRECTORY= 49 | exit 1 50 | fi 51 | 52 | case "$PIP_BUILD_ENV" in 53 | 54 | dev|devel|debug) 55 | cd $PIP_COMPILER_BUILD_DIRECTORY 56 | 57 | PIP_COMPILER_BUILD_DEPS_INFILE=requirements-dev-build.in 58 | PIP_COMPILER_RUNTIME_DEPS_INFILE=requirements-dev-runtime.in 59 | 60 | compile_requirements 61 | ;; 62 | 63 | sync) 64 | cd $PIP_COMPILER_BUILD_DIRECTORY 65 | 66 | sync_requirements 67 | ;; 68 | *) 69 | cd $PIP_COMPILER_BUILD_DIRECTORY 70 | 71 | PIP_COMPILER_BUILD_DEPS_INFILE=requirements-prod-build.in 72 | PIP_COMPILER_RUNTIME_DEPS_INFILE=requirements-prod-runtime.in 73 | 74 | compile_requirements 75 | ;; 76 | 77 | esac 78 | -------------------------------------------------------------------------------- /support/deps/requirements-dev-build.in: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | # 3 | # Top-level packages required for dev builds 4 | # 5 | ###################################################################################### 6 | 7 | devpi-client 8 | pip-tools 9 | cookiecutter 10 | bumpversion 11 | mockito 12 | pytest 13 | coverage 14 | flake8 15 | sphinx 16 | tox 17 | wheel 18 | pybuilder 19 | -------------------------------------------------------------------------------- /support/deps/requirements-dev-runtime.in: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | # 3 | # Top-level packages required for dev runtime 4 | # 5 | ###################################################################################### 6 | 7 | six 8 | docopt 9 | numpy 10 | scipy 11 | scikit-learn 12 | -------------------------------------------------------------------------------- /support/deps/requirements-prod-build.in: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | # 3 | # Top-level packages required for prod builds 4 | # 5 | ###################################################################################### 6 | 7 | devpi-client==2.7.0 8 | pip-tools==1.7.0 9 | cookiecutter==1.4.0 10 | bumpversion==0.5.3 11 | mockito==0.7.0 12 | pytest==3.0.3 13 | coverage==4.2 14 | flake8==3.0.4 15 | sphinx==1.4.8 16 | tox==2.4.1 17 | wheel==0.29.0 18 | pybuilder==0.11.9 19 | -------------------------------------------------------------------------------- /support/deps/requirements-prod-runtime.in: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | # 3 | # Top-level packages required for prod runtime 4 | # 5 | ###################################################################################### 6 | 7 | six>=1.10 8 | docopt>=0.6.2 9 | numpy>=1.11.2 10 | scipy>=0.18.1 11 | scikit-learn>=0.18 12 | -------------------------------------------------------------------------------- /support/dist/setup.cfg: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | # 3 | # Setup configuration file 4 | # 5 | ######################################################################################## 6 | 7 | 8 | [bdist_wheel] 9 | universal = 1 10 | 11 | [aliases] 12 | release = register clean --all sdist bdist_wheel upload 13 | 14 | [flake8] 15 | # pep8 configuration http://pep8.readthedocs.org/en/latest/intro.html#configuration 16 | max-line-length = 140 17 | exclude = tests/*, */migrations/*, */south_migrations/* 18 | ignore = E201,E202,E203,E221,E272,E302,E303,E501 19 | 20 | [pytest] 21 | norecursedirs = 22 | .git 23 | .tox 24 | dist 25 | build 26 | south_migrations 27 | migrations 28 | 29 | python_files = 30 | test_*.py 31 | *_test.py 32 | tests.py 33 | 34 | addopts = 35 | -rxEfs 36 | --strict 37 | --ignore docs/conf.py 38 | --ignore setup.py 39 | --ignore bootstrap.py 40 | --doctest-modules 41 | --doctest-glob \*.rst 42 | --tb short 43 | 44 | [isort] 45 | force_single_line = True 46 | line_length = 120 47 | known_first_party = shift_detect 48 | default_section = THIRDPARTY 49 | forced_separate = test_shift_detect 50 | -------------------------------------------------------------------------------- /support/dist/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Main entry point for package setup 4 | # 5 | import io 6 | import os 7 | import re 8 | 9 | from glob import glob 10 | from os.path import basename 11 | from os.path import dirname 12 | from os.path import join 13 | from os.path import splitext 14 | 15 | from setuptools import find_packages 16 | from setuptools import setup 17 | from setuptools import Command 18 | 19 | # 20 | # Function to read a file. Used for populating setup.py configuration 21 | # options. 22 | # 23 | def read(*args, **kwargs) : 24 | return io.open(join(dirname(__file__), *args), encoding=kwargs.get("encoding", "utf8")).read() 25 | 26 | 27 | # 28 | # Custom clean command specified as a setup cmdclass extension. 29 | # 30 | class RealClean(Command) : 31 | """Custom clean command to tidy up the project root.""" 32 | 33 | user_options = [] 34 | 35 | def initialize_options(self) : 36 | pass 37 | 38 | def finalize_options(self) : 39 | pass 40 | 41 | def run(self) : 42 | os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg-info ./src/*.egg-info') 43 | 44 | 45 | 46 | 47 | kwargs = { 48 | "name" : "change-detect", 49 | "version" : "0.1.0", 50 | "license" : "MPL", 51 | "description" : "Covariate shift detector.", 52 | "long_description" : "{0}\n".format(read("README.rst")), 53 | "author" : "Paolo de Dios", 54 | "author_email" : "paolodedios@gmail.com", 55 | "url" : "http://appliedtheory.io/", 56 | "packages" : find_packages("src"), 57 | "package_dir" : {"" : "src"}, 58 | "py_modules" : [splitext(basename(path))[0] for path in glob("src/*.py")], 59 | "include_package_data" : True, 60 | "zip_safe" : False, 61 | "classifiers" : [ 62 | # For a complete classifier list, @see http://pypi.python.org/pypi?%3Aaction=list_classifiers 63 | "Development Status :: 5 - Production/Stable", 64 | "Intended Audience :: Developers", 65 | "License :: Other/Proprietary License" 66 | "Operating System :: Unix", 67 | "Operating System :: POSIX", 68 | "Programming Language :: Python", 69 | "Programming Language :: Python :: 2.6", 70 | "Programming Language :: Python :: 2.7", 71 | "Programming Language :: Python :: 3", 72 | "Programming Language :: Python :: 3.3", 73 | "Programming Language :: Python :: 3.4", 74 | "Programming Language :: Python :: Implementation :: CPython", 75 | "Programming Language :: Python :: Implementation :: PyPy", 76 | "Topic :: Utilities", 77 | ], 78 | "keywords" : [ 79 | # eg: "keyword1", "keyword2", "keyword3", 80 | ], 81 | "install_requires" : [ 82 | # eg: "aspectlib==1.1.1", "six>=1.7", 83 | ], 84 | "extras_require" : { 85 | # eg: "rst": ["docutils>=0.11"], 86 | }, 87 | "entry_points" : { 88 | "console_scripts": [ 89 | "shift_detect = shift_detect.__main__:main" 90 | ] 91 | }, 92 | "cmdclass" : { 93 | "realclean" : RealClean 94 | }, 95 | } 96 | 97 | setup(**kwargs) 98 | -------------------------------------------------------------------------------- /support/dist/tox.ini: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; 3 | ;; Generative settings for tox 4 | ;; @see: https://testrun.org/tox/latest/config.html#generative-envlist 5 | ;; 6 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 | 8 | [tox] 9 | envlist = 10 | clean, 11 | check, 12 | lint, 13 | {py26, py27, py33, py34, pypy}, 14 | {py26, py27, py33, py34, pypy}-nocover, 15 | report, 16 | docs 17 | 18 | [testenv] 19 | basepython = 20 | pypy: pypy 21 | py26: python2.6 22 | {py27,docs}: python2.7 23 | py33: python3.3 24 | py34: python3.4 25 | {clean,check,report,coveralls}: python3.4 26 | setenv = 27 | PYTHONPATH={toxinidir}/tests 28 | PYTHONUNBUFFERED=yes 29 | usedevelop = true 30 | deps = 31 | pytest 32 | pytest-capturelog 33 | pytest-cov 34 | commands = 35 | {posargs:py.test --cov=src --cov-report=term-missing -vv} 36 | 37 | 38 | [testenv:spell] 39 | setenv = 40 | SPELLCHECK=1 41 | usedevelop = true 42 | deps = 43 | -r{toxinidir}/docs/requirements.txt 44 | sphinxcontrib-spelling 45 | pyenchant 46 | commands = 47 | sphinx-build -b spelling docs dist/docs 48 | 49 | [testenv:lint] 50 | deps = 51 | -r{toxinidir}/requirements.txt 52 | flake8 53 | commands = 54 | flake8 55 | 56 | [testenv:docs] 57 | whitelist_externals = 58 | rm 59 | usedevelop = true 60 | deps = 61 | -r{toxinidir}/docs/requirements.txt 62 | commands = 63 | rm -rf dist/docs || rmdir /S /Q dist\docs 64 | sphinx-build -b html docs dist/docs 65 | sphinx-build -b linkcheck docs dist/docs 66 | 67 | [testenv:check] 68 | basepython = python3.4 69 | deps = 70 | docutils 71 | check-manifest 72 | flake8 73 | collective.checkdocs 74 | pygments 75 | usedevelop = true 76 | commands = 77 | python setup.py checkdocs 78 | python setup.py check --strict --metadata 79 | check-manifest {toxinidir} 80 | flake8 src 81 | 82 | [testenv:coveralls] 83 | deps = 84 | coveralls 85 | usedevelop = true 86 | commands = 87 | coverage combine 88 | coverage report 89 | coveralls 90 | 91 | [testenv:report] 92 | basepython = python3.4 93 | usedevelop = true 94 | deps = coverage 95 | commands = 96 | coverage combine 97 | coverage report 98 | 99 | [testenv:clean] 100 | usedevelop = true 101 | deps = coverage 102 | commands = 103 | coverage erase 104 | python setup.py clean --all 105 | python setup.py realclean 106 | 107 | [testenv:py26-nocover] 108 | usedevelop = false 109 | commands ={posargs:py.test -vv --ignore=src} 110 | 111 | [testenv:py27-nocover] 112 | usedevelop = false 113 | commands ={posargs:py.test -vv --ignore=src} 114 | 115 | [testenv:py33-nocover] 116 | usedevelop = false 117 | commands = 118 | {posargs:py.test -vv --ignore=src} 119 | 120 | [testenv:py34-nocover] 121 | usedevelop = false 122 | commands = 123 | {posargs:py.test -vv --ignore=src} 124 | 125 | 126 | [testenv:pypy-nocover] 127 | usedevelop = false 128 | commands = 129 | {posargs:py.test -vv --ignore=src} 130 | --------------------------------------------------------------------------------