├── .gitignore ├── LICENSE.txt ├── README.md ├── Vagrantfile ├── doc └── main.pdf ├── puppet └── manifests │ ├── classes │ ├── db.pp │ ├── init.pp │ ├── python.pp │ └── run.pp │ └── vagrant.pp └── refinery ├── bnpy ├── bnpy-dev │ ├── .gitignore │ ├── README.md │ ├── bnpy │ │ ├── ContinueRun.py │ │ ├── HModel.py │ │ ├── Run.py │ │ ├── __init__.py │ │ ├── allocmodel │ │ │ ├── AllocModel.py │ │ │ ├── __init__.py │ │ │ ├── admix │ │ │ │ ├── AdmixModel.py │ │ │ │ ├── BurstyVariationalOptimizer.py │ │ │ │ ├── HDPBetaOptimizer.py │ │ │ │ ├── HDPFullHard.py │ │ │ │ ├── HDPHardMult.py │ │ │ │ ├── HDPModel.py │ │ │ │ ├── HDPModelLP2.py │ │ │ │ ├── HDPPE.py │ │ │ │ ├── HDPRelModel.py │ │ │ │ ├── HDPSoft2Hard.py │ │ │ │ ├── OptimizerForHDPFullVarModel.py │ │ │ │ ├── OptimizerForHDPPE.py │ │ │ │ └── __init__.py │ │ │ ├── mix │ │ │ │ ├── DPMixModel.py │ │ │ │ ├── HardDPMixModel.py │ │ │ │ ├── MixModel.py │ │ │ │ └── __init__.py │ │ │ └── seq │ │ │ │ ├── HMMUtil.py │ │ │ │ └── __init__.py │ │ ├── config │ │ │ ├── allocmodel.conf │ │ │ ├── init.conf │ │ │ ├── learnalg.conf │ │ │ ├── numeric.platform-config │ │ │ ├── obsmodel.conf │ │ │ ├── onlinedata.conf │ │ │ └── output.conf │ │ ├── data │ │ │ ├── AdmixMinibatchIterator.py │ │ │ ├── AdmixMinibatchIteratorDB.py │ │ │ ├── DataObj.py │ │ │ ├── GraphData.py │ │ │ ├── MinibatchIterator.py │ │ │ ├── MinibatchIteratorFromDisk.py │ │ │ ├── WordsData.py │ │ │ ├── XData.py │ │ │ └── __init__.py │ │ ├── distr │ │ │ ├── BetaDistr.py │ │ │ ├── DirichletDistr.py │ │ │ ├── Distr.py │ │ │ ├── GaussDistr.py │ │ │ ├── GaussGammaDistr.py │ │ │ ├── GaussWishDistr.py │ │ │ ├── WishartDistr.py │ │ │ ├── ZMGaussDistr.py │ │ │ └── __init__.py │ │ ├── init │ │ │ ├── FromSaved.py │ │ │ ├── FromScratchBernRel.py │ │ │ ├── FromScratchGauss.py │ │ │ ├── FromScratchMult.py │ │ │ ├── FromTruth.py │ │ │ └── __init__.py │ │ ├── ioutil │ │ │ ├── BNPYArgParser.py │ │ │ ├── ModelReader.py │ │ │ ├── ModelWriter.py │ │ │ └── __init__.py │ │ ├── learnalg │ │ │ ├── BirthMove.py │ │ │ ├── BirthMoveTopicModel.py │ │ │ ├── LearnAlg.py │ │ │ ├── MemoizedOnlineVBLearnAlg.py │ │ │ ├── MergeMove.py │ │ │ ├── MergePairSelector.py │ │ │ ├── MergeTracker.py │ │ │ ├── OldMergeMove.py │ │ │ ├── StochasticOnlineVBLearnAlg.py │ │ │ ├── VBLearnAlg.py │ │ │ └── __init__.py │ │ ├── obsmodel │ │ │ ├── BagOfWordsObsModel.py │ │ │ ├── BernRelObsModel.py │ │ │ ├── DiagGaussObsModel.py │ │ │ ├── GaussObsModel.py │ │ │ ├── MultObsModel.py │ │ │ ├── ObsModel.py │ │ │ ├── ZMGaussObsModel.py │ │ │ └── __init__.py │ │ ├── suffstats │ │ │ ├── ParamBag.py │ │ │ ├── SuffStatBag.py │ │ │ └── __init__.py │ │ ├── util │ │ │ ├── IOUtil.py │ │ │ ├── LibRlogR.py │ │ │ ├── LinAlgUtil.py │ │ │ ├── NumericHardUtil.py │ │ │ ├── NumericUtil.py │ │ │ ├── RandUtil.py │ │ │ ├── SpecialFuncUtil.py │ │ │ ├── VerificationUtil.py │ │ │ ├── __init__.py │ │ │ └── lib │ │ │ │ └── RlogRCore.cpp │ │ └── viz │ │ │ ├── BarsViz.py │ │ │ ├── GaussViz.py │ │ │ ├── PlotComps.py │ │ │ ├── PlotELBO.py │ │ │ ├── PlotK.py │ │ │ ├── PrintTopics.py │ │ │ └── __init__.py │ ├── demodata │ │ ├── AsteriskK8.py │ │ ├── Bars2D.py │ │ ├── BarsK10V900.py │ │ ├── BarsK50V2500.py │ │ ├── BarsK6V9.py │ │ ├── BarsK8.py │ │ ├── BinaryGraphK5.py │ │ ├── DeadLeaves.py │ │ ├── DeadLeavesD25.py │ │ ├── HashtagK9.py │ │ ├── NIPS.py │ │ ├── NYTimes.py │ │ └── StarCovarK5.py │ └── tests │ │ ├── allocmodel │ │ ├── TestMixModel.py │ │ └── admix │ │ │ ├── TestGlobalStickbreakOptimizer.py │ │ │ ├── TestHDPBetaOptimizer.py │ │ │ └── TestHDPVariationalOptimizer.py │ │ ├── data │ │ └── TestMinibatchIterator.py │ │ ├── distr │ │ ├── TestGaussDistr.py │ │ ├── TestGaussWishDistr.py │ │ ├── TestWishartDistr.py │ │ └── TestZMGaussDistr.py │ │ ├── end-to-end │ │ ├── AbstractEndToEndTest.py │ │ ├── TestAdmixTopicModel.py │ │ ├── TestDPMixGauss.py │ │ ├── TestHDPModel.py │ │ ├── TestMixDiagGauss.py │ │ ├── TestMixGauss.py │ │ ├── TestMixZMGauss.py │ │ ├── TestProxFunc.py │ │ └── Util.py │ │ ├── init │ │ ├── TestFromSaved.py │ │ └── TestFromScratchGauss.py │ │ ├── ioutil │ │ └── TestModelReader.py │ │ ├── learnalg │ │ ├── TestBirthMove.py │ │ └── TestMemoizedVBWithBirth.py │ │ ├── merge │ │ ├── AbstractBaseTestForHDP.py │ │ ├── TestMathForHDPMerges.py │ │ ├── TestMergeDPMixModel.py │ │ ├── TestMergeHDPTopicModel.py │ │ ├── TestMergePairSelector.py │ │ └── TestMergeTracker.py │ │ ├── obsmodel │ │ ├── TestGaussObsModel.py │ │ └── TestZMGaussObsModel.py │ │ ├── paramestimation │ │ ├── Test-MixModel-Gauss-VB.py │ │ ├── Test-MixModel-ZMGauss-EM.py │ │ └── Test-MixModel-ZMGauss-VB.py │ │ ├── suffstats │ │ ├── TestParamBag.py │ │ └── TestSuffStatBag.py │ │ └── util │ │ └── TestEqualAtMSigFigs.py ├── results │ └── .gitignore └── scripts │ ├── AsteriskK8.py │ ├── BarsBurstyK20.py │ ├── BarsBurstyK6.py │ ├── BarsK8.py │ ├── DeadLeaves.py │ ├── DeadLeavesD25.py │ ├── HuffPost.py │ ├── NIPS.py │ ├── NYTimes.py │ ├── NYTimesDB.py │ ├── Science.py │ ├── StarCovarK5.py │ ├── ToyBarsK6.py │ ├── Wikipedia.py │ └── customFunc.py ├── config.py ├── data ├── nips0-12.zip ├── nyt_2013_obama.tar.gz ├── nyt_2013_obama.zip └── reuters.zip ├── fact_classifier ├── classify_ex.py ├── factfeat └── factsvm ├── lib ├── __init__.py ├── model_svm │ ├── feats │ ├── lower_words │ ├── non_abbrs │ └── svm_model ├── sbd.py ├── sbd_util.py ├── svmlite │ ├── LICENSE.txt │ ├── Makefile │ ├── kernel.h │ ├── svm_classify │ ├── svm_classify.c │ ├── svm_classify.o │ ├── svm_common.c │ ├── svm_common.h │ ├── svm_common.o │ ├── svm_hideo.c │ ├── svm_hideo.o │ ├── svm_learn │ ├── svm_learn.c │ ├── svm_learn.h │ ├── svm_learn.o │ ├── svm_learn_main.c │ ├── svm_learn_main.o │ ├── svm_light.tar.gz │ └── svm_loqo.c └── word_tokenize.py ├── refinery ├── __init__.py ├── data │ ├── __init__.py │ └── models.py ├── static │ ├── .gitignore │ ├── assets │ │ ├── fonts │ │ │ ├── bpicons │ │ │ │ ├── bpicons.eot │ │ │ │ ├── bpicons.svg │ │ │ │ ├── bpicons.ttf │ │ │ │ ├── bpicons.woff │ │ │ │ └── license.txt │ │ │ ├── fontawesome.eot │ │ │ ├── fontawesome.svg │ │ │ ├── fontawesome.ttf │ │ │ ├── fontawesome.woff │ │ │ ├── fontawesome │ │ │ │ ├── Read Me.txt │ │ │ │ ├── fontawesome.dev.svg │ │ │ │ ├── fontawesome.eot │ │ │ │ ├── fontawesome.svg │ │ │ │ ├── fontawesome.ttf │ │ │ │ ├── fontawesome.woff │ │ │ │ └── license.txt │ │ │ ├── icomoon │ │ │ │ ├── icomoon.dev.svg │ │ │ │ ├── icomoon.eot │ │ │ │ ├── icomoon.svg │ │ │ │ ├── icomoon.ttf │ │ │ │ ├── icomoon.woff │ │ │ │ └── license.txt │ │ │ └── icomoon_arrows │ │ │ │ ├── icomoon.dev.svg │ │ │ │ ├── icomoon.eot │ │ │ │ ├── icomoon.svg │ │ │ │ ├── icomoon.ttf │ │ │ │ └── icomoon.woff │ │ ├── images │ │ │ ├── Logo.png │ │ │ ├── arrows │ │ │ │ ├── arrows.dev.svg │ │ │ │ ├── arrows.eot │ │ │ │ ├── arrows.svg │ │ │ │ ├── arrows.ttf │ │ │ │ ├── arrows.woff │ │ │ │ └── license.txt │ │ │ ├── elephants │ │ │ │ ├── 1.jpg │ │ │ │ ├── 2.jpg │ │ │ │ ├── 3.jpg │ │ │ │ ├── 4.jpg │ │ │ │ └── 5.jpg │ │ │ ├── glyphicons_020_home.png │ │ │ ├── glyphicons_063_power.png │ │ │ ├── glyphicons_144_folder_open.png │ │ │ ├── glyphicons_145_folder_plus.png │ │ │ ├── glyphicons_220_play_button.png │ │ │ ├── glyphicons_232_cloud.png │ │ │ ├── glyphicons_341_briefcase.png │ │ │ ├── glyphicons_357_suitcase.png │ │ │ ├── glyphicons_363_cloud_upload.png │ │ │ ├── glyphicons_364_cloud_download.png │ │ │ ├── glyphicons_370_globe_af.png │ │ │ ├── glyphicons_371_global.png │ │ │ ├── hopper-chop-suey.jpg │ │ │ ├── hopper-early-sunday-morning.jpg │ │ │ ├── hopper-gas.jpg │ │ │ ├── hopper-morning-sun.jpg │ │ │ ├── hopper-nighthawks.jpg │ │ │ ├── icons │ │ │ │ ├── arrow_down.png │ │ │ │ ├── arrow_up.png │ │ │ │ ├── iconmonstr-add-folder-icon-256.png │ │ │ │ ├── iconmonstr-arrow-4-icon.png │ │ │ │ ├── iconmonstr-book-17-icon.png │ │ │ │ ├── iconmonstr-cloud-3-icon.png │ │ │ │ ├── iconmonstr-cloud-9-icon-48.png │ │ │ │ ├── iconmonstr-edit-8-icon.png │ │ │ │ ├── iconmonstr-flask-7-icon-256.png │ │ │ │ ├── iconmonstr-gear-icon-48.png │ │ │ │ ├── iconmonstr-info-6-icon-128.png │ │ │ │ ├── iconmonstr-info-6-icon-256.png │ │ │ │ ├── iconmonstr-line-chart-4-icon-64.png │ │ │ │ ├── iconmonstr-line-chart-icon-64.png │ │ │ │ ├── iconmonstr-lock-13-icon.png │ │ │ │ ├── iconmonstr-lock-15-icon.png │ │ │ │ ├── iconmonstr-lock-3-icon.png │ │ │ │ ├── iconmonstr-lock-9-icon.png │ │ │ │ ├── iconmonstr-magnifier-4-icon.png │ │ │ │ ├── iconmonstr-magnifier-6-icon.png │ │ │ │ ├── iconmonstr-newspaper-12-icon.png │ │ │ │ ├── iconmonstr-newspaper-3-icon.png │ │ │ │ ├── iconmonstr-newspaper-4-icon.png │ │ │ │ ├── iconmonstr-newspaper-7-icon.png │ │ │ │ ├── iconmonstr-note-25-icon.png │ │ │ │ ├── iconmonstr-photo-camera-6-icon.png │ │ │ │ ├── iconmonstr-photo-camera-8-icon.png │ │ │ │ ├── iconmonstr-plus-5-icon-24.png │ │ │ │ ├── iconmonstr-star-5-icon.png │ │ │ │ ├── iconmonstr-star-7-icon.png │ │ │ │ ├── iconmonstr-text-file-4-icon.png │ │ │ │ ├── iconmonstr-x-mark-4-icon-24.png │ │ │ │ ├── iconmonstr-x-mark-4-icon-256.png │ │ │ │ ├── iconmonstr-x-mark-4-icon.png │ │ │ │ ├── pixel-arrow-right.png │ │ │ │ ├── pixel-arrow-right_.png │ │ │ │ ├── pixel-arrow-right__.png │ │ │ │ └── running-icon.png │ │ │ ├── paintings │ │ │ │ ├── dali-rose.jpg │ │ │ │ ├── hopper-chop-suey.jpg │ │ │ │ ├── hopper-early-sunday-morning.jpg │ │ │ │ ├── hopper-gas.jpg │ │ │ │ ├── hopper-morning-sun.jpg │ │ │ │ ├── hopper-nighthawks.jpg │ │ │ │ ├── kandinsky-trans.jpg │ │ │ │ ├── mondrian-comp-a.jpg │ │ │ │ ├── monet-fog.jpg │ │ │ │ ├── monet-gal.jpg │ │ │ │ └── monet-poppies.jpg │ │ │ ├── progress.png │ │ │ ├── progressbar.gif │ │ │ ├── projects │ │ │ │ ├── img05.jpg │ │ │ │ ├── img06.jpg │ │ │ │ ├── img07.jpg │ │ │ │ ├── img08.jpg │ │ │ │ └── latest │ │ │ │ │ ├── img004.jpg │ │ │ │ │ ├── img01.jpg │ │ │ │ │ ├── img02.jpg │ │ │ │ │ ├── img03.jpg │ │ │ │ │ └── img04.jpg │ │ │ ├── random │ │ │ │ ├── 1.jpg │ │ │ │ ├── 2.jpg │ │ │ │ ├── 3.jpg │ │ │ │ ├── 4.jpg │ │ │ │ └── 5.jpg │ │ │ ├── sliders │ │ │ │ └── camera │ │ │ │ │ ├── slide00.jpg │ │ │ │ │ ├── slide01.jpg │ │ │ │ │ ├── slide02.jpg │ │ │ │ │ └── slide03.jpg │ │ │ ├── ui │ │ │ │ ├── ui-bg_flat_0_aaaaaa_40x100.png │ │ │ │ ├── ui-bg_flat_75_ffffff_40x100.png │ │ │ │ ├── ui-bg_glass_55_fbf9ee_1x400.png │ │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ │ ├── ui-bg_glass_75_dadada_1x400.png │ │ │ │ ├── ui-bg_glass_75_e6e6e6_1x400.png │ │ │ │ ├── ui-bg_glass_95_fef1ec_1x400.png │ │ │ │ ├── ui-bg_highlight-soft_75_cccccc_1x100.png │ │ │ │ ├── ui-icons_222222_256x240.png │ │ │ │ ├── ui-icons_2e83ff_256x240.png │ │ │ │ ├── ui-icons_454545_256x240.png │ │ │ │ ├── ui-icons_454545_256x240___________.png │ │ │ │ ├── ui-icons_888888_256x240.png │ │ │ │ └── ui-icons_cd0a0a_256x240.png │ │ │ └── view_mode │ │ │ │ ├── 1.png │ │ │ │ ├── 10.png │ │ │ │ ├── 2.png │ │ │ │ ├── 3.png │ │ │ │ ├── 4.png │ │ │ │ ├── 5.png │ │ │ │ ├── 6.png │ │ │ │ ├── 7.png │ │ │ │ ├── 8.png │ │ │ │ └── 9.png │ │ ├── lib │ │ │ ├── colorbox │ │ │ │ ├── colorbox.css │ │ │ │ ├── images │ │ │ │ │ ├── border.png │ │ │ │ │ ├── controls.png │ │ │ │ │ ├── loading.gif │ │ │ │ │ ├── loading_background.png │ │ │ │ │ └── overlay.png │ │ │ │ └── jquery.colorbox-min.js │ │ │ ├── column12.css │ │ │ ├── d3 │ │ │ │ ├── d3.layout.cloud.js │ │ │ │ └── d3.v3.min.js │ │ │ ├── jquery.knob.js │ │ │ ├── jquery.mixitup.min.js │ │ │ ├── jquery │ │ │ │ ├── jquery-1.10.2.js │ │ │ │ ├── jquery-1.11.1.js │ │ │ │ ├── jquery-1.9.1.min.js │ │ │ │ ├── jquery-ui-1.10.4.js │ │ │ │ ├── jquery-ui-1.10.4.min.js │ │ │ │ ├── jquery-ui-1.9.2.custom.min.js │ │ │ │ ├── jquery-ui.css │ │ │ │ ├── jquery.knob.js │ │ │ │ └── jquery.min.map │ │ │ └── slider │ │ │ │ └── simple-slider.js │ │ └── misc │ │ │ └── stopwords.txt │ ├── css │ │ ├── experiment.css │ │ ├── listing.css │ │ ├── login.css │ │ ├── style.css │ │ └── upload.css │ └── js │ │ ├── annotate.js │ │ ├── main_menu.js │ │ ├── summarize.js │ │ ├── topicmodel.js │ │ └── upload.js ├── templates │ ├── about.html │ ├── base.html │ ├── browse_list.html │ ├── data_list.html │ ├── docview.html │ ├── ex_list.html │ ├── folder_list.html │ ├── info_page.html │ ├── login.html │ ├── profile.html │ ├── summarize.html │ ├── topicmodel.html │ └── upload.html └── webapp │ ├── __init__.py │ ├── admin.py │ ├── customFunc.py │ ├── main_menu.py │ ├── pubsub.py │ ├── summarize.py │ ├── topicmodel.py │ └── upload.py ├── requirements.txt ├── reset_db.py ├── reset_db_files └── default.jpg ├── setup_env.sh ├── start_celery.sh └── start_refinery.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.idea 3 | .pyc 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) <2014> 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /doc/main.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/doc/main.pdf -------------------------------------------------------------------------------- /puppet/manifests/classes/db.pp: -------------------------------------------------------------------------------- 1 | class db { 2 | # postgresql-dev required for Python's psycopg2 3 | /* 4 | package { [ 'postgresql', 'postgresql-server-dev-all' ]: 5 | ensure => 'installed', 6 | } 7 | 8 | service { 'postgresql': 9 | ensure => running, 10 | require => Package[postgresql], 11 | } 12 | 13 | package {'redis-server': 14 | ensure => 'installed', 15 | } 16 | 17 | service { 'redis-server': 18 | ensure => running, 19 | require => Package[redis-server], 20 | } 21 | */ 22 | 23 | class { 'postgresql::server': } 24 | 25 | postgresql::server::db { 'refinery': 26 | user => 'vagrant', 27 | password => postgresql_password('vagrant', ''), 28 | } 29 | 30 | package { [ 'postgresql-server-dev-all' ]: 31 | ensure => 'installed', 32 | require => Class['postgresql::server'], 33 | } 34 | 35 | # Redis - Server 36 | package {'redis-server': 37 | ensure => 'installed', 38 | } 39 | 40 | service { 'redis-server': 41 | ensure => running, 42 | require => Package[redis-server], 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /puppet/manifests/classes/init.pp: -------------------------------------------------------------------------------- 1 | # stage {"pre": before => Stage["main"]} class {'apt': stage => 'pre'} 2 | 3 | # Commands to run before all others in puppet. 4 | class init { 5 | group { "puppet": 6 | ensure => "present", 7 | } 8 | case $operatingsystem { 9 | ubuntu: { 10 | exec { "update_apt": 11 | command => "sudo apt-get update", 12 | } 13 | # Provides "add-apt-repository" command, useful if you need 14 | # to install software from other apt repositories. 15 | package { "python-software-properties": 16 | ensure => present, 17 | require => [ 18 | Exec['update_apt'], 19 | ]; 20 | } 21 | $misc_packages = ["make", "curl", "git-core"] 22 | package { $misc_packages: 23 | ensure => present, 24 | require => [ 25 | Exec['update_apt'], 26 | ]; 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /puppet/manifests/classes/python.pp: -------------------------------------------------------------------------------- 1 | # Install python and compiled modules for project 2 | class python { 3 | case $operatingsystem { 4 | ubuntu: { 5 | package { "python-pip": 6 | ensure => installed 7 | } 8 | package { ["python-scipy"]: 9 | ensure => installed, 10 | require => Package['python-pip'] 11 | } 12 | package { ["numpy"]: 13 | ensure => installed, 14 | provider => pip, 15 | require => Package['python-pip'] 16 | } 17 | 18 | 19 | /* 20 | package { ['libfreetype6-dev', 'pkg-config']: 21 | ensure => installed 22 | } 23 | package { ['pyparsing']: 24 | ensure => installed, 25 | provider => pip, 26 | require => Package['python-pip'] 27 | } 28 | package { ["matplotlib"]: 29 | ensure => installed, 30 | provider => pip, 31 | require => Package['numpy', 'pyparsing', 'libfreetype6-dev'] 32 | } 33 | */ 34 | 35 | 36 | package { 'virtualenv': 37 | ensure => installed, 38 | provider => pip, 39 | require => Package['python-pip'] 40 | } 41 | package { 'gunicorn': 42 | ensure => installed, 43 | provider => pip, 44 | require => Package['python-pip'] 45 | } 46 | package { 'flask': 47 | ensure => installed, 48 | provider => pip, 49 | require => Package['python-pip'] 50 | } 51 | package { ['joblib','redis','celery']: 52 | ensure => installed, 53 | provider => pip, 54 | require => Package['python-pip'] 55 | } 56 | package { ['flask-wtf','flask-login','wtforms']: 57 | ensure => installed, 58 | provider => pip, 59 | require => Package['python-pip','flask'] 60 | } 61 | package { 'scikit-learn': 62 | provider => pip, 63 | require => Package['python-pip'] 64 | } 65 | package { 'kombu': 66 | ensure => installed, 67 | provider => pip, 68 | require => Package['numpy'] 69 | } 70 | package { ['flask-sqlalchemy','psycopg2']: 71 | ensure => installed, 72 | provider => pip, 73 | require => Class['postgresql::server'] 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /puppet/manifests/classes/run.pp: -------------------------------------------------------------------------------- 1 | class run { 2 | include supervisor 3 | 4 | exec { "reset_db": 5 | command => "/usr/bin/python reset_db.py", 6 | user => "vagrant", 7 | cwd => "/vagrant/refinery/", 8 | } 9 | 10 | supervisor::app {'celery': 11 | command => '/usr/local/bin/celery --concurrency=4 -A refinery.celery worker', 12 | directory => '/vagrant/refinery/', 13 | user => 'vagrant', 14 | } 15 | 16 | supervisor::app {'refinery': 17 | command => '/usr/local/bin/gunicorn --timeout 120 -w 4 -b 0.0.0.0:8080 refinery.webapp.main_menu:app', 18 | directory => '/vagrant/refinery/', 19 | user => 'vagrant', 20 | } 21 | 22 | /* 23 | exec { "start_celery": 24 | command => "./start_celery.sh", 25 | cwd => "/vagrant/refinery/", 26 | provider => 'shell', 27 | user => 'vagrant', 28 | returns => 1, 29 | require => Exec['reset_db'], 30 | } 31 | 32 | exec { "start_refinery": 33 | command => "./start_refinery.sh", 34 | cwd => "/vagrant/refinery/", 35 | user => 'vagrant', 36 | provider => 'shell', 37 | returns => 1, 38 | require => Exec['reset_db'], 39 | } 40 | */ 41 | } -------------------------------------------------------------------------------- /puppet/manifests/vagrant.pp: -------------------------------------------------------------------------------- 1 | # This vagrant.pp represents the base script to begin installation of Refinery 2 | 3 | import "classes/*.pp" 4 | 5 | $PROJ_DIR = "/vagrant" 6 | $HOME_DIR = "/home/vagrant" 7 | 8 | Exec { 9 | path => "/usr/local/bin:/usr/bin:/usr/sbin:/sbin:/bin", 10 | } 11 | 12 | class dev { 13 | class { 14 | init: ; 15 | db: require => Class[init]; 16 | python: require => Class["init","db"]; 17 | run: require => Class["init", "db", "python"]; 18 | } 19 | } 20 | 21 | include dev 22 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/.gitignore: -------------------------------------------------------------------------------- 1 | # IGNORE THESE FILES FROM THE GITHUB REPOSITORY 2 | 3 | # ====================================================== EXCLUDE DIRECTORIES 4 | # Exclude everything in the bin directory, 5 | # which holds compiled binaries for executing on grid 6 | bin/ 7 | 8 | # Exclude everything in these directories 9 | # These aren't of interest for tracking in a repository because 10 | # (1) file sizes can be HUGE 11 | # (2) these files rarely mean anything to others except the person who ran the test 12 | logs/ 13 | local/ 14 | results/ 15 | profile/ 16 | .idea/* 17 | .ipynb_checkpoints/ 18 | 19 | # ====================================================== EXCLUDE FILE PATTERNS 20 | # Exclude script files 21 | *.sh 22 | .* 23 | !/.gitignore 24 | 25 | # Exclude user-specific config options for where to read and write data 26 | *.path 27 | 28 | # Exclude temporary files created by text editors 29 | *~ 30 | *~lock* 31 | *.DS_Store 32 | .*.swp 33 | 34 | # Exclude weird NFS file system dumps 35 | .nfs* 36 | 37 | # Exclude mocap files 38 | *.amc 39 | *.asf 40 | *.key 41 | 42 | 43 | # Exclude videos 44 | *.avi 45 | *.swf 46 | *.mpg 47 | 48 | # Exclude compiled python 49 | *.pyc 50 | 51 | # Exclude executable files generated by Matlab's mex 52 | *.mat 53 | *.mexglx 54 | *.mexa64 55 | *.mexmaci64 56 | *.so 57 | 58 | # Exclude auxiliary files generated by LaTeX 59 | *.pdf 60 | *-eps-converted-to.pdf 61 | *.dvi 62 | *.log 63 | *.blg 64 | *.bbl 65 | *.aux 66 | *.backup 67 | *.tmp 68 | *.synctex.gz 69 | *.out 70 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/README.md: -------------------------------------------------------------------------------- 1 | **bnpy** is Bayesian nonparametric unsupervised machine learning for python. 2 | 3 | Contact: Mike Hughes. mike AT michaelchughes.com 4 | 5 | # About 6 | This python module provides code for training popular Bayesian nonparametric models on massive datasets. **bnpy** supports the latest online learning algorithms as well as standard offline methods. 7 | 8 | Supported probabilistic models include 9 | 10 | * Gaussian mixture models 11 | * standard parametric 12 | * nonparametric (Dirichlet Process) 13 | 14 | Supported learning algorithms include: 15 | 16 | * EM: expectation-maximization (offline) 17 | * VB: variational Bayes (offline) 18 | * moVB: memoized online VB 19 | * soVB: stochastic online VB 20 | 21 | These are all variants of *variational inference*, a family of optimization algorithms that perform coordinate ascent to learn parameters. 22 | 23 | # Quick Start 24 | 25 | **bnpy** provides an easy command-line interface for launching experiments. 26 | 27 | Train 8-component Gaussian mixture model via EM. 28 | ``` 29 | python -m bnpy.Run AsteriskK8 MixModel ZMGauss EM --K 8 30 | ``` 31 | 32 | Train Dirichlet-process Gaussian mixture model (DP-GMM) via variational bayes. 33 | ``` 34 | python -m bnpy.Run AsteriskK8 DPMixModel Gauss VB --K 8 35 | ``` 36 | 37 | Train DP-GMM via memoized online VB, with birth and merge moves 38 | ``` 39 | python -m bnpy.Run AsteriskK8 DPMixModel Gauss moVB --moves birth,merge 40 | ``` 41 | 42 | ### Quick help 43 | ``` 44 | # print help message for required arguments 45 | python -m bnpy.Run --help 46 | # print help message for specific keyword options for Gaussian mixture models 47 | python -m bnpy.Run AsteriskK8 MixModel Gauss EM --kwhelp 48 | ``` 49 | 50 | # Installation 51 | 52 | Follow the [installation instructions](https://bitbucket.org/michaelchughes/bnpy/wiki/Installation.md) on our project wiki. 53 | 54 | # Documentation 55 | 56 | All documentation can be found on the [project wiki](https://bitbucket.org/michaelchughes/bnpy/wiki/Home.md). 57 | 58 | Especially check out the [quick start demos](https://bitbucket.org/michaelchughes/bnpy/wiki/QuickStart/QuickStart.md) 59 | 60 | # Target Audience 61 | 62 | Primarly, we intend bnpy to be a platform for researchers. By gathering many learning algorithms and popular models in one convenient, modular repository, we hope to make it easier to compare and contrast approaches. 63 | 64 | # Repository Organization 65 | bnpy/ module-specific code 66 | 67 | demodata/ example dataset scripts 68 | 69 | tests/ unit-tests for assuring code correctness. using nose package. 70 | 71 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/__init__.py: -------------------------------------------------------------------------------- 1 | ''' bnpy module __init__ file 2 | ''' 3 | import data 4 | import distr 5 | import util 6 | import suffstats 7 | 8 | import allocmodel 9 | import obsmodel 10 | from HModel import HModel 11 | 12 | import ioutil 13 | load_model = ioutil.ModelReader.load_model 14 | save_model = ioutil.ModelWriter.save_model 15 | 16 | import init 17 | 18 | import learnalg 19 | import Run 20 | from Run import run 21 | 22 | import os 23 | import sys 24 | ''' 25 | ########################################################### Configure save 26 | ########################################################### location 27 | hasWriteableOutdir = False 28 | if 'BNPYOUTDIR' in os.environ: 29 | outdir = os.environ['BNPYOUTDIR'] 30 | if os.path.exists(outdir): 31 | try: 32 | with open(os.path.join(outdir, 'bnpytest'), 'w') as f: 33 | pass 34 | except IOError: 35 | sys.exit('BNPYOUTDIR not writeable: %s' % (outdir)) 36 | hasWriteableOutdir = True 37 | if not hasWriteableOutdir: 38 | raise ValueError('Environment variable BNPYOUTDIR not specified. Cannot save results to disk') 39 | ''' 40 | ########################################################### Configure data 41 | ########################################################### location 42 | root = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2]) 43 | sys.path.append(os.path.join(root, 'demodata/')) 44 | if 'BNPYDATADIR' in os.environ: 45 | if os.path.exists(os.environ['BNPYDATADIR']): 46 | sys.path.append(os.environ['BNPYDATADIR']) 47 | else: 48 | print "Warning: Environment variable BNPYDATADIR not a valid directory" 49 | 50 | ########################################################### Optional: viz 51 | ########################################################### package for plots 52 | canPlot = False 53 | ''' 54 | try: 55 | from matplotlib import pylab 56 | canPlot = True 57 | except ImportError: 58 | print "Error importing matplotlib. Plotting disabled." 59 | print "Fix by making sure this produces a figure window on your system" 60 | print " >>> from matplotlib import pylab; pylab.figure(); pylab.show();" 61 | if canPlot: 62 | import viz 63 | __all__ = ['run', 'Run', 'learn', 'allocmodel','obsmodel', 'suffstats', 64 | 'HModel', 'init', 'util','ioutil','viz','distr', 'mergeutil'] 65 | ''' 66 | __all__ = ['run', 'Run', 'learn', 'allocmodel','obsmodel', 'suffstats', 67 | 'HModel', 'init', 'util','ioutil','distr', 'mergeutil'] 68 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/allocmodel/__init__.py: -------------------------------------------------------------------------------- 1 | from AllocModel import AllocModel 2 | 3 | from mix.MixModel import MixModel 4 | from mix.DPMixModel import DPMixModel 5 | from mix.HardDPMixModel import HardDPMixModel 6 | 7 | from admix.AdmixModel import AdmixModel 8 | from admix.HDPModel import HDPModel 9 | from admix.HDPPE import HDPPE 10 | from admix.HDPFullHard import HDPFullHard 11 | from admix.HDPSoft2Hard import HDPSoft2Hard 12 | from admix.HDPHardMult import HDPHardMult 13 | from admix.HDPRelModel import HDPRelAssortModel 14 | 15 | AllocModelConstructorsByName = { \ 16 | 'MixModel':MixModel, 17 | 'DPMixModel':DPMixModel, 18 | 'HardDPMixModel':HardDPMixModel, 19 | 'AdmixModel':AdmixModel, 20 | 'HDPModel':HDPModel, 21 | 'HDPPE':HDPPE, 22 | 'HDPFullHard':HDPFullHard, 23 | 'HDPSoft2Hard':HDPSoft2Hard, 24 | 'HDPHardMult':HDPHardMult, 25 | 'HDPRelAssortModel':HDPRelAssortModel, 26 | } 27 | 28 | AllocModelNameSet = set(AllocModelConstructorsByName.keys()) 29 | 30 | __all__ = list() 31 | for name in AllocModelConstructorsByName: 32 | __all__.append(name) 33 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/allocmodel/admix/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`admix` module gathers point-estimate and variational approximations 3 | for Bayesian admixture modeling 4 | """ 5 | 6 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/allocmodel/mix/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`mix` module gathers point-estimate and variational approximations 3 | for Bayesian mixture modeling, including 4 | finite parametric mixture models 5 | nonparametric Dirichlet Process and Pitman-Yor mixture models 6 | """ 7 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/allocmodel/seq/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`seq` module gathers routines for Bayesian sequence modeling, including 3 | finite hidden Markov models 4 | nonparametric hidden Markov models 5 | """ 6 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/allocmodel.conf: -------------------------------------------------------------------------------- 1 | # Keyword options for bnpy allocation models 2 | # specify priors on parameters that allocate/assign data to clusters 3 | 4 | [MixModel] 5 | alpha0=1.0 6 | 7 | [MixModelHelp] 8 | alpha0="Scalar positive parameter for symmetric Dirichlet prior on mixture weights (component appearance probabilities). Set to 1.0 for a uniform prior (ML estimation)." 9 | 10 | [DPMixModel] 11 | alpha0=1.0 12 | truncType=z 13 | 14 | [DPMixModelHelp] 15 | alpha0="Scalar positive concentration parameter for Dirichlet Process. Set large 16 | to give all components nearly-same probability mass. Set small (near zero) to make only a few components probable." 17 | truncType="Truncation scheme for Dirichlet Process, determines how unassigned components in infinite tail are represented. Strongly recommended: 'z'." 18 | 19 | [HardDPMixModel] 20 | alpha0=1.0 21 | truncType=z 22 | 23 | [AdmixModel] 24 | alpha0 = 1.0 25 | 26 | [AdmixModelHelp] 27 | alpha0="Scalar positive parameter for symmetric Dirichlet prior on mixture weights (component appearance probabilities). Set to 1.0 for a uniform prior (ML estimation)." 28 | 29 | 30 | [HDPModel] 31 | alpha0 = 5 32 | gamma = 0.5 33 | 34 | [HDPPE] 35 | alpha0 = 5 36 | gamma = 0.5 37 | 38 | [HDPFullHard] 39 | alpha0 = 5 40 | gamma = 0.5 41 | 42 | [HDPSoft2Hard] 43 | alpha0 = 5 44 | gamma = 0.5 45 | 46 | [HDPHardMult] 47 | alpha0 = 5 48 | gamma = 0.5 49 | 50 | [HDPRelAssortModel] 51 | alpha0 = 5 52 | gamma = 0.5 53 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/init.conf: -------------------------------------------------------------------------------- 1 | # Keyword options for initialization of global parameters for bnpy models 2 | # specify 3 | 4 | [Initialization] 5 | initname=randfromprior 6 | initarg=1.0 7 | K=6 8 | nRepeatTrue=2 9 | 10 | [InitializationHelp] 11 | initname=Name of routine for initialization. Options: {'randexamples','randexamplesbydist','truelabels','repeattruelabels'}. 12 | initarg=Numeric argument for initialization of key parameters. See details of init/FromScratch modules. 13 | K=Integer number of components. 14 | nRepeatTrue=[when initname='repeattruelabels'] number of duplicates of each true component to include when initializing. 15 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/numeric.platform-config: -------------------------------------------------------------------------------- 1 | [RememberThisFileIsAutogenerated] 2 | 3 | [LibraryPrefs] 4 | calcRlogR = numpy 5 | inplaceExpAndNormalizeRows = numpy 6 | calcRlogRdotv = numpy 7 | 8 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/obsmodel.conf: -------------------------------------------------------------------------------- 1 | # Keyword options for observation models 2 | # specify how to set prior on data-generating parameters 3 | 4 | [BernRel] 5 | lamA=0.1 6 | lamB=0.1 7 | 8 | [BernRelHelp] 9 | lamA=hyperparameter representing pseudo head count 10 | lamB=hyperparameter representing pseudo tail count 11 | 12 | [ZMGauss] 13 | min_covar=1e-8 14 | dF=0 15 | sF=1.0 16 | ECovMat=eye 17 | 18 | [ZMGaussHelp] 19 | min_covar=Minimum value for diagonal entries of covariance matrix, to ensure invertibility (EM only). Set very small to approach maximum likelihood estimates. 20 | dF=Number of degrees of freedom for Wishart prior. Must be >= dimension of Data. 21 | sF=Scale factor for expected covariance matrix under Wishart prior. Set very small to approach maximum likelihood estimates. 22 | ECovMat=Name of routine for setting expected covariance matrix under Wishart prior. Options: {'eye', 'covdata'}. 23 | 24 | [Gauss] 25 | min_covar=1e-8 26 | dF=0 27 | sF=1.0 28 | ECovMat=eye 29 | kappa=1e-4 30 | 31 | [GaussHelp] 32 | min_covar=Minimum value for diagonal entries of covariance matrix, to ensure invertibility [algName='EM']. Set very small to approach maximum likelihood estimates. 33 | dF=Number of degrees of freedom for Wishart prior. Must be >= dimension of Data. 34 | sF=Scale factor for expected covariance matrix under Wishart prior. Set very small to approach maximum likelihood estimates. 35 | ECovMat=Name of routine for setting expected covariance matrix under Wishart prior. Options: {'eye', 'covdata'}. 36 | kappa=Scalar that controls the precision (inverse variance) of Gaussian prior on means: \mu[k] ~ Normal( 0, 1/kappa * ECovMat). Set very small to allow means to approach maximum likelihood estimates. 37 | 38 | [DiagGauss] 39 | min_covar=1e-8 40 | kappa=1e-4 41 | m0=0.0 42 | a0=1.0 43 | b0=2.0 44 | 45 | [DiagGaussHelp] 46 | kappa=Precision factor for the Gauss-Gamma prior distribution. Set very small to let means be learned from data. 47 | m0=Mean for Gauss-Gamma prior distribution 48 | 49 | [Mult] 50 | lambda=0.01 51 | 52 | [MultHelp] 53 | lambda=parameter for symmetric Dirichlet prior over each topic's word distribution 54 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/onlinedata.conf: -------------------------------------------------------------------------------- 1 | # Keyword options for online (minibatch) traversal of dataset 2 | # specify how many batches to divide data into 3 | 4 | [OnlineDataPrefs] 5 | nBatch=10 6 | nLap=1 7 | 8 | [OnlineDataPrefsHelp] 9 | nBatch=Number of batches (aka minibatches) to split up dataset into. 10 | nLap=Number of times to cycle thru all batches in dataset. -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/config/output.conf: -------------------------------------------------------------------------------- 1 | # Keyword options for how to name, save, and display experimental progress. 2 | 3 | [OutputPrefs] 4 | jobname=defaultjob 5 | taskid=1 6 | nTask=1 7 | 8 | traceEvery=1. 9 | printEvery=1. 10 | saveEvery=5. 11 | 12 | [OutputPrefsHelp] 13 | jobname=String name of current experiment. This name is hashed to create a unique random seed, which controls initialization and algorithm execution. 14 | taskid=Integer ID of current run/trial/initialization. Must be >= 1. 15 | nTask=Number of runs/trials/initializations to perform for single experiment. 16 | 17 | traceEvery=Number of laps (passes thru entire dataset) between saving ELBO values. Can be a fraction (like 0.5) to report partial-lap progress. 18 | printEvery=Number of laps (passes thru entire dataset) between printing status updates to log. Can be a fraction (like 0.5) to report partial-lap progress. 19 | saveEvery=Number of laps (passes thru entire dataset) between saving model's global parameters to disk. Can be a fraction (like 0.5) to report partial-lap progress. 20 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/data/DataObj.py: -------------------------------------------------------------------------------- 1 | ''' 2 | DataObj.py 3 | 4 | General abstract base class for all data objects, 5 | whether they are full datasets or iterators over small batches of data 6 | 7 | Attributes 8 | ------- 9 | nObs : 10 | ''' 11 | 12 | class DataObj(object): 13 | @classmethod 14 | def read_from_mat(self, matfilepath): 15 | ''' Constructor for building data object from disk 16 | ''' 17 | pass 18 | 19 | def __init__(self, *args, **kwargs): 20 | ''' Constructor for building data object from scratch in memory 21 | ''' 22 | pass 23 | 24 | def get_short_name(self): 25 | ''' Returns string with short name (at most 10 char) of this data object, 26 | with no spaces and only alpha-numeric characters. 27 | Useful for creating filepaths specific for this data object. 28 | ''' 29 | if hasattr(self, 'shortname'): 30 | return self.shortname 31 | return "MyData%d" % (self.nObs) 32 | 33 | def get_text_summary(self, **kwargs): 34 | ''' Returns string with human-readable description of this dataset 35 | e.g. source, author/creator, etc. 36 | ''' 37 | if hasattr(self, 'summary'): 38 | return self.summary 39 | s = '%s. nObs %d' % (self.__class__.__name__, self.nObs) 40 | return s 41 | 42 | def summarize_num_observations(self): 43 | ''' Returns string summary of number of observations in this data object 44 | ''' 45 | pass 46 | 47 | def select_subset_by_mask(self, *args, **kwargs): 48 | ''' Returns DataObj of the same type, containing a subset of self's data 49 | ''' 50 | pass 51 | 52 | def add_data(self, DataObj): 53 | ''' Updates (in-place) the dataset to include provided data 54 | ''' 55 | pass -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/data/MinibatchIteratorFromDisk.py: -------------------------------------------------------------------------------- 1 | ''' 2 | MinibatchIteratorFromDisk.py 3 | 4 | Extension of MinibatchIterator 5 | reads in data that has been pre-split into batches on disk. 6 | 7 | Usage: 8 | construct by providing a list of valid filepaths to .mat files (see XData for format) 9 | then call has_next_batch() 10 | get_next_batch() 11 | 12 | Traversal order of the files is randomized every lap through the full dataset 13 | Set the "dataseed" parameter to get repeatable orders. 14 | ''' 15 | 16 | import numpy as np 17 | import scipy.io 18 | 19 | from MinibatchIterator import MinibatchIterator 20 | from XData import XData 21 | 22 | class MinibatchIteratorFromDisk( MinibatchIterator): 23 | 24 | def __init__(self): 25 | raise NotImplementedError("TODO") -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .DataObj import DataObj 2 | from .XData import XData 3 | from .WordsData import WordsData 4 | from .GraphData import GraphData 5 | from .MinibatchIterator import MinibatchIterator 6 | from .AdmixMinibatchIterator import AdmixMinibatchIterator 7 | from .AdmixMinibatchIteratorDB import AdmixMinibatchIteratorDB 8 | 9 | __all__ = ['DataObj', 'WordsData', 'XData', 'GraphData', 10 | 'MinibatchIterator', 'AdmixMinibatchIterator', 'AdmixMinibatchIteratorDB'] 11 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/distr/Distr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Distr.py 3 | 4 | Generic exponential family probability distribution object 5 | ''' 6 | 7 | class Distr( object ): 8 | 9 | ######################################################### Constructor 10 | ######################################################### 11 | def __init__(self, *args, **kwargs): 12 | ''' Basic constructor 13 | ''' 14 | pass 15 | 16 | @classmethod 17 | def CreateAsPrior(cls, argDict, Data): 18 | ''' Creates Distr as prior for parameters that generate provided Data 19 | ''' 20 | pass 21 | 22 | ######################################################### Log Cond. Prob. 23 | ######################################################### E-step 24 | def log_pdf( self ): 25 | ''' Returns log p( x | theta ) 26 | ''' 27 | pass 28 | 29 | def E_log_pdf( self ): 30 | ''' Returns E[ log p( x | theta ) ] under q(theta) <- this distr 31 | ''' 32 | pass 33 | 34 | ######################################################### Global updates 35 | ######################################################### M-step 36 | def get_post_distr( self, SS ): 37 | ''' Create new Distr object with posterior params 38 | ''' 39 | pass 40 | 41 | def post_update_soVB( self, rho, *args ): 42 | ''' Stochastic online update of internal params 43 | ''' 44 | pass 45 | 46 | 47 | ######################################################### ELBO terms 48 | ######################################################### 49 | def get_log_norm_const(self): 50 | ''' Returns log( Z ), where 51 | PDF(x) := 1/Z(theta) f( x | theta ) 52 | ''' 53 | pass 54 | 55 | def get_entropy( self ): 56 | ''' Returns entropy of this distribution 57 | H[ p(x) ] = -1*\int p(x|theta) log p(x|theta) dx 58 | ''' 59 | pass 60 | 61 | ######################################################### Accessors 62 | ######################################################### 63 | 64 | ######################################################### I/O Utils 65 | ######################################################### 66 | def to_dict(self): 67 | pass 68 | 69 | def from_dict(self, pDict): 70 | pass -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/distr/__init__.py: -------------------------------------------------------------------------------- 1 | from Distr import Distr 2 | 3 | from BetaDistr import BetaDistr 4 | from GaussDistr import GaussDistr 5 | from ZMGaussDistr import ZMGaussDistr 6 | from WishartDistr import WishartDistr 7 | from GaussWishDistr import GaussWishDistr 8 | from DirichletDistr import DirichletDistr 9 | from GaussGammaDistr import GaussGammaDistr 10 | 11 | __all__ = ['WishartDistr', 'ZMGaussDistr', 'GaussDistr', 'GaussWishDistr','DirichletDistr', 'GaussGammaDistr', 'BetaDistr'] 12 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/init/FromSaved.py: -------------------------------------------------------------------------------- 1 | ''' 2 | FromSaved.py 3 | 4 | Initialize params of a bnpy model from a previous result saved to disk. 5 | ''' 6 | import numpy as np 7 | import scipy.io 8 | import os 9 | from bnpy.ioutil import ModelReader 10 | 11 | def init_global_params(hmodel, Data, initname=None, prefix='Best', **kwargs): 12 | ''' Initialize (in-place) the global params of the given hmodel 13 | by copying the global parameters of a previously saved hmodel 14 | 15 | Only global parameters are modified. 16 | This does NOT alter settings of hmodel's prior distribution. 17 | 18 | Args 19 | ------- 20 | hmodel : bnpy model object to initialize 21 | Data : bnpy Data object whose dimensions must match resulting hmodel 22 | initname : valid filesystem path to stored result 23 | 24 | Returns 25 | ------- 26 | None. hmodel modified in-place. 27 | ''' 28 | if os.path.isdir(initname): 29 | init_global_params_from_bnpy_format(hmodel, Data, initname, prefix) 30 | elif initname.count('.mat') > 0: 31 | # Handle external external formats (not bnpy models) saved as MAT file 32 | MatDict = scipy.io.loadmat(initname) 33 | hmodel.set_global_params(**MatDict) 34 | else: 35 | raise ValueError('Unrecognized init file: %s' % (initname)) 36 | 37 | 38 | def init_global_params_from_bnpy_format(hmodel, Data, initname, prefix): 39 | storedModel = ModelReader.load_model(initname, prefix) 40 | # TODO check if dimension matches 41 | aTypesMatch = type(storedModel.allocModel) == type(hmodel.allocModel) 42 | oTypesMatch = type(storedModel.obsModel) == type(hmodel.obsModel) 43 | inferTypesMatch = storedModel.inferType == hmodel.inferType 44 | 45 | if aTypesMatch and oTypesMatch and inferTypesMatch: 46 | hmodel.set_global_params(hmodel=storedModel) 47 | else: 48 | LP = storedModel.calc_local_params(Data) 49 | SS = hmodel.get_global_suff_stats(Data, LP) 50 | hmodel.update_global_params(SS) 51 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/init/FromScratchBernRel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | FromScratchMult.py 3 | 4 | Initialize params of HModel with multinomial observations from scratch. 5 | ''' 6 | import numpy as np 7 | from scipy.special import digamma 8 | from scipy.cluster import vq 9 | 10 | hasRexAvailable = True 11 | try: 12 | import KMeansRex 13 | except ImportError: 14 | hasRexAvailable = False 15 | 16 | def init_global_params(hmodel, Data, initname='randexamples', 17 | seed=0, K=0, initarg=None, **kwargs): 18 | ''' Initialize hmodel's global parameters in-place. 19 | 20 | Returns 21 | ------- 22 | Nothing. hmodel is updated in place. 23 | Global Paramters are: 24 | lamA, lamB = K x K stochastic block matrix 25 | theta = N x K matrix of community membership probabilities 26 | ''' 27 | PRNG = np.random.RandomState(seed) 28 | N = Data.nNodeTotal 29 | if initname == 'randexamples': 30 | # Generate a sparse matrix given observed positive edges 31 | #Data.to_sparse_matrix() 32 | # Create assortative stochastic block matrix 33 | lamA = np.zeros( K ) + (Data.nPosEdges / K) # assortative ( K x 1 ) vs. (K x K) 34 | lamB = np.zeros( K ) + (Data.nAbsEdges / (K*K)) # assortative 35 | # Create theta used for 36 | theta = np.zeros( (N,K) ) 37 | alpha = np.ones(K) / K 38 | for ii in xrange(N): 39 | theta[ii, :] = PRNG.dirichlet(alpha) 40 | 41 | # Initialize global stick-breaking weights beta to be 1/K (uniform) 42 | beta = np.ones(K) / K 43 | # Set the global parameters for the hmodel 44 | hmodel.set_global_params(K=K, beta=beta, lamA=lamA, lamB=lamB, theta=theta) 45 | return 46 | else: 47 | raise NotImplementedError('Unrecognized initname ' + initname) 48 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/init/FromScratchGauss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | FromScratchGauss.py 3 | 4 | Initialize params of a mixture model with gaussian observations from scratch. 5 | ''' 6 | import numpy as np 7 | from bnpy.util import discrete_single_draw 8 | from bnpy.data import XData 9 | 10 | def init_global_params(hmodel, Data, initname='randexamples', seed=0, K=0, **kwargs): 11 | PRNG = np.random.RandomState(seed) 12 | X = Data.X 13 | if initname == 'randexamples': 14 | ''' Choose K items uniformly at random from the Data 15 | then component params by M-step given those single items 16 | ''' 17 | resp = np.zeros((Data.nObs, K)) 18 | permIDs = PRNG.permutation(Data.nObs).tolist() 19 | for k in xrange(K): 20 | resp[permIDs[k],k] = 1.0 21 | elif initname == 'randexamplesbydist': 22 | ''' Choose K items from the Data, 23 | selecting the first at random, 24 | then subsequently proportional to euclidean distance to the closest item 25 | ''' 26 | objID = discrete_single_draw(np.ones(Data.nObs), PRNG) 27 | chosenObjIDs = list([objID]) 28 | minDistVec = np.inf * np.ones(Data.nObs) 29 | for k in range(1, K): 30 | curDistVec = np.sum((Data.X - Data.X[objID])**2, axis=1) 31 | minDistVec = np.minimum(minDistVec, curDistVec) 32 | objID = discrete_single_draw(minDistVec, PRNG) 33 | chosenObjIDs.append(objID) 34 | resp = np.zeros((Data.nObs, K)) 35 | for k in xrange(K): 36 | resp[chosenObjIDs[k], k] = 1.0 37 | elif initname == 'randsoftpartition': 38 | ''' Randomly assign all data items some mass in each of K components 39 | then create component params by M-step given that soft partition 40 | ''' 41 | resp = PRNG.rand(Data.nObs, K) 42 | resp = resp/np.sum(resp,axis=1)[:,np.newaxis] 43 | 44 | elif initname == 'randomnaive': 45 | ''' Generate K "fake" examples from the diagonalized data covariance, 46 | creating params by assigning each "fake" example to a component. 47 | ''' 48 | Sig = np.sqrt(np.diag(np.cov(Data.X.T))) 49 | Xfake = Sig * PRNG.randn(K, Data.dim) 50 | Data = XData(Xfake) 51 | resp = np.eye(K) 52 | 53 | LP = dict(resp=resp) 54 | SS = hmodel.get_global_suff_stats(Data, LP) 55 | hmodel.update_global_params(SS) 56 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/init/FromTruth.py: -------------------------------------------------------------------------------- 1 | ''' 2 | FromTruth.py 3 | 4 | Initialize params of a bnpy model using "ground truth" information, 5 | such as human annotations 6 | 7 | These are provided within a Data object, as a "TrueLabels" field 8 | ''' 9 | import numpy as np 10 | import FromScratchMult 11 | 12 | def init_global_params(hmodel, Data, initname=None, seed=0, nRepeatTrue=2, **kwargs): 13 | ''' Initialize (in-place) the global params of the given hmodel 14 | using the true labels associated with the Data 15 | 16 | Args 17 | ------- 18 | hmodel : bnpy model object to initialize 19 | Data : bnpy Data object whose dimensions must match resulting hmodel 20 | initname : string name for the routine to use 21 | 'truelabels' or 'repeattruelabels' 22 | ''' 23 | PRNG = np.random.RandomState(seed) 24 | if initname.count('truelabels') > 0: 25 | if hasattr(Data, 'TrueLabels'): 26 | resp = calc_resp_from_true_labels(Data) 27 | elif hasattr(Data, 'TrueParams'): 28 | if 'resp' in Data.TrueParams: 29 | resp = Data.TrueParams['resp'] 30 | if 'word_variational' in Data.TrueParams: 31 | resp = Data.TrueParams['word_variational'] 32 | 33 | 34 | if initname == 'truelabels': 35 | pass # have everything we need 36 | elif initname == 'repeattruelabels': 37 | Ktrue = resp.shape[1] 38 | rowIDs = PRNG.permutation(Data.nObs) 39 | L = len(rowIDs)/nRepeatTrue 40 | bigResp = np.zeros((Data.nObs, Ktrue*nRepeatTrue)) 41 | curLoc = 0 42 | for r in range(nRepeatTrue): 43 | targetIDs = rowIDs[curLoc:curLoc+L] 44 | bigResp[targetIDs, r*Ktrue:(r+1)*Ktrue] = resp[targetIDs,:] 45 | curLoc += L 46 | resp = bigResp 47 | elif initname == 'trueparams': 48 | hmodel.set_global_params(**Data.TrueParams) 49 | return 50 | else: 51 | raise NotImplementedError('Unknown initname: %s' % (initname)) 52 | 53 | if hmodel.obsModel.__class__.__name__.count('Gauss') > 0: 54 | LP = dict(resp=resp) 55 | else: 56 | LP = FromScratchMult.getLPfromResp(resp, Data) 57 | SS = hmodel.get_global_suff_stats(Data, LP) 58 | hmodel.update_global_params(SS) 59 | 60 | 61 | 62 | def calc_resp_from_true_labels(Data): 63 | TrueLabels = Data.TrueLabels 64 | uniqueLabels = np.unique(TrueLabels) 65 | Ktrue = len(uniqueLabels) 66 | resp = np.zeros((Data.nObs, Ktrue)) 67 | for k in range(Ktrue): 68 | mask = TrueLabels == uniqueLabels[k] 69 | resp[mask,k] = 1.0 70 | return resp -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/init/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`init` module gathers initialization procedures for model parameters 3 | """ 4 | import FromScratchGauss, FromScratchMult 5 | import FromScratchBernRel 6 | import FromSaved, FromTruth 7 | 8 | __all__ = ['FromScratchGauss', 'FromSaved', 'FromTruth', 'FromScratchMult', 'FromScratchBernRel'] 9 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/ioutil/ModelWriter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | import numpy as np 4 | import scipy.io 5 | import os 6 | from distutils.dir_util import mkpath 7 | 8 | def makePrefixForLap(lap): 9 | return 'Lap%08.3f' % (lap) 10 | 11 | def save_model(hmodel, fname, prefix, doSavePriorInfo=True, doLinkBest=False): 12 | ''' saves HModel object to mat file persistently 13 | 14 | Args 15 | -------- 16 | hmodel: HModel to save 17 | fname: absolute full path of directory to save in 18 | prefix: prefix for file name, like 'Iter00004' or 'Best' 19 | doSavePriorInfo: whether to save prior info 20 | ''' 21 | if not os.path.exists( fname): 22 | mkpath( fname ) 23 | save_alloc_model( hmodel.allocModel, fname, prefix, doLinkBest=doLinkBest ) 24 | save_obs_model( hmodel.obsModel, fname, prefix, doLinkBest=doLinkBest ) 25 | if doSavePriorInfo: 26 | save_alloc_prior( hmodel.allocModel, fname) 27 | save_obs_prior( hmodel.obsModel, fname) 28 | 29 | def save_alloc_model(amodel, fpath, prefix, doLinkBest=False): 30 | amatname = prefix + 'AllocModel.mat' 31 | outmatfile = os.path.join( fpath, amatname ) 32 | adict = amodel.to_dict() 33 | adict.update( amodel.to_dict_essential() ) 34 | scipy.io.savemat( outmatfile, adict, oned_as='row') 35 | if doLinkBest and prefix != 'Best': 36 | create_best_link( outmatfile, os.path.join(fpath,'BestAllocModel.mat')) 37 | 38 | def save_obs_model(obsmodel, fpath, prefix, doLinkBest=False): 39 | amatname = prefix + 'ObsModel.mat' 40 | outmatfile = os.path.join( fpath, amatname ) 41 | compList = list() 42 | for k in xrange( obsmodel.K ): 43 | compList.append( obsmodel.comp[k].to_dict() ) 44 | myDict = obsmodel.to_dict_essential() 45 | for key in compList[0].keys(): 46 | if key in myDict: 47 | continue 48 | myDict[key] = np.squeeze(np.dstack([ compDict[key] for compDict in compList])) 49 | scipy.io.savemat( outmatfile, myDict, oned_as='row') 50 | if doLinkBest and prefix != 'Best': 51 | create_best_link( outmatfile, os.path.join(fpath,'BestObsModel.mat')) 52 | 53 | def save_alloc_prior( amodel, fpath): 54 | outpath = os.path.join( fpath, 'AllocPrior.mat') 55 | adict = amodel.get_prior_dict() 56 | if len( adict.keys() ) == 0: 57 | return None 58 | scipy.io.savemat( outpath, adict, oned_as='row') 59 | 60 | def save_obs_prior( obsModel, fpath): 61 | outpath = os.path.join( fpath, 'ObsPrior.mat') 62 | adict = obsModel.get_prior_dict() 63 | if len( adict.keys() ) == 0: 64 | return None 65 | scipy.io.savemat( outpath, adict, oned_as='row') 66 | 67 | def create_best_link( hardmatfile, linkmatfile): 68 | ''' Creates a symlink file named linkmatfile that points to hardmatfile, 69 | where both are full valid absolute file system paths 70 | ''' 71 | if os.path.islink( linkmatfile): 72 | os.unlink( linkmatfile ) 73 | if os.path.exists(linkmatfile): 74 | os.remove(linkmatfile) 75 | if os.path.exists( hardmatfile ): 76 | os.symlink( hardmatfile, linkmatfile ) 77 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/ioutil/__init__.py: -------------------------------------------------------------------------------- 1 | import ModelWriter 2 | import ModelReader 3 | import BNPYArgParser 4 | 5 | __all__ = ['BNPYArgParser', 'ModelWriter', 'ModelReader'] 6 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/learnalg/StochasticOnlineVBLearnAlg.py: -------------------------------------------------------------------------------- 1 | ''' 2 | StochasticOnlineVBLearnAlg.py 3 | 4 | Implementation of stochastic online VB (soVB) for bnpy models 5 | ''' 6 | import numpy as np 7 | from LearnAlg import LearnAlg 8 | 9 | class StochasticOnlineVBLearnAlg(LearnAlg): 10 | 11 | def __init__(self, **kwargs): 12 | ''' Creates stochastic online learning algorithm, 13 | with fields rhodelay, rhoexp that define learning rate schedule. 14 | ''' 15 | super(type(self),self).__init__(**kwargs) 16 | self.rhodelay = self.algParams['rhodelay'] 17 | self.rhoexp = self.algParams['rhoexp'] 18 | 19 | def fit(self, hmodel, DataIterator, SS=None): 20 | ''' Run soVB learning algorithm, fit global parameters of hmodel to Data 21 | Returns 22 | -------- 23 | LP : local params from final pass of Data 24 | Info : dict of run information, with fields 25 | evBound : final ELBO evidence bound 26 | status : str message indicating reason for termination 27 | {'all data processed'} 28 | ''' 29 | LP = None 30 | rho = 1.0 # Learning rate 31 | nBatch = float(DataIterator.nBatch) 32 | 33 | # Set-up progress-tracking variables 34 | iterid = -1 35 | lapFrac = np.maximum(0, self.algParams['startLap'] - 1.0/nBatch) 36 | if lapFrac > 0: 37 | # When restarting an existing run, 38 | # need to start with last update for final batch from previous lap 39 | DataIterator.lapID = int(np.ceil(lapFrac)) - 1 40 | DataIterator.curLapPos = nBatch - 2 41 | iterid = int(nBatch * lapFrac) - 1 42 | 43 | self.set_start_time_now() 44 | while DataIterator.has_next_batch(): 45 | 46 | # Grab new data 47 | Dchunk = DataIterator.get_next_batch() 48 | 49 | # Update progress-tracking variables 50 | iterid += 1 51 | lapFrac += 1.0/nBatch 52 | self.set_random_seed_at_lap(lapFrac) 53 | 54 | # M step with learning rate 55 | if SS is not None: 56 | rho = (iterid + self.rhodelay) ** (-1.0 * self.rhoexp) 57 | hmodel.update_global_params(SS, rho) 58 | 59 | # E step 60 | LP = hmodel.calc_local_params(Dchunk) 61 | SS = hmodel.get_global_suff_stats(Dchunk, LP, doAmplify=True) 62 | 63 | # ELBO calculation 64 | evBound = hmodel.calc_evidence(Dchunk, SS, LP) 65 | 66 | # Save and display progress 67 | self.add_nObs(Dchunk.nObs) 68 | self.save_state(hmodel, iterid, lapFrac, evBound) 69 | self.print_state(hmodel, iterid, lapFrac, evBound) 70 | 71 | #Finally, save, print and exit 72 | status = "all data processed." 73 | self.save_state(hmodel,iterid, lapFrac, evBound, doFinal=True) 74 | self.print_state(hmodel, iterid, lapFrac, evBound, doFinal=True, status=status) 75 | return None, self.buildRunInfo(evBound, status) 76 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/learnalg/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The:mod:`learnalg' module provides standard learning algorithms such as EM and VB (Variational Bayes) 3 | """ 4 | from .LearnAlg import LearnAlg 5 | from .VBLearnAlg import VBLearnAlg 6 | from .StochasticOnlineVBLearnAlg import StochasticOnlineVBLearnAlg 7 | from .MemoizedOnlineVBLearnAlg import MemoizedOnlineVBLearnAlg 8 | import MergeMove 9 | 10 | from .MergePairSelector import MergePairSelector 11 | from .MergeTracker import MergeTracker 12 | 13 | __all__ = ['LearnAlg', 'VBLearnAlg', 'StochasticOnlineVBLearnAlg', 14 | 'MemoizedOnlineVBLearnAlg', 'MergeMove', 15 | 'MergeTracker', 'MergePairSelector'] 16 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/obsmodel/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | 4 | from DiagGaussObsModel import DiagGaussObsModel 5 | from GaussObsModel import GaussObsModel 6 | from ZMGaussObsModel import ZMGaussObsModel 7 | from MultObsModel import MultObsModel 8 | from BernRelObsModel import BernRelObsModel 9 | 10 | ObsModelConstructorsByName = { \ 11 | 'DiagGauss':DiagGaussObsModel, 12 | 'Gauss':GaussObsModel, 13 | 'ZMGauss':ZMGaussObsModel, 14 | 'Mult':MultObsModel, 15 | 'BernRel':BernRelObsModel, 16 | } 17 | 18 | ObsModelNameSet = set(ObsModelConstructorsByName.keys()) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/suffstats/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | suffstats module defines objects that represent sufficient statistics 3 | ''' 4 | 5 | from ParamBag import ParamBag 6 | from SuffStatBag import SuffStatBag 7 | 8 | __all__ = ['SuffStatBag', 'ParamBag'] 9 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/IOUtil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def flatstr2np( xvecstr ): 4 | return np.asarray( [float(x) for x in xvecstr.split()] ) 5 | 6 | def np2flatstr( X, fmt="% .6f" ): 7 | return ' '.join( [fmt%(x) for x in np.asarray(X).flatten() ] ) 8 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/LinAlgUtil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | LinAlgUtil.py 3 | 4 | Linear algebra utilities. Mostly efficient matrix multiplication subroutines. 5 | 6 | Notes 7 | ------- 8 | Timing results on 9 | late 2011 macbook (with Intel CPU) 10 | 32-bit desktop (with AMD CPU, ~3GHz) 11 | 64-bit desktop (with AMD CPU, ~3GHz) 12 | 13 | X = np.random.rand( 1e6, 64) 14 | 15 | Compare methods for computing X.T * X 16 | A | fblas.dgemm(1.0, X, X, trans_a=True) 17 | B | fblas.dgemm(1.0, X.T, X.T, trans_b=True) 18 | C | np.dot(X.T,X) 19 | C A B 20 | macbook 1.46 s 1.20 s 0.69 s 21 | 32-bit desktop 1.67 s 1.45 s 0.58 s 22 | 64-bit desktop 1.39 s 1.2 s 0.45 s 23 | 24 | Conclusion: method "B" is the best by far! 25 | ''' 26 | 27 | import numpy as np 28 | 29 | try: 30 | import scipy.linalg.blas 31 | try: 32 | fblas = scipy.linalg.blas.fblas 33 | except AttributeError: 34 | # Scipy changed location of BLAS libraries in late 2012. 35 | # See http://github.com/scipy/scipy/pull/358 36 | fblas = scipy.linalg.blas._fblas 37 | except: 38 | raise ImportError("BLAS libraries for efficient matrix multiplication not found") 39 | 40 | 41 | def dotATB(A, B): 42 | ''' Compute matrix product A.T * B 43 | using efficient BLAS routines (low-level machine code) 44 | ''' 45 | if A.shape[1] > B.shape[1]: 46 | return fblas.dgemm(1.0, A, B, trans_a=True) 47 | else: 48 | return np.dot( A.T, B) 49 | 50 | def dotABT(A, B): 51 | ''' Compute matrix product A* B.T 52 | using efficient BLAS routines (low-level machine code) 53 | ''' 54 | if B.shape[0] > A.shape[0]: 55 | return fblas.dgemm(1.0, A, B, trans_b=True) 56 | else: 57 | return np.dot( A, B.T) 58 | 59 | def dotATA(A ): 60 | ''' Compute matrix product A.T * A 61 | using efficient BLAS routines (low-level machine code) 62 | ''' 63 | return fblas.dgemm(1.0, A.T, A.T, trans_b=True) 64 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/RandUtil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | RandUtil.py 3 | 4 | Utilities for sampling (pseudo) random numbers 5 | ''' 6 | import numpy as np 7 | 8 | def choice(candidates, ps=None, randstate=np.random): 9 | ''' Choose one element at random from list of candidates. 10 | ps[k] gives probability of candidate k 11 | ps need not sum to one, but all entries must be positive 12 | ''' 13 | if ps is None: 14 | N = len(candidates) 15 | ps = np.ones(N)/N 16 | totals = np.cumsum(ps) 17 | r = randstate.rand() * totals[-1] 18 | k = np.searchsorted(totals, r) 19 | return candidates[k] 20 | 21 | def multinomial(Nsamp, ps, randstate=np.random): 22 | return randstate.multinomial(Nsamp, ps/ps.sum() ) 23 | #ps = np.asarray(ps, dtype=np.float64) 24 | #Pmat = np.tile(ps, (Nsamp,1)) 25 | #choiceVec = discrete_single_draw_vectorized(Pmat, randstate) 26 | #choiceHist, bins = np.histogram(choiceVec, np.arange(-.5,ps.size + .5)) 27 | #return choiceHist 28 | 29 | def discrete_single_draw_vectorized( Pmat, randstate=np.random): 30 | Ts = np.cumsum(Pmat, axis=1) 31 | throws = randstate.rand( Pmat.shape[0] )*Ts[:,-1] 32 | Ts[ Ts > throws[:,np.newaxis] ] = np.inf 33 | choices = np.argmax( Ts, axis=1 ) # relies on argmax returning first id 34 | return choices 35 | 36 | def discrete_single_draw( ps, randstate=np.random): 37 | ''' Given vector of K positive real weights "ps", 38 | draw a single integer assignment in {1,2, ...K} 39 | such that Prob(choice=k) = ps[k] 40 | 41 | Args 42 | -------- 43 | ps : K-length numpy vector of positive real numbers 44 | 45 | Returns 46 | -------- 47 | choice : integer in range 0, 1, ... K 48 | ''' 49 | totals = np.cumsum(ps) 50 | return np.searchsorted(totals, randstate.rand()*totals[-1]) 51 | 52 | def mvnrand(mu, Sigma, N=1, PRNG=np.random.RandomState()): 53 | if type(PRNG) == int: 54 | PRNG = np.random.RandomState(PRNG) 55 | return PRNG.multivariate_normal(mu, Sigma, (N)) 56 | 57 | def rotateCovMat( Sigma, theta=np.pi/4): 58 | ''' Returns valid covariance matrix with same eigen structure, rotated by theta radians 59 | ''' 60 | RotMat = [[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]] 61 | RotMat = np.asarray( RotMat) 62 | Lam,V = np.linalg.eig( Sigma ) 63 | Lam = np.diag(Lam) 64 | Vrot = np.dot( V, RotMat ) 65 | return np.dot( Vrot, np.dot( Lam, Vrot.T) ) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/SpecialFuncUtil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | SpecialFuncUtil.py 3 | 4 | Special mathematical functions, like multivariate gammaln. 5 | ''' 6 | 7 | from collections import defaultdict 8 | import numpy as np 9 | from scipy.special import gammaln, digamma 10 | 11 | LOGPI = np.log(np.pi) 12 | LOGTWO = np.log(2.) 13 | LOGTWOPI = np.log( 2.*np.pi ) 14 | EPS = 10*np.finfo(float).eps 15 | 16 | MVgCache = defaultdict( lambda: dict()) 17 | def MVgammaln(x, D): 18 | ''' Compute log of the D-dimensional multivariate Gamma func. for input x 19 | 20 | Notes: Caching gives big speedup! 21 | ------- 22 | caching : 208 sec for 5 iters of CGS on K=50, D=2 problem with N=10000 23 | no cache : 300 sec 24 | ''' 25 | try: 26 | return MVgCache[D][x] 27 | except KeyError: 28 | result = gammaln(x+ 0.5*(1 - np.arange(1,D+1)) ).sum() + 0.25*D*(D-1)*LOGPI 29 | MVgCache[D][x] = result 30 | return result 31 | 32 | def MVdigamma(x, D): 33 | ''' Compute the first-derivative of the log of the D-dim. Gamma function 34 | ''' 35 | return digamma(x + 0.5 * (1 - np.arange(1,D+1))).sum() 36 | 37 | def logsumexp(logA, axis=None): 38 | ''' Efficiently compute log(sum(exp(...))) for input matrix "logA" 39 | Computation is both vectorized and numerically stable. 40 | ''' 41 | logA = np.asarray(logA) 42 | logAmax = logA.max(axis=axis) 43 | if axis is None: 44 | logA = logA - logAmax 45 | elif axis==1: 46 | logA = logA - logAmax[:,np.newaxis] 47 | elif axis==0: 48 | logA = logA - logAmax[np.newaxis,:] 49 | assert np.allclose( logA.max(), 0.0 ) 50 | logA = np.log( np.sum( np.exp(logA), axis=axis ) ) 51 | return logA + logAmax 52 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/VerificationUtil.py: -------------------------------------------------------------------------------- 1 | ''' 2 | VerificationUtil.py 3 | 4 | Verification utilities, for checking whether numerical variables are "equal". 5 | ''' 6 | import numpy as np 7 | 8 | def isEvenlyDivisibleFloat(a, b, margin=1e-6): 9 | ''' Returns true/false for whether a is evenly divisible by b 10 | within a (small) numerical tolerance 11 | Examples 12 | -------- 13 | >>> isEvenlyDivisibleFloat( 1.5, 0.5) 14 | True 15 | >>> isEvenlyDivisibleFloat( 1.0, 1./3) 16 | True 17 | ''' 18 | cexact = np.asarray(a)/float(b) 19 | cround = np.round(cexact) 20 | return abs(cexact - cround) < margin 21 | 22 | def closeAtMSigFigs(A, B, M=10, tol=5): 23 | ''' Returns true/false for whether A and B are numerically "close" 24 | aka roughly equal at M significant figures 25 | 26 | Only makes sense for numbers on scale of abs. value 1.0 or larger. 27 | Log evidences will usually always be at this scale. 28 | 29 | Examples 30 | -------- 31 | >>> closeAtMSigFigs(1234, 1000, M=1) # margin is 500 32 | True 33 | >>> closeAtMSigFigs(1234, 1000, M=2) # margin is 50 34 | False 35 | >>> closeAtMSigFigs(1034, 1000, M=2) # margin is 50 36 | True 37 | >>> closeAtMSigFigs(1005, 1000, M=3) # margin is 5 38 | True 39 | 40 | >>> closeAtMSigFigs(44.5, 49.5, M=1) # margin is 5 41 | True 42 | >>> closeAtMSigFigs(44.5, 49.501, M=1) # just over the margin 43 | False 44 | >>> closeAtMSigFigs(44.499, 49.5, M=1) 45 | False 46 | ''' 47 | A = float(A) 48 | B = float(B) 49 | # Enforce abs(A) >= abs(B) 50 | if abs(A) < abs(B): 51 | tmp = A 52 | A = B 53 | B = tmp 54 | assert abs(A) >= abs(B) 55 | 56 | # Find the scale that A (the larger of the two) possesses 57 | # A ~= 10 ** (P10) 58 | P10 = int(np.floor(np.log10(abs(A)))) 59 | 60 | # Compare the difference between A and B 61 | # to the allowed margin THR 62 | diff = abs(A - B) 63 | if P10 >= 0: 64 | THR = tol * 10.0**(P10 - M) 65 | THR = (1 + 1e-11) * THR 66 | # make THR just a little bigger to avoid issues where 2.0 and 1.95 67 | # aren't equal at 0.05 margin due to rounding errors 68 | return np.sign(A) == np.sign(B) and diff <= THR 69 | else: 70 | THR = tol * 10.0**(-M) 71 | THR = (1 + 1e-11) * THR 72 | return diff <= THR 73 | 74 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`util` module gathers utility functions 3 | for IO, special functions like "logsumexp", 4 | and various random sampling functions 5 | """ 6 | 7 | import RandUtil 8 | 9 | from .IOUtil import np2flatstr, flatstr2np 10 | from .LinAlgUtil import dotATA, dotATB, dotABT 11 | from .RandUtil import discrete_single_draw, discrete_single_draw_vectorized 12 | from .RandUtil import choice 13 | from .SpecialFuncUtil import MVgammaln, MVdigamma, digamma, gammaln 14 | from .SpecialFuncUtil import LOGTWO, LOGPI, LOGTWOPI, EPS 15 | from .SpecialFuncUtil import logsumexp 16 | from .VerificationUtil import closeAtMSigFigs, isEvenlyDivisibleFloat 17 | 18 | __all__ = ['RandUtil', 19 | 'np2flatstr', 'flatstr2np', 20 | 'dotATA', 'dotATB', 'dotABT', 21 | 'discrete_single_draw', 22 | 'MVgammaln', 'MVdigamma', 'logsumexp', 'digamma', 'gammaln', 23 | 'closeAtMSigFigs', 'isEvenlyDivisibleFloat', 24 | 'LOGTWO', 'LOGTWOPI', 'LOGPI', 'EPS'] 25 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/bnpy/viz/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`viz` module provides visualization capability 3 | """ 4 | # TODO: do a check for wxpython issues 5 | 6 | import GaussViz 7 | import BarsViz 8 | import PlotELBO 9 | import PlotComps 10 | 11 | __all__ = ['GaussViz', 'BarsViz', 'PlotELBO', 'PlotComps'] -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/Bars2D.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Bars2D.py 3 | 4 | Generic functions for creating toy bars data 5 | ''' 6 | import numpy as np 7 | 8 | def Create2DBarsTopicWordParams(V, K, fracMassOnTopic=0.95, PRNG=np.random): 9 | ''' Create parameters of each topics distribution over words 10 | 11 | Args 12 | --------- 13 | V : int vocab size 14 | K : int number of topics 15 | fracMassOnTopic : fraction of total probability mass for "on-topic" words 16 | PRNG : random number generator (for reproducibility) 17 | 18 | Returns 19 | --------- 20 | topics : K x V matrix, real positive numbers whose rows sum to one 21 | ''' 22 | sqrtV = int(np.sqrt(V)) 23 | BarWidth = sqrtV/ (K/2) # number of consecutive words in each bar 24 | B = V/ (K/2) # total number of "on topic" words in each bar 25 | 26 | topics = np.zeros((K,V)) 27 | # Make horizontal bars 28 | for k in range(K/2): 29 | wordIDs = range(B*k, B*(k+1)) 30 | topics[k, wordIDs] = 1.0 31 | 32 | # Make vertical bars 33 | for k in range(K/2): 34 | wordIDs = list() 35 | for b in range(sqrtV): 36 | start = b * sqrtV + k*BarWidth 37 | wordIDs.extend( range(start, start+BarWidth)) 38 | topics[K/2 + k, wordIDs] = 1.0 39 | 40 | # Add smoothing mass to all entries in "topics" 41 | # instead of picking this value out of thin air, instead, 42 | # set it so that 95% of the mass of each topic is on the "on-topic" bar words 43 | # if s is the smoothing mass added, and B is num "on topic" words, then 44 | # fracMassOnTopic = (1 + s) * B / ( (1+s)*B + s*(V-B) ), and we solve for s 45 | smoothMass = (1 - fracMassOnTopic)/(fracMassOnTopic*V - B)*B 46 | topics += (2 * smoothMass) * PRNG.rand(K,V) 47 | 48 | # Ensure each row of topics is a probability vector 49 | for k in xrange(K): 50 | topics[k,:] /= np.sum(topics[k,:]) 51 | 52 | assert np.sum(topics[0, :B]) > fracMassOnTopic - 0.05 53 | assert np.sum(topics[1, B:2*B]) > fracMassOnTopic - 0.05 54 | assert np.sum(topics[-1, wordIDs]) > fracMassOnTopic - 0.05 55 | return topics -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/BarsK10V900.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsK10V900.py 3 | 4 | Toy Bars data, with K=10 topics and vocabulary size 900. 5 | 5 horizontal bars, and 5 vertical bars. 6 | 7 | Generated via the standard LDA generative model 8 | see WordsData.CreateToyDataFromLDAModel for details. 9 | ''' 10 | import numpy as np 11 | from bnpy.data import WordsData, AdmixMinibatchIterator 12 | import Bars2D 13 | 14 | SEED = 8675309 15 | PRNG = np.random.RandomState(SEED) 16 | 17 | # FIXED DATA GENERATION PARAMS 18 | K = 10 # Number of topics 19 | V = 900 # Vocabulary Size 20 | gamma = 0.5 # hyperparameter over doc-topic distribution 21 | 22 | Defaults = dict() 23 | Defaults['nDocTotal'] = 2000 24 | Defaults['nWordsPerDoc'] = 2 * V / (K/2) 25 | 26 | # GLOBAL PROB DISTRIBUTION OVER TOPICS 27 | trueBeta = np.ones(K) 28 | trueBeta /= trueBeta.sum() 29 | Defaults['topic_prior'] = gamma * trueBeta 30 | 31 | # TOPIC by WORD distribution 32 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG) 33 | 34 | def get_data_info(**kwargs): 35 | if 'nDocTotal' in kwargs: 36 | nDocTotal = kwargs['nDocTotal'] 37 | else: 38 | nDocTotal = Defaults['nDocTotal'] 39 | return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 1-3 bars per doc.' % (K, nDocTotal) 40 | 41 | def get_data(**kwargs): 42 | ''' 43 | Args 44 | ------- 45 | seed 46 | nDocTotal 47 | nWordsPerDoc 48 | ''' 49 | Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs) 50 | Data.summary = get_data_info(**kwargs) 51 | return Data 52 | 53 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1, 54 | dataorderseed=0, **kwargs): 55 | ''' 56 | Args 57 | ------- 58 | seed 59 | nDocTotal 60 | nWordsPerDoc 61 | ''' 62 | Data = CreateToyDataFromLDAModel(seed=seed, **kwargs) 63 | DataIterator = AdmixMinibatchIterator(Data, 64 | nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed) 65 | DataIterator.summary = get_data_info(**kwargs) 66 | return DataIterator 67 | 68 | def CreateToyDataFromLDAModel(**kwargs): 69 | for key in Defaults: 70 | if key not in kwargs: 71 | kwargs[key] = Defaults[key] 72 | return WordsData.CreateToyDataFromLDAModel(**kwargs) 73 | 74 | if __name__ == '__main__': 75 | import bnpy.viz.BarsViz 76 | WData = CreateToyDataFromLDAModel(seed=SEED) 77 | bnpy.viz.BarsViz.plotExampleBarsDocs(WData) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/BarsK50V2500.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsK50V2500.py 3 | 4 | Toy Bars data, with K=50 topics and V=2500 vocabulary size. 5 | 25 horizontal bars, and 25 vertical vertical ones. 6 | 7 | Generated via the standard LDA generative model 8 | see WordsData.CreateToyDataFromLDAModel for details. 9 | 10 | Usage 11 | --------- 12 | To visualize example documents, execute this file as a script 13 | >> python BarsK50V2500.py 14 | 15 | To visualize document "1" from within Python 16 | >> Data = BarsK50V2500.get_data(nDocTotal=5) 17 | >> wid1 = Data.word_id[ Data.doc_range[0,0]:Data.doc_range[0,1] ] 18 | >> wct1 = Data.word_count[ Data.doc_range[0,0]:Data.doc_range[0,1] ] 19 | Make histogram with counts for each of the vocab word types 20 | >> whist = np.zeros(Data.vocab_size) 21 | >> whist[wid1] = wct1 22 | # Plot it as a 2D image 23 | >> whist2D = np.reshape( whist, (50, 50) ) 24 | >> pylab.imshow(whist2D, interpolation='nearest') 25 | 26 | ''' 27 | import numpy as np 28 | from bnpy.data import WordsData, AdmixMinibatchIterator 29 | import Bars2D 30 | 31 | SEED = 8675309 32 | PRNG = np.random.RandomState(SEED) 33 | 34 | # FIXED DATA GENERATION PARAMS 35 | K = 50 # Number of topics 36 | V = 2500 # Vocabulary Size 37 | gamma = 0.75 # hyperparameter over doc-topic distribution 38 | 39 | Defaults = dict() 40 | Defaults['nDocTotal'] = 2000 41 | Defaults['nWordsPerDoc'] = 5 * V / (K/2) 42 | 43 | # GLOBAL PROB DISTRIBUTION OVER TOPICS 44 | trueBeta = np.ones(K) 45 | trueBeta /= trueBeta.sum() 46 | Defaults['topic_prior'] = gamma * trueBeta 47 | 48 | # TOPIC by WORD distribution 49 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG) 50 | 51 | def get_data_info(**kwargs): 52 | if 'nDocTotal' in kwargs: 53 | nDocTotal = kwargs['nDocTotal'] 54 | else: 55 | nDocTotal = Defaults['nDocTotal'] 56 | return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 2-4 bars per doc' % (K, nDocTotal) 57 | 58 | def get_data(**kwargs): 59 | ''' 60 | Args 61 | ------- 62 | seed 63 | nDocTotal 64 | nWordsPerDoc 65 | ''' 66 | Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs) 67 | Data.summary = get_data_info(**kwargs) 68 | return Data 69 | 70 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1, 71 | dataorderseed=0, **kwargs): 72 | ''' 73 | Args 74 | ------- 75 | seed 76 | nDocTotal 77 | nWordsPerDoc 78 | ''' 79 | Data = CreateToyDataFromLDAModel(seed=seed, **kwargs) 80 | DataIterator = AdmixMinibatchIterator(Data, 81 | nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed) 82 | DataIterator.summary = get_data_info(**kwargs) 83 | return DataIterator 84 | 85 | def CreateToyDataFromLDAModel(**kwargs): 86 | for key in Defaults: 87 | if key not in kwargs: 88 | kwargs[key] = Defaults[key] 89 | return WordsData.CreateToyDataFromLDAModel(**kwargs) 90 | 91 | if __name__ == '__main__': 92 | import bnpy.viz.BarsViz 93 | WData = CreateToyDataFromLDAModel(seed=SEED) 94 | bnpy.viz.BarsViz.plotExampleBarsDocs(WData) 95 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/BarsK6V9.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsK6V9.py 3 | 4 | Toy Bars data, with K=6 topics and vocabulary size 9. 5 | 3 horizontal bars, and 3 vertical bars. 6 | 7 | Generated via the standard LDA generative model 8 | see WordsData.CreateToyDataFromLDAModel for details. 9 | ''' 10 | import numpy as np 11 | from bnpy.data import WordsData, AdmixMinibatchIterator 12 | import Bars2D 13 | 14 | SEED = 8675309 15 | PRNG = np.random.RandomState(SEED) 16 | 17 | # FIXED DATA GENERATION PARAMS 18 | K = 6 # Number of topics 19 | V = 9 # Vocabulary Size 20 | gamma = 0.5 # hyperparameter over doc-topic distribution 21 | 22 | Defaults = dict() 23 | Defaults['nDocTotal'] = 200 24 | Defaults['nWordsPerDoc'] = 25 25 | 26 | # GLOBAL PROB DISTRIBUTION OVER TOPICS 27 | trueBeta = np.ones(K) 28 | trueBeta /= trueBeta.sum() 29 | Defaults['topic_prior'] = gamma * trueBeta 30 | 31 | # TOPIC by WORD distribution 32 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG) 33 | 34 | def get_data_info(**kwargs): 35 | if 'nDocTotal' in kwargs: 36 | nDocTotal = kwargs['nDocTotal'] 37 | else: 38 | nDocTotal = Defaults['nDocTotal'] 39 | return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 1-3 bars per doc.' % (K, nDocTotal) 40 | 41 | def get_data(**kwargs): 42 | ''' 43 | Args 44 | ------- 45 | seed 46 | nDocTotal 47 | nWordsPerDoc 48 | ''' 49 | Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs) 50 | Data.summary = get_data_info(**kwargs) 51 | return Data 52 | 53 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1, 54 | dataorderseed=0, **kwargs): 55 | ''' 56 | Args 57 | ------- 58 | seed 59 | nDocTotal 60 | nWordsPerDoc 61 | ''' 62 | Data = CreateToyDataFromLDAModel(seed=seed, **kwargs) 63 | DataIterator = AdmixMinibatchIterator(Data, 64 | nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed) 65 | DataIterator.summary = get_data_info(**kwargs) 66 | return DataIterator 67 | 68 | def CreateToyDataFromLDAModel(**kwargs): 69 | for key in Defaults: 70 | if key not in kwargs: 71 | kwargs[key] = Defaults[key] 72 | return WordsData.CreateToyDataFromLDAModel(**kwargs) 73 | 74 | if __name__ == '__main__': 75 | import bnpy.viz.BarsViz 76 | WData = CreateToyDataFromLDAModel(seed=SEED) 77 | bnpy.viz.BarsViz.plotExampleBarsDocs(WData) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/BarsK8.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsK8.py 3 | 4 | Toy Bars data, with K=8 topics 5 | 4 horizontal, and 4 vertical. 6 | ''' 7 | import numpy as np 8 | from bnpy.data import WordsData, AdmixMinibatchIterator 9 | 10 | Defaults = dict() 11 | Defaults['nDocTotal'] = 2000 12 | Defaults['nWordsPerDoc'] = 100 13 | 14 | SEED = 8675309 15 | 16 | # FIXED DATA GENERATION PARAMS 17 | K = 8 # Number of topics 18 | V = 16 # Vocabulary Size 19 | gamma = 0.5 # hyperparameter over doc-topic distribution 20 | 21 | # TOPIC by WORD distribution 22 | topics = np.zeros( (K,V) ) 23 | topics[0,:] = [ 9, 9, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 24 | topics[1,:] = [ 0, 0, 0, 0, 9, 9, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0] 25 | topics[2,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 9, 9, 0, 0, 0, 0] 26 | topics[3,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 9, 9] 27 | topics[4,:] = [ 8, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0] 28 | topics[5,:] = [ 0, 8, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0] 29 | topics[6,:] = [ 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 8, 0, 0, 0, 8, 0] 30 | topics[7,:] = [ 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 8, 0, 0, 0, 8] 31 | 32 | # Add "smoothing" term to each entry of the topic-word matrix 33 | # With V = 16 and 8 sets of bars, 34 | # smoothMass=0.02 yields 0.944 probability of drawing "on topic" word 35 | smoothMass = 0.02 * 8 36 | topics += smoothMass 37 | # Ensure each row of topics is a probability vector 38 | for k in xrange(K): 39 | topics[k,:] /= np.sum(topics[k,:]) 40 | Defaults['topics'] = topics 41 | 42 | # GLOBAL PROB DISTRIBUTION OVER TOPICS 43 | trueBeta = np.hstack([1.1*np.ones(K/2), np.ones(K/2)]) 44 | trueBeta /= trueBeta.sum() 45 | Defaults['topic_prior'] = gamma * trueBeta 46 | 47 | def get_data_info(**kwargs): 48 | if 'nDocTotal' in kwargs: 49 | nDocTotal = kwargs['nDocTotal'] 50 | else: 51 | nDocTotal = Defaults['nDocTotal'] 52 | return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d.' % (K, nDocTotal) 53 | 54 | def get_data(**kwargs): 55 | ''' 56 | Args 57 | ------- 58 | seed 59 | nDocTotal 60 | nWordsPerDoc 61 | ''' 62 | Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs) 63 | Data.summary = get_data_info(**kwargs) 64 | return Data 65 | 66 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1, 67 | dataorderseed=0, **kwargs): 68 | ''' 69 | Args 70 | ------- 71 | seed 72 | nDocTotal 73 | nWordsPerDoc 74 | ''' 75 | Data = CreateToyDataFromLDAModel(seed=seed, **kwargs) 76 | DataIterator = AdmixMinibatchIterator(Data, 77 | nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed) 78 | DataIterator.summary = get_data_info(**kwargs) 79 | return DataIterator 80 | 81 | def CreateToyDataFromLDAModel(**kwargs): 82 | for key in Defaults: 83 | if key not in kwargs: 84 | kwargs[key] = Defaults[key] 85 | return WordsData.CreateToyDataFromLDAModel(**kwargs) 86 | 87 | if __name__ == '__main__': 88 | import bnpy.viz.BarsViz 89 | WData = CreateToyDataFromLDAModel(seed=SEED) 90 | bnpy.viz.BarsViz.plotExampleBarsDocs(WData) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/BinaryGraphK5.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BinaryGraphK5.py 3 | 4 | Binary Toy Graph with K=5 communities. 5 | ''' 6 | import numpy as np 7 | import random 8 | from bnpy.data import GraphData 9 | 10 | SEED = 8675309 11 | PRNG = np.random.RandomState(SEED) 12 | 13 | # FIXED DATA GENERATION PARAMS 14 | K = 5 # Number of communities 15 | N = 50 # Number of nodes 16 | beta_a = 0.1 # hyperparameter over block matrix entries 17 | beta_b = 0.1 # hyperparameter over block matrix entries 18 | 19 | Defaults = dict() 20 | Defaults['nNodeTotal'] = 50 21 | 22 | # Initialize adjacency matrix and stochastic block matrix 23 | sb = np.zeros( (K,K) ) + 0.01 24 | sb[0,0] = .9 25 | sb[1,1] = .9 26 | sb[2,2] = .9 27 | sb[3,3] = .9 28 | sb[4,4] = .9 29 | 30 | # function to generate adjacency matrix 31 | def gen_graph(K, N, sb): 32 | 33 | # define the edge indices and edge values 34 | edge_val = list() 35 | edge_exclude = list() # edges to exclude (10%) 36 | exclusion_thresh = 0.9 # 1 = no excluded edges 37 | 38 | # generate community memberships 39 | pi = np.zeros( (N,K) ) 40 | alpha = np.zeros(K) + .1 41 | for ii in xrange(N): 42 | pi[ii,:] = PRNG.dirichlet(alpha) 43 | 44 | for ii in xrange(N): 45 | for jj in xrange(ii+1,N): 46 | if ii != jj and ii < jj: 47 | s = PRNG.choice(5, 1, p=pi[ii,:]) 48 | r = PRNG.choice(5, 1, p=pi[jj,:]) 49 | # If this edge is not being exlcuded, just add to edge_id 50 | if PRNG.rand() <= exclusion_thresh: 51 | if PRNG.rand() < sb[s,r]: 52 | edge_val.append([ii,jj,1]) 53 | else: # include this as an edge that needs to be excluded 54 | if PRNG.rand() < sb[s,r]: 55 | edge_exclude.append([ii,jj,1]) 56 | else: 57 | edge_exclude.append([ii,jj,0]) 58 | 59 | edge_val = np.asarray(np.squeeze(edge_val), dtype=np.int32) 60 | edge_exclude = np.asarray(np.squeeze(edge_exclude), dtype=np.int32) 61 | 62 | return (edge_val, edge_exclude) 63 | 64 | # template function to wrap data in bnpy format 65 | def get_data(**kwargs): 66 | ''' Grab data from matfile specified by matfilepath 67 | ''' 68 | edge_val, edge_exclude = gen_graph(K,N,sb) 69 | Data = GraphData(edge_val = edge_val, nNodeTotal=N, edge_exclude=edge_exclude) 70 | Data.summary = get_data_info(K, Data.nNodeTotal, Data.nEdgeTotal) 71 | Data.get_edges_all() # Grab the full set of edges for inference 72 | return Data 73 | 74 | def get_minibatch_iterator(nBatch=10, nLap=1, dataorderseed=0, **kwargs): 75 | pass 76 | 77 | def get_data_info(K,N,E): 78 | return 'Toy Binary Graph Dataset where K=%d . N=%d. E=%d' % (K,N,E) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/DeadLeavesD25.py: -------------------------------------------------------------------------------- 1 | import DeadLeaves as DL 2 | 3 | DL.makeTrueParams(25) 4 | 5 | def get_data(**kwargs): 6 | return DL.get_data(**kwargs) 7 | 8 | def get_minibatch_iterator(**kwargs): 9 | return DL.get_minibatch_iterator(**kwargs) 10 | 11 | def get_short_name(): 12 | return DL.get_short_name() 13 | 14 | def get_data_info(): 15 | return DL.get_data_info() 16 | 17 | 18 | if __name__ == '__main__': 19 | DL.plotTrueCovMats(doShowNow=False) 20 | DL.plotImgPatchPrototypes() 21 | 22 | 23 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/NIPS.py: -------------------------------------------------------------------------------- 1 | ''' 2 | NIPSCorpus.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | datadir = '/Users/daeil/Dropbox/research/bnpy/data/' 9 | NIPSmatfile = 'nips_bnpy.mat' 10 | matfilepath = os.environ['BNPYDATADIR'] + NIPSmatfile 11 | 12 | if not os.path.exists(matfilepath): 13 | matfilepath = datadir + NIPSmatfile 14 | 15 | def get_data(**kwargs): 16 | ''' Grab data from matfile specified by matfilepath 17 | ''' 18 | Data = WordsData.read_from_mat(matfilepath) 19 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 20 | return Data 21 | 22 | def get_minibatch_iterator(nBatch=10, nLap=1, 23 | dataorderseed=0, **kwargs): 24 | Data = WordsData.read_from_mat(matfilepath) 25 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, 26 | nLap=nLap, dataorderseed=dataorderseed) 27 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 28 | return DataIterator 29 | 30 | def get_data_info(D, V): 31 | return 'NIPS bag-of-words data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/demodata/NYTimes.py: -------------------------------------------------------------------------------- 1 | ''' 2 | NYTimes.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/nytimes_small/' 9 | matfilepath = os.environ['BNPYDATADIR'] + 'nyt_small_bnpy.mat' 10 | 11 | if not os.path.exists(matfilepath): 12 | matfilepath = data_dir + 'nyt_small_bnpy.mat' 13 | 14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 15 | ''' Grab data from matfile specified by matfilepath 16 | ''' 17 | Data = WordsData.read_from_mat( matfilepath ) 18 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 19 | return Data 20 | 21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 22 | Data = WordsData.read_from_mat( matfilepath ) 23 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 24 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 25 | return DataIterator 26 | 27 | def get_data_info(D, V): 28 | return 'NYTimes (Small) Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/allocmodel/TestMixModel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for MixModel.py 3 | ''' 4 | import numpy as np 5 | import bnpy 6 | from bnpy.allocmodel import MixModel 7 | from bnpy.suffstats import SuffStatBag 8 | 9 | class TestMixModelEMUnifAlpha(object): 10 | def shortDescription(self): 11 | return None 12 | 13 | def setUp(self): 14 | ''' 15 | Create a stupid simple case for making sure we're calculating things correctly 16 | ''' 17 | self.alpha0 = 1.0 18 | self.allocM = MixModel('EM', dict(alpha0=self.alpha0)) 19 | self.N = np.asarray([1.,2.,3,4,5.]) 20 | self.SS = SuffStatBag(K=5, D=1) 21 | self.SS.setField('N', self.N, dims='K') 22 | self.resp = np.random.rand(100,3) 23 | self.precompEntropy = np.sum(self.resp * np.log(self.resp), axis=0) 24 | 25 | def test_update_global_params_EM(self): 26 | self.allocM.update_global_params_EM(self.SS) 27 | wTrue = (self.N + self.alpha0 - 1.0) 28 | wTrue = wTrue / np.sum(wTrue) 29 | wEst = self.allocM.w 30 | print wTrue 31 | print wEst 32 | assert np.allclose(wTrue, wEst) 33 | 34 | def test_get_global_suff_stats(self): 35 | Data = bnpy.data.XData(np.random.randn(10,1)) 36 | SS = self.allocM.get_global_suff_stats(Data, dict(resp=self.resp), doPrecompEntropy=True) 37 | assert np.allclose(self.precompEntropy, SS.getELBOTerm('ElogqZ')) 38 | assert np.allclose( np.sum(self.resp, axis=0), SS.N) 39 | 40 | class TestMixModelEMNonunifAlpha(TestMixModelEMUnifAlpha): 41 | def setUp(self): 42 | self.alpha0 = 2.0 43 | self.allocM = MixModel('EM', dict(alpha0=self.alpha0)) 44 | self.N = np.asarray([1.,2.,3,4,5.]) 45 | self.SS = SuffStatBag(K=5, D=1) 46 | self.SS.setField('N', self.N, dims='K') 47 | self.resp = np.random.rand(100,3) 48 | self.precompEntropy = np.sum(self.resp * np.log(self.resp), axis=0) 49 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/data/TestMinibatchIterator.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for MinibatchIterator.py 3 | ''' 4 | import numpy as np 5 | import unittest 6 | import copy 7 | import bnpy.data.XData as XData 8 | import bnpy.data.MinibatchIterator as MinibatchIterator 9 | 10 | class TestMinibatchIterator(unittest.TestCase): 11 | def shortDescription(self): 12 | return None 13 | 14 | def setUp(self): 15 | X = np.random.randn(100, 3) 16 | self.Data = XData(X=X) 17 | self.DataIterator = MinibatchIterator(self.Data, nBatch=10, nLap=10) 18 | 19 | def test_first_batch(self): 20 | assert self.DataIterator.has_next_batch() 21 | bData = self.DataIterator.get_next_batch() 22 | assert self.DataIterator.curLapPos == 0 23 | self.verify_batch(bData) 24 | 25 | def test_num_laps(self): 26 | ''' Make sure we raise the expected exception after exhausting all the data 27 | ''' 28 | nLap = self.DataIterator.nLap 29 | nBatch = self.DataIterator.nBatch 30 | for lapID in range(nLap): 31 | for batchCount in range(nBatch): 32 | bData = self.DataIterator.get_next_batch() 33 | assert self.DataIterator.curLapPos == batchCount 34 | assert self.DataIterator.lapID == lapID 35 | self.verify_batch(bData) 36 | try: 37 | bData = self.DataIterator.get_next_batch() 38 | raise Exception('should not make it to this line!') 39 | except StopIteration: 40 | assert 1==1 41 | 42 | def test_batchIDs_traversal_order(self): 43 | ''' Make sure batchIDs from consecutive laps are not the same 44 | ''' 45 | self.DataIterator.lapID = 0 46 | self.DataIterator.curLapPos = -1 47 | bData1 = self.DataIterator.get_next_batch() 48 | batchOrder = copy.copy(self.DataIterator.batchOrderCurLap) 49 | 50 | self.DataIterator.lapID = 1 51 | self.DataIterator.curLapPos = -1 52 | bData2 = self.DataIterator.get_next_batch() 53 | batchOrder2 = self.DataIterator.batchOrderCurLap 54 | print batchOrder, batchOrder2 55 | assert not np.allclose(batchOrder, batchOrder2) 56 | assert np.allclose(np.unique(batchOrder),np.unique(batchOrder2)) 57 | 58 | 59 | def test_obs_full_coverage(self): 60 | ''' Make sure all data items are covered every lap 61 | ''' 62 | coveredIDs = list() 63 | nBatch = self.DataIterator.nBatch 64 | for bID in range(nBatch): 65 | bData = self.DataIterator.get_next_batch() 66 | obsIDs = self.DataIterator.getObsIDsForCurrentBatch() 67 | coveredIDs.extend(obsIDs) 68 | assert len(np.unique(coveredIDs)) == self.Data.nObsTotal 69 | 70 | def verify_batch(self, bData): 71 | assert bData.nObs == self.Data.nObs / self.DataIterator.nBatch 72 | assert bData.nObsTotal == self.Data.nObsTotal 73 | # Check that the data is as expected! 74 | batchX = bData.X 75 | trueMask = self.DataIterator.getObsIDsForCurrentBatch() 76 | trueX = self.Data.X[trueMask] 77 | assert np.allclose(batchX, trueX) 78 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/distr/TestGaussDistr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for GaussDistr.py 3 | ''' 4 | from bnpy.distr import GaussDistr 5 | import numpy as np 6 | 7 | class TestGaussD2(object): 8 | def setUp(self): 9 | self.m = np.ones(2) 10 | self.invSigma = np.eye(2) 11 | self.distr = GaussDistr(m=self.m, L=self.invSigma) 12 | 13 | def test_dimension(self): 14 | assert self.distr.D == self.invSigma.shape[0] 15 | 16 | def test_cholL(self): 17 | chol = self.distr.cholL() 18 | assert np.allclose(np.dot(chol, chol.T), self.distr.L) 19 | 20 | def test_logdetL(self): 21 | logdetL = self.distr.logdetL() 22 | assert np.allclose( np.log(np.linalg.det(self.invSigma)), logdetL) 23 | 24 | def test_dist_mahalanobis(self, N=10): 25 | X = np.random.randn(N, self.distr.D) 26 | Dist = self.distr.dist_mahalanobis(X) 27 | invSigma = self.invSigma 28 | MyDist = np.zeros(N) 29 | for ii in range(N): 30 | x = X[ii] - self.m 31 | MyDist[ii] = np.dot(x.T, np.dot(invSigma, x)) 32 | #if error, we print it out 33 | print MyDist[ii], Dist[ii] 34 | assert np.allclose(MyDist, Dist) 35 | 36 | class TestGaussD1(TestGaussD2): 37 | def setUp(self): 38 | self.m = np.ones(1) 39 | self.invSigma = np.eye(1) 40 | self.distr = GaussDistr(m=self.m, L=self.invSigma) 41 | 42 | 43 | class TestGaussD10(TestGaussD2): 44 | def setUp(self): 45 | PRNG = np.random.RandomState(867) 46 | R = PRNG.rand(10,10) 47 | 48 | self.m = np.ones(10) 49 | self.invSigma = 1e-4*np.eye(10) 50 | self.distr = GaussDistr(m=self.m, L=self.invSigma) 51 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/distr/TestGaussWishDistr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for GaussWishDistr.py 3 | ''' 4 | from bnpy.distr import GaussWishDistr, WishartDistr 5 | from bnpy.suffstats import SuffStatBag 6 | import numpy as np 7 | import copy 8 | 9 | class TestGaussWishDistr(object): 10 | def setUp(self): 11 | self.invW = np.eye(2) 12 | self.m = np.zeros(2) 13 | self.distr = GaussWishDistr(m=self.m, invW=self.invW, kappa=1.0, dF=4) 14 | 15 | def test_dimension(self): 16 | assert self.distr.D == self.invW.shape[0] 17 | 18 | def test_entropyWish(self): 19 | ''' Verify that (wishart) entropy is same for this object and Wishart object 20 | ''' 21 | Hself = self.distr.entropyWish() 22 | wishDistr = WishartDistr(v=self.distr.dF, invW=self.distr.invW) 23 | Hwish = wishDistr.get_entropy() 24 | assert np.allclose( Hself, Hwish) 25 | 26 | def test_dist_mahalanobis(self, N=10): 27 | ''' Verify that distance computation is largest at mean and decays further away 28 | ''' 29 | Xlist = list() 30 | for r in [0, 0.01, 0.1, 1, 2, 3, 4, 5]: 31 | Xlist.append(self.distr.m + r) 32 | X = np.asarray(Xlist) 33 | Dist = self.distr.dist_mahalanobis(X) 34 | print Dist 35 | assert np.all( Dist[:-1] < Dist[1:]) 36 | 37 | def test_update_soVB(self, rho=0.25): 38 | ''' Verify the blend update for stochastic variational is correct 39 | ''' 40 | distrB = copy.deepcopy(self.distr) 41 | distrB.invW *= 3 42 | distrB.m += 2 43 | distrB.kappa *= 10 44 | distrB2 = copy.deepcopy(distrB) 45 | # Make sure things are different! 46 | assert not np.allclose(distrB.invW, self.distr.invW) 47 | assert not np.allclose(distrB.m, self.distr.m) 48 | 49 | distrB.post_update_soVB(rho, self.distr) 50 | assert distrB.dF == distrB2.dF * (1-rho) + self.distr.dF * rho 51 | assert np.allclose(distrB.kappa, distrB2.kappa * (1-rho) + self.distr.kappa * rho) 52 | 53 | # these dont work because the parameterization is a bit trickier here. 54 | #assert np.allclose(distrB.invW, distrB2.invW * (1-rho) + self.distr.invW * rho) 55 | #assert np.allclose(distrB.m, distrB2.m * (1-rho) + self.distr.m * rho) 56 | 57 | 58 | def test_entropy_posterior_gets_smaller(self, N=10): 59 | PRNG = np.random.RandomState(seed=8675309) 60 | for trial in range(3): 61 | X = PRNG.randn(N, self.distr.D) + self.distr.m 62 | x = np.sum(X,axis=0) 63 | xxT = np.dot(X.T,X) 64 | SS = SuffStatBag(K=1, D=self.distr.D) 65 | SS.setField('N', [N], dims='K') 66 | SS.setField('x', [x], dims=('K','D')) 67 | SS.setField('xxT', [xxT], dims=('K','D','D')) 68 | postD = self.distr.get_post_distr(SS, 0) 69 | assert postD.D == self.distr.D 70 | Hpost = postD.entropyWish() 71 | Hprior = self.distr.entropyWish() 72 | print 'Prior %.3g, Post %.3g' % (Hprior, Hpost) 73 | assert Hpost < Hprior -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/distr/TestWishartDistr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | from bnpy.distr import WishartDistr 4 | from bnpy.suffstats import SuffStatBag 5 | import numpy as np 6 | import copy 7 | 8 | class TestWishart(object): 9 | def setUp(self): 10 | self.v = 4 11 | self.invW = np.eye(2) 12 | self.distr = WishartDistr(v=self.v, invW=self.invW) 13 | 14 | def test_dimension(self): 15 | assert self.distr.D == self.invW.shape[0] 16 | 17 | def test_cholinvW(self): 18 | cholinvW = self.distr.cholinvW() 19 | assert np.allclose(np.dot(cholinvW, cholinvW.T), self.distr.invW) 20 | 21 | def test_expected_covariance_matrix(self): 22 | CovMat = self.distr.ECovMat() 23 | MyCovMat = self.invW / (self.v - self.distr.D - 1) 24 | print MyCovMat, CovMat 25 | assert np.allclose(MyCovMat, CovMat) 26 | 27 | def test_post_update_soVB(self, rho=0.375): 28 | distrA = copy.deepcopy(self.distr) 29 | distrB = WishartDistr(distrA.v, invW=np.eye(distrA.D) ) 30 | self.distr.post_update_soVB(rho, distrB) 31 | assert self.distr.v == rho*distrA.v + (1-rho)*distrB.v 32 | assert np.allclose(self.distr.invW, rho*distrA.invW + (1-rho)*distrB.invW) 33 | 34 | def test_entropy_posterior_gets_smaller(self, N=1): 35 | PRNG = np.random.RandomState(seed=8675309) 36 | for trial in range(3): 37 | X = PRNG.randn(N, self.distr.D) 38 | xxT = np.dot(X.T, X) 39 | 40 | SS = SuffStatBag(K=1, D=self.distr.D) 41 | SS.setField('N', [N], dims='K') 42 | SS.setField('xxT', [xxT], dims=('K','D','D')) 43 | 44 | postD = self.distr.get_post_distr(SS, 0) 45 | assert postD.D == self.distr.D 46 | Hpost = postD.get_entropy() 47 | Hprior = self.distr.get_entropy() 48 | print 'Prior %.3g, Post %.3g' % (Hprior, Hpost) 49 | print self.distr.invW 50 | print postD.invW 51 | assert Hpost < Hprior -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/distr/TestZMGaussDistr.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | from bnpy.distr import ZMGaussDistr 4 | import numpy as np 5 | 6 | class TestZMGauss(object): 7 | def setUp(self): 8 | self.Sigma = np.eye(4) 9 | self.distr = ZMGaussDistr(Sigma=self.Sigma.copy()) 10 | 11 | def test_dimension(self): 12 | assert self.distr.D == self.Sigma.shape[0] 13 | 14 | def test_cholSigma(self): 15 | chol = self.distr.cholSigma() 16 | assert np.allclose(np.dot(chol, chol.T), self.distr.Sigma) 17 | 18 | def test_logdetSigma(self): 19 | logdetSigma = self.distr.logdetSigma() 20 | assert np.allclose( np.log(np.linalg.det(self.Sigma)), logdetSigma) 21 | 22 | def test_get_log_norm_const(self): 23 | logZ = self.distr.get_log_norm_const() 24 | logdetSigma = np.log(np.linalg.det(self.Sigma)) 25 | mylogZ = 0.5*self.Sigma.shape[0]*np.log(2*np.pi) + 0.5 * logdetSigma 26 | 27 | def test_dist_mahalanobis(self, N=10): 28 | X = np.random.randn(N, self.distr.D) 29 | Dist = self.distr.dist_mahalanobis(X) 30 | invSigma = np.linalg.inv(self.Sigma) 31 | MyDist = np.zeros(N) 32 | for ii in range(N): 33 | x = X[ii] 34 | MyDist[ii] = np.dot(x.T, np.dot(invSigma, x)) 35 | #if error, we print it out 36 | print MyDist[ii], Dist[ii] 37 | assert np.allclose(MyDist, Dist) 38 | 39 | class TestZMGaussRand1Dim(TestZMGauss): 40 | def setUp(self): 41 | self.Sigma = np.asarray([[42.0]]) 42 | self.distr = ZMGaussDistr(Sigma=self.Sigma) 43 | 44 | class TestZMGaussRand5Dim(TestZMGauss): 45 | def setUp(self): 46 | R = np.random.rand(5,5) 47 | self.Sigma = np.dot(R, R.T) + 0.02*np.eye(5) 48 | self.distr = ZMGaussDistr(Sigma=self.Sigma) -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestAdmixTopicModel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for full learning for topic models 3 | ''' 4 | import numpy as np 5 | import unittest 6 | 7 | import bnpy 8 | from AbstractEndToEndTest import AbstractEndToEndTest 9 | import Util 10 | 11 | class TestAdmixTopicModel(AbstractEndToEndTest): 12 | __test__ = True 13 | 14 | def setUp(self): 15 | self.Data = bnpy.data.WordsData.CreateToyDataSimple(nDoc=25, nWordsPerDoc=50, vocab_size=100) 16 | self.allocModelName = 'AdmixModel' 17 | self.obsModelName = 'Mult' 18 | self.kwargs = dict(nLap=30, K=5, alpha0=1) 19 | self.kwargs['lambda'] = 1 20 | self.kwargs['doMemoizeLocalParams'] = 1 21 | 22 | self.mustRetainLPAcrossLapsForGuarantees = True 23 | self.learnAlgs = ['VB', 'moVB', 'soVB'] 24 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestDPMixGauss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for full learning for full-mean, full-covariance Gaussian models 3 | ''' 4 | import numpy as np 5 | import unittest 6 | 7 | import bnpy 8 | from AbstractEndToEndTest import AbstractEndToEndTest 9 | import Util 10 | 11 | class TestDPMixGaussModel(AbstractEndToEndTest): 12 | __test__ = True 13 | 14 | def setUp(self): 15 | PRNG = np.random.RandomState(333) 16 | X = PRNG.randn(1000, 3) 17 | X = np.vstack([X, 5 + PRNG.randn(100, 3)]) 18 | self.Data = bnpy.data.XData(X) 19 | self.allocModelName = 'DPMixModel' 20 | self.obsModelName = 'Gauss' 21 | self.kwargs = dict(nLap=30, K=5, alpha0=1) 22 | self.kwargs['smatname'] = 'eye' 23 | 24 | self.learnAlgs = ['VB', 'soVB', 'moVB'] 25 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestHDPModel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for full learning for topic models 3 | ''' 4 | import numpy as np 5 | import unittest 6 | 7 | import bnpy 8 | from AbstractEndToEndTest import AbstractEndToEndTest 9 | import Util 10 | 11 | class TestHDPModel(AbstractEndToEndTest): 12 | __test__ = True 13 | 14 | def setUp(self): 15 | self.Data = bnpy.data.WordsData.CreateToyDataSimple(nDoc=25, nWordsPerDoc=50, vocab_size=100) 16 | self.allocModelName = 'HDPModel' 17 | self.obsModelName = 'Mult' 18 | self.kwargs = dict(nLap=30, K=5, alpha0=1) 19 | self.kwargs['lambda'] = 1 20 | self.kwargs['doMemoizeLocalParams'] = 1 21 | self.kwargs['doFullPassBeforeMstep'] = 1 22 | 23 | self.mustRetainLPAcrossLapsForGuarantees = True 24 | self.learnAlgs = ['VB', 'moVB', 'soVB'] 25 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixDiagGauss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ''' 3 | import numpy as np 4 | import unittest 5 | 6 | import bnpy 7 | from AbstractEndToEndTest import AbstractEndToEndTest 8 | import Util 9 | 10 | class TestSimple(AbstractEndToEndTest): 11 | __test__ = True 12 | 13 | def setUp(self): 14 | PRNG = np.random.RandomState(333) 15 | X = PRNG.randn(1000, 3) 16 | self.Data = bnpy.data.XData(X) 17 | self.allocModelName = 'MixModel' 18 | self.obsModelName = 'DiagGauss' 19 | self.kwargs = dict(nLap=30, K=3, alpha0=1) 20 | self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB'] 21 | 22 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixGauss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for full learning for full-mean, full-covariance Gaussian models 3 | ''' 4 | import numpy as np 5 | import unittest 6 | 7 | import bnpy 8 | from AbstractEndToEndTest import AbstractEndToEndTest 9 | import Util 10 | 11 | class TestSimple(AbstractEndToEndTest): 12 | ''' Test basic functionality (run without crashing?) on very simple dataset. 13 | ''' 14 | __test__ = True 15 | 16 | def setUp(self): 17 | PRNG = np.random.RandomState(333) 18 | X = PRNG.randn(1000, 3) 19 | self.Data = bnpy.data.XData(X) 20 | self.allocModelName = 'MixModel' 21 | self.obsModelName = 'Gauss' 22 | self.kwargs = dict(nLap=30, K=3, alpha0=1) 23 | self.kwargs['smatname'] = 'eye' 24 | 25 | self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB'] 26 | 27 | 28 | class TestEasyK2_EM(AbstractEndToEndTest): 29 | ''' Test basic EM parameter estimation on well-separable K=2 toy dataset. 30 | 31 | Verify runs from fromTruth and fromScratch inits reach ideal params. 32 | ''' 33 | __test__ = True 34 | 35 | def setUp(self): 36 | # Define true parameters (mean, prec matrix) for 2 well-separated clusters 37 | self.K = 2 38 | B = 20 39 | Mu = np.eye(2) 40 | Sigma = np.zeros((2,2,2)) 41 | Sigma[0] = np.asarray([[B,0], [0,1./B]]) 42 | Sigma[1] = np.asarray([[1./B,0], [0,B]]) 43 | L = np.zeros_like(Sigma) 44 | for k in xrange(self.K): 45 | L[k] = np.linalg.inv(Sigma[k]) 46 | self.TrueParams = dict(w=0.5*np.ones(self.K), K=self.K, m=Mu, L=L) 47 | self.ProxFunc = dict(L=Util.CovMatProxFunc, 48 | m=Util.VectorProxFunc, 49 | w=Util.ProbVectorProxFunc) 50 | 51 | # Generate data 52 | Nk = 1000 53 | X = Util.MakeGaussData(Mu, Sigma, Nk) 54 | self.Data = bnpy.data.XData(X) 55 | 56 | self.learnAlgs = ['EM'] 57 | 58 | # Basic configuration 59 | self.allocModelName = 'MixModel' 60 | self.obsModelName = 'Gauss' 61 | self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0) 62 | 63 | # Substitute config used for "from-scratch" tests only 64 | # anything in here overrides defaults in self.kwargs 65 | self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples') 66 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixZMGauss.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit-tests for full learning for zero-mean, full-covariance Gaussian models 3 | ''' 4 | import numpy as np 5 | import unittest 6 | 7 | import bnpy 8 | from AbstractEndToEndTest import AbstractEndToEndTest 9 | import Util 10 | 11 | class TestSimple(AbstractEndToEndTest): 12 | ''' Test basic functionality (run without crashing?) on very simple dataset. 13 | ''' 14 | __test__ = True 15 | 16 | def setUp(self): 17 | PRNG = np.random.RandomState(333) 18 | X = PRNG.randn(1000, 3) 19 | self.Data = bnpy.data.XData(X) 20 | self.allocModelName = 'MixModel' 21 | self.obsModelName = 'ZMGauss' 22 | self.kwargs = dict(nLap=30, K=4, alpha0=1.0) 23 | self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB'] 24 | 25 | class TestEasyK2_EM(AbstractEndToEndTest): 26 | ''' Test basic EM parameter estimation on well-separable K=2 toy dataset. 27 | 28 | Verify runs from fromTruth and fromScratch inits reach ideal params. 29 | ''' 30 | __test__ = True 31 | 32 | def setUp(self): 33 | # Define true parameters: two very-different covariance matrices 34 | self.K = 2 35 | B = 20 36 | Sigma = np.zeros((2,2,2)) 37 | Sigma[0] = np.asarray([[B,0], [0,1./B]]) 38 | Sigma[1] = np.asarray([[1./B,0], [0,B]]) 39 | self.TrueParams = dict(Sigma=Sigma, w=0.5*np.ones(self.K)) 40 | 41 | # Functions used by tests to decide if estimated params are "close enough" 42 | # Must have same keys as self.TrueParams 43 | self.ProxFunc = dict(Sigma=Util.CovMatProxFunc, 44 | w=Util.ProbVectorProxFunc) 45 | 46 | # Generate toy dataset 47 | Nk = 1000 48 | X = Util.MakeZMGaussData(Sigma, Nk, seed=34567) 49 | self.Data = bnpy.data.XData(X) 50 | 51 | # Only run EM tests 52 | self.learnAlgs = ['EM'] 53 | 54 | # Basic model configuration 55 | self.allocModelName = 'MixModel' 56 | self.obsModelName = 'ZMGauss' 57 | self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0) 58 | 59 | # Substitute config used for "from-scratch" tests only 60 | # anything in here overrides defaults in self.kwargs 61 | self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples') 62 | self.fromScratchTrials = 5 63 | self.fromScratchSuccessRate = 0.5 64 | 65 | 66 | class TestStarCovarK5_EM(AbstractEndToEndTest): 67 | ''' Test basic EM parameter estimation on StarCovarK5 toy dataset. 68 | 69 | Verify runs from fromTruth and fromScratch inits estimate ideal params. 70 | ''' 71 | __test__ = True 72 | 73 | def setUp(self): 74 | self.K = 5 75 | import StarCovarK5 76 | self.Data = StarCovarK5.get_data(nObsTotal=10000) 77 | 78 | self.TrueParams = dict(Sigma=StarCovarK5.Sigma, 79 | w=StarCovarK5.w) 80 | self.ProxFunc = dict(Sigma=Util.CovMatProxFunc, 81 | w=Util.ProbVectorProxFunc) 82 | 83 | self.learnAlgs = ['EM'] 84 | 85 | self.allocModelName = 'MixModel' 86 | self.obsModelName = 'ZMGauss' 87 | self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0) 88 | 89 | self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples') 90 | self.fromScratchTrials = 10 91 | self.fromScratchSuccessRate = 0.5 -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/end-to-end/TestProxFunc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests to verify that our proposed proximity functions work as expected. 3 | 4 | Proximity function (defined in Util) are used to determine if two estimated parameters are "close enough" within some numerical tolerance to be treated as equivalent. We eventually use these functions to assess whether learning algorithms like EM are able to estimate "true"/"ideal" parameters for toy data. 5 | ''' 6 | 7 | import unittest 8 | import numpy as np 9 | 10 | import bnpy 11 | import Util 12 | 13 | class TestVectorProxFunc(unittest.TestCase): 14 | def test_vectorproxfunc(self): 15 | avec = np.asarray([1, 0, 0]) 16 | bvec = np.asarray([0.91, 0.03, 0.08]) 17 | assert np.all( Util.VectorProxFunc(avec, bvec)) 18 | 19 | bvec = np.asarray([0.91, -0.03, -0.08]) 20 | assert np.all( Util.VectorProxFunc(avec, bvec)) 21 | 22 | bvec = np.asarray([0.99, 0.11, 0.12]) 23 | assert not np.all( Util.VectorProxFunc(avec, bvec)) 24 | 25 | 26 | class TestStarCovarK5(unittest.TestCase): 27 | ''' Verify CovMatProxFunc discriminates between all StarCovarK5 cov matrices. 28 | ''' 29 | 30 | def setUp(self): 31 | import StarCovarK5 32 | self.Sigma = StarCovarK5.Sigma.copy() 33 | self.SigmaHat = np.zeros_like(self.Sigma) 34 | for k in range(5): 35 | Xk = Util.MakeZMGaussData(self.Sigma[k], 10000, seed=k) 36 | self.SigmaHat[k] = np.cov(Xk.T, bias=1) 37 | 38 | def test_CovMatProxFunc(self): 39 | print '' 40 | K = self.Sigma.shape[0] 41 | for k in xrange(K): 42 | isG = Util.CovMatProxFunc(self.Sigma[k], self.SigmaHat[k]) 43 | if not np.all(isG): 44 | Util.pprint( self.Sigma[k], 'true') 45 | Util.pprint( self.SigmaHat[k], 'est') 46 | Util.pprint( np.diag(isG).min()) 47 | from IPython import embed; embed() 48 | assert np.all(isG) 49 | for k in xrange(K): 50 | for j in xrange(k+1, K): 51 | print k,j 52 | isG = Util.CovMatProxFunc(self.Sigma[k], self.SigmaHat[j]) 53 | if np.all(isG): 54 | print self.Sigma[k] 55 | print self.SigmaHat[j] 56 | assert not np.all(isG) 57 | 58 | 59 | 60 | class TestDeadLeavesD25(TestStarCovarK5): 61 | ''' Verify CovMatProxFunc discriminates between DeadLeavesD25 cov matrices. 62 | ''' 63 | def setUp(self): 64 | import DeadLeavesD25 65 | self.Sigma = DeadLeavesD25.DL.Sigma.copy() 66 | self.SigmaHat = np.zeros_like(self.Sigma) 67 | for k in range(8): 68 | Xk = Util.MakeZMGaussData(self.Sigma[k], 10000, seed=k) 69 | self.SigmaHat[k] = np.cov(Xk.T, bias=1) 70 | 71 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/init/TestFromSaved.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for FromScratchGauss.py 3 | ''' 4 | import unittest 5 | import numpy as np 6 | from bnpy.data import XData 7 | from bnpy import HModel 8 | from bnpy.ioutil import ModelWriter, ModelReader 9 | 10 | class TestFromScratchGauss(unittest.TestCase): 11 | def shortDescription(self): 12 | return None 13 | 14 | def setUp(self, K=7): 15 | ''' Create random data, and a K component MixModel to go with it 16 | Call this original model "hmodel". 17 | We copy hmodel into "modelB", and then save to file via save_model() 18 | ''' 19 | self.K = K 20 | PRNG = np.random.RandomState(867) 21 | X = PRNG.randn(100,2) 22 | self.Data = XData(X=X) 23 | 24 | aPDict = dict(alpha0=1.0) 25 | oPDict = dict(min_covar=1e-9) 26 | self.hmodel = HModel.CreateEntireModel('EM','MixModel','ZMGauss', 27 | aPDict, oPDict, self.Data) 28 | modelB = self.hmodel.copy() 29 | initParams = dict(initname='randexamples', seed=0, K=self.K) 30 | modelB.init_global_params(self.Data, **initParams) 31 | ModelWriter.save_model(modelB, '/tmp/', 'Test') 32 | self.modelB = modelB 33 | 34 | def test_viable_init(self): 35 | ''' Verify hmodel after init can be used to perform E-step 36 | ''' 37 | initSavedParams = dict(initname='/tmp/', prefix='Test') 38 | self.hmodel.init_global_params(self.Data, **initSavedParams) 39 | assert self.hmodel.allocModel.K == self.K 40 | keysA = self.hmodel.allocModel.to_dict() 41 | keysB = self.modelB.allocModel.to_dict() 42 | assert len(keysA) == len(keysB) 43 | 44 | aLP = self.hmodel.calc_local_params(self.Data) 45 | assert np.all(np.logical_and(aLP['resp']>=0,aLP['resp']<=1.0)) 46 | assert np.allclose(1.0, np.sum(aLP['resp'],axis=1)) 47 | 48 | 49 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/learnalg/TestMemoizedVBWithBirth.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for moVB with birth moves. 3 | 4 | Coverage 5 | -------- 6 | * do_birth_at_lap 7 | * verify births occur at the expected times (when lap < fracLapsBirth*nLap) 8 | ''' 9 | import bnpy 10 | import unittest 11 | 12 | class TestMOVBWithBirth(unittest.TestCase): 13 | 14 | def setUp(self): 15 | birthP = dict(fracLapsBirth=0.8) 16 | algP = dict(nLap=10, birth=birthP) 17 | self.learnAlg = bnpy.learnalg.MemoizedOnlineVBLearnAlg(savedir=None, seed=0, 18 | algParams=algP, outputParams=dict()) 19 | 20 | def test_do_birth_at_lap(self): 21 | assert self.learnAlg.do_birth_at_lap(0) 22 | assert self.learnAlg.do_birth_at_lap(0.5) 23 | assert self.learnAlg.do_birth_at_lap(1) 24 | assert self.learnAlg.do_birth_at_lap(2) 25 | assert self.learnAlg.do_birth_at_lap(8) 26 | assert not self.learnAlg.do_birth_at_lap(8.05) 27 | assert not self.learnAlg.do_birth_at_lap(8.2) 28 | assert not self.learnAlg.do_birth_at_lap(9) 29 | assert not self.learnAlg.do_birth_at_lap(10) 30 | assert not self.learnAlg.do_birth_at_lap(11111) 31 | 32 | 33 | class TestMOVBWithBirthFracThatNeedsRounding(TestMOVBWithBirth): 34 | ''' Now check it with a fraction that will need to be rounded. 35 | ''' 36 | 37 | def setUp(self): 38 | birthP = dict(fracLapsBirth=0.7777) 39 | algP = dict(nLap=10, birth=birthP) 40 | self.learnAlg = bnpy.learnalg.MemoizedOnlineVBLearnAlg(savedir=None, seed=0, 41 | algParams=algP, outputParams=dict()) 42 | 43 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/merge/TestMergePairSelector.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for MergePairSelector.py 3 | 4 | Verifies that we can successfully select components to merge 5 | ''' 6 | import numpy as np 7 | import unittest 8 | 9 | from bnpy.learnalg import MergeTracker 10 | from bnpy.learnalg import MergePairSelector 11 | 12 | class TestMergePairSelector(unittest.TestCase): 13 | def shortDescription(self): 14 | return None 15 | 16 | def setUp(self): 17 | pass 18 | 19 | def test_reindexAfterMerge(self): 20 | MSelector = MergePairSelector() 21 | MSelector.MScores[0] = 5 22 | MSelector.MScores[3] = 5 23 | MSelector.MScores[4] = 5 24 | MSelector.PairMScores[(0,1)] = 5 25 | MSelector.PairMScores[(3,4)] = 5 26 | MSelector.PairMScores[(5,6)] = 5 27 | 28 | MSelector.reindexAfterMerge(2,3) 29 | 30 | assert MSelector.MScores[0] == 5 31 | assert MSelector.MScores[3] == 5 32 | assert 2 not in MSelector.MScores 33 | 34 | assert len(MSelector.PairMScores.keys()) == 2 35 | assert (0,1) in MSelector.PairMScores 36 | assert (4,5) in MSelector.PairMScores 37 | assert (2,3) not in MSelector.PairMScores 38 | 39 | def test_select_merge_components_random(self): 40 | ''' Verify that under random choices, we select among 3 components 41 | equally often 42 | ''' 43 | MT = MergeTracker(3) 44 | MSelector = MergePairSelector() 45 | counts = np.zeros(3) 46 | for trial in range(1000): 47 | kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random') 48 | counts[kA] += 1 49 | counts[kB] += 1 50 | counts /= np.sum(counts) 51 | minFrac = 0.25 52 | maxFrac = 0.4 53 | # Uniform at random means fraction of choice should be ~1/3 for each 54 | assert np.all(counts > minFrac) 55 | assert np.all(counts < maxFrac) 56 | 57 | def test_select_merge_components_random_raisesError(self): 58 | ''' Verify that when comp 0 is excluded with K=3 59 | we cannot provide comp 0 as kA, [error is raised] 60 | AND 61 | in free choice, we only choose kA=1, kB=2 62 | ''' 63 | MT = MergeTracker(3) 64 | MSelector = MergePairSelector() 65 | 66 | MT.excludeList = set([0]) 67 | MT._synchronize_and_verify() 68 | for trial in range(10): 69 | kA, kB = MSelector.select_merge_components(None, None, MT, kA=1, mergename='random') 70 | assert kA == 1 71 | assert kB == 2 72 | for trial in range(10): 73 | kA, kB = MSelector.select_merge_components(None, None, MT, kA=2, mergename='random') 74 | assert kA == 1 75 | assert kB == 2 76 | with self.assertRaises(AssertionError): 77 | kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random', kA=0) 78 | 79 | def test_select_merge_components_random_raisesErrorAllButOneExcluded(self): 80 | ''' Verify that when comps 0,1 are excluded with K=3 81 | we cannot provide comp 2 as kA, [error is raised] 82 | ''' 83 | MT = MergeTracker(3) 84 | MSelector = MergePairSelector() 85 | 86 | MT.excludeList = set([1, 0]) 87 | MT._synchronize_and_verify() 88 | with self.assertRaises(AssertionError): 89 | kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random', kA=2) 90 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/obsmodel/TestGaussObsModel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for GaussObsModel 3 | ''' 4 | from bnpy.data import XData 5 | from bnpy.obsmodel import GaussObsModel 6 | from bnpy.distr import GaussWishDistr 7 | from bnpy.util.RandUtil import mvnrand 8 | import unittest 9 | import numpy as np 10 | 11 | class TestGaussObsModel(unittest.TestCase): 12 | def shortDescription(self): 13 | pass 14 | 15 | def setUp(self): 16 | self.MakeModel() 17 | self.MakeData() 18 | 19 | def MakeModel(self): 20 | self.obsM = None 21 | 22 | def MakeData(self, nObsC=200): 23 | if self.obsM is None: 24 | return 25 | XList = list() 26 | np.random.seed(505) 27 | for k in range(self.obsM.K): 28 | Sigma = self.obsM.get_covar_mat_for_comp(k) 29 | mu = self.obsM.get_mean_for_comp(k) 30 | Xcur = mvnrand(mu, Sigma, nObsC) 31 | XList.append(Xcur) 32 | X = np.vstack(XList) 33 | self.nObsC = nObsC 34 | self.Data = XData(X=X) 35 | 36 | def test_dimension(self): 37 | ''' Verify dimensions match btw model and data 38 | ''' 39 | if self.obsM is None: 40 | return 41 | assert self.obsM.D == self.Data.dim 42 | 43 | def test_calc_local_params(self): 44 | ''' Calc soft assign responsibilities for all data items 45 | Verify that the items generated by each component are (usually) associated with that component. 46 | ''' 47 | if self.obsM is None: 48 | return 49 | LP = self.obsM.calc_local_params(self.Data) 50 | lpr = LP['E_log_soft_ev'] 51 | maxIDs = np.argmax(lpr, axis=1) 52 | for k in range(self.obsM.K): 53 | currange = range(k*self.nObsC, (k+1)*self.nObsC) 54 | nMatch = np.sum( maxIDs[currange]==k ) 55 | assert nMatch > 0.95 * self.nObsC 56 | 57 | class TestGaussObsModelVB(TestGaussObsModel): 58 | def MakeModel(self): 59 | oDict = dict(inferType='VB') 60 | compA = dict(m=[100,100], kappa=1.0e-4, invW=np.eye(2), dF=4) 61 | compB = dict(m=[-100,-100], kappa=1.0e-4, invW=np.eye(2), dF=4) 62 | compC = dict(m=[0,0], kappa=1.0e-4, invW=np.eye(2), dF=4) 63 | compDictList = [compA, compB, compC] 64 | obsPrior = GaussWishDistr(m=[0,0], kappa=1e-4, invW=np.eye(2), dF=4) 65 | self.obsM = GaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList) 66 | 67 | class TestGaussObsModelEM(TestGaussObsModel): 68 | def MakeModel(self): 69 | oDict = dict(inferType='EM', min_covar=0.0) 70 | compDictList = [ dict(m=[100,100], L=np.eye(2)), dict(m=[0,0], L=100*np.eye(2))] 71 | obsPrior = None 72 | self.obsM = GaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList) 73 | -------------------------------------------------------------------------------- /refinery/bnpy/bnpy-dev/tests/obsmodel/TestZMGaussObsModel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Unit tests for ZMGaussObsModel 3 | ''' 4 | from bnpy.data import XData 5 | from bnpy.obsmodel import ZMGaussObsModel 6 | import numpy as np 7 | 8 | class TestZMGaussObsModelEM(object): 9 | def setUp(self): 10 | oDict = dict(inferType='EM', min_covar=0.0) 11 | compDictList = [ dict(Sigma=np.eye(2)), dict(Sigma=100*np.eye(2))] 12 | obsPrior = None 13 | self.obsM = ZMGaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList) 14 | self.C = 10 15 | XList = list() 16 | for k in range(self.obsM.K): 17 | Xcur = np.random.randn(self.C,2) 18 | sig = np.sqrt(self.obsM.comp[k].Sigma[0,0]) 19 | XList.append(sig*Xcur) 20 | self.Data = XData(X=np.vstack(XList)) 21 | print self.Data.X 22 | 23 | def test_dimension(self): 24 | assert self.obsM.D == 2 25 | 26 | def test_calc_local_params(self): 27 | # calculate the soft assignment probabilities for all data items 28 | # make sure that the items generated by each component 29 | # are (statistically) associated with that component 30 | LP = self.obsM.calc_local_params(self.Data) 31 | lpr = LP['E_log_soft_ev'] 32 | maxIDs = np.argmax(lpr, axis=1) 33 | for k in range(self.obsM.K): 34 | currange = range(k*self.C, (k+1)*self.C) 35 | matchMask = maxIDs[currange]==k 36 | nMatch = np.sum(matchMask) 37 | assert nMatch > 0.8*self.C -------------------------------------------------------------------------------- /refinery/bnpy/results/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /refinery/bnpy/scripts/BarsBurstyK20.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsBursty.py 3 | 4 | Loads Mike Bryant's old bursty topics dataset. 5 | TODO: Need to write code to generate a bursty topic dataset 6 | 7 | ''' 8 | from bnpy.data import WordsData, AdmixMinibatchIterator 9 | import os 10 | 11 | data_dir = '/data/liv/liv-x/topic_models/data/bars/' 12 | matfilepath = os.environ['BNPYDATADIR'] + 'bars_bnpy_burstyK20_train.mat' 13 | 14 | if not os.path.exists(matfilepath): 15 | matfilepath = data_dir + 'bars_bnpy_burstyK20_train.mat' 16 | 17 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 18 | ''' Grab data from matfile specified by matfilepath 19 | ''' 20 | Data = WordsData.read_from_mat( matfilepath ) 21 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 22 | return Data 23 | 24 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 25 | Data = WordsData.read_from_mat( matfilepath ) 26 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 27 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 28 | return DataIterator 29 | 30 | def get_data_info(D, V): 31 | return 'Bars Bursty K20 Data. D=%d. VocabSize=%d' % (D,V) 32 | -------------------------------------------------------------------------------- /refinery/bnpy/scripts/BarsBurstyK6.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsBursty.py 3 | 4 | Loads Mike Bryant's old bursty topics dataset. 5 | TODO: Need to write code to generate a bursty topic dataset 6 | 7 | ''' 8 | from bnpy.data import WordsData, AdmixMinibatchIterator 9 | import os 10 | 11 | data_dir = '/data/liv/liv-x/topic_models/data/bars/' 12 | matfilepath = os.environ['BNPYDATADIR'] + 'bars_bnpy_burstyK6_train.mat' 13 | 14 | if not os.path.exists(matfilepath): 15 | matfilepath = data_dir + 'bars_bnpy_burstyK6_train.mat' 16 | 17 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 18 | ''' Grab data from matfile specified by matfilepath 19 | ''' 20 | Data = WordsData.read_from_mat( matfilepath ) 21 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 22 | return Data 23 | 24 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 25 | Data = WordsData.read_from_mat( matfilepath ) 26 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 27 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 28 | return DataIterator 29 | 30 | def get_data_info(D, V): 31 | return 'Bars Bursty Data. D=%d. VocabSize=%d' % (D,V) 32 | -------------------------------------------------------------------------------- /refinery/bnpy/scripts/BarsK8.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BarsK8.py 3 | 4 | Toy Bars data, with K=8 topics 5 | 4 horizontal, and 4 vertical. 6 | ''' 7 | import numpy as np 8 | 9 | from bnpy.data import WordsData, AdmixMinibatchIterator 10 | 11 | Defaults = dict() 12 | Defaults['nDocTotal'] = 2000 13 | Defaults['nWordsPerDoc'] = 100 14 | 15 | SEED = 8675309 16 | 17 | # FIXED DATA GENERATION PARAMS 18 | K = 8 # Number of topics 19 | V = 16 # Vocabulary Size 20 | gamma = 0.5 # hyperparameter over doc-topic distribution 21 | 22 | # TOPIC by WORD distribution 23 | true_tw = np.zeros( (K,V) ) 24 | true_tw[0,:] = [ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 25 | true_tw[1,:] = [ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0] 26 | true_tw[2,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0] 27 | true_tw[3,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1] 28 | true_tw[4,:] = [ 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0] 29 | true_tw[5,:] = [ 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] 30 | true_tw[6,:] = [ 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0] 31 | true_tw[7,:] = [ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] 32 | # Add "smoothing" term to each entry of the topic-word matrix 33 | # With V = 16 and 8 sets of bars, 34 | # smoothMass=0.02 yields 0.944 probability of drawing "on topic" word 35 | smoothMass = 0.02 36 | true_tw += smoothMass 37 | # Ensure each row of true_tw is a probability vector 38 | for k in xrange(K): 39 | true_tw[k,:] /= np.sum( true_tw[k,:] ) 40 | Defaults['TopicWordProbs'] = true_tw 41 | 42 | 43 | # GLOBAL PROB DISTRIBUTION OVER TOPICS 44 | trueBeta = np.hstack([1.1*np.ones(K/2), np.ones(K/2)]) 45 | trueBeta /= trueBeta.sum() 46 | Defaults['docTopicParamVec'] = gamma * trueBeta 47 | 48 | def get_data_info(**kwargs): 49 | if 'nDocTotal' in kwargs: 50 | nDocTotal = kwargs['nDocTotal'] 51 | else: 52 | nDocTotal = Defaults['nDocTotal'] 53 | return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d.' % (K, nDocTotal) 54 | 55 | def get_data(**kwargs): 56 | ''' 57 | Args 58 | ------- 59 | seed 60 | nDocTotal 61 | nWordsPerDoc 62 | ''' 63 | Data = genWordsData(seed=SEED, **kwargs) 64 | Data.summary = get_data_info(**kwargs) 65 | return Data 66 | 67 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1, 68 | dataorderseed=0, **kwargs): 69 | ''' 70 | Args 71 | ------- 72 | seed 73 | nDocTotal 74 | nWordsPerDoc 75 | ''' 76 | Data = genWordsData(seed=seed, **kwargs) 77 | DataIterator = AdmixMinibatchIterator(Data, 78 | nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed) 79 | DataIterator.summary = get_data_info(**kwargs) 80 | return DataIterator 81 | 82 | def genWordsData(**kwargs): 83 | for key in Defaults: 84 | if key not in kwargs: 85 | kwargs[key] = Defaults[key] 86 | return WordsData.genToyData(**kwargs) 87 | 88 | if __name__ == '__main__': 89 | import bnpy.viz.BarsViz 90 | WData = genWordsData(seed=SEED) 91 | bnpy.viz.BarsViz.plotExampleBarsDocs(WData) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/DeadLeavesD25.py: -------------------------------------------------------------------------------- 1 | import DeadLeaves as DL 2 | 3 | DL.makeTrueParams(25) 4 | 5 | def get_data(**kwargs): 6 | return DL.get_data(**kwargs) 7 | 8 | def get_minibatch_iterator(**kwargs): 9 | return DL.get_minibatch_iterator(**kwargs) 10 | 11 | def get_short_name(): 12 | return DL.get_short_name() 13 | 14 | def get_data_info(): 15 | return DL.get_data_info() 16 | 17 | 18 | if __name__ == '__main__': 19 | DL.plotTrueCovMats(doShowNow=False) 20 | DL.plotImgPatchPrototypes() 21 | 22 | 23 | -------------------------------------------------------------------------------- /refinery/bnpy/scripts/HuffPost.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HuffPost.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | data_dir = '/Users/daeil/Dropbox/research/bnpy/data/huffpost/' 9 | matfilepath = os.environ['BNPYDATADIR'] + 'huffpost_bnpy.mat' 10 | 11 | if not os.path.exists(matfilepath): 12 | matfilepath = data_dir + 'huffpost_bnpy.mat' 13 | 14 | def get_data(seed=8675309, **kwargs): 15 | ''' Grab data from matfile specified by matfilepath 16 | ''' 17 | Data = WordsData.read_from_mat(matfilepath) 18 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 19 | return Data 20 | 21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nLap=1, 22 | dataorderseed=0, **kwargs): 23 | Data = WordsData.read_from_mat(matfilepath) 24 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=300, nLap=nLap, dataorderseed=dataorderseed) 25 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 26 | return DataIterator 27 | 28 | def get_data_info(D, V): 29 | return 'Huffington Post Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/NIPS.py: -------------------------------------------------------------------------------- 1 | ''' 2 | NIPSCorpus.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | datadir = '/Users/daeil/Dropbox/research/bnpy/data/nips/' 9 | NIPSmatfile = 'nips_bnpy.mat' 10 | matfilepath = os.environ['BNPYDATADIR'] + NIPSmatfile 11 | 12 | if not os.path.exists(matfilepath): 13 | matfilepath = datadir + NIPSmatfile 14 | 15 | def get_data(**kwargs): 16 | ''' Grab data from matfile specified by matfilepath 17 | ''' 18 | Data = WordsData.read_from_mat(matfilepath) 19 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 20 | return Data 21 | 22 | def get_minibatch_iterator(nBatch=10, nLap=1, 23 | dataorderseed=0, **kwargs): 24 | Data = WordsData.read_from_mat(matfilepath) 25 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, 26 | nLap=nLap, dataorderseed=dataorderseed) 27 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 28 | return DataIterator 29 | 30 | def get_data_info(D, V): 31 | return 'NIPS bag-of-words data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/NYTimes.py: -------------------------------------------------------------------------------- 1 | ''' 2 | NYTimes.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/nytimes_small/' 9 | matfilepath = os.environ['BNPYDATADIR'] + 'nyt_small_bnpy.mat' 10 | 11 | if not os.path.exists(matfilepath): 12 | matfilepath = data_dir + 'nyt_small_bnpy.mat' 13 | 14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 15 | ''' Grab data from matfile specified by matfilepath 16 | ''' 17 | Data = WordsData.read_from_mat( matfilepath ) 18 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 19 | return Data 20 | 21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 22 | Data = WordsData.read_from_mat( matfilepath ) 23 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 24 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 25 | return DataIterator 26 | 27 | def get_data_info(D, V): 28 | return 'NYTimes (Small) Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/NYTimesDB.py: -------------------------------------------------------------------------------- 1 | ''' 2 | NYTimes.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIteratorDB 6 | 7 | dbpath = '/Users/daeil/Dropbox/research/local/nytimes_ldc' 8 | ''' Use the dbpath below in order to connect to the nytimes database at Brown 9 | ''' 10 | #dbpath='/data/liv/nytimes/liv/nytimes_ldc' 11 | 12 | D = 1816909 13 | V = 8000 14 | 15 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 16 | ''' Grab data from database to initialize (used only once really) 17 | ''' 18 | doc_id_select = range(1,500) # grab the first 500 documents to initialize 19 | nDoc = len(doc_id_select) 20 | query = 'select * from data where rowid in (' + ','.join(map(str, doc_id_select)) + ')' 21 | Data = WordsData.read_from_db( dbpath, query, nDoc=nDoc, nDocTotal = nDoc, vocab_size = V ) 22 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 23 | return Data 24 | 25 | def get_minibatch_iterator(seed=8675309, nBatch=10000, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 26 | ''' Data is primarily loaded through AdmixMinibatchIteratorDB. 27 | If creating from database, put in true number of documents and vocabulary size for the entire corpus 28 | Initialize with only a handful of documents however, specified by doc_id_select 29 | ''' 30 | #Data object isn't passed in, is this bottom part necessary again? 31 | #doc_id_select = range(1,500) # grab the first 500 documents 32 | #query = 'select * from data where rowid in (' + ','.join(map(str, doc_id_select)) + ')' 33 | #Data = WordsData.read_from_db( dbpath, query, nDoc=len(doc_id_select), nDocTotal = D, vocab_size = V ) 34 | Data = get_data(nDocTotal = D, vocab_size = V) 35 | 36 | #Create iterator that grabs documents from the sqlite3 database 37 | DataIterator = AdmixMinibatchIteratorDB(Data, dbpath=dbpath, nDocTotal=D, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 38 | DataIterator.summary = get_data_info(D, V) 39 | return DataIterator 40 | 41 | def get_data_info(D, V): 42 | return 'NYTimes (Very Large) Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/Science.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Science.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/science/' 9 | matfilepath = os.environ['BNPYDATADIR'] + 'science_bnpy.mat' 10 | 11 | if not os.path.exists(matfilepath): 12 | matfilepath = data_dir + 'science_bnpy.mat' 13 | 14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 15 | ''' Grab data from matfile specified by matfilepath 16 | ''' 17 | Data = WordsData.read_from_mat( matfilepath ) 18 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 19 | return Data 20 | 21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 22 | Data = WordsData.read_from_mat( matfilepath ) 23 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 24 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 25 | return DataIterator 26 | 27 | def get_data_info(D, V): 28 | return 'Science Abstracts Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/Wikipedia.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Science.py 3 | 4 | ''' 5 | from bnpy.data import WordsData, AdmixMinibatchIterator 6 | import os 7 | 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/wikipedia/' 9 | matfilepath = os.environ['BNPYDATADIR'] + 'wiki_bnpy.mat' 10 | 11 | if not os.path.exists(matfilepath): 12 | matfilepath = data_dir + 'wiki_bnpy.mat' 13 | 14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs): 15 | ''' Grab data from matfile specified by matfilepath 16 | ''' 17 | Data = WordsData.read_from_mat( matfilepath ) 18 | Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 19 | return Data 20 | 21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs): 22 | Data = WordsData.read_from_mat( matfilepath ) 23 | DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed) 24 | DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size) 25 | return DataIterator 26 | 27 | def get_data_info(D, V): 28 | return 'Wikipedia Data. D=%d. VocabSize=%d' % (D,V) -------------------------------------------------------------------------------- /refinery/bnpy/scripts/customFunc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | customFunc.py 3 | A custom function that we can use to hook into the BNPy analysis. Runs at every minibatch or lap depending on the 4 | the type of analysis. 5 | 6 | onLapComplete() run after every complete lap through all B batches 7 | onBatchComplete() run after every complete visit (Mstep, Estep, Sstep, ELBOstep) to a single batch 8 | onAlgorithmComplete() run after the algorithm converges/reaches maximum number of laps 9 | 10 | ''' 11 | import redis 12 | msgServer = redis.StrictRedis() 13 | 14 | def onLapComplete(hModel, percentDone, customFuncArgs): 15 | update = str(percentDone) + "% Done" 16 | msgServer.publish('analysis', "%s" % (update)) 17 | print "onLapComplete" 18 | 19 | def onBatchComplete(hModel, percentDone, customFuncArgs): 20 | update = str(percentDone) + "% Done" 21 | msgServer.publish('analysis', "%s" % (update)) 22 | print "onBatchComplete" 23 | 24 | def onAlgorithmComplete(hModel, percentDone, customFuncArgs): 25 | msgServer.publish('analysis', "%s" % ('status:Analysis Finished')) 26 | print "onAlgorithmComplete" -------------------------------------------------------------------------------- /refinery/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import os 4 | import sys 5 | basedir = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | # Must turn this off when in devlopment 8 | DEBUG = True 9 | 10 | # Flask WTF module requires these two settings 11 | CSRF_ENABLED = True 12 | SECRET_KEY = 'bulgogi' 13 | # Path of our database file, required by flask-SQLAlchemy 14 | #SQLALCHEMY_DATABASE_URI = 'sqlite:///' + os.path.join(basedir, 'app.db') 15 | SQLALCHEMY_DATABASE_URI = "postgresql:///refinery" 16 | 17 | #SQLALCHEMY_DATABASE_URI = 'postgresql://refinery_admin@localhost/refinery' 18 | # Folder that stores our SQLAlchemy-migrate data files 19 | SQLALCHEMY_MIGRATE_REPO = os.path.join(basedir, 'db_repository') 20 | 21 | UPLOAD_FOLDER = 'refinery/static/datasets/' 22 | USER_DIRECTORY = 'refinery/static/users/' 23 | RANDOM_IMG_DIRECTORY = 'refinery/static/assets/images/random/' 24 | 25 | 26 | -------------------------------------------------------------------------------- /refinery/data/nips0-12.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nips0-12.zip -------------------------------------------------------------------------------- /refinery/data/nyt_2013_obama.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nyt_2013_obama.tar.gz -------------------------------------------------------------------------------- /refinery/data/nyt_2013_obama.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nyt_2013_obama.zip -------------------------------------------------------------------------------- /refinery/data/reuters.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/reuters.zip -------------------------------------------------------------------------------- /refinery/fact_classifier/classify_ex.py: -------------------------------------------------------------------------------- 1 | from sklearn import svm 2 | from sklearn.feature_extraction import DictVectorizer 3 | from collections import defaultdict 4 | import pickle 5 | v = DictVectorizer() 6 | 7 | #TODO : need to tokenize the words before using them as features! 8 | 9 | def main(): 10 | 11 | def munge(s): 12 | ps = s.split() 13 | label = int(ps[0]) 14 | ws = defaultdict(int) 15 | for w in ps[1:]: 16 | ws[w] += 1 17 | return [label,ws] 18 | 19 | data = [munge(l.strip()) for l in open("/home/chonger/Downloads/annotations.txt")] 20 | 21 | labels = [x[0] for x in data] 22 | dicts = [x[1] for x in data] 23 | 24 | feats = v.fit_transform(dicts) 25 | 26 | ttsplit = int(len(labels) * .8) 27 | clf = svm.SVC(kernel='linear', class_weight={1: 10}) 28 | #clf = svm.SVC() 29 | clf.fit(feats[:ttsplit],labels[:ttsplit]) 30 | 31 | print clf.score(feats[ttsplit:],labels[ttsplit:]) 32 | 33 | tot = defaultdict(int) 34 | tr = defaultdict(int) 35 | for ex in labels[ttsplit:]: 36 | tr[ex] += 1 37 | 38 | for ex in feats[ttsplit:]: 39 | tot[(clf.predict(ex).tolist())[0]] += 1 40 | 41 | print tr 42 | print tot 43 | 44 | print feats[0] 45 | print feats[1] 46 | 47 | f = open("/home/chonger/factsvm",'w') 48 | pickle.dump(clf,f) 49 | f.close() 50 | 51 | f = open("/home/chonger/factfeat",'w') 52 | pickle.dump(v,f) 53 | f.close() 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /refinery/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/__init__.py -------------------------------------------------------------------------------- /refinery/lib/model_svm/feats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/feats -------------------------------------------------------------------------------- /refinery/lib/model_svm/lower_words: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/lower_words -------------------------------------------------------------------------------- /refinery/lib/model_svm/non_abbrs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/non_abbrs -------------------------------------------------------------------------------- /refinery/lib/svmlite/LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/LICENSE.txt -------------------------------------------------------------------------------- /refinery/lib/svmlite/kernel.h: -------------------------------------------------------------------------------- 1 | /************************************************************************/ 2 | /* */ 3 | /* kernel.h */ 4 | /* */ 5 | /* User defined kernel function. Feel free to plug in your own. */ 6 | /* */ 7 | /* Copyright: Thorsten Joachims */ 8 | /* Date: 16.12.97 */ 9 | /* */ 10 | /************************************************************************/ 11 | 12 | /* KERNEL_PARM is defined in svm_common.h The field 'custom' is reserved for */ 13 | /* parameters of the user defined kernel. You can also access and use */ 14 | /* the parameters of the other kernels. Just replace the line 15 | return((double)(1.0)); 16 | with your own kernel. */ 17 | 18 | /* Example: The following computes the polynomial kernel. sprod_ss 19 | computes the inner product between two sparse vectors. 20 | 21 | return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words) 22 | +kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 23 | */ 24 | 25 | /* If you are implementing a kernel that is not based on a 26 | feature/value representation, you might want to make use of the 27 | field "userdefined" in SVECTOR. By default, this field will contain 28 | whatever string you put behind a # sign in the example file. So, if 29 | a line in your training file looks like 30 | 31 | -1 1:3 5:6 #abcdefg 32 | 33 | then the SVECTOR field "words" will contain the vector 1:3 5:6, and 34 | "userdefined" will contain the string "abcdefg". */ 35 | 36 | double custom_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 37 | /* plug in you favorite kernel */ 38 | { 39 | return((double)(1.0)); 40 | } 41 | -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_classify: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_classify -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_classify.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_classify.o -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_common.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_common.o -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_hideo.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_hideo.o -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_learn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_learn.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn.o -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_learn_main.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn_main.c -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_learn_main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn_main.o -------------------------------------------------------------------------------- /refinery/lib/svmlite/svm_light.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_light.tar.gz -------------------------------------------------------------------------------- /refinery/lib/word_tokenize.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | """ 4 | A list of (regexp, repl) pairs applied in sequence. 5 | The resulting string is split on whitespace. 6 | (Adapted from the Punkt Word Tokenizer) 7 | """ 8 | 9 | _tokenize_regexps = [ 10 | 11 | # uniform quotes 12 | (re.compile(r'\'\''), r'"'), 13 | (re.compile(r'\`\`'), r'"'), 14 | 15 | # Separate punctuation (except period) from words: 16 | (re.compile(r'(^|\s)(\')'), r'\1\2 '), 17 | (re.compile(r'(?=[\(\"\`{\[:;&\#\*@])(.)'), r'\1 '), 18 | 19 | (re.compile(r'(.)(?=[?!)\";}\]\*:@\'])'), r'\1 '), 20 | (re.compile(r'(?=[\)}\]])(.)'), r'\1 '), 21 | (re.compile(r'(.)(?=[({\[])'), r'\1 '), 22 | (re.compile(r'((^|\s)\-)(?=[^\-])'), r'\1 '), 23 | 24 | # Treat double-hyphen as one token: 25 | (re.compile(r'([^-])(\-\-+)([^-])'), r'\1 \2 \3'), 26 | (re.compile(r'(\s|^)(,)(?=(\S))'), r'\1\2 '), 27 | 28 | # Only separate comma if space follows: 29 | (re.compile(r'(.)(,)(\s|$)'), r'\1 \2\3'), 30 | 31 | # Combine dots separated by whitespace to be a single token: 32 | (re.compile(r'\.\s\.\s\.'), r'...'), 33 | 34 | # Separate "No.6" 35 | (re.compile(r'([A-Za-z]\.)(\d+)'), r'\1 \2'), 36 | 37 | # Separate words from ellipses 38 | (re.compile(r'([^\.]|^)(\.{2,})(.?)'), r'\1 \2 \3'), 39 | (re.compile(r'(^|\s)(\.{2,})([^\.\s])'), r'\1\2 \3'), 40 | (re.compile(r'([^\.\s])(\.{2,})($|\s)'), r'\1 \2\3'), 41 | 42 | ## adding a few things here: 43 | 44 | # fix %, $, & 45 | (re.compile(r'(\d)%'), r'\1 %'), 46 | (re.compile(r'\$(\.?\d)'), r'$ \1'), 47 | (re.compile(r'(\w)& (\w)'), r'\1&\2'), 48 | (re.compile(r'(\w\w+)&(\w\w+)'), r'\1 & \2'), 49 | 50 | # fix (n 't) --> ( n't) 51 | (re.compile(r'n \'t( |$)'), r" n't\1"), 52 | (re.compile(r'N \'T( |$)'), r" N'T\1"), 53 | 54 | # treebank tokenizer special words 55 | (re.compile(r'([Cc])annot'), r'\1an not'), 56 | 57 | (re.compile(r'\s+'), r' '), 58 | 59 | ] 60 | 61 | def tokenize(s): 62 | """ 63 | Tokenize a string using the rule above 64 | """ 65 | for (regexp, repl) in _tokenize_regexps: 66 | s = regexp.sub(repl, s) 67 | return s 68 | 69 | -------------------------------------------------------------------------------- /refinery/refinery/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask.ext.sqlalchemy import SQLAlchemy 3 | from flask.ext.login import LoginManager 4 | from celery import Celery 5 | 6 | print "Opening a Refinery" 7 | 8 | app = Flask(__name__) 9 | app.config.from_object('config') 10 | db = SQLAlchemy(app) 11 | 12 | lm = LoginManager() 13 | lm.init_app(app) 14 | lm.login_view = 'login' 15 | 16 | def make_celery(app): 17 | celery = Celery(app.import_name, broker=app.config['CELERY_BROKER_URL']) 18 | celery.conf.update(app.config) 19 | TaskBase = celery.Task 20 | class ContextTask(TaskBase): 21 | abstract = True 22 | def __call__(self, *args, **kwargs): 23 | with app.app_context(): 24 | return TaskBase.__call__(self, *args, **kwargs) 25 | celery.Task = ContextTask 26 | return celery 27 | 28 | app.config.update( 29 | CELERY_BROKER_URL='redis://localhost:6379', 30 | CELERY_RESULT_BACKEND='redis://localhost:6379', 31 | CELERY_IMPORTS=['refinery.webapp.topicmodel','refinery.webapp.main_menu'], 32 | CELERY_REDIS_MAX_CONNECTIONS=4 33 | ) 34 | 35 | celery = make_celery(app) 36 | -------------------------------------------------------------------------------- /refinery/refinery/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/data/__init__.py -------------------------------------------------------------------------------- /refinery/refinery/static/.gitignore: -------------------------------------------------------------------------------- 1 | users/ 2 | -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/bpicons/bpicons.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/bpicons/bpicons.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/bpicons/bpicons.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/bpicons/license.txt: -------------------------------------------------------------------------------- 1 | Icon Set: Font Awesome -- http://fortawesome.github.com/Font-Awesome/ 2 | License: SIL -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL 3 | 4 | 5 | Icon Set: Eco Ico -- http://dribbble.com/shots/665585-Eco-Ico 6 | License: CC0 -- http://creativecommons.org/publicdomain/zero/1.0/ -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | This is a custom SVG font generated by IcoMoon. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome/Read Me.txt: -------------------------------------------------------------------------------- 1 | To modify your generated font, use the *dev.svg* file, located in the *fonts* folder in this package. You can import this dev.svg file to the IcoMoon app. All the tags (class names) and the Unicode points of your glyphs are saved in this file. 2 | 3 | See the documentation for more info on how to use this package: http://icomoon.io/#docs/font-face -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome/fontawesome.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome/fontawesome.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome/fontawesome.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/fontawesome/license.txt: -------------------------------------------------------------------------------- 1 | Icon Set: Font Awesome -- http://fortawesome.github.com/Font-Awesome/ 2 | License: SIL -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon/icomoon.dev.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | This is a custom SVG font generated by IcoMoon. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 17 | 18 | -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon/icomoon.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon/icomoon.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon/icomoon.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon/license.txt: -------------------------------------------------------------------------------- 1 | Icon Set: IcoMoon - Free -- http://keyamoon.com/icomoon/ 2 | License: CC BY 3.0 -- http://creativecommons.org/licenses/by/3.0/ -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/Logo.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/arrows.dev.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | This is a custom SVG font generated by IcoMoon. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/arrows.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.eot -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/arrows.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | This is a custom SVG font generated by IcoMoon. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/arrows.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.ttf -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/arrows.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.woff -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/arrows/license.txt: -------------------------------------------------------------------------------- 1 | Icon Set: Entypo -- http://www.entypo.com/ 2 | License: CC BY-SA 3.0 -- http://creativecommons.org/licenses/by-sa/3.0/ -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/elephants/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/1.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/elephants/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/2.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/elephants/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/3.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/elephants/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/4.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/elephants/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/5.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_020_home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_020_home.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_063_power.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_063_power.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_144_folder_open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_144_folder_open.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_145_folder_plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_145_folder_plus.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_220_play_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_220_play_button.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_232_cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_232_cloud.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_341_briefcase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_341_briefcase.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_357_suitcase.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_357_suitcase.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_363_cloud_upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_363_cloud_upload.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_364_cloud_download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_364_cloud_download.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_370_globe_af.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_370_globe_af.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/glyphicons_371_global.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_371_global.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/hopper-chop-suey.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-chop-suey.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/hopper-early-sunday-morning.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-early-sunday-morning.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/hopper-gas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-gas.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/hopper-morning-sun.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-morning-sun.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/hopper-nighthawks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-nighthawks.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/arrow_down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/arrow_down.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/arrow_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/arrow_up.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-add-folder-icon-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-add-folder-icon-256.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-arrow-4-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-arrow-4-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-book-17-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-book-17-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-cloud-3-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-3-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-cloud-9-icon-48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-9-icon-48.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-edit-8-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-edit-8-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-flask-7-icon-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-flask-7-icon-256.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-gear-icon-48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-gear-icon-48.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-128.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-256.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-4-icon-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-4-icon-64.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-icon-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-icon-64.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-lock-13-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-13-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-lock-15-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-15-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-lock-3-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-3-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-lock-9-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-9-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-4-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-4-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-6-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-6-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-12-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-12-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-3-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-3-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-4-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-4-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-7-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-7-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-note-25-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-note-25-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-6-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-6-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-8-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-8-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-plus-5-icon-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-plus-5-icon-24.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-star-5-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-star-5-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-star-7-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-star-7-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-text-file-4-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-text-file-4-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-24.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-256.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/pixel-arrow-right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/pixel-arrow-right_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right_.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/pixel-arrow-right__.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right__.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/icons/running-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/running-icon.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/dali-rose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/dali-rose.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/hopper-chop-suey.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-chop-suey.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/hopper-early-sunday-morning.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-early-sunday-morning.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/hopper-gas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-gas.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/hopper-morning-sun.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-morning-sun.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/hopper-nighthawks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-nighthawks.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/kandinsky-trans.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/kandinsky-trans.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/mondrian-comp-a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/mondrian-comp-a.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/monet-fog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-fog.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/monet-gal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-gal.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/paintings/monet-poppies.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-poppies.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/progress.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/progressbar.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/progressbar.gif -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/img05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img05.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/img06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img06.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/img07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img07.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/img08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img08.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/latest/img004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img004.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/latest/img01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img01.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/latest/img02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img02.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/latest/img03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img03.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/projects/latest/img04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img04.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/random/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/1.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/random/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/2.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/random/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/3.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/random/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/4.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/random/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/5.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/sliders/camera/slide00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide00.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/sliders/camera/slide01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide01.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/sliders/camera/slide02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide02.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/sliders/camera/slide03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide03.jpg -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_flat_0_aaaaaa_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_flat_0_aaaaaa_40x100.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_flat_75_ffffff_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_flat_75_ffffff_40x100.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_glass_55_fbf9ee_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_55_fbf9ee_1x400.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_glass_75_dadada_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_dadada_1x400.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_glass_75_e6e6e6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_e6e6e6_1x400.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_glass_95_fef1ec_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_95_fef1ec_1x400.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-bg_highlight-soft_75_cccccc_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_highlight-soft_75_cccccc_1x100.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_2e83ff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_2e83ff_256x240.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240___________.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240___________.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_888888_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_888888_256x240.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/ui/ui-icons_cd0a0a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_cd0a0a_256x240.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/1.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/10.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/2.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/3.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/4.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/5.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/6.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/7.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/8.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/images/view_mode/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/9.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/lib/colorbox/images/border.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/border.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/lib/colorbox/images/controls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/controls.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/lib/colorbox/images/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/loading.gif -------------------------------------------------------------------------------- /refinery/refinery/static/assets/lib/colorbox/images/loading_background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/loading_background.png -------------------------------------------------------------------------------- /refinery/refinery/static/assets/lib/colorbox/images/overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/overlay.png -------------------------------------------------------------------------------- /refinery/refinery/static/css/upload.css: -------------------------------------------------------------------------------- 1 | #upload_dropzone 2 | { 3 | position:aboslute; 4 | margin-top:30px; 5 | } 6 | 7 | .filedrag_box 8 | { 9 | display: block; 10 | position: relative; 11 | height: 125px; 12 | padding: 10px; 13 | text-align: center; 14 | color: #111; 15 | border: 2px dashed #555; 16 | border-radius: 10px; 17 | cursor: default; 18 | } 19 | 20 | 21 | .filedrag_box_hover 22 | { 23 | display: block; 24 | position: relative; 25 | height: 125px; 26 | padding: 10px; 27 | text-align: center; 28 | color: #111; 29 | border: 2px dashed #555; 30 | border-radius: 10px; 31 | cursor: default; 32 | color: #f00; 33 | border-color: #f00; 34 | border-style: solid; 35 | box-shadow: inset 0 3px 4px #888; 36 | } 37 | 38 | /** the img after it gets dropped but before submit */ 39 | #img_frame 40 | { 41 | height: 200px; 42 | width: 200px; 43 | margin-left: auto; 44 | margin-right: auto; 45 | } 46 | 47 | /** 48 | #upload_progress 49 | { 50 | border: 1px solid black; 51 | margin: 0 auto; 52 | position: relative; 53 | border-radius: 10px; 54 | padding: 5px; 55 | width: 95%; 56 | height: 19px; 57 | background: #eee url(/static/assets/images/progress.png) 100% 0 repeat-y; 58 | background-position: 0% 0; 59 | } 60 | 61 | #progress_text 62 | { 63 | margin-top:0px; 64 | padding: 5px; 65 | position: absolute; 66 | color: #444; 67 | width: 100%; 68 | height: 100%; 69 | background: #eee url(/static/assets/images/progress.png) 100% 0 repeat-y; 70 | } 71 | 72 | #upload_progress.success 73 | { 74 | background: #00cc33 none 0 0 no-repeat; 75 | } 76 | 77 | #upload_progress.failed 78 | { 79 | background: #c00 none 0 0 no-repeat; 80 | } 81 | */ 82 | #uploadlist 83 | { 84 | height:100%; 85 | border: 2px solid #222; 86 | border-radius: 10px; 87 | } 88 | 89 | #uploadlist li 90 | { 91 | text-align:left; 92 | 93 | } 94 | -------------------------------------------------------------------------------- /refinery/refinery/static/js/annotate.js: -------------------------------------------------------------------------------- 1 | 2 | var selected = {}; 3 | 4 | console.log(lines); 5 | 6 | var ls = d3.select("#survey").selectAll("div").data(lines); 7 | 8 | ls.enter().append("div").attr("class","sentex").on("click",function(d,i) { 9 | d3.select(this).style("background",function(d2,i2) { 10 | console.log("clicked " + i); 11 | if(i in selected) { 12 | delete selected[i]; 13 | return "#ffffff"; 14 | } else { 15 | selected[i] = 0; 16 | return "#aaeeff"; 17 | } 18 | 19 | }); 20 | }).html(function(d) { 21 | return d; 22 | }); 23 | 24 | 25 | 26 | function submitSurvey() { 27 | 28 | var linse = d3.select("#survey").selectAll("div").data(); 29 | 30 | var labels = []; 31 | 32 | for (i in selected){ 33 | console.log("!" + i); 34 | labels.push(i); 35 | } 36 | 37 | $.post(ann_url,{'labels[]' : labels, 'lines[]' : lines},function(d) { 38 | 39 | window.location = "/annotate"; 40 | 41 | }); 42 | 43 | } 44 | 45 | -------------------------------------------------------------------------------- /refinery/refinery/templates/about.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block js_head %} 3 | 4 | 5 | 6 | 7 | 8 | {% endblock %} 9 | 10 | {% block content %} 11 |
12 |

M L RUUUUN

13 |
14 | {% endblock %} 15 | 16 | -------------------------------------------------------------------------------- /refinery/refinery/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | {% block title %} 12 | {% endblock %} 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% block js_head %} 23 | {% endblock %} 24 | 25 | 26 | 27 | 28 | 29 |
30 |
31 | 47 |
48 |
49 |
50 |
51 | 52 | {% block content %} 53 | {% endblock %} 54 | 55 |
56 | 57 | {% block js %} 58 | {% endblock %} 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /refinery/refinery/templates/browse_list.html: -------------------------------------------------------------------------------- 1 | {% for doc in dox %} 2 | 3 |
  • 4 | 5 | 6 | 7 |
    {{doc.name}}
    8 | 9 |
  • 10 |
    11 | {% endfor %} -------------------------------------------------------------------------------- /refinery/refinery/templates/docview.html: -------------------------------------------------------------------------------- 1 |
    2 | 3 | {{doctext}} 4 | 5 |
    -------------------------------------------------------------------------------- /refinery/refinery/templates/info_page.html: -------------------------------------------------------------------------------- 1 |
    2 |

    3 | Welcome to the Refinery 4 |

    5 | 6 |

    7 | Refinery identifies topics in your data and allows you to refine it into folders that focus on the topics you're interested in. Also, it helps you summarize your folders into important sentences with links back to the documents that they appear in. 8 |

    9 | 10 |

    11 | Use buttons to change settings 12 |

    13 |
    14 |

    15 | Use buttons to run machine learning 16 |

    17 |
    18 |

    19 | Use buttons to experiment with Refinery's tools 20 |

    21 |
    22 |

    23 | Use buttons to browse the documents in a folder 24 |

    25 |
    26 |

    27 | Use buttons to delete things 28 |

    29 |
    30 |

    31 | Click the button to see this information page again 32 |

    33 |
    34 |

    35 | To get started with Refinery, upload a dataset with the button at the bottom of the page 36 |

    37 | 38 | 39 |
    40 | -------------------------------------------------------------------------------- /refinery/refinery/templates/login.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
    8 | {{ form.hidden_tag() }} 9 |

    refinery (beta: invitation only)

    10 |
    11 | {{ form.username(id_='login_username', class_='field required') }} 12 |
    13 |
    14 | {{ form.password(id_='login_password', class_='field required') }} 15 |
    16 | 17 |

    Forgot your password?

    18 |

    Not a member? Sign Up

    19 |
    20 | 21 | 22 |
    23 | 24 |
    25 | 26 | -------------------------------------------------------------------------------- /refinery/refinery/templates/profile.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block js_head %} 3 | 4 | {% endblock %} 5 | 6 | {% block content %} 7 | 8 |
    9 |
    10 | 11 | 12 |
    13 | 14 |
    15 | 21 | 22 |
    23 |
    24 |
    25 | {% endblock %} 26 | {% block js %} 27 | 28 | {% endblock %} 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /refinery/refinery/templates/summarize.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block js_head %} 3 | 4 | 5 | 6 | 7 | {% endblock %} 8 | 9 | {% block content %} 10 | 11 |
    12 |
    13 | INLINE STUFF! 14 |
    15 |
    16 | 17 |
    18 |
    19 | 20 |
    21 |
    Select notes by clicking the circles
    22 |
    When you're finished, click the "Keep" Button. Selecting nothing selects all results
    23 | 24 |
    25 |
    26 | 27 |
    28 |
    29 | 30 |
    31 |
    32 | 33 |
    34 |
    35 | 36 | 47 | 48 |
    49 |
    50 | 51 | 52 | 53 |
    54 |
    55 |
    56 |
    57 | {% endblock %} 58 | 59 | 60 | {% block js %} 61 | 73 | 74 | 77 | {% endblock %} -------------------------------------------------------------------------------- /refinery/refinery/webapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/webapp/__init__.py -------------------------------------------------------------------------------- /refinery/refinery/webapp/customFunc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | customFunc.py 3 | 4 | A custom function that we can use to hook into the BNPY analysis. 5 | 6 | onLapComplete() run after every complete lap through all B batches 7 | onBatchComplete() run after every complete visit (Mstep, Estep, Sstep, ELBOstep) to a single batch 8 | onAlgorithmComplete() run after the algorithm converges/reaches maximum number of laps 9 | 10 | ''' 11 | import redis 12 | import numpy as np 13 | import json 14 | 15 | msgServer = redis.StrictRedis(socket_timeout=20) 16 | #from pubsub import msgServer 17 | 18 | def getModelState(hmodel,LP,nTopW): 19 | 20 | num_topics = hmodel.allocModel.K 21 | 22 | def lm_info(): 23 | for k in xrange(num_topics): 24 | lamvec = hmodel.obsModel.comp[k].lamvec #bag of words weights 25 | elamvec = lamvec / lamvec.sum() #renormalized weights 26 | inds = np.argsort(elamvec)[-nTopW:].tolist() #get the top indices 27 | inds.reverse() 28 | probs = [elamvec[idx] for idx in inds] #get their weights 29 | yield [elamvec,zip(inds,probs)] 30 | 31 | topW = [] 32 | lms = [] 33 | for lm,tops in lm_info(): 34 | topW.append(tops) 35 | lms.append(lm) 36 | 37 | topic_props = hmodel.allocModel.Ebeta 38 | 39 | def renormalize(vec): 40 | tot = sum(vec) 41 | return [x/tot for x in vec] 42 | 43 | doc_tops = [renormalize(x[:-1]) for x in LP['alphaPi']] #topic posteriors for each document, drop the last value because bnpy 44 | 45 | return [topW,topic_props,doc_tops,lms] 46 | 47 | def onLapComplete(hmodel, percentDone, customFuncArgs): 48 | 49 | update = str(percentDone * 100) 50 | customArgs = json.loads(customFuncArgs) 51 | tm_id = customArgs["tm_id"] 52 | username = customArgs["username"] 53 | 54 | msgServer.publish(username + "Xmenus",'tm_prog,' + tm_id + "," + update) 55 | 56 | ''' 57 | 58 | For now we dont use these hooks, but bnpy allows them 59 | 60 | ''' 61 | 62 | def onBatchComplete(hModel, percentDone, customFuncArgs): 63 | print "onBatchComplete!" 64 | 65 | def onAlgorithmComplete(hModel, percentDone, customFuncArgs): 66 | print "onAlgorithmComplete!" 67 | -------------------------------------------------------------------------------- /refinery/refinery/webapp/pubsub.py: -------------------------------------------------------------------------------- 1 | import redis 2 | from refinery import app 3 | from flask import Response 4 | 5 | ''' 6 | 7 | Handles pubsub stuff 8 | 9 | ''' 10 | 11 | # START REDIS for pubsub system, should only happen once 12 | msgServer = redis.StrictRedis(socket_timeout=20) 13 | 14 | #Generic function to call redis and start pub/sub messaging service 15 | def event_stream(channel=None): 16 | 17 | pubsub = msgServer.pubsub() 18 | pubsub.subscribe(channel) 19 | 20 | # handle client disconnection in the client side by calling the exit keyword 21 | 22 | try: 23 | for msg in pubsub.listen(): 24 | yield 'data: %s\n\n' % msg['data'] 25 | except Exception: 26 | yield 'data: NONE\n\n' #if a timeout happens on the listen, we need to return something 27 | 28 | 29 | ''' 30 | 31 | These are the pubsub channels that serve the information 32 | 33 | ''' 34 | 35 | @app.route("//stream_upload") 36 | def stream_upload(username=None): 37 | mimetype = "text/event-stream" 38 | channel = username + "Xupload" 39 | return Response(event_stream(channel=channel), mimetype=mimetype) 40 | 41 | @app.route("//stream_menus") 42 | def stream_menus(username=None): 43 | mimetype = "text/event-stream" 44 | channel = username + "Xmenus" 45 | return Response(event_stream(channel=channel), mimetype=mimetype) 46 | 47 | @app.route('//stream_sum//') 48 | def stream_sum(username=None, data_id=None,ex_id=None): 49 | mimetype = "text/event-stream" 50 | ch = username +"_summary_" + str(data_id) + "_" + str(ex_id) 51 | return Response(event_stream(channel=ch), mimetype=mimetype) 52 | 53 | -------------------------------------------------------------------------------- /refinery/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==0.10.1 2 | flask-login==0.2.11 3 | flask-sqlalchemy==2.0 4 | gunicorn==19.1.1 5 | numpy==1.9.0 6 | scipy==0.9.0 7 | wtforms==2.0.1 8 | redis==2.10.3 9 | joblib==0.8.3-r1 10 | celery==3.1.15 11 | flask-wtf==0.10.2 12 | psycopg2==2.5.4 13 | kombu==3.0.23 14 | 15 | -------------------------------------------------------------------------------- /refinery/reset_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import os 4 | 5 | from config import basedir 6 | from refinery import app,db 7 | from refinery.data.models import * 8 | import shutil 9 | 10 | def create_db_entries(): 11 | 12 | print "Creating new DB" 13 | 14 | userdir = app.config['USER_DIRECTORY'] 15 | 16 | try: 17 | os.stat(userdir) 18 | except: 19 | os.mkdir(userdir) 20 | 21 | # remove all folders within the users directory 22 | remove_dir(userdir) 23 | 24 | # recreate database structure 25 | db.drop_all() 26 | db.create_all() 27 | 28 | # username and passwords for mock db fill 29 | usernames = ['doc'] 30 | passwords = ['refinery'] 31 | 32 | # Create a bunch of users 33 | for i in xrange(len(usernames)): 34 | add_user(usernames[i],passwords[i]) 35 | 36 | # Create a bunch of datasets 37 | 38 | # Create a bunch of experiments 39 | 40 | # create a bunch of reports 41 | 42 | check_db() 43 | 44 | def check_db(): 45 | query_users = User.query.all() 46 | query_exp = Experiment.query.all() 47 | query_data = Dataset.query.all() 48 | 49 | #print query_users 50 | #print query_exp 51 | #print query_data 52 | 53 | def remove_dir(folder): 54 | for the_file in os.listdir(folder): 55 | file_path = os.path.join(folder, the_file) 56 | if os.path.isdir(file_path): 57 | print "Deleting: " + file_path 58 | shutil.rmtree(file_path) 59 | 60 | def create_user_dir(username): 61 | newdir = app.config['USER_DIRECTORY'] + username 62 | if os.path.exists(newdir): 63 | print "Directory already exists for " + username 64 | else: 65 | datadir = newdir + "/documents" 66 | imdir = newdir + "/images" 67 | procdir = newdir + "/processed" 68 | os.makedirs(newdir) 69 | os.makedirs(procdir) 70 | os.makedirs(datadir) 71 | os.makedirs(imdir) 72 | 73 | print "Creating directory structure for: " + newdir 74 | 75 | def add_user(username, password): 76 | ''' When we add a new user, we first check if this user exists. If not, 77 | we create this users directory structure. 78 | ''' 79 | 80 | create_user_dir(username) 81 | email = username + "@refinery.com" 82 | u = User(username = username, password=password, email=email) 83 | 84 | if(username == "doc"): 85 | u.email = "refinery@docrefinery.com" 86 | shutil.copyfile("reset_db_files/default.jpg","refinery/static/users/doc/images/default.jpg") 87 | u.image = "default.jpg" 88 | 89 | db.session.add(u) 90 | db.session.commit() 91 | 92 | if __name__ == '__main__': 93 | create_db_entries() 94 | -------------------------------------------------------------------------------- /refinery/reset_db_files/default.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/reset_db_files/default.jpg -------------------------------------------------------------------------------- /refinery/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | refineryBASE=./ 3 | export refineryBASE=$refineryBASE 4 | export BNPYOUTDIR=$refineryBASE/refinery/static/users/ 5 | export BNPYDATADIR=$refineryBASE/bnpy/data/ 6 | #export BNPYROOT=/Users/daeil/Dropbox/research/bnpy/bnpy-dev/ 7 | echo $refineryBASE 8 | -------------------------------------------------------------------------------- /refinery/start_celery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | celery --loglevel=debug --concurrency=2 -A refinery.celery worker 3 | -------------------------------------------------------------------------------- /refinery/start_refinery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # redis-server & 3 | #. venv/bin/activate 4 | gunicorn --log-level=debug --timeout 1200 -w 4 -b 0.0.0.0:8080 refinery.webapp.main_menu:app 5 | --------------------------------------------------------------------------------