├── .gitignore
├── LICENSE.txt
├── README.md
├── Vagrantfile
├── doc
    └── main.pdf
├── puppet
    └── manifests
    │   ├── classes
    │       ├── db.pp
    │       ├── init.pp
    │       ├── python.pp
    │       └── run.pp
    │   └── vagrant.pp
└── refinery
    ├── bnpy
        ├── bnpy-dev
        │   ├── .gitignore
        │   ├── README.md
        │   ├── bnpy
        │   │   ├── ContinueRun.py
        │   │   ├── HModel.py
        │   │   ├── Run.py
        │   │   ├── __init__.py
        │   │   ├── allocmodel
        │   │   │   ├── AllocModel.py
        │   │   │   ├── __init__.py
        │   │   │   ├── admix
        │   │   │   │   ├── AdmixModel.py
        │   │   │   │   ├── BurstyVariationalOptimizer.py
        │   │   │   │   ├── HDPBetaOptimizer.py
        │   │   │   │   ├── HDPFullHard.py
        │   │   │   │   ├── HDPHardMult.py
        │   │   │   │   ├── HDPModel.py
        │   │   │   │   ├── HDPModelLP2.py
        │   │   │   │   ├── HDPPE.py
        │   │   │   │   ├── HDPRelModel.py
        │   │   │   │   ├── HDPSoft2Hard.py
        │   │   │   │   ├── OptimizerForHDPFullVarModel.py
        │   │   │   │   ├── OptimizerForHDPPE.py
        │   │   │   │   └── __init__.py
        │   │   │   ├── mix
        │   │   │   │   ├── DPMixModel.py
        │   │   │   │   ├── HardDPMixModel.py
        │   │   │   │   ├── MixModel.py
        │   │   │   │   └── __init__.py
        │   │   │   └── seq
        │   │   │   │   ├── HMMUtil.py
        │   │   │   │   └── __init__.py
        │   │   ├── config
        │   │   │   ├── allocmodel.conf
        │   │   │   ├── init.conf
        │   │   │   ├── learnalg.conf
        │   │   │   ├── numeric.platform-config
        │   │   │   ├── obsmodel.conf
        │   │   │   ├── onlinedata.conf
        │   │   │   └── output.conf
        │   │   ├── data
        │   │   │   ├── AdmixMinibatchIterator.py
        │   │   │   ├── AdmixMinibatchIteratorDB.py
        │   │   │   ├── DataObj.py
        │   │   │   ├── GraphData.py
        │   │   │   ├── MinibatchIterator.py
        │   │   │   ├── MinibatchIteratorFromDisk.py
        │   │   │   ├── WordsData.py
        │   │   │   ├── XData.py
        │   │   │   └── __init__.py
        │   │   ├── distr
        │   │   │   ├── BetaDistr.py
        │   │   │   ├── DirichletDistr.py
        │   │   │   ├── Distr.py
        │   │   │   ├── GaussDistr.py
        │   │   │   ├── GaussGammaDistr.py
        │   │   │   ├── GaussWishDistr.py
        │   │   │   ├── WishartDistr.py
        │   │   │   ├── ZMGaussDistr.py
        │   │   │   └── __init__.py
        │   │   ├── init
        │   │   │   ├── FromSaved.py
        │   │   │   ├── FromScratchBernRel.py
        │   │   │   ├── FromScratchGauss.py
        │   │   │   ├── FromScratchMult.py
        │   │   │   ├── FromTruth.py
        │   │   │   └── __init__.py
        │   │   ├── ioutil
        │   │   │   ├── BNPYArgParser.py
        │   │   │   ├── ModelReader.py
        │   │   │   ├── ModelWriter.py
        │   │   │   └── __init__.py
        │   │   ├── learnalg
        │   │   │   ├── BirthMove.py
        │   │   │   ├── BirthMoveTopicModel.py
        │   │   │   ├── LearnAlg.py
        │   │   │   ├── MemoizedOnlineVBLearnAlg.py
        │   │   │   ├── MergeMove.py
        │   │   │   ├── MergePairSelector.py
        │   │   │   ├── MergeTracker.py
        │   │   │   ├── OldMergeMove.py
        │   │   │   ├── StochasticOnlineVBLearnAlg.py
        │   │   │   ├── VBLearnAlg.py
        │   │   │   └── __init__.py
        │   │   ├── obsmodel
        │   │   │   ├── BagOfWordsObsModel.py
        │   │   │   ├── BernRelObsModel.py
        │   │   │   ├── DiagGaussObsModel.py
        │   │   │   ├── GaussObsModel.py
        │   │   │   ├── MultObsModel.py
        │   │   │   ├── ObsModel.py
        │   │   │   ├── ZMGaussObsModel.py
        │   │   │   └── __init__.py
        │   │   ├── suffstats
        │   │   │   ├── ParamBag.py
        │   │   │   ├── SuffStatBag.py
        │   │   │   └── __init__.py
        │   │   ├── util
        │   │   │   ├── IOUtil.py
        │   │   │   ├── LibRlogR.py
        │   │   │   ├── LinAlgUtil.py
        │   │   │   ├── NumericHardUtil.py
        │   │   │   ├── NumericUtil.py
        │   │   │   ├── RandUtil.py
        │   │   │   ├── SpecialFuncUtil.py
        │   │   │   ├── VerificationUtil.py
        │   │   │   ├── __init__.py
        │   │   │   └── lib
        │   │   │   │   └── RlogRCore.cpp
        │   │   └── viz
        │   │   │   ├── BarsViz.py
        │   │   │   ├── GaussViz.py
        │   │   │   ├── PlotComps.py
        │   │   │   ├── PlotELBO.py
        │   │   │   ├── PlotK.py
        │   │   │   ├── PrintTopics.py
        │   │   │   └── __init__.py
        │   ├── demodata
        │   │   ├── AsteriskK8.py
        │   │   ├── Bars2D.py
        │   │   ├── BarsK10V900.py
        │   │   ├── BarsK50V2500.py
        │   │   ├── BarsK6V9.py
        │   │   ├── BarsK8.py
        │   │   ├── BinaryGraphK5.py
        │   │   ├── DeadLeaves.py
        │   │   ├── DeadLeavesD25.py
        │   │   ├── HashtagK9.py
        │   │   ├── NIPS.py
        │   │   ├── NYTimes.py
        │   │   └── StarCovarK5.py
        │   └── tests
        │   │   ├── allocmodel
        │   │       ├── TestMixModel.py
        │   │       └── admix
        │   │       │   ├── TestGlobalStickbreakOptimizer.py
        │   │       │   ├── TestHDPBetaOptimizer.py
        │   │       │   └── TestHDPVariationalOptimizer.py
        │   │   ├── data
        │   │       └── TestMinibatchIterator.py
        │   │   ├── distr
        │   │       ├── TestGaussDistr.py
        │   │       ├── TestGaussWishDistr.py
        │   │       ├── TestWishartDistr.py
        │   │       └── TestZMGaussDistr.py
        │   │   ├── end-to-end
        │   │       ├── AbstractEndToEndTest.py
        │   │       ├── TestAdmixTopicModel.py
        │   │       ├── TestDPMixGauss.py
        │   │       ├── TestHDPModel.py
        │   │       ├── TestMixDiagGauss.py
        │   │       ├── TestMixGauss.py
        │   │       ├── TestMixZMGauss.py
        │   │       ├── TestProxFunc.py
        │   │       └── Util.py
        │   │   ├── init
        │   │       ├── TestFromSaved.py
        │   │       └── TestFromScratchGauss.py
        │   │   ├── ioutil
        │   │       └── TestModelReader.py
        │   │   ├── learnalg
        │   │       ├── TestBirthMove.py
        │   │       └── TestMemoizedVBWithBirth.py
        │   │   ├── merge
        │   │       ├── AbstractBaseTestForHDP.py
        │   │       ├── TestMathForHDPMerges.py
        │   │       ├── TestMergeDPMixModel.py
        │   │       ├── TestMergeHDPTopicModel.py
        │   │       ├── TestMergePairSelector.py
        │   │       └── TestMergeTracker.py
        │   │   ├── obsmodel
        │   │       ├── TestGaussObsModel.py
        │   │       └── TestZMGaussObsModel.py
        │   │   ├── paramestimation
        │   │       ├── Test-MixModel-Gauss-VB.py
        │   │       ├── Test-MixModel-ZMGauss-EM.py
        │   │       └── Test-MixModel-ZMGauss-VB.py
        │   │   ├── suffstats
        │   │       ├── TestParamBag.py
        │   │       └── TestSuffStatBag.py
        │   │   └── util
        │   │       └── TestEqualAtMSigFigs.py
        ├── results
        │   └── .gitignore
        └── scripts
        │   ├── AsteriskK8.py
        │   ├── BarsBurstyK20.py
        │   ├── BarsBurstyK6.py
        │   ├── BarsK8.py
        │   ├── DeadLeaves.py
        │   ├── DeadLeavesD25.py
        │   ├── HuffPost.py
        │   ├── NIPS.py
        │   ├── NYTimes.py
        │   ├── NYTimesDB.py
        │   ├── Science.py
        │   ├── StarCovarK5.py
        │   ├── ToyBarsK6.py
        │   ├── Wikipedia.py
        │   └── customFunc.py
    ├── config.py
    ├── data
        ├── nips0-12.zip
        ├── nyt_2013_obama.tar.gz
        ├── nyt_2013_obama.zip
        └── reuters.zip
    ├── fact_classifier
        ├── classify_ex.py
        ├── factfeat
        └── factsvm
    ├── lib
        ├── __init__.py
        ├── model_svm
        │   ├── feats
        │   ├── lower_words
        │   ├── non_abbrs
        │   └── svm_model
        ├── sbd.py
        ├── sbd_util.py
        ├── svmlite
        │   ├── LICENSE.txt
        │   ├── Makefile
        │   ├── kernel.h
        │   ├── svm_classify
        │   ├── svm_classify.c
        │   ├── svm_classify.o
        │   ├── svm_common.c
        │   ├── svm_common.h
        │   ├── svm_common.o
        │   ├── svm_hideo.c
        │   ├── svm_hideo.o
        │   ├── svm_learn
        │   ├── svm_learn.c
        │   ├── svm_learn.h
        │   ├── svm_learn.o
        │   ├── svm_learn_main.c
        │   ├── svm_learn_main.o
        │   ├── svm_light.tar.gz
        │   └── svm_loqo.c
        └── word_tokenize.py
    ├── refinery
        ├── __init__.py
        ├── data
        │   ├── __init__.py
        │   └── models.py
        ├── static
        │   ├── .gitignore
        │   ├── assets
        │   │   ├── fonts
        │   │   │   ├── bpicons
        │   │   │   │   ├── bpicons.eot
        │   │   │   │   ├── bpicons.svg
        │   │   │   │   ├── bpicons.ttf
        │   │   │   │   ├── bpicons.woff
        │   │   │   │   └── license.txt
        │   │   │   ├── fontawesome.eot
        │   │   │   ├── fontawesome.svg
        │   │   │   ├── fontawesome.ttf
        │   │   │   ├── fontawesome.woff
        │   │   │   ├── fontawesome
        │   │   │   │   ├── Read Me.txt
        │   │   │   │   ├── fontawesome.dev.svg
        │   │   │   │   ├── fontawesome.eot
        │   │   │   │   ├── fontawesome.svg
        │   │   │   │   ├── fontawesome.ttf
        │   │   │   │   ├── fontawesome.woff
        │   │   │   │   └── license.txt
        │   │   │   ├── icomoon
        │   │   │   │   ├── icomoon.dev.svg
        │   │   │   │   ├── icomoon.eot
        │   │   │   │   ├── icomoon.svg
        │   │   │   │   ├── icomoon.ttf
        │   │   │   │   ├── icomoon.woff
        │   │   │   │   └── license.txt
        │   │   │   └── icomoon_arrows
        │   │   │   │   ├── icomoon.dev.svg
        │   │   │   │   ├── icomoon.eot
        │   │   │   │   ├── icomoon.svg
        │   │   │   │   ├── icomoon.ttf
        │   │   │   │   └── icomoon.woff
        │   │   ├── images
        │   │   │   ├── Logo.png
        │   │   │   ├── arrows
        │   │   │   │   ├── arrows.dev.svg
        │   │   │   │   ├── arrows.eot
        │   │   │   │   ├── arrows.svg
        │   │   │   │   ├── arrows.ttf
        │   │   │   │   ├── arrows.woff
        │   │   │   │   └── license.txt
        │   │   │   ├── elephants
        │   │   │   │   ├── 1.jpg
        │   │   │   │   ├── 2.jpg
        │   │   │   │   ├── 3.jpg
        │   │   │   │   ├── 4.jpg
        │   │   │   │   └── 5.jpg
        │   │   │   ├── glyphicons_020_home.png
        │   │   │   ├── glyphicons_063_power.png
        │   │   │   ├── glyphicons_144_folder_open.png
        │   │   │   ├── glyphicons_145_folder_plus.png
        │   │   │   ├── glyphicons_220_play_button.png
        │   │   │   ├── glyphicons_232_cloud.png
        │   │   │   ├── glyphicons_341_briefcase.png
        │   │   │   ├── glyphicons_357_suitcase.png
        │   │   │   ├── glyphicons_363_cloud_upload.png
        │   │   │   ├── glyphicons_364_cloud_download.png
        │   │   │   ├── glyphicons_370_globe_af.png
        │   │   │   ├── glyphicons_371_global.png
        │   │   │   ├── hopper-chop-suey.jpg
        │   │   │   ├── hopper-early-sunday-morning.jpg
        │   │   │   ├── hopper-gas.jpg
        │   │   │   ├── hopper-morning-sun.jpg
        │   │   │   ├── hopper-nighthawks.jpg
        │   │   │   ├── icons
        │   │   │   │   ├── arrow_down.png
        │   │   │   │   ├── arrow_up.png
        │   │   │   │   ├── iconmonstr-add-folder-icon-256.png
        │   │   │   │   ├── iconmonstr-arrow-4-icon.png
        │   │   │   │   ├── iconmonstr-book-17-icon.png
        │   │   │   │   ├── iconmonstr-cloud-3-icon.png
        │   │   │   │   ├── iconmonstr-cloud-9-icon-48.png
        │   │   │   │   ├── iconmonstr-edit-8-icon.png
        │   │   │   │   ├── iconmonstr-flask-7-icon-256.png
        │   │   │   │   ├── iconmonstr-gear-icon-48.png
        │   │   │   │   ├── iconmonstr-info-6-icon-128.png
        │   │   │   │   ├── iconmonstr-info-6-icon-256.png
        │   │   │   │   ├── iconmonstr-line-chart-4-icon-64.png
        │   │   │   │   ├── iconmonstr-line-chart-icon-64.png
        │   │   │   │   ├── iconmonstr-lock-13-icon.png
        │   │   │   │   ├── iconmonstr-lock-15-icon.png
        │   │   │   │   ├── iconmonstr-lock-3-icon.png
        │   │   │   │   ├── iconmonstr-lock-9-icon.png
        │   │   │   │   ├── iconmonstr-magnifier-4-icon.png
        │   │   │   │   ├── iconmonstr-magnifier-6-icon.png
        │   │   │   │   ├── iconmonstr-newspaper-12-icon.png
        │   │   │   │   ├── iconmonstr-newspaper-3-icon.png
        │   │   │   │   ├── iconmonstr-newspaper-4-icon.png
        │   │   │   │   ├── iconmonstr-newspaper-7-icon.png
        │   │   │   │   ├── iconmonstr-note-25-icon.png
        │   │   │   │   ├── iconmonstr-photo-camera-6-icon.png
        │   │   │   │   ├── iconmonstr-photo-camera-8-icon.png
        │   │   │   │   ├── iconmonstr-plus-5-icon-24.png
        │   │   │   │   ├── iconmonstr-star-5-icon.png
        │   │   │   │   ├── iconmonstr-star-7-icon.png
        │   │   │   │   ├── iconmonstr-text-file-4-icon.png
        │   │   │   │   ├── iconmonstr-x-mark-4-icon-24.png
        │   │   │   │   ├── iconmonstr-x-mark-4-icon-256.png
        │   │   │   │   ├── iconmonstr-x-mark-4-icon.png
        │   │   │   │   ├── pixel-arrow-right.png
        │   │   │   │   ├── pixel-arrow-right_.png
        │   │   │   │   ├── pixel-arrow-right__.png
        │   │   │   │   └── running-icon.png
        │   │   │   ├── paintings
        │   │   │   │   ├── dali-rose.jpg
        │   │   │   │   ├── hopper-chop-suey.jpg
        │   │   │   │   ├── hopper-early-sunday-morning.jpg
        │   │   │   │   ├── hopper-gas.jpg
        │   │   │   │   ├── hopper-morning-sun.jpg
        │   │   │   │   ├── hopper-nighthawks.jpg
        │   │   │   │   ├── kandinsky-trans.jpg
        │   │   │   │   ├── mondrian-comp-a.jpg
        │   │   │   │   ├── monet-fog.jpg
        │   │   │   │   ├── monet-gal.jpg
        │   │   │   │   └── monet-poppies.jpg
        │   │   │   ├── progress.png
        │   │   │   ├── progressbar.gif
        │   │   │   ├── projects
        │   │   │   │   ├── img05.jpg
        │   │   │   │   ├── img06.jpg
        │   │   │   │   ├── img07.jpg
        │   │   │   │   ├── img08.jpg
        │   │   │   │   └── latest
        │   │   │   │   │   ├── img004.jpg
        │   │   │   │   │   ├── img01.jpg
        │   │   │   │   │   ├── img02.jpg
        │   │   │   │   │   ├── img03.jpg
        │   │   │   │   │   └── img04.jpg
        │   │   │   ├── random
        │   │   │   │   ├── 1.jpg
        │   │   │   │   ├── 2.jpg
        │   │   │   │   ├── 3.jpg
        │   │   │   │   ├── 4.jpg
        │   │   │   │   └── 5.jpg
        │   │   │   ├── sliders
        │   │   │   │   └── camera
        │   │   │   │   │   ├── slide00.jpg
        │   │   │   │   │   ├── slide01.jpg
        │   │   │   │   │   ├── slide02.jpg
        │   │   │   │   │   └── slide03.jpg
        │   │   │   ├── ui
        │   │   │   │   ├── ui-bg_flat_0_aaaaaa_40x100.png
        │   │   │   │   ├── ui-bg_flat_75_ffffff_40x100.png
        │   │   │   │   ├── ui-bg_glass_55_fbf9ee_1x400.png
        │   │   │   │   ├── ui-bg_glass_65_ffffff_1x400.png
        │   │   │   │   ├── ui-bg_glass_75_dadada_1x400.png
        │   │   │   │   ├── ui-bg_glass_75_e6e6e6_1x400.png
        │   │   │   │   ├── ui-bg_glass_95_fef1ec_1x400.png
        │   │   │   │   ├── ui-bg_highlight-soft_75_cccccc_1x100.png
        │   │   │   │   ├── ui-icons_222222_256x240.png
        │   │   │   │   ├── ui-icons_2e83ff_256x240.png
        │   │   │   │   ├── ui-icons_454545_256x240.png
        │   │   │   │   ├── ui-icons_454545_256x240___________.png
        │   │   │   │   ├── ui-icons_888888_256x240.png
        │   │   │   │   └── ui-icons_cd0a0a_256x240.png
        │   │   │   └── view_mode
        │   │   │   │   ├── 1.png
        │   │   │   │   ├── 10.png
        │   │   │   │   ├── 2.png
        │   │   │   │   ├── 3.png
        │   │   │   │   ├── 4.png
        │   │   │   │   ├── 5.png
        │   │   │   │   ├── 6.png
        │   │   │   │   ├── 7.png
        │   │   │   │   ├── 8.png
        │   │   │   │   └── 9.png
        │   │   ├── lib
        │   │   │   ├── colorbox
        │   │   │   │   ├── colorbox.css
        │   │   │   │   ├── images
        │   │   │   │   │   ├── border.png
        │   │   │   │   │   ├── controls.png
        │   │   │   │   │   ├── loading.gif
        │   │   │   │   │   ├── loading_background.png
        │   │   │   │   │   └── overlay.png
        │   │   │   │   └── jquery.colorbox-min.js
        │   │   │   ├── column12.css
        │   │   │   ├── d3
        │   │   │   │   ├── d3.layout.cloud.js
        │   │   │   │   └── d3.v3.min.js
        │   │   │   ├── jquery.knob.js
        │   │   │   ├── jquery.mixitup.min.js
        │   │   │   ├── jquery
        │   │   │   │   ├── jquery-1.10.2.js
        │   │   │   │   ├── jquery-1.11.1.js
        │   │   │   │   ├── jquery-1.9.1.min.js
        │   │   │   │   ├── jquery-ui-1.10.4.js
        │   │   │   │   ├── jquery-ui-1.10.4.min.js
        │   │   │   │   ├── jquery-ui-1.9.2.custom.min.js
        │   │   │   │   ├── jquery-ui.css
        │   │   │   │   ├── jquery.knob.js
        │   │   │   │   └── jquery.min.map
        │   │   │   └── slider
        │   │   │   │   └── simple-slider.js
        │   │   └── misc
        │   │   │   └── stopwords.txt
        │   ├── css
        │   │   ├── experiment.css
        │   │   ├── listing.css
        │   │   ├── login.css
        │   │   ├── style.css
        │   │   └── upload.css
        │   └── js
        │   │   ├── annotate.js
        │   │   ├── main_menu.js
        │   │   ├── summarize.js
        │   │   ├── topicmodel.js
        │   │   └── upload.js
        ├── templates
        │   ├── about.html
        │   ├── base.html
        │   ├── browse_list.html
        │   ├── data_list.html
        │   ├── docview.html
        │   ├── ex_list.html
        │   ├── folder_list.html
        │   ├── info_page.html
        │   ├── login.html
        │   ├── profile.html
        │   ├── summarize.html
        │   ├── topicmodel.html
        │   └── upload.html
        └── webapp
        │   ├── __init__.py
        │   ├── admin.py
        │   ├── customFunc.py
        │   ├── main_menu.py
        │   ├── pubsub.py
        │   ├── summarize.py
        │   ├── topicmodel.py
        │   └── upload.py
    ├── requirements.txt
    ├── reset_db.py
    ├── reset_db_files
        └── default.jpg
    ├── setup_env.sh
    ├── start_celery.sh
    └── start_refinery.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.idea
3 | .pyc
4 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) <2014> <Daeil Kim>
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/doc/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/doc/main.pdf


--------------------------------------------------------------------------------
/puppet/manifests/classes/db.pp:
--------------------------------------------------------------------------------
 1 | class db {
 2 |   # postgresql-dev required for Python's psycopg2
 3 |    /*
 4 |   package { [ 'postgresql', 'postgresql-server-dev-all' ]:
 5 |     ensure => 'installed',
 6 |   }
 7 | 
 8 |   service { 'postgresql':
 9 |     ensure  => running,
10 |     require => Package[postgresql],
11 |   }
12 | 
13 |   package {'redis-server':
14 |     ensure => 'installed',
15 |   }
16 |   
17 |   service { 'redis-server':
18 |     ensure  => running,
19 |     require => Package[redis-server],
20 |   }
21 |   */
22 | 
23 |   class { 'postgresql::server': }
24 | 
25 |   postgresql::server::db { 'refinery':
26 |     user     => 'vagrant',
27 |     password => postgresql_password('vagrant', ''),
28 |   }
29 | 
30 |   package { [ 'postgresql-server-dev-all' ]:
31 |     ensure => 'installed',
32 |     require => Class['postgresql::server'],
33 |   }
34 | 
35 |   # Redis - Server
36 |   package {'redis-server':
37 |     ensure => 'installed',
38 |   }
39 |   
40 |   service { 'redis-server':
41 |     ensure  => running,
42 |     require => Package[redis-server],
43 |   }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/puppet/manifests/classes/init.pp:
--------------------------------------------------------------------------------
 1 | # stage {"pre": before => Stage["main"]} class {'apt': stage => 'pre'}
 2 | 
 3 | # Commands to run before all others in puppet.
 4 | class init {
 5 |     group { "puppet":
 6 |         ensure => "present",
 7 |     }
 8 |     case $operatingsystem {
 9 |         ubuntu: {
10 |             exec { "update_apt":
11 |                 command => "sudo apt-get update",
12 |             }
13 |             # Provides "add-apt-repository" command, useful if you need
14 |             # to install software from other apt repositories.
15 |             package { "python-software-properties":
16 |                 ensure => present,
17 |                 require => [
18 |                     Exec['update_apt'],
19 |                 ];
20 |             }
21 |             $misc_packages = ["make", "curl", "git-core"]
22 |             package { $misc_packages:
23 |                 ensure => present,
24 |                 require => [
25 |                     Exec['update_apt'],
26 |                 ];
27 |             }
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/puppet/manifests/classes/python.pp:
--------------------------------------------------------------------------------
 1 | # Install python and compiled modules for project
 2 | class python {
 3 |     case $operatingsystem {
 4 |         ubuntu: {
 5 |             package { "python-pip":
 6 |                 ensure => installed
 7 |             }
 8 |             package { ["python-scipy"]:
 9 |                 ensure => installed,
10 |                 require => Package['python-pip']
11 |             }
12 |             package { ["numpy"]:
13 |                 ensure => installed,
14 |                 provider => pip,
15 |                 require => Package['python-pip']
16 |             }
17 | 
18 | 
19 |             /*
20 |             package { ['libfreetype6-dev', 'pkg-config']:
21 |                 ensure => installed
22 |             }
23 |             package { ['pyparsing']:
24 |                 ensure => installed,
25 |                 provider => pip,
26 |                 require => Package['python-pip']
27 |             }
28 |             package { ["matplotlib"]:
29 |                 ensure => installed,
30 |                 provider => pip,
31 |                 require => Package['numpy', 'pyparsing', 'libfreetype6-dev']
32 |             }
33 |             */
34 | 
35 | 
36 |             package { 'virtualenv':
37 |                 ensure => installed,
38 |                 provider => pip,
39 |                 require => Package['python-pip']
40 |             }
41 |             package { 'gunicorn':
42 |                 ensure => installed,
43 |                 provider => pip,
44 |                 require => Package['python-pip']
45 |             }
46 |             package { 'flask':
47 |                 ensure => installed,
48 |                 provider => pip,
49 |                 require => Package['python-pip']
50 |             }
51 |             package { ['joblib','redis','celery']:
52 |                 ensure => installed,
53 |                 provider => pip,
54 |                 require => Package['python-pip']
55 |             }
56 |             package { ['flask-wtf','flask-login','wtforms']:
57 |                 ensure => installed,
58 |                 provider => pip,
59 |                 require => Package['python-pip','flask']
60 |             }
61 |             package { 'scikit-learn':
62 |                 provider => pip,
63 |                 require => Package['python-pip']
64 |             }
65 |             package { 'kombu':
66 |                 ensure => installed,
67 |                 provider => pip,
68 |                 require => Package['numpy']
69 |             }
70 |             package { ['flask-sqlalchemy','psycopg2']:
71 |                 ensure => installed,
72 |                 provider => pip,
73 |                 require => Class['postgresql::server']
74 |             }
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/puppet/manifests/classes/run.pp:
--------------------------------------------------------------------------------
 1 | class run {
 2 |   include supervisor
 3 |   
 4 |   exec { "reset_db":
 5 |     command => "/usr/bin/python reset_db.py",
 6 |     user => "vagrant",
 7 |     cwd    => "/vagrant/refinery/",
 8 |   }
 9 | 
10 |   supervisor::app {'celery':
11 |     command => '/usr/local/bin/celery --concurrency=4 -A refinery.celery worker',
12 |     directory => '/vagrant/refinery/',
13 |     user => 'vagrant',
14 |   }
15 | 
16 |   supervisor::app {'refinery':
17 |     command => '/usr/local/bin/gunicorn --timeout 120 -w 4 -b 0.0.0.0:8080 refinery.webapp.main_menu:app',
18 |     directory => '/vagrant/refinery/',
19 |     user => 'vagrant',
20 |   }
21 | 
22 |   /*
23 |   exec { "start_celery":
24 |     command => "./start_celery.sh",
25 |     cwd => "/vagrant/refinery/",
26 |     provider => 'shell',
27 |     user => 'vagrant',
28 |     returns => 1,
29 |     require => Exec['reset_db'],
30 |   }
31 | 
32 |   exec { "start_refinery":
33 |     command => "./start_refinery.sh",
34 |     cwd => "/vagrant/refinery/",
35 |     user => 'vagrant',
36 |     provider => 'shell',
37 |     returns => 1,
38 |     require => Exec['reset_db'],
39 |   }
40 |   */
41 | }


--------------------------------------------------------------------------------
/puppet/manifests/vagrant.pp:
--------------------------------------------------------------------------------
 1 | # This vagrant.pp represents the base script to begin installation of Refinery
 2 | 
 3 | import "classes/*.pp"
 4 | 
 5 | $PROJ_DIR = "/vagrant"
 6 | $HOME_DIR = "/home/vagrant"
 7 | 
 8 | Exec {
 9 |     path => "/usr/local/bin:/usr/bin:/usr/sbin:/sbin:/bin",
10 | }
11 | 
12 | class dev {
13 |     class {
14 |         init: ;
15 |         db: require => Class[init];
16 |         python: require => Class["init","db"];
17 |         run: require => Class["init", "db", "python"];
18 |     }
19 | }
20 | 
21 | include dev
22 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/.gitignore:
--------------------------------------------------------------------------------
 1 | # IGNORE THESE FILES FROM THE GITHUB REPOSITORY
 2 | 
 3 | # ======================================================  EXCLUDE DIRECTORIES
 4 | # Exclude everything in the bin directory,
 5 | #   which holds compiled binaries for executing on grid
 6 | bin/
 7 | 
 8 | # Exclude everything in these directories
 9 | # These aren't of interest for tracking in a repository because
10 | #   (1) file sizes can be HUGE    
11 | #   (2) these files rarely mean anything to others except the person who ran the test
12 | logs/
13 | local/
14 | results/
15 | profile/
16 | .idea/*
17 | .ipynb_checkpoints/
18 | 
19 | # ======================================================  EXCLUDE FILE PATTERNS
20 | # Exclude script files
21 | *.sh
22 | .*
23 | !/.gitignore
24 | 
25 | # Exclude user-specific config options for where to read and write data
26 | *.path
27 | 
28 | # Exclude temporary files created by text editors
29 | *~
30 | *~lock*
31 | *.DS_Store
32 | .*.swp
33 | 
34 | # Exclude weird NFS file system dumps
35 | .nfs*
36 | 
37 | # Exclude mocap files
38 | *.amc
39 | *.asf
40 | *.key
41 | 
42 | 
43 | # Exclude videos
44 | *.avi
45 | *.swf
46 | *.mpg
47 | 
48 | # Exclude compiled python
49 | *.pyc
50 | 
51 | # Exclude executable files generated by Matlab's mex
52 | *.mat
53 | *.mexglx
54 | *.mexa64
55 | *.mexmaci64
56 | *.so
57 | 
58 | # Exclude auxiliary files generated by LaTeX
59 | *.pdf
60 | *-eps-converted-to.pdf
61 | *.dvi
62 | *.log
63 | *.blg
64 | *.bbl
65 | *.aux
66 | *.backup
67 | *.tmp
68 | *.synctex.gz
69 | *.out
70 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/README.md:
--------------------------------------------------------------------------------
 1 | **bnpy** is Bayesian nonparametric unsupervised machine learning for python.
 2 | 
 3 | Contact:  Mike Hughes. mike AT michaelchughes.com 
 4 | 
 5 | # About
 6 | This python module provides code for training popular Bayesian nonparametric models on massive datasets. **bnpy** supports the latest online learning algorithms as well as standard offline methods. 
 7 | 
 8 | Supported probabilistic models include
 9 | 
10 | * Gaussian mixture models
11 |     * standard parametric
12 |     * nonparametric (Dirichlet Process)
13 | 
14 | Supported learning algorithms include:
15 | 
16 | * EM: expectation-maximization (offline)
17 | * VB: variational Bayes (offline)
18 | * moVB: memoized online VB
19 | * soVB: stochastic online VB
20 | 
21 | These are all variants of *variational inference*, a family of optimization algorithms that perform coordinate ascent to learn parameters. 
22 | 
23 | # Quick Start
24 | 
25 | **bnpy** provides an easy command-line interface for launching experiments.
26 | 
27 | Train 8-component Gaussian mixture model via EM.
28 | ```
29 | python -m bnpy.Run AsteriskK8 MixModel ZMGauss EM --K 8
30 | ```
31 | 
32 | Train Dirichlet-process Gaussian mixture model (DP-GMM) via variational bayes.
33 | ```
34 | python -m bnpy.Run AsteriskK8 DPMixModel Gauss VB --K 8
35 | ```
36 | 
37 | Train DP-GMM via memoized online VB, with birth and merge moves
38 | ```
39 | python -m bnpy.Run AsteriskK8 DPMixModel Gauss moVB --moves birth,merge
40 | ```
41 | 
42 | ### Quick help
43 | ```
44 | # print help message for required arguments
45 | python -m bnpy.Run --help 
46 | # print help message for specific keyword options for Gaussian mixture models
47 | python -m bnpy.Run AsteriskK8 MixModel Gauss EM --kwhelp
48 | ```
49 | 
50 | # Installation
51 | 
52 | Follow the [installation instructions](https://bitbucket.org/michaelchughes/bnpy/wiki/Installation.md) on our project wiki.
53 | 
54 | # Documentation
55 | 
56 | All documentation can be found on the  [project wiki](https://bitbucket.org/michaelchughes/bnpy/wiki/Home.md).
57 | 
58 | Especially check out the [quick start demos](https://bitbucket.org/michaelchughes/bnpy/wiki/QuickStart/QuickStart.md)
59 | 
60 | # Target Audience
61 | 
62 | Primarly, we intend bnpy to be a platform for researchers. By gathering many learning algorithms and popular models in one convenient, modular repository, we hope to make it easier to compare and contrast approaches.
63 | 
64 | # Repository Organization
65 |   bnpy/ module-specific code
66 | 
67 |   demodata/ example dataset scripts
68 | 
69 |   tests/ unit-tests for assuring code correctness. using nose package.
70 | 
71 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' bnpy module __init__ file
 2 | '''
 3 | import data
 4 | import distr
 5 | import util
 6 | import suffstats
 7 | 
 8 | import allocmodel
 9 | import obsmodel
10 | from HModel import HModel
11 | 
12 | import ioutil
13 | load_model = ioutil.ModelReader.load_model
14 | save_model = ioutil.ModelWriter.save_model
15 | 
16 | import init
17 | 
18 | import learnalg
19 | import Run
20 | from Run import run
21 | 
22 | import os
23 | import sys
24 | '''
25 | ########################################################### Configure save
26 | ###########################################################  location
27 | hasWriteableOutdir = False
28 | if 'BNPYOUTDIR' in os.environ:
29 |   outdir = os.environ['BNPYOUTDIR']
30 |   if os.path.exists(outdir):
31 |     try:
32 |       with open(os.path.join(outdir, 'bnpytest'), 'w') as f:
33 |         pass
34 |     except IOError:
35 |       sys.exit('BNPYOUTDIR not writeable: %s' % (outdir))
36 |     hasWriteableOutdir = True
37 | if not hasWriteableOutdir:
38 |   raise ValueError('Environment variable BNPYOUTDIR not specified. Cannot save results to disk')
39 | '''
40 | ########################################################### Configure data
41 | ###########################################################  location
42 | root = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2])
43 | sys.path.append(os.path.join(root, 'demodata/'))
44 | if 'BNPYDATADIR' in os.environ:
45 |   if os.path.exists(os.environ['BNPYDATADIR']):
46 |     sys.path.append(os.environ['BNPYDATADIR'])
47 |   else:
48 |     print "Warning: Environment variable BNPYDATADIR not a valid directory"
49 | 
50 | ########################################################### Optional: viz
51 | ###########################################################  package for plots
52 | canPlot = False
53 | '''
54 | try:
55 |   from matplotlib import pylab
56 |   canPlot = True
57 | except ImportError:
58 |   print "Error importing matplotlib. Plotting disabled."
59 |   print "Fix by making sure this produces a figure window on your system"
60 |   print " >>> from matplotlib import pylab; pylab.figure(); pylab.show();"
61 | if canPlot:
62 |   import viz
63 | __all__ = ['run', 'Run', 'learn', 'allocmodel','obsmodel', 'suffstats',
64 |            'HModel', 'init', 'util','ioutil','viz','distr', 'mergeutil']
65 | '''
66 | __all__ = ['run', 'Run', 'learn', 'allocmodel','obsmodel', 'suffstats',
67 |            'HModel', 'init', 'util','ioutil','distr', 'mergeutil']
68 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/allocmodel/__init__.py:
--------------------------------------------------------------------------------
 1 | from AllocModel import AllocModel
 2 | 
 3 | from mix.MixModel import MixModel
 4 | from mix.DPMixModel import DPMixModel
 5 | from mix.HardDPMixModel import HardDPMixModel
 6 | 
 7 | from admix.AdmixModel import AdmixModel
 8 | from admix.HDPModel import HDPModel
 9 | from admix.HDPPE import HDPPE
10 | from admix.HDPFullHard import HDPFullHard
11 | from admix.HDPSoft2Hard import HDPSoft2Hard
12 | from admix.HDPHardMult import HDPHardMult
13 | from admix.HDPRelModel import HDPRelAssortModel
14 | 
15 | AllocModelConstructorsByName = { \
16 |            'MixModel':MixModel,
17 |            'DPMixModel':DPMixModel,
18 |            'HardDPMixModel':HardDPMixModel,
19 |            'AdmixModel':AdmixModel,
20 |            'HDPModel':HDPModel,
21 |            'HDPPE':HDPPE,
22 |            'HDPFullHard':HDPFullHard,
23 |            'HDPSoft2Hard':HDPSoft2Hard,
24 |            'HDPHardMult':HDPHardMult,
25 |            'HDPRelAssortModel':HDPRelAssortModel,
26 |           }
27 | 
28 | AllocModelNameSet = set(AllocModelConstructorsByName.keys())
29 | 
30 | __all__ = list()
31 | for name in AllocModelConstructorsByName:
32 |   __all__.append(name)
33 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/allocmodel/admix/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`admix` module gathers point-estimate and variational approximations
3 |    for Bayesian admixture modeling
4 | """
5 | 
6 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/allocmodel/mix/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`mix` module gathers point-estimate and variational approximations
3 |    for Bayesian mixture modeling, including
4 |       finite parametric mixture models
5 |       nonparametric Dirichlet Process and Pitman-Yor mixture models
6 | """
7 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/allocmodel/seq/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`seq` module gathers routines for Bayesian sequence modeling, including
3 |       finite hidden Markov models
4 |       nonparametric hidden Markov models
5 | """
6 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/allocmodel.conf:
--------------------------------------------------------------------------------
 1 | # Keyword options for bnpy allocation models
 2 | #  specify priors on parameters that allocate/assign data to clusters
 3 | 
 4 | [MixModel]
 5 | alpha0=1.0
 6 | 
 7 | [MixModelHelp]
 8 | alpha0="Scalar positive parameter for symmetric Dirichlet prior on mixture weights (component appearance probabilities). Set to 1.0 for a uniform prior (ML estimation)."
 9 | 
10 | [DPMixModel]
11 | alpha0=1.0
12 | truncType=z
13 | 
14 | [DPMixModelHelp]
15 | alpha0="Scalar positive concentration parameter for Dirichlet Process. Set large
16 |  to give all components nearly-same probability mass. Set small (near zero) to make only a few components probable."
17 | truncType="Truncation scheme for Dirichlet Process, determines how unassigned components in infinite tail are represented. Strongly recommended: 'z'."
18 | 
19 | [HardDPMixModel]
20 | alpha0=1.0
21 | truncType=z
22 | 
23 | [AdmixModel]
24 | alpha0 = 1.0
25 | 
26 | [AdmixModelHelp]
27 | alpha0="Scalar positive parameter for symmetric Dirichlet prior on mixture weights (component appearance probabilities). Set to 1.0 for a uniform prior (ML estimation)."
28 | 
29 | 
30 | [HDPModel]
31 | alpha0 = 5
32 | gamma = 0.5
33 | 
34 | [HDPPE]
35 | alpha0 = 5
36 | gamma = 0.5
37 | 
38 | [HDPFullHard]
39 | alpha0 = 5
40 | gamma = 0.5
41 | 
42 | [HDPSoft2Hard]
43 | alpha0 = 5
44 | gamma = 0.5
45 | 
46 | [HDPHardMult]
47 | alpha0 = 5
48 | gamma = 0.5
49 | 
50 | [HDPRelAssortModel]
51 | alpha0 = 5
52 | gamma = 0.5
53 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/init.conf:
--------------------------------------------------------------------------------
 1 | # Keyword options for initialization of global parameters for bnpy models
 2 | #  specify 
 3 | 
 4 | [Initialization]
 5 | initname=randfromprior
 6 | initarg=1.0
 7 | K=6
 8 | nRepeatTrue=2
 9 | 
10 | [InitializationHelp]
11 | initname=Name of routine for initialization. Options: {'randexamples','randexamplesbydist','truelabels','repeattruelabels'}.
12 | initarg=Numeric argument for initialization of key parameters. See details of init/FromScratch modules.
13 | K=Integer number of components.
14 | nRepeatTrue=[when initname='repeattruelabels'] number of duplicates of each true component to include when initializing.
15 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/numeric.platform-config:
--------------------------------------------------------------------------------
1 | [RememberThisFileIsAutogenerated]
2 | 
3 | [LibraryPrefs]
4 | calcRlogR = numpy
5 | inplaceExpAndNormalizeRows = numpy
6 | calcRlogRdotv = numpy
7 | 
8 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/obsmodel.conf:
--------------------------------------------------------------------------------
 1 | # Keyword options for observation models
 2 | #  specify how to set prior on data-generating parameters
 3 | 
 4 | [BernRel]
 5 | lamA=0.1
 6 | lamB=0.1
 7 | 
 8 | [BernRelHelp]
 9 | lamA=hyperparameter representing pseudo head count
10 | lamB=hyperparameter representing pseudo tail count
11 | 
12 | [ZMGauss]
13 | min_covar=1e-8
14 | dF=0
15 | sF=1.0
16 | ECovMat=eye
17 | 
18 | [ZMGaussHelp]
19 | min_covar=Minimum value for diagonal entries of covariance matrix, to ensure invertibility (EM only). Set very small to approach maximum likelihood estimates.
20 | dF=Number of degrees of freedom for Wishart prior. Must be >= dimension of Data.
21 | sF=Scale factor for expected covariance matrix under Wishart prior. Set very small to approach maximum likelihood estimates.
22 | ECovMat=Name of routine for setting expected covariance matrix under Wishart prior. Options: {'eye', 'covdata'}.
23 | 
24 | [Gauss]
25 | min_covar=1e-8
26 | dF=0
27 | sF=1.0
28 | ECovMat=eye
29 | kappa=1e-4
30 | 
31 | [GaussHelp]
32 | min_covar=Minimum value for diagonal entries of covariance matrix, to ensure invertibility [algName='EM']. Set very small to approach maximum likelihood estimates.
33 | dF=Number of degrees of freedom for Wishart prior. Must be >= dimension of Data.
34 | sF=Scale factor for expected covariance matrix under Wishart prior. Set very small to approach maximum likelihood estimates.
35 | ECovMat=Name of routine for setting expected covariance matrix under Wishart prior. Options: {'eye', 'covdata'}.
36 | kappa=Scalar that controls the precision (inverse variance) of Gaussian prior on means: \mu[k] ~ Normal( 0, 1/kappa * ECovMat). Set very small to allow means to approach maximum likelihood estimates.
37 | 
38 | [DiagGauss]
39 | min_covar=1e-8
40 | kappa=1e-4
41 | m0=0.0
42 | a0=1.0
43 | b0=2.0
44 | 
45 | [DiagGaussHelp]
46 | kappa=Precision factor for the Gauss-Gamma prior distribution. Set very small to let means be learned from data.
47 | m0=Mean for Gauss-Gamma prior distribution
48 | 
49 | [Mult]
50 | lambda=0.01
51 | 
52 | [MultHelp]
53 | lambda=parameter for symmetric Dirichlet prior over each topic's word distribution
54 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/onlinedata.conf:
--------------------------------------------------------------------------------
 1 | # Keyword options for online (minibatch) traversal of dataset
 2 | #  specify how many batches to divide data into
 3 | 
 4 | [OnlineDataPrefs]
 5 | nBatch=10
 6 | nLap=1
 7 | 
 8 | [OnlineDataPrefsHelp]
 9 | nBatch=Number of batches (aka minibatches) to split up dataset into.
10 | nLap=Number of times to cycle thru all batches in dataset.


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/config/output.conf:
--------------------------------------------------------------------------------
 1 | # Keyword options for how to name, save, and display experimental progress.
 2 | 
 3 | [OutputPrefs]
 4 | jobname=defaultjob
 5 | taskid=1
 6 | nTask=1
 7 | 
 8 | traceEvery=1.
 9 | printEvery=1.
10 | saveEvery=5.
11 | 
12 | [OutputPrefsHelp]
13 | jobname=String name of current experiment. This name is hashed to create a unique random seed, which controls initialization and algorithm execution.
14 | taskid=Integer ID of current run/trial/initialization. Must be >= 1.
15 | nTask=Number of runs/trials/initializations to perform for single experiment.
16 | 
17 | traceEvery=Number of laps (passes thru entire dataset) between saving ELBO values. Can be a fraction (like 0.5) to report partial-lap progress.
18 | printEvery=Number of laps (passes thru entire dataset) between printing status updates to log. Can be a fraction (like 0.5) to report partial-lap progress.
19 | saveEvery=Number of laps (passes thru entire dataset) between saving model's global parameters to disk. Can be a fraction (like 0.5) to report partial-lap progress.
20 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/data/DataObj.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | DataObj.py
 3 | 
 4 | General abstract base class for all data objects,
 5 | whether they are full datasets or iterators over small batches of data
 6 | 
 7 | Attributes
 8 | -------
 9 | nObs : 
10 | '''
11 | 
12 | class DataObj(object):
13 |   @classmethod
14 |   def read_from_mat(self, matfilepath):
15 |     ''' Constructor for building data object from disk
16 |     '''
17 |     pass
18 |   
19 |   def __init__(self, *args, **kwargs):
20 |     ''' Constructor for building data object from scratch in memory
21 |     '''
22 |     pass
23 |     
24 |   def get_short_name(self):
25 |     ''' Returns string with short name (at most 10 char) of this data object,
26 |           with no spaces and only alpha-numeric characters.
27 |         Useful for creating filepaths specific for this data object.
28 |     '''
29 |     if hasattr(self, 'shortname'):
30 |       return self.shortname
31 |     return "MyData%d" % (self.nObs)
32 | 
33 |   def get_text_summary(self, **kwargs):
34 |     ''' Returns string with human-readable description of this dataset 
35 |         e.g. source, author/creator, etc.
36 |     '''
37 |     if hasattr(self, 'summary'):
38 |       return self.summary
39 |     s = '%s. nObs %d' % (self.__class__.__name__, self.nObs)
40 |     return s
41 | 
42 |   def summarize_num_observations(self):
43 |     ''' Returns string summary of number of observations in this data object
44 |     '''
45 |     pass
46 |     
47 |   def select_subset_by_mask(self, *args, **kwargs):
48 |     ''' Returns DataObj of the same type, containing a subset of self's data
49 |     '''
50 |     pass
51 |     
52 |   def add_data(self, DataObj):
53 |     ''' Updates (in-place) the dataset to include provided data
54 |     '''
55 |     pass


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/data/MinibatchIteratorFromDisk.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | MinibatchIteratorFromDisk.py
 3 | 
 4 | Extension of MinibatchIterator 
 5 |    reads in data that has been pre-split into batches on disk.
 6 |    
 7 | Usage:
 8 |   construct by providing a list of valid filepaths to .mat files (see XData for format)
 9 |   then call has_next_batch()
10 |             get_next_batch()
11 |   
12 |   Traversal order of the files is randomized every lap through the full dataset
13 |   Set the "dataseed" parameter to get repeatable orders.
14 | '''
15 | 
16 | import numpy as np
17 | import scipy.io
18 | 
19 | from MinibatchIterator import MinibatchIterator
20 | from XData import XData
21 | 
22 | class MinibatchIteratorFromDisk( MinibatchIterator):
23 | 
24 |   def __init__(self):
25 |     raise NotImplementedError("TODO")


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .DataObj import DataObj
 2 | from .XData import XData
 3 | from .WordsData import WordsData
 4 | from .GraphData import GraphData
 5 | from .MinibatchIterator import MinibatchIterator
 6 | from .AdmixMinibatchIterator import AdmixMinibatchIterator
 7 | from .AdmixMinibatchIteratorDB import AdmixMinibatchIteratorDB
 8 | 
 9 | __all__ = ['DataObj', 'WordsData', 'XData', 'GraphData',
10 |            'MinibatchIterator', 'AdmixMinibatchIterator', 'AdmixMinibatchIteratorDB']
11 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/distr/Distr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Distr.py 
 3 | 
 4 | Generic exponential family probability distribution object
 5 | '''
 6 | 
 7 | class Distr( object ):
 8 | 
 9 |   ######################################################### Constructor  
10 |   #########################################################
11 |   def __init__(self, *args, **kwargs):
12 |     ''' Basic constructor
13 |     '''
14 |     pass
15 | 
16 |   @classmethod
17 |   def CreateAsPrior(cls, argDict, Data):
18 |     ''' Creates Distr as prior for parameters that generate provided Data
19 |     '''
20 |     pass
21 | 
22 |   ######################################################### Log Cond. Prob.  
23 |   #########################################################   E-step
24 |   def log_pdf( self ):
25 |     ''' Returns log p( x | theta )
26 |     '''
27 |     pass
28 |     
29 |   def E_log_pdf( self ):
30 |     ''' Returns E[ log p( x | theta ) ] under q(theta) <- this distr
31 |     '''
32 |     pass
33 |     
34 |   ######################################################### Global updates
35 |   #########################################################   M-step
36 |   def get_post_distr( self, SS ):
37 |     ''' Create new Distr object with posterior params
38 |     '''
39 |     pass
40 |     
41 |   def post_update_soVB( self, rho, *args ):
42 |     ''' Stochastic online update of internal params
43 |     '''
44 |     pass
45 |     
46 |     
47 |   ######################################################### ELBO terms
48 |   ######################################################### 
49 |   def get_log_norm_const(self):
50 |     ''' Returns log( Z ), where
51 |          PDF(x) :=  1/Z(theta) f( x | theta )
52 |     '''
53 |     pass
54 | 
55 |   def get_entropy( self ):
56 |     ''' Returns entropy of this distribution 
57 |           H[ p(x) ] = -1*\int p(x|theta) log p(x|theta) dx
58 |     '''
59 |     pass
60 |     
61 |   ######################################################### Accessors
62 |   ######################################################### 
63 | 
64 |   ######################################################### I/O Utils
65 |   ######################################################### 
66 |   def to_dict(self):
67 |     pass
68 |     
69 |   def from_dict(self, pDict):
70 |     pass


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/distr/__init__.py:
--------------------------------------------------------------------------------
 1 | from Distr import Distr
 2 | 
 3 | from BetaDistr import BetaDistr
 4 | from GaussDistr import GaussDistr
 5 | from ZMGaussDistr import ZMGaussDistr
 6 | from WishartDistr import WishartDistr
 7 | from GaussWishDistr import GaussWishDistr
 8 | from DirichletDistr import DirichletDistr
 9 | from GaussGammaDistr import GaussGammaDistr
10 | 
11 | __all__ = ['WishartDistr', 'ZMGaussDistr', 'GaussDistr', 'GaussWishDistr','DirichletDistr', 'GaussGammaDistr', 'BetaDistr']
12 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/init/FromSaved.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | FromSaved.py
 3 | 
 4 | Initialize params of a bnpy model from a previous result saved to disk.
 5 | '''
 6 | import numpy as np
 7 | import scipy.io
 8 | import os
 9 | from bnpy.ioutil import ModelReader
10 | 
11 | def init_global_params(hmodel, Data, initname=None, prefix='Best', **kwargs):
12 |   ''' Initialize (in-place) the global params of the given hmodel
13 |       by copying the global parameters of a previously saved hmodel
14 | 
15 |       Only global parameters are modified.
16 |       This does NOT alter settings of hmodel's prior distribution.
17 | 
18 |       Args
19 |       -------
20 |       hmodel : bnpy model object to initialize
21 |       Data   : bnpy Data object whose dimensions must match resulting hmodel
22 |       initname : valid filesystem path to stored result 
23 |             
24 |       Returns
25 |       -------
26 |       None. hmodel modified in-place.
27 |   '''
28 |   if os.path.isdir(initname):
29 |     init_global_params_from_bnpy_format(hmodel, Data, initname, prefix)
30 |   elif initname.count('.mat') > 0:
31 |     # Handle external external formats (not bnpy models) saved as MAT file
32 |     MatDict = scipy.io.loadmat(initname)
33 |     hmodel.set_global_params(**MatDict)  
34 |   else:
35 |     raise ValueError('Unrecognized init file: %s' % (initname))
36 | 
37 | 
38 | def init_global_params_from_bnpy_format(hmodel, Data, initname, prefix):
39 |   storedModel = ModelReader.load_model(initname, prefix)
40 |   # TODO check if dimension matches
41 |   aTypesMatch = type(storedModel.allocModel) == type(hmodel.allocModel)
42 |   oTypesMatch = type(storedModel.obsModel) == type(hmodel.obsModel)
43 |   inferTypesMatch = storedModel.inferType == hmodel.inferType
44 | 
45 |   if aTypesMatch and oTypesMatch and inferTypesMatch:
46 |     hmodel.set_global_params(hmodel=storedModel)
47 |   else:
48 |     LP = storedModel.calc_local_params(Data)
49 |     SS = hmodel.get_global_suff_stats(Data, LP)
50 |     hmodel.update_global_params(SS)
51 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/init/FromScratchBernRel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | FromScratchMult.py
 3 | 
 4 | Initialize params of HModel with multinomial observations from scratch.
 5 | '''
 6 | import numpy as np
 7 | from scipy.special import digamma
 8 | from scipy.cluster import vq
 9 | 
10 | hasRexAvailable = True
11 | try:
12 |   import KMeansRex
13 | except ImportError:
14 |   hasRexAvailable = False
15 | 
16 | def init_global_params(hmodel, Data, initname='randexamples',
17 |                                seed=0, K=0, initarg=None, **kwargs):
18 |   ''' Initialize hmodel's global parameters in-place.
19 | 
20 |       Returns
21 |       -------
22 |       Nothing. hmodel is updated in place.
23 |       Global Paramters are:
24 |         lamA, lamB = K x K stochastic block matrix
25 |         theta = N x K matrix of community membership probabilities
26 |   '''
27 |   PRNG = np.random.RandomState(seed)
28 |   N = Data.nNodeTotal
29 |   if initname == 'randexamples':
30 |     # Generate a sparse matrix given observed positive edges
31 |     #Data.to_sparse_matrix()
32 |     # Create assortative stochastic block matrix
33 |     lamA = np.zeros( K ) + (Data.nPosEdges / K) # assortative ( K x 1 ) vs. (K x K)
34 |     lamB = np.zeros( K ) + (Data.nAbsEdges / (K*K)) # assortative
35 |     # Create theta used for
36 |     theta = np.zeros( (N,K) )
37 |     alpha = np.ones(K) / K
38 |     for ii in xrange(N):
39 |         theta[ii, :] = PRNG.dirichlet(alpha)
40 | 
41 |     # Initialize global stick-breaking weights beta to be 1/K (uniform)
42 |     beta = np.ones(K) / K
43 |     # Set the global parameters for the hmodel
44 |     hmodel.set_global_params(K=K, beta=beta, lamA=lamA, lamB=lamB, theta=theta)
45 |     return
46 |   else:
47 |     raise NotImplementedError('Unrecognized initname ' + initname)
48 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/init/FromScratchGauss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | FromScratchGauss.py
 3 | 
 4 | Initialize params of a mixture model with gaussian observations from scratch.
 5 | '''
 6 | import numpy as np
 7 | from bnpy.util import discrete_single_draw
 8 | from bnpy.data import XData
 9 | 
10 | def init_global_params(hmodel, Data, initname='randexamples', seed=0, K=0, **kwargs):
11 |   PRNG = np.random.RandomState(seed)
12 |   X = Data.X
13 |   if initname == 'randexamples':
14 |     ''' Choose K items uniformly at random from the Data
15 |         then component params by M-step given those single items
16 |     '''
17 |     resp = np.zeros((Data.nObs, K))
18 |     permIDs = PRNG.permutation(Data.nObs).tolist()
19 |     for k in xrange(K):
20 |       resp[permIDs[k],k] = 1.0
21 |   elif initname == 'randexamplesbydist':
22 |     ''' Choose K items from the Data,
23 |         selecting the first at random,
24 |         then subsequently proportional to euclidean distance to the closest item
25 |     '''
26 |     objID = discrete_single_draw(np.ones(Data.nObs), PRNG)
27 |     chosenObjIDs = list([objID])
28 |     minDistVec = np.inf * np.ones(Data.nObs)
29 |     for k in range(1, K):
30 |       curDistVec = np.sum((Data.X - Data.X[objID])**2, axis=1)
31 |       minDistVec = np.minimum(minDistVec, curDistVec)
32 |       objID = discrete_single_draw(minDistVec, PRNG)
33 |       chosenObjIDs.append(objID)
34 |     resp = np.zeros((Data.nObs, K))
35 |     for k in xrange(K):
36 |       resp[chosenObjIDs[k], k] = 1.0
37 |   elif initname == 'randsoftpartition':
38 |     ''' Randomly assign all data items some mass in each of K components
39 |         then create component params by M-step given that soft partition
40 |     '''
41 |     resp = PRNG.rand(Data.nObs, K)
42 |     resp = resp/np.sum(resp,axis=1)[:,np.newaxis]
43 | 
44 |   elif initname == 'randomnaive':
45 |     ''' Generate K "fake" examples from the diagonalized data covariance,
46 |         creating params by assigning each "fake" example to a component.
47 |     '''
48 |     Sig = np.sqrt(np.diag(np.cov(Data.X.T)))
49 |     Xfake = Sig * PRNG.randn(K, Data.dim)
50 |     Data = XData(Xfake)
51 |     resp = np.eye(K)
52 |   
53 |   LP = dict(resp=resp)
54 |   SS = hmodel.get_global_suff_stats(Data, LP)
55 |   hmodel.update_global_params(SS)
56 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/init/FromTruth.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | FromTruth.py
 3 | 
 4 | Initialize params of a bnpy model using "ground truth" information,
 5 | such as human annotations 
 6 | 
 7 | These are provided within a Data object, as a "TrueLabels" field
 8 | '''
 9 | import numpy as np
10 | import FromScratchMult
11 | 
12 | def init_global_params(hmodel, Data, initname=None, seed=0, nRepeatTrue=2, **kwargs):
13 |   ''' Initialize (in-place) the global params of the given hmodel
14 |       using the true labels associated with the Data
15 | 
16 |       Args
17 |       -------
18 |       hmodel : bnpy model object to initialize
19 |       Data   : bnpy Data object whose dimensions must match resulting hmodel
20 |       initname : string name for the routine to use
21 |                  'truelabels' or 'repeattruelabels'
22 |   '''
23 |   PRNG = np.random.RandomState(seed)
24 |   if initname.count('truelabels') > 0:
25 |     if hasattr(Data, 'TrueLabels'):
26 |       resp = calc_resp_from_true_labels(Data)
27 |     elif hasattr(Data, 'TrueParams'):
28 |       if 'resp' in Data.TrueParams:
29 |         resp = Data.TrueParams['resp']
30 |       if 'word_variational' in Data.TrueParams:
31 |         resp = Data.TrueParams['word_variational']
32 |       
33 | 
34 |   if initname == 'truelabels':
35 |     pass # have everything we need
36 |   elif initname == 'repeattruelabels':
37 |     Ktrue = resp.shape[1]
38 |     rowIDs = PRNG.permutation(Data.nObs)
39 |     L = len(rowIDs)/nRepeatTrue
40 |     bigResp = np.zeros((Data.nObs, Ktrue*nRepeatTrue))
41 |     curLoc = 0
42 |     for r in range(nRepeatTrue):
43 |       targetIDs = rowIDs[curLoc:curLoc+L]
44 |       bigResp[targetIDs, r*Ktrue:(r+1)*Ktrue] = resp[targetIDs,:]
45 |       curLoc += L
46 |     resp = bigResp
47 |   elif initname == 'trueparams':
48 |     hmodel.set_global_params(**Data.TrueParams)
49 |     return
50 |   else:
51 |     raise NotImplementedError('Unknown initname: %s' % (initname))
52 | 
53 |   if hmodel.obsModel.__class__.__name__.count('Gauss') > 0:
54 |     LP = dict(resp=resp)
55 |   else:
56 |     LP = FromScratchMult.getLPfromResp(resp, Data)
57 |   SS = hmodel.get_global_suff_stats(Data, LP)
58 |   hmodel.update_global_params(SS)
59 | 
60 | 
61 | 
62 | def calc_resp_from_true_labels(Data):
63 |   TrueLabels = Data.TrueLabels
64 |   uniqueLabels = np.unique(TrueLabels)
65 |   Ktrue = len(uniqueLabels)
66 |   resp = np.zeros((Data.nObs, Ktrue))
67 |   for k in range(Ktrue):
68 |     mask = TrueLabels == uniqueLabels[k]
69 |     resp[mask,k] = 1.0
70 |   return resp


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/init/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The :mod:`init` module gathers initialization procedures for model parameters
3 | """
4 | import FromScratchGauss, FromScratchMult
5 | import FromScratchBernRel
6 | import FromSaved, FromTruth
7 | 
8 | __all__ = ['FromScratchGauss', 'FromSaved', 'FromTruth', 'FromScratchMult', 'FromScratchBernRel']
9 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/ioutil/ModelWriter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | import numpy as np
 4 | import scipy.io
 5 | import os
 6 | from distutils.dir_util import mkpath
 7 | 
 8 | def makePrefixForLap(lap):
 9 |   return 'Lap%08.3f' % (lap)
10 | 
11 | def save_model(hmodel, fname, prefix, doSavePriorInfo=True, doLinkBest=False):
12 |   ''' saves HModel object to mat file persistently
13 |       
14 |       Args
15 |       --------
16 |       hmodel: HModel to save
17 |       fname: absolute full path of directory to save in
18 |       prefix: prefix for file name, like 'Iter00004' or 'Best'
19 |       doSavePriorInfo: whether to save prior info
20 |   '''
21 |   if not os.path.exists( fname):
22 |     mkpath( fname )
23 |   save_alloc_model( hmodel.allocModel, fname, prefix, doLinkBest=doLinkBest )
24 |   save_obs_model( hmodel.obsModel, fname, prefix, doLinkBest=doLinkBest )
25 |   if doSavePriorInfo:
26 |     save_alloc_prior( hmodel.allocModel, fname)
27 |     save_obs_prior( hmodel.obsModel, fname)
28 |     
29 | def save_alloc_model(amodel, fpath, prefix, doLinkBest=False):
30 |   amatname = prefix + 'AllocModel.mat'
31 |   outmatfile = os.path.join( fpath, amatname )
32 |   adict = amodel.to_dict()
33 |   adict.update( amodel.to_dict_essential() )
34 |   scipy.io.savemat( outmatfile, adict, oned_as='row')
35 |   if doLinkBest and prefix != 'Best':
36 |     create_best_link( outmatfile, os.path.join(fpath,'BestAllocModel.mat'))
37 |           
38 | def save_obs_model(obsmodel, fpath, prefix, doLinkBest=False):  
39 |   amatname = prefix + 'ObsModel.mat'
40 |   outmatfile = os.path.join( fpath, amatname )
41 |   compList = list()
42 |   for k in xrange( obsmodel.K ):
43 |     compList.append( obsmodel.comp[k].to_dict() )
44 |   myDict = obsmodel.to_dict_essential()
45 |   for key in compList[0].keys():
46 |     if key in myDict:
47 |       continue
48 |     myDict[key] = np.squeeze(np.dstack([ compDict[key] for compDict in compList]))
49 |   scipy.io.savemat( outmatfile, myDict, oned_as='row')
50 |   if doLinkBest and prefix != 'Best':
51 |     create_best_link( outmatfile, os.path.join(fpath,'BestObsModel.mat'))    
52 |   
53 | def save_alloc_prior( amodel, fpath):
54 |   outpath = os.path.join( fpath, 'AllocPrior.mat')
55 |   adict = amodel.get_prior_dict()
56 |   if len( adict.keys() ) == 0:
57 |     return None
58 |   scipy.io.savemat( outpath, adict, oned_as='row')
59 | 
60 | def save_obs_prior( obsModel, fpath):
61 |   outpath = os.path.join( fpath, 'ObsPrior.mat')
62 |   adict = obsModel.get_prior_dict()
63 |   if len( adict.keys() ) == 0:
64 |     return None
65 |   scipy.io.savemat( outpath, adict, oned_as='row')
66 | 
67 | def create_best_link( hardmatfile, linkmatfile):
68 |   ''' Creates a symlink file named linkmatfile that points to hardmatfile,
69 |       where both are full valid absolute file system paths 
70 |   '''
71 |   if os.path.islink( linkmatfile):
72 |     os.unlink( linkmatfile )
73 |   if os.path.exists(linkmatfile):
74 |     os.remove(linkmatfile)
75 |   if os.path.exists( hardmatfile ):
76 |     os.symlink( hardmatfile, linkmatfile )
77 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/ioutil/__init__.py:
--------------------------------------------------------------------------------
1 | import ModelWriter
2 | import ModelReader
3 | import BNPYArgParser
4 | 
5 | __all__ = ['BNPYArgParser', 'ModelWriter', 'ModelReader']
6 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/learnalg/StochasticOnlineVBLearnAlg.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | StochasticOnlineVBLearnAlg.py
 3 | 
 4 | Implementation of stochastic online VB (soVB) for bnpy models
 5 | '''
 6 | import numpy as np
 7 | from LearnAlg import LearnAlg
 8 | 
 9 | class StochasticOnlineVBLearnAlg(LearnAlg):
10 | 
11 |   def __init__(self, **kwargs):
12 |     ''' Creates stochastic online learning algorithm, 
13 |         with fields rhodelay, rhoexp that define learning rate schedule.
14 |     '''
15 |     super(type(self),self).__init__(**kwargs)
16 |     self.rhodelay = self.algParams['rhodelay']
17 |     self.rhoexp = self.algParams['rhoexp']
18 | 
19 |   def fit(self, hmodel, DataIterator, SS=None):
20 |     ''' Run soVB learning algorithm, fit global parameters of hmodel to Data
21 |         Returns
22 |         --------
23 |         LP : local params from final pass of Data
24 |         Info : dict of run information, with fields
25 |               evBound : final ELBO evidence bound
26 |               status : str message indicating reason for termination
27 |                         {'all data processed'}
28 |     '''
29 |     LP = None
30 |     rho = 1.0 # Learning rate
31 |     nBatch = float(DataIterator.nBatch)
32 | 
33 |     # Set-up progress-tracking variables
34 |     iterid = -1
35 |     lapFrac = np.maximum(0, self.algParams['startLap'] - 1.0/nBatch)
36 |     if lapFrac > 0:
37 |       # When restarting an existing run,
38 |       #  need to start with last update for final batch from previous lap
39 |       DataIterator.lapID = int(np.ceil(lapFrac)) - 1
40 |       DataIterator.curLapPos = nBatch - 2
41 |       iterid = int(nBatch * lapFrac) - 1
42 | 
43 |     self.set_start_time_now()
44 |     while DataIterator.has_next_batch():
45 | 
46 |       # Grab new data
47 |       Dchunk = DataIterator.get_next_batch()
48 | 
49 |       # Update progress-tracking variables
50 |       iterid += 1
51 |       lapFrac += 1.0/nBatch
52 |       self.set_random_seed_at_lap(lapFrac)
53 | 
54 |       # M step with learning rate
55 |       if SS is not None:
56 |         rho = (iterid + self.rhodelay) ** (-1.0 * self.rhoexp)
57 |         hmodel.update_global_params(SS, rho)
58 |       
59 |       # E step
60 |       LP = hmodel.calc_local_params(Dchunk)
61 |       SS = hmodel.get_global_suff_stats(Dchunk, LP, doAmplify=True)
62 | 
63 |       # ELBO calculation
64 |       evBound = hmodel.calc_evidence(Dchunk, SS, LP)      
65 | 
66 |       # Save and display progress
67 |       self.add_nObs(Dchunk.nObs)
68 |       self.save_state(hmodel, iterid, lapFrac, evBound)
69 |       self.print_state(hmodel, iterid, lapFrac, evBound)
70 |     
71 |     #Finally, save, print and exit
72 |     status = "all data processed."
73 |     self.save_state(hmodel,iterid, lapFrac, evBound, doFinal=True)    
74 |     self.print_state(hmodel, iterid, lapFrac, evBound, doFinal=True, status=status)
75 |     return None, self.buildRunInfo(evBound, status)
76 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/learnalg/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The:mod:`learnalg' module provides standard learning algorithms such as EM and VB (Variational Bayes)
 3 | """
 4 | from .LearnAlg import LearnAlg
 5 | from .VBLearnAlg import VBLearnAlg
 6 | from .StochasticOnlineVBLearnAlg import StochasticOnlineVBLearnAlg
 7 | from .MemoizedOnlineVBLearnAlg import MemoizedOnlineVBLearnAlg
 8 | import MergeMove
 9 | 
10 | from .MergePairSelector import MergePairSelector
11 | from .MergeTracker import MergeTracker
12 | 
13 | __all__ = ['LearnAlg', 'VBLearnAlg', 'StochasticOnlineVBLearnAlg',
14 |            'MemoizedOnlineVBLearnAlg', 'MergeMove',
15 |             'MergeTracker', 'MergePairSelector']
16 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/obsmodel/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | 
 4 | from DiagGaussObsModel import DiagGaussObsModel
 5 | from GaussObsModel import GaussObsModel
 6 | from ZMGaussObsModel import ZMGaussObsModel
 7 | from MultObsModel import MultObsModel
 8 | from BernRelObsModel import BernRelObsModel
 9 | 
10 | ObsModelConstructorsByName = { \
11 |            'DiagGauss':DiagGaussObsModel,
12 |            'Gauss':GaussObsModel,
13 |            'ZMGauss':ZMGaussObsModel,
14 |            'Mult':MultObsModel,
15 |            'BernRel':BernRelObsModel,
16 |           }
17 | 
18 | ObsModelNameSet = set(ObsModelConstructorsByName.keys())


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/suffstats/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | suffstats module defines objects that represent sufficient statistics
3 | '''
4 | 
5 | from ParamBag import ParamBag
6 | from SuffStatBag import SuffStatBag
7 | 
8 | __all__ = ['SuffStatBag', 'ParamBag']
9 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/IOUtil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def flatstr2np( xvecstr ):
4 |   return np.asarray( [float(x) for x in xvecstr.split()] )
5 | 
6 | def np2flatstr( X, fmt="% .6f" ):
7 |   return ' '.join( [fmt%(x) for x in np.asarray(X).flatten() ] )  
8 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/LinAlgUtil.py:
--------------------------------------------------------------------------------
 1 | '''  
 2 | LinAlgUtil.py
 3 | 
 4 | Linear algebra utilities.  Mostly efficient matrix multiplication subroutines.
 5 | 
 6 | Notes
 7 | -------
 8 | Timing results on 
 9 |  late 2011 macbook (with Intel CPU)
10 |  32-bit desktop (with AMD CPU, ~3GHz)
11 |  64-bit desktop (with AMD CPU, ~3GHz)
12 | 
13 | X = np.random.rand( 1e6, 64)
14 | 
15 | Compare methods for computing X.T * X
16 |       A | fblas.dgemm(1.0, X, X, trans_a=True)
17 |       B | fblas.dgemm(1.0, X.T, X.T, trans_b=True)
18 |       C | np.dot(X.T,X)
19 |                    C         A        B      
20 |       macbook      1.46 s    1.20 s    0.69 s
21 | 32-bit desktop     1.67 s    1.45 s    0.58 s
22 | 64-bit desktop     1.39 s    1.2 s     0.45 s
23 | 
24 | Conclusion: method "B" is the best by far!
25 | '''
26 | 
27 | import numpy as np
28 | 
29 | try:
30 |   import scipy.linalg.blas
31 |   try:
32 |     fblas = scipy.linalg.blas.fblas
33 |   except AttributeError:
34 |     # Scipy changed location of BLAS libraries in late 2012. 
35 |     # See http://github.com/scipy/scipy/pull/358
36 |     fblas = scipy.linalg.blas._fblas
37 | except:
38 |   raise ImportError("BLAS libraries for efficient matrix multiplication not found")
39 | 
40 | 
41 | def dotATB(A, B):
42 |   ''' Compute matrix product A.T * B
43 |       using efficient BLAS routines (low-level machine code)
44 |   '''
45 |   if A.shape[1] > B.shape[1]:
46 |     return fblas.dgemm(1.0, A, B, trans_a=True)
47 |   else:
48 |     return np.dot( A.T, B)
49 | 
50 | def dotABT(A, B):
51 |   ''' Compute matrix product A* B.T
52 |       using efficient BLAS routines (low-level machine code)
53 |   '''
54 |   if B.shape[0] > A.shape[0]:
55 |     return fblas.dgemm(1.0, A, B, trans_b=True)
56 |   else:
57 |     return np.dot( A, B.T)
58 |     
59 | def dotATA(A ):
60 |   ''' Compute matrix product A.T * A
61 |       using efficient BLAS routines (low-level machine code)
62 |   '''
63 |   return fblas.dgemm(1.0, A.T, A.T, trans_b=True)
64 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/RandUtil.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | RandUtil.py
 3 | 
 4 | Utilities for sampling (pseudo) random numbers
 5 | '''
 6 | import numpy as np
 7 | 
 8 | def choice(candidates, ps=None, randstate=np.random):
 9 |   ''' Choose one element at random from list of candidates.
10 |       ps[k] gives probability of candidate k
11 |       ps need not sum to one, but all entries must be positive
12 |   '''
13 |   if ps is None:
14 |     N = len(candidates)
15 |     ps = np.ones(N)/N
16 |   totals = np.cumsum(ps)
17 |   r = randstate.rand() * totals[-1]
18 |   k = np.searchsorted(totals, r)
19 |   return candidates[k]
20 | 
21 | def multinomial(Nsamp, ps, randstate=np.random):
22 |   return randstate.multinomial(Nsamp, ps/ps.sum() )
23 |   #ps = np.asarray(ps, dtype=np.float64)
24 |   #Pmat = np.tile(ps, (Nsamp,1))
25 |   #choiceVec = discrete_single_draw_vectorized(Pmat, randstate)
26 |   #choiceHist, bins = np.histogram(choiceVec, np.arange(-.5,ps.size + .5))
27 |   #return choiceHist
28 | 
29 | def discrete_single_draw_vectorized( Pmat, randstate=np.random):
30 |   Ts = np.cumsum(Pmat, axis=1)
31 |   throws = randstate.rand( Pmat.shape[0] )*Ts[:,-1]
32 |   Ts[ Ts > throws[:,np.newaxis] ] = np.inf
33 |   choices = np.argmax( Ts, axis=1 ) # relies on argmax returning first id
34 |   return choices
35 | 
36 | def discrete_single_draw( ps, randstate=np.random):
37 |   ''' Given vector of K positive real weights "ps",
38 |       draw a single integer assignment in {1,2, ...K}
39 |       such that Prob(choice=k) = ps[k]
40 | 
41 |       Args
42 |       --------
43 |       ps : K-length numpy vector of positive real numbers
44 | 
45 |       Returns
46 |       --------
47 |       choice : integer in range 0, 1, ... K
48 |   '''
49 |   totals = np.cumsum(ps)
50 |   return np.searchsorted(totals, randstate.rand()*totals[-1])
51 | 
52 | def mvnrand(mu, Sigma, N=1, PRNG=np.random.RandomState()):
53 |   if type(PRNG) == int:
54 |     PRNG = np.random.RandomState(PRNG)
55 |   return PRNG.multivariate_normal(mu, Sigma, (N))
56 |   
57 | def rotateCovMat( Sigma, theta=np.pi/4):
58 |   ''' Returns valid covariance matrix with same eigen structure, rotated by theta radians
59 |   '''
60 |   RotMat = [[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]
61 |   RotMat = np.asarray( RotMat)
62 |   Lam,V = np.linalg.eig( Sigma )
63 |   Lam = np.diag(Lam)
64 |   Vrot = np.dot( V, RotMat )
65 |   return np.dot( Vrot, np.dot( Lam, Vrot.T) )


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/SpecialFuncUtil.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | SpecialFuncUtil.py
 3 | 
 4 | Special mathematical functions, like multivariate gammaln.
 5 | '''
 6 | 
 7 | from collections import defaultdict
 8 | import numpy as np
 9 | from scipy.special import gammaln, digamma
10 | 
11 | LOGPI = np.log(np.pi)
12 | LOGTWO = np.log(2.)
13 | LOGTWOPI = np.log( 2.*np.pi )
14 | EPS = 10*np.finfo(float).eps
15 | 
16 | MVgCache = defaultdict( lambda: dict())
17 | def MVgammaln(x, D):
18 |   ''' Compute log of the D-dimensional multivariate Gamma func. for input x
19 |           
20 |       Notes: Caching gives big speedup!
21 |       -------
22 |        caching : 208 sec for 5 iters of CGS on K=50, D=2 problem with N=10000
23 |       no cache : 300 sec
24 |   '''
25 |   try:
26 |     return MVgCache[D][x]
27 |   except KeyError:
28 |     result = gammaln(x+ 0.5*(1 - np.arange(1,D+1)) ).sum() + 0.25*D*(D-1)*LOGPI
29 |     MVgCache[D][x] = result
30 |   return result
31 |   
32 | def MVdigamma(x, D):
33 |   ''' Compute the first-derivative of the log of the D-dim. Gamma function
34 |   '''
35 |   return digamma(x + 0.5 * (1 - np.arange(1,D+1))).sum()
36 | 
37 | def logsumexp(logA, axis=None):
38 |   ''' Efficiently compute log(sum(exp(...))) for input matrix "logA"
39 |       Computation is both vectorized and numerically stable.
40 |   '''
41 |   logA = np.asarray(logA)
42 |   logAmax = logA.max(axis=axis)
43 |   if axis is None:
44 |     logA = logA - logAmax
45 |   elif axis==1:
46 |     logA = logA - logAmax[:,np.newaxis]
47 |   elif axis==0:
48 |     logA = logA - logAmax[np.newaxis,:]
49 |   assert np.allclose( logA.max(), 0.0 )
50 |   logA = np.log( np.sum( np.exp(logA), axis=axis )  )
51 |   return logA + logAmax
52 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/VerificationUtil.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | VerificationUtil.py
 3 | 
 4 | Verification utilities, for checking whether numerical variables are "equal".
 5 | '''
 6 | import numpy as np
 7 | 
 8 | def isEvenlyDivisibleFloat(a, b, margin=1e-6):
 9 |   ''' Returns true/false for whether a is evenly divisible by b 
10 |         within a (small) numerical tolerance
11 |       Examples
12 |       --------
13 |       >>> isEvenlyDivisibleFloat( 1.5, 0.5)
14 |       True
15 |       >>> isEvenlyDivisibleFloat( 1.0, 1./3)
16 |       True
17 |   '''
18 |   cexact = np.asarray(a)/float(b)
19 |   cround = np.round(cexact)
20 |   return abs(cexact - cround) < margin
21 |   
22 | def closeAtMSigFigs(A, B, M=10, tol=5):
23 |   ''' Returns true/false for whether A and B are numerically "close"
24 |           aka roughly equal at M significant figures
25 | 
26 |       Only makes sense for numbers on scale of abs. value 1.0 or larger.      
27 |       Log evidences will usually always be at this scale.
28 | 
29 |       Examples
30 |       --------
31 |       >>> closeAtMSigFigs(1234, 1000, M=1)  # margin is 500 
32 |       True
33 |       >>> closeAtMSigFigs(1234, 1000, M=2)  # margin is 50 
34 |       False
35 |       >>> closeAtMSigFigs(1034, 1000, M=2)  # margin is 50 
36 |       True
37 |       >>> closeAtMSigFigs(1005, 1000, M=3)  # margin is 5 
38 |       True
39 | 
40 |       >>> closeAtMSigFigs(44.5, 49.5, M=1) # margin is 5 
41 |       True
42 |       >>> closeAtMSigFigs(44.5, 49.501, M=1) # just over the margin
43 |       False
44 |       >>> closeAtMSigFigs(44.499, 49.5, M=1) 
45 |       False
46 |   '''
47 |   A = float(A)
48 |   B = float(B)
49 |   # Enforce abs(A) >= abs(B)
50 |   if abs(A) < abs(B):
51 |     tmp = A
52 |     A = B
53 |     B = tmp
54 |   assert abs(A) >= abs(B)
55 | 
56 |   # Find the scale that A (the larger of the two) possesses
57 |   #  A ~= 10 ** (P10)
58 |   P10 = int(np.floor(np.log10(abs(A))))
59 | 
60 |   # Compare the difference between A and B
61 |   #   to the allowed margin THR
62 |   diff = abs(A - B)
63 |   if P10 >= 0:
64 |     THR = tol * 10.0**(P10 - M)
65 |     THR = (1 + 1e-11) * THR 
66 |     # make THR just a little bigger to avoid issues where 2.0 and 1.95
67 |     # aren't equal at 0.05 margin due to rounding errors
68 |     return np.sign(A) == np.sign(B) and diff <= THR
69 |   else:
70 |     THR = tol * 10.0**(-M)
71 |     THR = (1 + 1e-11) * THR
72 |     return diff <= THR
73 | 
74 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/util/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`util` module gathers utility functions
 3 |   for IO, special functions like "logsumexp", 
 4 |   and various random sampling functions
 5 | """
 6 | 
 7 | import RandUtil
 8 | 
 9 | from .IOUtil import np2flatstr, flatstr2np
10 | from .LinAlgUtil import dotATA, dotATB, dotABT
11 | from .RandUtil import discrete_single_draw, discrete_single_draw_vectorized
12 | from .RandUtil import choice
13 | from .SpecialFuncUtil import MVgammaln, MVdigamma, digamma, gammaln
14 | from .SpecialFuncUtil import LOGTWO, LOGPI, LOGTWOPI, EPS
15 | from .SpecialFuncUtil import logsumexp
16 | from .VerificationUtil import closeAtMSigFigs, isEvenlyDivisibleFloat
17 | 
18 | __all__ = ['RandUtil', 
19 |            'np2flatstr', 'flatstr2np', 
20 |            'dotATA', 'dotATB', 'dotABT', 
21 |            'discrete_single_draw', 
22 |            'MVgammaln', 'MVdigamma', 'logsumexp', 'digamma', 'gammaln',
23 |            'closeAtMSigFigs', 'isEvenlyDivisibleFloat',
24 |            'LOGTWO', 'LOGTWOPI', 'LOGPI', 'EPS']
25 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/bnpy/viz/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`viz` module provides visualization capability
 3 | """
 4 | # TODO: do a check for wxpython issues
 5 | 
 6 | import GaussViz
 7 | import BarsViz
 8 | import PlotELBO
 9 | import PlotComps
10 | 
11 | __all__ = ['GaussViz', 'BarsViz', 'PlotELBO', 'PlotComps']


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/Bars2D.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Bars2D.py
 3 | 
 4 | Generic functions for creating toy bars data
 5 | '''
 6 | import numpy as np
 7 | 
 8 | def Create2DBarsTopicWordParams(V, K, fracMassOnTopic=0.95, PRNG=np.random):
 9 |   ''' Create parameters of each topics distribution over words
10 |       
11 |       Args
12 |       ---------
13 |       V : int vocab size
14 |       K : int number of topics
15 |       fracMassOnTopic : fraction of total probability mass for "on-topic" words
16 |       PRNG : random number generator (for reproducibility)
17 | 
18 |       Returns
19 |       ---------
20 |       topics : K x V matrix, real positive numbers whose rows sum to one
21 |   '''
22 |   sqrtV = int(np.sqrt(V))
23 |   BarWidth = sqrtV/ (K/2) # number of consecutive words in each bar
24 |   B = V/ (K/2) # total number of "on topic" words in each bar
25 | 
26 |   topics = np.zeros((K,V))
27 |   # Make horizontal bars
28 |   for k in range(K/2):
29 |     wordIDs = range(B*k, B*(k+1))
30 |     topics[k, wordIDs] = 1.0
31 | 
32 |   # Make vertical bars
33 |   for k in range(K/2):
34 |     wordIDs = list()
35 |     for b in range(sqrtV):
36 |       start = b * sqrtV + k*BarWidth
37 |       wordIDs.extend( range(start, start+BarWidth))
38 |     topics[K/2 + k, wordIDs] = 1.0
39 | 
40 |   # Add smoothing mass to all entries in "topics"
41 |   #  instead of picking this value out of thin air, instead,
42 |   #  set it so that 95% of the mass of each topic is on the "on-topic" bar words
43 |   #  if s is the smoothing mass added, and B is num "on topic" words, then
44 |   #   fracMassOnTopic = (1 + s) * B / ( (1+s)*B + s*(V-B) ), and we solve for s
45 |   smoothMass = (1 - fracMassOnTopic)/(fracMassOnTopic*V - B)*B
46 |   topics += (2 * smoothMass) * PRNG.rand(K,V)
47 | 
48 |   # Ensure each row of topics is a probability vector
49 |   for k in xrange(K):
50 |     topics[k,:] /= np.sum(topics[k,:])
51 | 
52 |   assert np.sum(topics[0, :B]) > fracMassOnTopic - 0.05
53 |   assert np.sum(topics[1, B:2*B]) > fracMassOnTopic - 0.05
54 |   assert np.sum(topics[-1, wordIDs]) > fracMassOnTopic - 0.05
55 |   return topics


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/BarsK10V900.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsK10V900.py
 3 | 
 4 | Toy Bars data, with K=10 topics and vocabulary size 900.
 5 | 5 horizontal bars, and 5 vertical bars.
 6 | 
 7 | Generated via the standard LDA generative model
 8 |   see WordsData.CreateToyDataFromLDAModel for details.
 9 | '''
10 | import numpy as np
11 | from bnpy.data import WordsData, AdmixMinibatchIterator
12 | import Bars2D
13 | 
14 | SEED = 8675309
15 | PRNG = np.random.RandomState(SEED)
16 | 
17 | # FIXED DATA GENERATION PARAMS
18 | K = 10 # Number of topics
19 | V = 900 # Vocabulary Size
20 | gamma = 0.5 # hyperparameter over doc-topic distribution
21 | 
22 | Defaults = dict()
23 | Defaults['nDocTotal'] = 2000
24 | Defaults['nWordsPerDoc'] = 2 * V / (K/2)
25 | 
26 | # GLOBAL PROB DISTRIBUTION OVER TOPICS
27 | trueBeta = np.ones(K)
28 | trueBeta /= trueBeta.sum()
29 | Defaults['topic_prior'] = gamma * trueBeta
30 | 
31 | # TOPIC by WORD distribution
32 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG)
33 | 
34 | def get_data_info(**kwargs):
35 |     if 'nDocTotal' in kwargs:
36 |       nDocTotal = kwargs['nDocTotal']
37 |     else:
38 |       nDocTotal = Defaults['nDocTotal']
39 |     return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 1-3 bars per doc.' % (K, nDocTotal)
40 | 
41 | def get_data(**kwargs):
42 |     ''' 
43 |         Args
44 |         -------
45 |         seed
46 |         nDocTotal
47 |         nWordsPerDoc
48 |     '''
49 |     Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs)
50 |     Data.summary = get_data_info(**kwargs)
51 |     return Data
52 | 
53 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1,
54 |                            dataorderseed=0, **kwargs):
55 |     '''
56 |         Args
57 |         -------
58 |         seed
59 |         nDocTotal
60 |         nWordsPerDoc
61 |     '''
62 |     Data = CreateToyDataFromLDAModel(seed=seed, **kwargs)
63 |     DataIterator = AdmixMinibatchIterator(Data, 
64 |                         nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed)
65 |     DataIterator.summary = get_data_info(**kwargs)
66 |     return DataIterator
67 | 
68 | def CreateToyDataFromLDAModel(**kwargs):
69 |   for key in Defaults:
70 |     if key not in kwargs:
71 |       kwargs[key] = Defaults[key]
72 |   return WordsData.CreateToyDataFromLDAModel(**kwargs)
73 | 
74 | if __name__ == '__main__':
75 |   import bnpy.viz.BarsViz
76 |   WData = CreateToyDataFromLDAModel(seed=SEED)
77 |   bnpy.viz.BarsViz.plotExampleBarsDocs(WData)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/BarsK50V2500.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsK50V2500.py
 3 | 
 4 | Toy Bars data, with K=50 topics and V=2500 vocabulary size.
 5 | 25 horizontal bars, and 25 vertical vertical ones.
 6 | 
 7 | Generated via the standard LDA generative model
 8 |   see WordsData.CreateToyDataFromLDAModel for details.
 9 | 
10 | Usage
11 | ---------
12 | To visualize example documents, execute this file as a script 
13 | >> python BarsK50V2500.py
14 | 
15 | To visualize document "1" from within Python
16 | >> Data = BarsK50V2500.get_data(nDocTotal=5)
17 | >> wid1 = Data.word_id[ Data.doc_range[0,0]:Data.doc_range[0,1] ]
18 | >> wct1 = Data.word_count[ Data.doc_range[0,0]:Data.doc_range[0,1] ]
19 | Make histogram with counts for each of the vocab word types
20 | >> whist = np.zeros(Data.vocab_size)
21 | >> whist[wid1] = wct1
22 | # Plot it as a 2D image
23 | >> whist2D = np.reshape( whist, (50, 50) )
24 | >> pylab.imshow(whist2D, interpolation='nearest')
25 | 
26 | '''
27 | import numpy as np
28 | from bnpy.data import WordsData, AdmixMinibatchIterator
29 | import Bars2D
30 | 
31 | SEED = 8675309
32 | PRNG = np.random.RandomState(SEED)
33 | 
34 | # FIXED DATA GENERATION PARAMS
35 | K = 50 # Number of topics
36 | V = 2500 # Vocabulary Size
37 | gamma = 0.75 # hyperparameter over doc-topic distribution
38 | 
39 | Defaults = dict()
40 | Defaults['nDocTotal'] = 2000
41 | Defaults['nWordsPerDoc'] = 5 * V / (K/2)
42 | 
43 | # GLOBAL PROB DISTRIBUTION OVER TOPICS
44 | trueBeta = np.ones(K)
45 | trueBeta /= trueBeta.sum()
46 | Defaults['topic_prior'] = gamma * trueBeta
47 | 
48 | # TOPIC by WORD distribution
49 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG)
50 | 
51 | def get_data_info(**kwargs):
52 |     if 'nDocTotal' in kwargs:
53 |       nDocTotal = kwargs['nDocTotal']
54 |     else:
55 |       nDocTotal = Defaults['nDocTotal']
56 |     return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 2-4 bars per doc' % (K, nDocTotal)
57 | 
58 | def get_data(**kwargs):
59 |     ''' 
60 |         Args
61 |         -------
62 |         seed
63 |         nDocTotal
64 |         nWordsPerDoc
65 |     '''
66 |     Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs)
67 |     Data.summary = get_data_info(**kwargs)
68 |     return Data
69 | 
70 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1,
71 |                            dataorderseed=0, **kwargs):
72 |     '''
73 |         Args
74 |         -------
75 |         seed
76 |         nDocTotal
77 |         nWordsPerDoc
78 |     '''
79 |     Data = CreateToyDataFromLDAModel(seed=seed, **kwargs)
80 |     DataIterator = AdmixMinibatchIterator(Data, 
81 |                         nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed)
82 |     DataIterator.summary = get_data_info(**kwargs)
83 |     return DataIterator
84 | 
85 | def CreateToyDataFromLDAModel(**kwargs):
86 |   for key in Defaults:
87 |     if key not in kwargs:
88 |       kwargs[key] = Defaults[key]
89 |   return WordsData.CreateToyDataFromLDAModel(**kwargs)
90 | 
91 | if __name__ == '__main__':
92 |   import bnpy.viz.BarsViz
93 |   WData = CreateToyDataFromLDAModel(seed=SEED)
94 |   bnpy.viz.BarsViz.plotExampleBarsDocs(WData)
95 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/BarsK6V9.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsK6V9.py
 3 | 
 4 | Toy Bars data, with K=6 topics and vocabulary size 9.
 5 | 3 horizontal bars, and 3 vertical bars.
 6 | 
 7 | Generated via the standard LDA generative model
 8 |   see WordsData.CreateToyDataFromLDAModel for details.
 9 | '''
10 | import numpy as np
11 | from bnpy.data import WordsData, AdmixMinibatchIterator
12 | import Bars2D
13 | 
14 | SEED = 8675309
15 | PRNG = np.random.RandomState(SEED)
16 | 
17 | # FIXED DATA GENERATION PARAMS
18 | K = 6 # Number of topics
19 | V = 9 # Vocabulary Size
20 | gamma = 0.5 # hyperparameter over doc-topic distribution
21 | 
22 | Defaults = dict()
23 | Defaults['nDocTotal'] = 200
24 | Defaults['nWordsPerDoc'] = 25
25 | 
26 | # GLOBAL PROB DISTRIBUTION OVER TOPICS
27 | trueBeta = np.ones(K)
28 | trueBeta /= trueBeta.sum()
29 | Defaults['topic_prior'] = gamma * trueBeta
30 | 
31 | # TOPIC by WORD distribution
32 | Defaults['topics'] = Bars2D.Create2DBarsTopicWordParams(V, K, PRNG=PRNG)
33 | 
34 | def get_data_info(**kwargs):
35 |     if 'nDocTotal' in kwargs:
36 |       nDocTotal = kwargs['nDocTotal']
37 |     else:
38 |       nDocTotal = Defaults['nDocTotal']
39 |     return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d. Typically 1-3 bars per doc.' % (K, nDocTotal)
40 | 
41 | def get_data(**kwargs):
42 |     ''' 
43 |         Args
44 |         -------
45 |         seed
46 |         nDocTotal
47 |         nWordsPerDoc
48 |     '''
49 |     Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs)
50 |     Data.summary = get_data_info(**kwargs)
51 |     return Data
52 | 
53 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1,
54 |                            dataorderseed=0, **kwargs):
55 |     '''
56 |         Args
57 |         -------
58 |         seed
59 |         nDocTotal
60 |         nWordsPerDoc
61 |     '''
62 |     Data = CreateToyDataFromLDAModel(seed=seed, **kwargs)
63 |     DataIterator = AdmixMinibatchIterator(Data, 
64 |                         nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed)
65 |     DataIterator.summary = get_data_info(**kwargs)
66 |     return DataIterator
67 | 
68 | def CreateToyDataFromLDAModel(**kwargs):
69 |   for key in Defaults:
70 |     if key not in kwargs:
71 |       kwargs[key] = Defaults[key]
72 |   return WordsData.CreateToyDataFromLDAModel(**kwargs)
73 | 
74 | if __name__ == '__main__':
75 |   import bnpy.viz.BarsViz
76 |   WData = CreateToyDataFromLDAModel(seed=SEED)
77 |   bnpy.viz.BarsViz.plotExampleBarsDocs(WData)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/BarsK8.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsK8.py
 3 | 
 4 | Toy Bars data, with K=8 topics
 5 | 4 horizontal, and 4 vertical.
 6 | '''
 7 | import numpy as np
 8 | from bnpy.data import WordsData, AdmixMinibatchIterator
 9 | 
10 | Defaults = dict()
11 | Defaults['nDocTotal'] = 2000
12 | Defaults['nWordsPerDoc'] = 100
13 | 
14 | SEED = 8675309
15 | 
16 | # FIXED DATA GENERATION PARAMS
17 | K = 8 # Number of topics
18 | V = 16 # Vocabulary Size
19 | gamma = 0.5 # hyperparameter over doc-topic distribution
20 | 
21 | # TOPIC by WORD distribution
22 | topics = np.zeros( (K,V) )
23 | topics[0,:] = [ 9, 9, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
24 | topics[1,:] = [ 0, 0, 0, 0, 9, 9, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0]
25 | topics[2,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 9, 9, 0, 0, 0, 0]
26 | topics[3,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 9, 9]
27 | topics[4,:] = [ 8, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0]
28 | topics[5,:] = [ 0, 8, 0, 0, 0, 8, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0]
29 | topics[6,:] = [ 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 8, 0, 0, 0, 8, 0]
30 | topics[7,:] = [ 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 8, 0, 0, 0, 8]
31 | 
32 | # Add "smoothing" term to each entry of the topic-word matrix
33 | # With V = 16 and 8 sets of bars,
34 | #  smoothMass=0.02 yields 0.944 probability of drawing "on topic" word
35 | smoothMass = 0.02 * 8
36 | topics += smoothMass
37 | # Ensure each row of topics is a probability vector
38 | for k in xrange(K):
39 |     topics[k,:] /= np.sum(topics[k,:])
40 | Defaults['topics'] = topics
41 | 
42 | # GLOBAL PROB DISTRIBUTION OVER TOPICS
43 | trueBeta = np.hstack([1.1*np.ones(K/2), np.ones(K/2)])
44 | trueBeta /= trueBeta.sum()
45 | Defaults['topic_prior'] = gamma * trueBeta
46 | 
47 | def get_data_info(**kwargs):
48 |     if 'nDocTotal' in kwargs:
49 |       nDocTotal = kwargs['nDocTotal']
50 |     else:
51 |       nDocTotal = Defaults['nDocTotal']
52 |     return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d.' % (K, nDocTotal)
53 | 
54 | def get_data(**kwargs):
55 |     ''' 
56 |         Args
57 |         -------
58 |         seed
59 |         nDocTotal
60 |         nWordsPerDoc
61 |     '''
62 |     Data = CreateToyDataFromLDAModel(seed=SEED, **kwargs)
63 |     Data.summary = get_data_info(**kwargs)
64 |     return Data
65 | 
66 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1,
67 |                            dataorderseed=0, **kwargs):
68 |     '''
69 |         Args
70 |         -------
71 |         seed
72 |         nDocTotal
73 |         nWordsPerDoc
74 |     '''
75 |     Data = CreateToyDataFromLDAModel(seed=seed, **kwargs)
76 |     DataIterator = AdmixMinibatchIterator(Data, 
77 |                         nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed)
78 |     DataIterator.summary = get_data_info(**kwargs)
79 |     return DataIterator
80 | 
81 | def CreateToyDataFromLDAModel(**kwargs):
82 |   for key in Defaults:
83 |     if key not in kwargs:
84 |       kwargs[key] = Defaults[key]
85 |   return WordsData.CreateToyDataFromLDAModel(**kwargs)
86 | 
87 | if __name__ == '__main__':
88 |   import bnpy.viz.BarsViz
89 |   WData = CreateToyDataFromLDAModel(seed=SEED)
90 |   bnpy.viz.BarsViz.plotExampleBarsDocs(WData)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/BinaryGraphK5.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BinaryGraphK5.py
 3 | 
 4 | Binary Toy Graph with K=5 communities.
 5 | '''
 6 | import numpy as np
 7 | import random
 8 | from bnpy.data import GraphData
 9 | 
10 | SEED = 8675309
11 | PRNG = np.random.RandomState(SEED)
12 | 
13 | # FIXED DATA GENERATION PARAMS
14 | K = 5 # Number of communities
15 | N = 50 # Number of nodes
16 | beta_a = 0.1 # hyperparameter over block matrix entries
17 | beta_b = 0.1 # hyperparameter over block matrix entries
18 | 
19 | Defaults = dict()
20 | Defaults['nNodeTotal'] = 50
21 | 
22 | # Initialize adjacency matrix and stochastic block matrix
23 | sb = np.zeros( (K,K) ) + 0.01
24 | sb[0,0] = .9
25 | sb[1,1] = .9
26 | sb[2,2] = .9
27 | sb[3,3] = .9
28 | sb[4,4] = .9
29 | 
30 | # function to generate adjacency matrix
31 | def gen_graph(K, N, sb):
32 | 
33 |     # define the edge indices and edge values
34 |     edge_val = list()
35 |     edge_exclude = list() # edges to exclude (10%)
36 |     exclusion_thresh = 0.9 # 1 = no excluded edges
37 | 
38 |     # generate community memberships
39 |     pi = np.zeros( (N,K) )
40 |     alpha = np.zeros(K) + .1
41 |     for ii in xrange(N):
42 |         pi[ii,:] = PRNG.dirichlet(alpha)
43 | 
44 |     for ii in xrange(N):
45 |         for jj in xrange(ii+1,N):
46 |             if ii != jj and ii < jj:
47 |                 s = PRNG.choice(5, 1, p=pi[ii,:])
48 |                 r = PRNG.choice(5, 1, p=pi[jj,:])
49 |                 # If this edge is not being exlcuded, just add to edge_id
50 |                 if PRNG.rand() <= exclusion_thresh:
51 |                     if PRNG.rand() < sb[s,r]:
52 |                         edge_val.append([ii,jj,1])
53 |                 else: # include this as an edge that needs to be excluded
54 |                     if PRNG.rand() < sb[s,r]:
55 |                         edge_exclude.append([ii,jj,1])
56 |                     else:
57 |                         edge_exclude.append([ii,jj,0])
58 | 
59 |     edge_val = np.asarray(np.squeeze(edge_val), dtype=np.int32)
60 |     edge_exclude = np.asarray(np.squeeze(edge_exclude), dtype=np.int32)
61 | 
62 |     return (edge_val, edge_exclude)
63 | 
64 | # template function to wrap data in bnpy format
65 | def get_data(**kwargs):
66 |     ''' Grab data from matfile specified by matfilepath
67 |     '''
68 |     edge_val, edge_exclude = gen_graph(K,N,sb)
69 |     Data = GraphData(edge_val = edge_val, nNodeTotal=N, edge_exclude=edge_exclude)
70 |     Data.summary = get_data_info(K, Data.nNodeTotal, Data.nEdgeTotal)
71 |     Data.get_edges_all() # Grab the full set of edges for inference
72 |     return Data
73 | 
74 | def get_minibatch_iterator(nBatch=10, nLap=1, dataorderseed=0, **kwargs):
75 |     pass
76 | 
77 | def get_data_info(K,N,E):
78 |     return 'Toy Binary Graph Dataset where K=%d . N=%d. E=%d' % (K,N,E)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/DeadLeavesD25.py:
--------------------------------------------------------------------------------
 1 | import DeadLeaves as DL
 2 | 
 3 | DL.makeTrueParams(25)
 4 | 
 5 | def get_data(**kwargs):
 6 |   return DL.get_data(**kwargs)
 7 |   
 8 | def get_minibatch_iterator(**kwargs):
 9 |   return DL.get_minibatch_iterator(**kwargs)
10 |   
11 | def get_short_name():
12 |   return DL.get_short_name()
13 | 
14 | def get_data_info():
15 |   return DL.get_data_info()
16 |   
17 | 
18 | if __name__ == '__main__':
19 |   DL.plotTrueCovMats(doShowNow=False)
20 |   DL.plotImgPatchPrototypes()
21 | 
22 |   
23 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/NIPS.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | NIPSCorpus.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | datadir = '/Users/daeil/Dropbox/research/bnpy/data/'
 9 | NIPSmatfile = 'nips_bnpy.mat'
10 | matfilepath = os.environ['BNPYDATADIR'] + NIPSmatfile
11 | 
12 | if not os.path.exists(matfilepath):
13 |     matfilepath = datadir + NIPSmatfile
14 | 
15 | def get_data(**kwargs):
16 |     ''' Grab data from matfile specified by matfilepath
17 |     '''
18 |     Data = WordsData.read_from_mat(matfilepath)
19 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
20 |     return Data
21 | 
22 | def get_minibatch_iterator(nBatch=10, nLap=1, 
23 |                            dataorderseed=0, **kwargs):
24 |     Data = WordsData.read_from_mat(matfilepath)
25 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch,
26 |                     nLap=nLap, dataorderseed=dataorderseed)
27 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
28 |     return DataIterator
29 | 
30 | def get_data_info(D, V):
31 |     return 'NIPS bag-of-words data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/demodata/NYTimes.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | NYTimes.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/nytimes_small/'
 9 | matfilepath = os.environ['BNPYDATADIR'] + 'nyt_small_bnpy.mat'
10 | 
11 | if not os.path.exists(matfilepath):
12 |     matfilepath = data_dir + 'nyt_small_bnpy.mat'
13 | 
14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
15 |     ''' Grab data from matfile specified by matfilepath
16 |     '''
17 |     Data = WordsData.read_from_mat( matfilepath )
18 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
19 |     return Data
20 | 
21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
22 |     Data = WordsData.read_from_mat( matfilepath )
23 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
24 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
25 |     return DataIterator
26 | 
27 | def get_data_info(D, V):
28 |     return 'NYTimes (Small) Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/allocmodel/TestMixModel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for MixModel.py
 3 | '''
 4 | import numpy as np
 5 | import bnpy
 6 | from bnpy.allocmodel import MixModel
 7 | from bnpy.suffstats import SuffStatBag
 8 | 
 9 | class TestMixModelEMUnifAlpha(object):
10 |   def shortDescription(self):
11 |     return None
12 | 
13 |   def setUp(self):
14 |     '''
15 |     Create a stupid simple case for making sure we're calculating things correctly
16 |     '''
17 |     self.alpha0 = 1.0
18 |     self.allocM = MixModel('EM', dict(alpha0=self.alpha0))
19 |     self.N = np.asarray([1.,2.,3,4,5.])
20 |     self.SS = SuffStatBag(K=5, D=1)
21 |     self.SS.setField('N', self.N, dims='K')
22 |     self.resp = np.random.rand(100,3)
23 |     self.precompEntropy = np.sum(self.resp * np.log(self.resp), axis=0)
24 |     
25 |   def test_update_global_params_EM(self):
26 |     self.allocM.update_global_params_EM(self.SS)
27 |     wTrue = (self.N + self.alpha0 - 1.0)
28 |     wTrue = wTrue / np.sum(wTrue)
29 |     wEst = self.allocM.w
30 |     print wTrue
31 |     print wEst
32 |     assert np.allclose(wTrue, wEst)
33 |     
34 |   def test_get_global_suff_stats(self):
35 |     Data = bnpy.data.XData(np.random.randn(10,1))
36 |     SS = self.allocM.get_global_suff_stats(Data, dict(resp=self.resp), doPrecompEntropy=True)
37 |     assert np.allclose(self.precompEntropy, SS.getELBOTerm('ElogqZ'))
38 |     assert np.allclose( np.sum(self.resp, axis=0), SS.N)
39 | 
40 | class TestMixModelEMNonunifAlpha(TestMixModelEMUnifAlpha):
41 |   def setUp(self):
42 |     self.alpha0 = 2.0
43 |     self.allocM = MixModel('EM', dict(alpha0=self.alpha0))
44 |     self.N = np.asarray([1.,2.,3,4,5.])
45 |     self.SS = SuffStatBag(K=5, D=1)
46 |     self.SS.setField('N', self.N, dims='K')
47 |     self.resp = np.random.rand(100,3)
48 |     self.precompEntropy = np.sum(self.resp * np.log(self.resp), axis=0)
49 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/data/TestMinibatchIterator.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for MinibatchIterator.py
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | import copy
 7 | import bnpy.data.XData as XData
 8 | import bnpy.data.MinibatchIterator as MinibatchIterator
 9 | 
10 | class TestMinibatchIterator(unittest.TestCase):
11 |   def shortDescription(self):
12 |     return None
13 |     
14 |   def setUp(self):
15 |     X = np.random.randn(100, 3)
16 |     self.Data = XData(X=X)
17 |     self.DataIterator = MinibatchIterator(self.Data, nBatch=10, nLap=10)
18 |   
19 |   def test_first_batch(self):
20 |     assert self.DataIterator.has_next_batch()
21 |     bData = self.DataIterator.get_next_batch()
22 |     assert self.DataIterator.curLapPos == 0
23 |     self.verify_batch(bData)
24 |   
25 |   def test_num_laps(self):
26 |     ''' Make sure we raise the expected exception after exhausting all the data
27 |     '''
28 |     nLap = self.DataIterator.nLap
29 |     nBatch = self.DataIterator.nBatch
30 |     for lapID in range(nLap):
31 |       for batchCount in range(nBatch):
32 |         bData = self.DataIterator.get_next_batch()
33 |         assert self.DataIterator.curLapPos == batchCount
34 |         assert self.DataIterator.lapID == lapID
35 |         self.verify_batch(bData)
36 |     try:
37 |       bData = self.DataIterator.get_next_batch()
38 |       raise Exception('should not make it to this line!')
39 |     except StopIteration:
40 |       assert 1==1
41 |         
42 |   def test_batchIDs_traversal_order(self):
43 |     ''' Make sure batchIDs from consecutive laps are not the same
44 |     '''
45 |     self.DataIterator.lapID = 0
46 |     self.DataIterator.curLapPos = -1
47 |     bData1 = self.DataIterator.get_next_batch()      
48 |     batchOrder = copy.copy(self.DataIterator.batchOrderCurLap)
49 |     
50 |     self.DataIterator.lapID = 1
51 |     self.DataIterator.curLapPos = -1
52 |     bData2 = self.DataIterator.get_next_batch()      
53 |     batchOrder2 = self.DataIterator.batchOrderCurLap
54 |     print batchOrder, batchOrder2
55 |     assert not np.allclose(batchOrder, batchOrder2)
56 |     assert np.allclose(np.unique(batchOrder),np.unique(batchOrder2))
57 |         
58 |   
59 |   def test_obs_full_coverage(self):
60 |     ''' Make sure all data items are covered every lap
61 |     '''
62 |     coveredIDs = list()
63 |     nBatch = self.DataIterator.nBatch
64 |     for bID in range(nBatch):
65 |       bData = self.DataIterator.get_next_batch()      
66 |       obsIDs = self.DataIterator.getObsIDsForCurrentBatch()
67 |       coveredIDs.extend(obsIDs)
68 |     assert len(np.unique(coveredIDs)) == self.Data.nObsTotal
69 |         
70 |   def verify_batch(self, bData):
71 |     assert bData.nObs == self.Data.nObs / self.DataIterator.nBatch
72 |     assert bData.nObsTotal == self.Data.nObsTotal
73 |     # Check that the data is as expected!
74 |     batchX = bData.X    
75 |     trueMask = self.DataIterator.getObsIDsForCurrentBatch()
76 |     trueX = self.Data.X[trueMask]
77 |     assert np.allclose(batchX, trueX)
78 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/distr/TestGaussDistr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for GaussDistr.py
 3 | '''
 4 | from bnpy.distr import GaussDistr
 5 | import numpy as np
 6 | 
 7 | class TestGaussD2(object):
 8 |   def setUp(self):
 9 |     self.m = np.ones(2)
10 |     self.invSigma = np.eye(2)
11 |     self.distr = GaussDistr(m=self.m, L=self.invSigma)
12 |     
13 |   def test_dimension(self):
14 |     assert self.distr.D == self.invSigma.shape[0]
15 |     
16 |   def test_cholL(self):
17 |     chol = self.distr.cholL()
18 |     assert np.allclose(np.dot(chol, chol.T), self.distr.L)
19 |     
20 |   def test_logdetL(self):
21 |     logdetL = self.distr.logdetL()
22 |     assert np.allclose( np.log(np.linalg.det(self.invSigma)), logdetL)
23 |   
24 |   def test_dist_mahalanobis(self, N=10):
25 |     X = np.random.randn(N, self.distr.D)
26 |     Dist = self.distr.dist_mahalanobis(X)
27 |     invSigma = self.invSigma
28 |     MyDist = np.zeros(N)
29 |     for ii in range(N):
30 |       x = X[ii] - self.m
31 |       MyDist[ii] = np.dot(x.T, np.dot(invSigma, x))
32 |       #if error, we print it out
33 |       print MyDist[ii], Dist[ii]
34 |     assert np.allclose(MyDist, Dist)
35 |     
36 | class TestGaussD1(TestGaussD2):
37 |   def setUp(self):
38 |     self.m = np.ones(1)
39 |     self.invSigma = np.eye(1)
40 |     self.distr = GaussDistr(m=self.m, L=self.invSigma)
41 |     
42 |     
43 | class TestGaussD10(TestGaussD2):
44 |   def setUp(self):
45 |     PRNG = np.random.RandomState(867)
46 |     R = PRNG.rand(10,10)
47 | 
48 |     self.m = np.ones(10)
49 |     self.invSigma = 1e-4*np.eye(10)
50 |     self.distr = GaussDistr(m=self.m, L=self.invSigma)
51 |     


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/distr/TestGaussWishDistr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for GaussWishDistr.py
 3 | '''
 4 | from bnpy.distr import GaussWishDistr, WishartDistr
 5 | from bnpy.suffstats import SuffStatBag
 6 | import numpy as np
 7 | import copy
 8 | 
 9 | class TestGaussWishDistr(object):
10 |   def setUp(self):
11 |     self.invW = np.eye(2)
12 |     self.m = np.zeros(2)
13 |     self.distr = GaussWishDistr(m=self.m, invW=self.invW, kappa=1.0, dF=4)
14 |     
15 |   def test_dimension(self):
16 |     assert self.distr.D == self.invW.shape[0]
17 |     
18 |   def test_entropyWish(self):
19 |     ''' Verify that (wishart) entropy is same for this object and Wishart object 
20 |     '''
21 |     Hself = self.distr.entropyWish()
22 |     wishDistr = WishartDistr(v=self.distr.dF, invW=self.distr.invW)
23 |     Hwish = wishDistr.get_entropy()
24 |     assert np.allclose( Hself, Hwish)
25 |     
26 |   def test_dist_mahalanobis(self, N=10):
27 |     ''' Verify that distance computation is largest at mean and decays further away 
28 |     '''
29 |     Xlist = list()
30 |     for r in [0, 0.01, 0.1, 1, 2, 3, 4, 5]:
31 |       Xlist.append(self.distr.m + r)
32 |     X = np.asarray(Xlist)
33 |     Dist = self.distr.dist_mahalanobis(X)
34 |     print Dist
35 |     assert np.all( Dist[:-1] < Dist[1:])
36 |     
37 |   def test_update_soVB(self, rho=0.25):
38 |     ''' Verify the blend update for stochastic variational is correct
39 |     '''
40 |     distrB = copy.deepcopy(self.distr)
41 |     distrB.invW *= 3
42 |     distrB.m += 2
43 |     distrB.kappa *= 10
44 |     distrB2 = copy.deepcopy(distrB)
45 |     # Make sure things are different!
46 |     assert not np.allclose(distrB.invW, self.distr.invW)
47 |     assert not np.allclose(distrB.m, self.distr.m)
48 |     
49 |     distrB.post_update_soVB(rho, self.distr)
50 |     assert distrB.dF == distrB2.dF * (1-rho) + self.distr.dF * rho
51 |     assert np.allclose(distrB.kappa, distrB2.kappa * (1-rho) + self.distr.kappa * rho)
52 | 
53 |     # these dont work because the parameterization is a bit trickier here.
54 |     #assert np.allclose(distrB.invW, distrB2.invW * (1-rho) + self.distr.invW * rho)
55 |     #assert np.allclose(distrB.m, distrB2.m * (1-rho) + self.distr.m * rho)
56 | 
57 |     
58 |   def test_entropy_posterior_gets_smaller(self, N=10):
59 |     PRNG = np.random.RandomState(seed=8675309)
60 |     for trial in range(3):
61 |       X = PRNG.randn(N, self.distr.D) + self.distr.m
62 |       x = np.sum(X,axis=0)
63 |       xxT = np.dot(X.T,X)
64 |       SS = SuffStatBag(K=1, D=self.distr.D)
65 |       SS.setField('N', [N], dims='K')
66 |       SS.setField('x', [x], dims=('K','D'))
67 |       SS.setField('xxT', [xxT], dims=('K','D','D'))
68 |       postD = self.distr.get_post_distr(SS, 0)
69 |       assert postD.D == self.distr.D
70 |       Hpost = postD.entropyWish()
71 |       Hprior = self.distr.entropyWish()
72 |       print 'Prior %.3g, Post %.3g' % (Hprior, Hpost)
73 |       assert Hpost < Hprior


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/distr/TestWishartDistr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | from bnpy.distr import WishartDistr
 4 | from bnpy.suffstats import SuffStatBag
 5 | import numpy as np
 6 | import copy
 7 | 
 8 | class TestWishart(object):
 9 |   def setUp(self):
10 |     self.v = 4
11 |     self.invW = np.eye(2)
12 |     self.distr = WishartDistr(v=self.v, invW=self.invW)
13 |     
14 |   def test_dimension(self):
15 |     assert self.distr.D == self.invW.shape[0]
16 |     
17 |   def test_cholinvW(self):
18 |     cholinvW = self.distr.cholinvW()
19 |     assert np.allclose(np.dot(cholinvW, cholinvW.T), self.distr.invW)
20 |   
21 |   def test_expected_covariance_matrix(self):
22 |     CovMat = self.distr.ECovMat()
23 |     MyCovMat = self.invW / (self.v - self.distr.D - 1)
24 |     print MyCovMat, CovMat
25 |     assert np.allclose(MyCovMat, CovMat)
26 |     
27 |   def test_post_update_soVB(self, rho=0.375):
28 |     distrA = copy.deepcopy(self.distr)
29 |     distrB = WishartDistr(distrA.v, invW=np.eye(distrA.D) )        
30 |     self.distr.post_update_soVB(rho, distrB)
31 |     assert self.distr.v == rho*distrA.v + (1-rho)*distrB.v
32 |     assert np.allclose(self.distr.invW, rho*distrA.invW + (1-rho)*distrB.invW)
33 |     
34 |   def test_entropy_posterior_gets_smaller(self, N=1):
35 |     PRNG = np.random.RandomState(seed=8675309)
36 |     for trial in range(3):
37 |       X = PRNG.randn(N, self.distr.D)
38 |       xxT = np.dot(X.T, X)
39 | 
40 |       SS = SuffStatBag(K=1, D=self.distr.D)
41 |       SS.setField('N', [N], dims='K')
42 |       SS.setField('xxT', [xxT], dims=('K','D','D'))
43 | 
44 |       postD = self.distr.get_post_distr(SS, 0)
45 |       assert postD.D == self.distr.D
46 |       Hpost = postD.get_entropy()
47 |       Hprior = self.distr.get_entropy()
48 |       print 'Prior %.3g, Post %.3g' % (Hprior, Hpost)
49 |       print self.distr.invW
50 |       print postD.invW
51 |       assert Hpost < Hprior


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/distr/TestZMGaussDistr.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | from bnpy.distr import ZMGaussDistr
 4 | import numpy as np
 5 | 
 6 | class TestZMGauss(object):
 7 |   def setUp(self):
 8 |     self.Sigma = np.eye(4)
 9 |     self.distr = ZMGaussDistr(Sigma=self.Sigma.copy())
10 |     
11 |   def test_dimension(self):
12 |     assert self.distr.D == self.Sigma.shape[0]
13 |     
14 |   def test_cholSigma(self):
15 |     chol = self.distr.cholSigma()
16 |     assert np.allclose(np.dot(chol, chol.T), self.distr.Sigma)
17 |     
18 |   def test_logdetSigma(self):
19 |     logdetSigma = self.distr.logdetSigma()
20 |     assert np.allclose( np.log(np.linalg.det(self.Sigma)), logdetSigma)
21 |   
22 |   def test_get_log_norm_const(self):
23 |     logZ = self.distr.get_log_norm_const()
24 |     logdetSigma = np.log(np.linalg.det(self.Sigma))
25 |     mylogZ = 0.5*self.Sigma.shape[0]*np.log(2*np.pi) + 0.5 * logdetSigma
26 |     
27 |   def test_dist_mahalanobis(self, N=10):
28 |     X = np.random.randn(N, self.distr.D)
29 |     Dist = self.distr.dist_mahalanobis(X)
30 |     invSigma = np.linalg.inv(self.Sigma)
31 |     MyDist = np.zeros(N)
32 |     for ii in range(N):
33 |       x = X[ii]
34 |       MyDist[ii] = np.dot(x.T, np.dot(invSigma, x))
35 |       #if error, we print it out
36 |       print MyDist[ii], Dist[ii]
37 |     assert np.allclose(MyDist, Dist)
38 | 
39 | class TestZMGaussRand1Dim(TestZMGauss):
40 |   def setUp(self):
41 |     self.Sigma = np.asarray([[42.0]])
42 |     self.distr = ZMGaussDistr(Sigma=self.Sigma)
43 |   
44 | class TestZMGaussRand5Dim(TestZMGauss):
45 |   def setUp(self):
46 |     R = np.random.rand(5,5)
47 |     self.Sigma = np.dot(R, R.T) + 0.02*np.eye(5)
48 |     self.distr = ZMGaussDistr(Sigma=self.Sigma)


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestAdmixTopicModel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for full learning for topic models
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | 
 7 | import bnpy
 8 | from AbstractEndToEndTest import AbstractEndToEndTest
 9 | import Util
10 | 
11 | class TestAdmixTopicModel(AbstractEndToEndTest):
12 |   __test__ = True
13 | 
14 |   def setUp(self):
15 |     self.Data = bnpy.data.WordsData.CreateToyDataSimple(nDoc=25, nWordsPerDoc=50, vocab_size=100)
16 |     self.allocModelName = 'AdmixModel'
17 |     self.obsModelName = 'Mult'  
18 |     self.kwargs = dict(nLap=30, K=5, alpha0=1)
19 |     self.kwargs['lambda'] = 1
20 |     self.kwargs['doMemoizeLocalParams'] = 1
21 | 
22 |     self.mustRetainLPAcrossLapsForGuarantees = True
23 |     self.learnAlgs = ['VB', 'moVB', 'soVB']
24 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestDPMixGauss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for full learning for full-mean, full-covariance Gaussian models
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | 
 7 | import bnpy
 8 | from AbstractEndToEndTest import AbstractEndToEndTest
 9 | import Util
10 | 
11 | class TestDPMixGaussModel(AbstractEndToEndTest):
12 |   __test__ = True
13 | 
14 |   def setUp(self):
15 |     PRNG = np.random.RandomState(333)
16 |     X = PRNG.randn(1000, 3)
17 |     X = np.vstack([X, 5 + PRNG.randn(100, 3)])
18 |     self.Data = bnpy.data.XData(X)
19 |     self.allocModelName = 'DPMixModel'
20 |     self.obsModelName = 'Gauss'  
21 |     self.kwargs = dict(nLap=30, K=5, alpha0=1)
22 |     self.kwargs['smatname'] = 'eye'
23 | 
24 |     self.learnAlgs = ['VB', 'soVB', 'moVB']
25 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestHDPModel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for full learning for topic models
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | 
 7 | import bnpy
 8 | from AbstractEndToEndTest import AbstractEndToEndTest
 9 | import Util
10 | 
11 | class TestHDPModel(AbstractEndToEndTest):
12 |   __test__ = True
13 | 
14 |   def setUp(self):
15 |     self.Data = bnpy.data.WordsData.CreateToyDataSimple(nDoc=25, nWordsPerDoc=50, vocab_size=100)
16 |     self.allocModelName = 'HDPModel'
17 |     self.obsModelName = 'Mult'  
18 |     self.kwargs = dict(nLap=30, K=5, alpha0=1)
19 |     self.kwargs['lambda'] = 1
20 |     self.kwargs['doMemoizeLocalParams'] = 1
21 |     self.kwargs['doFullPassBeforeMstep'] = 1
22 | 
23 |     self.mustRetainLPAcrossLapsForGuarantees = True
24 |     self.learnAlgs = ['VB', 'moVB', 'soVB']
25 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixDiagGauss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | '''
 3 | import numpy as np
 4 | import unittest
 5 | 
 6 | import bnpy
 7 | from AbstractEndToEndTest import AbstractEndToEndTest
 8 | import Util
 9 | 
10 | class TestSimple(AbstractEndToEndTest):
11 |   __test__ = True
12 | 
13 |   def setUp(self):    
14 |     PRNG = np.random.RandomState(333)
15 |     X = PRNG.randn(1000, 3)
16 |     self.Data = bnpy.data.XData(X)
17 |     self.allocModelName = 'MixModel'
18 |     self.obsModelName = 'DiagGauss'  
19 |     self.kwargs = dict(nLap=30, K=3, alpha0=1)
20 |     self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB']
21 | 
22 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixGauss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for full learning for full-mean, full-covariance Gaussian models
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | 
 7 | import bnpy
 8 | from AbstractEndToEndTest import AbstractEndToEndTest
 9 | import Util
10 | 
11 | class TestSimple(AbstractEndToEndTest):
12 |   ''' Test basic functionality (run without crashing?) on very simple dataset. 
13 |   '''
14 |   __test__ = True
15 | 
16 |   def setUp(self):
17 |     PRNG = np.random.RandomState(333)
18 |     X = PRNG.randn(1000, 3)
19 |     self.Data = bnpy.data.XData(X)
20 |     self.allocModelName = 'MixModel'
21 |     self.obsModelName = 'Gauss'  
22 |     self.kwargs = dict(nLap=30, K=3, alpha0=1)
23 |     self.kwargs['smatname'] = 'eye'
24 | 
25 |     self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB']
26 | 
27 | 
28 | class TestEasyK2_EM(AbstractEndToEndTest):
29 |   ''' Test basic EM parameter estimation on well-separable K=2 toy dataset.
30 | 
31 |       Verify runs from fromTruth and fromScratch inits reach ideal params.
32 |   '''
33 |   __test__ = True
34 | 
35 |   def setUp(self):
36 |     # Define true parameters (mean, prec matrix) for 2 well-separated clusters
37 |     self.K = 2
38 |     B = 20
39 |     Mu = np.eye(2)
40 |     Sigma = np.zeros((2,2,2))
41 |     Sigma[0] = np.asarray([[B,0], [0,1./B]])
42 |     Sigma[1] = np.asarray([[1./B,0], [0,B]])    
43 |     L = np.zeros_like(Sigma)
44 |     for k in xrange(self.K):
45 |       L[k] = np.linalg.inv(Sigma[k])    
46 |     self.TrueParams = dict(w=0.5*np.ones(self.K), K=self.K, m=Mu, L=L)
47 |     self.ProxFunc = dict(L=Util.CovMatProxFunc,
48 |                          m=Util.VectorProxFunc,
49 |                          w=Util.ProbVectorProxFunc)
50 | 
51 |     # Generate data
52 |     Nk = 1000
53 |     X = Util.MakeGaussData(Mu, Sigma, Nk)
54 |     self.Data = bnpy.data.XData(X)
55 | 
56 |     self.learnAlgs = ['EM']
57 | 
58 |     # Basic configuration
59 |     self.allocModelName = 'MixModel'
60 |     self.obsModelName = 'Gauss'  
61 |     self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0)
62 |     
63 |     # Substitute config used for "from-scratch" tests only
64 |     #  anything in here overrides defaults in self.kwargs
65 |     self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples')
66 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestMixZMGauss.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit-tests for full learning for zero-mean, full-covariance Gaussian models
 3 | '''
 4 | import numpy as np
 5 | import unittest
 6 | 
 7 | import bnpy
 8 | from AbstractEndToEndTest import AbstractEndToEndTest
 9 | import Util
10 | 
11 | class TestSimple(AbstractEndToEndTest):
12 |   ''' Test basic functionality (run without crashing?) on very simple dataset. 
13 |   '''
14 |   __test__ = True
15 | 
16 |   def setUp(self):
17 |     PRNG = np.random.RandomState(333)
18 |     X = PRNG.randn(1000, 3)
19 |     self.Data = bnpy.data.XData(X)
20 |     self.allocModelName = 'MixModel'
21 |     self.obsModelName = 'ZMGauss'  
22 |     self.kwargs = dict(nLap=30, K=4, alpha0=1.0)
23 |     self.learnAlgs = ['EM', 'VB', 'moVB', 'soVB']
24 | 
25 | class TestEasyK2_EM(AbstractEndToEndTest):
26 |   ''' Test basic EM parameter estimation on well-separable K=2 toy dataset.
27 | 
28 |       Verify runs from fromTruth and fromScratch inits reach ideal params.
29 |   '''
30 |   __test__ = True
31 | 
32 |   def setUp(self):
33 |     # Define true parameters: two very-different covariance matrices
34 |     self.K = 2
35 |     B = 20
36 |     Sigma = np.zeros((2,2,2))
37 |     Sigma[0] = np.asarray([[B,0], [0,1./B]])
38 |     Sigma[1] = np.asarray([[1./B,0], [0,B]])    
39 |     self.TrueParams = dict(Sigma=Sigma, w=0.5*np.ones(self.K))
40 | 
41 |     # Functions used by tests to decide if estimated params are "close enough"
42 |     # Must have same keys as self.TrueParams
43 |     self.ProxFunc = dict(Sigma=Util.CovMatProxFunc,
44 |                           w=Util.ProbVectorProxFunc)
45 | 
46 |     # Generate toy dataset
47 |     Nk = 1000
48 |     X = Util.MakeZMGaussData(Sigma, Nk, seed=34567)
49 |     self.Data = bnpy.data.XData(X)
50 | 
51 |     # Only run EM tests
52 |     self.learnAlgs = ['EM']
53 | 
54 |     # Basic model configuration
55 |     self.allocModelName = 'MixModel'
56 |     self.obsModelName = 'ZMGauss'  
57 |     self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0)
58 | 
59 |     # Substitute config used for "from-scratch" tests only
60 |     #  anything in here overrides defaults in self.kwargs
61 |     self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples')
62 |     self.fromScratchTrials = 5
63 |     self.fromScratchSuccessRate = 0.5
64 | 
65 | 
66 | class TestStarCovarK5_EM(AbstractEndToEndTest):
67 |   ''' Test basic EM parameter estimation on StarCovarK5 toy dataset.
68 | 
69 |       Verify runs from fromTruth and fromScratch inits estimate ideal params.
70 |   '''
71 |   __test__ = True
72 | 
73 |   def setUp(self):
74 |     self.K = 5
75 |     import StarCovarK5
76 |     self.Data = StarCovarK5.get_data(nObsTotal=10000)
77 |     
78 |     self.TrueParams = dict(Sigma=StarCovarK5.Sigma,
79 |                            w=StarCovarK5.w)
80 |     self.ProxFunc = dict(Sigma=Util.CovMatProxFunc,
81 |                           w=Util.ProbVectorProxFunc)
82 | 
83 |     self.learnAlgs = ['EM']
84 | 
85 |     self.allocModelName = 'MixModel'
86 |     self.obsModelName = 'ZMGauss'  
87 |     self.kwargs = dict(nLap=30, K=self.K, alpha0=1.0)
88 | 
89 |     self.fromScratchArgs = dict(nLap=50, K=self.K, initname='randexamples')
90 |     self.fromScratchTrials = 10
91 |     self.fromScratchSuccessRate = 0.5


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/end-to-end/TestProxFunc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests to verify that our proposed proximity functions work as expected.
 3 | 
 4 | Proximity function (defined in Util) are used to determine if two estimated parameters are "close enough" within some numerical tolerance to be treated as equivalent. We eventually use these functions to assess whether learning algorithms like EM are able to estimate "true"/"ideal" parameters for toy data.
 5 | '''
 6 | 
 7 | import unittest
 8 | import numpy as np
 9 | 
10 | import bnpy
11 | import Util
12 | 
13 | class TestVectorProxFunc(unittest.TestCase):
14 |   def test_vectorproxfunc(self):
15 |     avec = np.asarray([1, 0, 0])
16 |     bvec = np.asarray([0.91, 0.03, 0.08])
17 |     assert np.all( Util.VectorProxFunc(avec, bvec))
18 | 
19 |     bvec = np.asarray([0.91, -0.03, -0.08])
20 |     assert np.all( Util.VectorProxFunc(avec, bvec))
21 | 
22 |     bvec = np.asarray([0.99, 0.11, 0.12])
23 |     assert not np.all( Util.VectorProxFunc(avec, bvec))
24 | 
25 | 
26 | class TestStarCovarK5(unittest.TestCase):
27 |   ''' Verify CovMatProxFunc discriminates between all StarCovarK5 cov matrices.
28 |   '''
29 | 
30 |   def setUp(self):
31 |     import StarCovarK5
32 |     self.Sigma = StarCovarK5.Sigma.copy()
33 |     self.SigmaHat = np.zeros_like(self.Sigma)
34 |     for k in range(5):
35 |       Xk = Util.MakeZMGaussData(self.Sigma[k], 10000, seed=k)
36 |       self.SigmaHat[k] = np.cov(Xk.T, bias=1)
37 | 
38 |   def test_CovMatProxFunc(self):
39 |     print ''
40 |     K = self.Sigma.shape[0]
41 |     for k in xrange(K):
42 |       isG = Util.CovMatProxFunc(self.Sigma[k], self.SigmaHat[k])
43 |       if not np.all(isG):
44 |         Util.pprint( self.Sigma[k], 'true')
45 |         Util.pprint( self.SigmaHat[k], 'est')
46 |         Util.pprint( np.diag(isG).min())
47 |         from IPython import embed; embed()
48 |       assert np.all(isG)
49 |     for k in xrange(K):
50 |       for j in xrange(k+1, K):
51 |         print k,j
52 |         isG = Util.CovMatProxFunc(self.Sigma[k], self.SigmaHat[j])
53 |         if np.all(isG):
54 |           print self.Sigma[k]
55 |           print self.SigmaHat[j]
56 |         assert not np.all(isG)
57 | 
58 | 
59 | 
60 | class TestDeadLeavesD25(TestStarCovarK5):
61 |   ''' Verify CovMatProxFunc discriminates between DeadLeavesD25 cov matrices.
62 |   '''
63 |   def setUp(self):
64 |     import DeadLeavesD25
65 |     self.Sigma = DeadLeavesD25.DL.Sigma.copy()
66 |     self.SigmaHat = np.zeros_like(self.Sigma)
67 |     for k in range(8):
68 |       Xk = Util.MakeZMGaussData(self.Sigma[k], 10000, seed=k)
69 |       self.SigmaHat[k] = np.cov(Xk.T, bias=1)
70 | 
71 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/init/TestFromSaved.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for FromScratchGauss.py
 3 | '''
 4 | import unittest
 5 | import numpy as np
 6 | from bnpy.data import XData
 7 | from bnpy import HModel
 8 | from bnpy.ioutil import ModelWriter, ModelReader
 9 | 
10 | class TestFromScratchGauss(unittest.TestCase):
11 |   def shortDescription(self):
12 |     return None
13 | 
14 |   def setUp(self, K=7):
15 |     ''' Create random data, and a K component MixModel to go with it
16 |         Call this original model "hmodel".
17 |         We copy hmodel into "modelB", and then save to file via save_model()
18 |     '''
19 |     self.K = K
20 |     PRNG = np.random.RandomState(867)
21 |     X = PRNG.randn(100,2)
22 |     self.Data = XData(X=X)
23 | 
24 |     aPDict = dict(alpha0=1.0)
25 |     oPDict = dict(min_covar=1e-9)
26 |     self.hmodel = HModel.CreateEntireModel('EM','MixModel','ZMGauss', 
27 |                                             aPDict, oPDict, self.Data)
28 |     modelB = self.hmodel.copy()    
29 |     initParams = dict(initname='randexamples', seed=0, K=self.K)
30 |     modelB.init_global_params(self.Data, **initParams)
31 |     ModelWriter.save_model(modelB, '/tmp/', 'Test')
32 |     self.modelB = modelB
33 | 
34 |   def test_viable_init(self):
35 |     ''' Verify hmodel after init can be used to perform E-step
36 |     '''
37 |     initSavedParams = dict(initname='/tmp/', prefix='Test')
38 |     self.hmodel.init_global_params(self.Data, **initSavedParams)
39 |     assert self.hmodel.allocModel.K == self.K
40 |     keysA = self.hmodel.allocModel.to_dict()
41 |     keysB = self.modelB.allocModel.to_dict()
42 |     assert len(keysA) == len(keysB)
43 |     
44 |     aLP = self.hmodel.calc_local_params(self.Data)
45 |     assert np.all(np.logical_and(aLP['resp']>=0,aLP['resp']<=1.0))
46 |     assert np.allclose(1.0, np.sum(aLP['resp'],axis=1))
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/learnalg/TestMemoizedVBWithBirth.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for moVB with birth moves.
 3 | 
 4 | Coverage
 5 | --------
 6 | * do_birth_at_lap
 7 |   * verify births occur at the expected times (when lap < fracLapsBirth*nLap)
 8 | '''
 9 | import bnpy
10 | import unittest
11 | 
12 | class TestMOVBWithBirth(unittest.TestCase):
13 | 
14 |   def setUp(self):
15 |     birthP = dict(fracLapsBirth=0.8)
16 |     algP = dict(nLap=10, birth=birthP)
17 |     self.learnAlg = bnpy.learnalg.MemoizedOnlineVBLearnAlg(savedir=None, seed=0, 
18 |                               algParams=algP, outputParams=dict())    
19 | 
20 |   def test_do_birth_at_lap(self):
21 |     assert self.learnAlg.do_birth_at_lap(0)
22 |     assert self.learnAlg.do_birth_at_lap(0.5)
23 |     assert self.learnAlg.do_birth_at_lap(1)
24 |     assert self.learnAlg.do_birth_at_lap(2)
25 |     assert self.learnAlg.do_birth_at_lap(8)
26 |     assert not self.learnAlg.do_birth_at_lap(8.05)
27 |     assert not self.learnAlg.do_birth_at_lap(8.2)
28 |     assert not self.learnAlg.do_birth_at_lap(9)
29 |     assert not self.learnAlg.do_birth_at_lap(10)
30 |     assert not self.learnAlg.do_birth_at_lap(11111)
31 | 
32 | 
33 | class TestMOVBWithBirthFracThatNeedsRounding(TestMOVBWithBirth):
34 |   ''' Now check it with a fraction that will need to be rounded.
35 |   '''
36 | 
37 |   def setUp(self):
38 |     birthP = dict(fracLapsBirth=0.7777)
39 |     algP = dict(nLap=10, birth=birthP)
40 |     self.learnAlg = bnpy.learnalg.MemoizedOnlineVBLearnAlg(savedir=None, seed=0, 
41 |                               algParams=algP, outputParams=dict())    
42 | 
43 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/merge/TestMergePairSelector.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for MergePairSelector.py
 3 | 
 4 | Verifies that we can successfully select components to merge
 5 | '''
 6 | import numpy as np
 7 | import unittest
 8 | 
 9 | from bnpy.learnalg import MergeTracker
10 | from bnpy.learnalg import MergePairSelector
11 | 
12 | class TestMergePairSelector(unittest.TestCase):
13 |   def shortDescription(self):
14 |     return None
15 | 
16 |   def setUp(self):
17 |     pass
18 | 
19 |   def test_reindexAfterMerge(self):
20 |     MSelector = MergePairSelector()
21 |     MSelector.MScores[0] = 5
22 |     MSelector.MScores[3] = 5
23 |     MSelector.MScores[4] = 5
24 |     MSelector.PairMScores[(0,1)] = 5
25 |     MSelector.PairMScores[(3,4)] = 5
26 |     MSelector.PairMScores[(5,6)] = 5
27 | 
28 |     MSelector.reindexAfterMerge(2,3)
29 | 
30 |     assert MSelector.MScores[0] == 5
31 |     assert MSelector.MScores[3] == 5
32 |     assert 2 not in MSelector.MScores
33 | 
34 |     assert len(MSelector.PairMScores.keys()) == 2
35 |     assert (0,1) in MSelector.PairMScores
36 |     assert (4,5) in MSelector.PairMScores
37 |     assert (2,3) not in MSelector.PairMScores
38 | 
39 |   def test_select_merge_components_random(self):
40 |     ''' Verify that under random choices, we select among 3 components
41 |           equally often
42 |     '''
43 |     MT = MergeTracker(3)
44 |     MSelector = MergePairSelector()
45 |     counts = np.zeros(3)
46 |     for trial in range(1000):
47 |       kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random')
48 |       counts[kA] += 1
49 |       counts[kB] += 1
50 |     counts /= np.sum(counts)
51 |     minFrac = 0.25
52 |     maxFrac = 0.4
53 |     # Uniform at random means fraction of choice should be ~1/3 for each
54 |     assert np.all(counts > minFrac)
55 |     assert np.all(counts < maxFrac)
56 | 
57 |   def test_select_merge_components_random_raisesError(self):
58 |     ''' Verify that when comp 0 is excluded with K=3
59 |           we cannot provide comp 0 as kA, [error is raised]
60 |           AND
61 |           in free choice, we only choose kA=1, kB=2
62 |     '''
63 |     MT = MergeTracker(3)
64 |     MSelector = MergePairSelector()
65 | 
66 |     MT.excludeList = set([0])
67 |     MT._synchronize_and_verify()
68 |     for trial in range(10):
69 |       kA, kB = MSelector.select_merge_components(None, None, MT, kA=1, mergename='random')
70 |       assert kA == 1
71 |       assert kB == 2
72 |     for trial in range(10):
73 |       kA, kB = MSelector.select_merge_components(None, None, MT, kA=2, mergename='random')
74 |       assert kA == 1
75 |       assert kB == 2
76 |     with self.assertRaises(AssertionError):
77 |       kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random', kA=0)
78 |     
79 |   def test_select_merge_components_random_raisesErrorAllButOneExcluded(self):
80 |     ''' Verify that when comps 0,1 are excluded with K=3
81 |           we cannot provide comp 2 as kA, [error is raised]
82 |     '''
83 |     MT = MergeTracker(3)
84 |     MSelector = MergePairSelector()
85 | 
86 |     MT.excludeList = set([1, 0])
87 |     MT._synchronize_and_verify()
88 |     with self.assertRaises(AssertionError):
89 |       kA, kB = MSelector.select_merge_components(None, None, MT, mergename='random', kA=2)
90 |     


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/obsmodel/TestGaussObsModel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for GaussObsModel
 3 | '''
 4 | from bnpy.data import XData
 5 | from bnpy.obsmodel import GaussObsModel
 6 | from bnpy.distr import GaussWishDistr
 7 | from bnpy.util.RandUtil import mvnrand
 8 | import unittest
 9 | import numpy as np
10 | 
11 | class TestGaussObsModel(unittest.TestCase):
12 |   def shortDescription(self):
13 |     pass
14 |     
15 |   def setUp(self):
16 |     self.MakeModel()
17 |     self.MakeData()
18 |   
19 |   def MakeModel(self):
20 |     self.obsM = None
21 |     
22 |   def MakeData(self, nObsC=200):
23 |     if self.obsM is None:
24 |       return
25 |     XList = list()
26 |     np.random.seed(505)
27 |     for k in range(self.obsM.K):
28 |       Sigma = self.obsM.get_covar_mat_for_comp(k)
29 |       mu = self.obsM.get_mean_for_comp(k)
30 |       Xcur = mvnrand(mu, Sigma, nObsC)
31 |       XList.append(Xcur)
32 |     X = np.vstack(XList)
33 |     self.nObsC = nObsC
34 |     self.Data = XData(X=X)  
35 |       
36 |   def test_dimension(self):
37 |     ''' Verify dimensions match btw model and data
38 |     '''
39 |     if self.obsM is None:
40 |       return
41 |     assert self.obsM.D == self.Data.dim 
42 | 
43 |   def test_calc_local_params(self):
44 |     ''' Calc soft assign responsibilities for all data items
45 |         Verify that the items generated by each component are (usually) associated with that component.
46 |     '''
47 |     if self.obsM is None:
48 |       return
49 |     LP = self.obsM.calc_local_params(self.Data)
50 |     lpr = LP['E_log_soft_ev']
51 |     maxIDs = np.argmax(lpr, axis=1)
52 |     for k in range(self.obsM.K):
53 |       currange = range(k*self.nObsC, (k+1)*self.nObsC)
54 |       nMatch = np.sum( maxIDs[currange]==k )
55 |       assert nMatch > 0.95 * self.nObsC
56 |     
57 | class TestGaussObsModelVB(TestGaussObsModel):
58 |   def MakeModel(self):
59 |     oDict = dict(inferType='VB')
60 |     compA =  dict(m=[100,100], kappa=1.0e-4, invW=np.eye(2), dF=4)
61 |     compB =  dict(m=[-100,-100], kappa=1.0e-4, invW=np.eye(2), dF=4)
62 |     compC =  dict(m=[0,0], kappa=1.0e-4, invW=np.eye(2), dF=4)
63 |     compDictList = [compA, compB, compC]
64 |     obsPrior = GaussWishDistr(m=[0,0], kappa=1e-4, invW=np.eye(2), dF=4)
65 |     self.obsM = GaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList)
66 |     
67 | class TestGaussObsModelEM(TestGaussObsModel):
68 |   def MakeModel(self):
69 |     oDict = dict(inferType='EM', min_covar=0.0)
70 |     compDictList = [ dict(m=[100,100], L=np.eye(2)), dict(m=[0,0], L=100*np.eye(2))]
71 |     obsPrior = None
72 |     self.obsM = GaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList)
73 | 


--------------------------------------------------------------------------------
/refinery/bnpy/bnpy-dev/tests/obsmodel/TestZMGaussObsModel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Unit tests for ZMGaussObsModel
 3 | '''
 4 | from bnpy.data import XData
 5 | from bnpy.obsmodel import ZMGaussObsModel
 6 | import numpy as np
 7 | 
 8 | class TestZMGaussObsModelEM(object):
 9 |   def setUp(self):
10 |     oDict = dict(inferType='EM', min_covar=0.0)
11 |     compDictList = [ dict(Sigma=np.eye(2)), dict(Sigma=100*np.eye(2))]
12 |     obsPrior = None
13 |     self.obsM = ZMGaussObsModel.CreateWithAllComps(oDict, obsPrior, compDictList)
14 |     self.C = 10
15 |     XList = list()
16 |     for k in range(self.obsM.K):
17 |       Xcur = np.random.randn(self.C,2)
18 |       sig = np.sqrt(self.obsM.comp[k].Sigma[0,0])
19 |       XList.append(sig*Xcur)
20 |     self.Data = XData(X=np.vstack(XList))   
21 |     print self.Data.X 
22 |     
23 |   def test_dimension(self):
24 |     assert self.obsM.D == 2
25 |     
26 |   def test_calc_local_params(self):
27 |     # calculate the soft assignment probabilities for all data items
28 |     # make sure that the items generated by each component
29 |     #  are (statistically) associated with that component
30 |     LP = self.obsM.calc_local_params(self.Data)
31 |     lpr = LP['E_log_soft_ev']
32 |     maxIDs = np.argmax(lpr, axis=1)
33 |     for k in range(self.obsM.K):
34 |       currange = range(k*self.C, (k+1)*self.C)
35 |       matchMask = maxIDs[currange]==k
36 |       nMatch = np.sum(matchMask)
37 |       assert nMatch > 0.8*self.C


--------------------------------------------------------------------------------
/refinery/bnpy/results/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/BarsBurstyK20.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsBursty.py
 3 | 
 4 | Loads Mike Bryant's old bursty topics dataset. 
 5 | TODO: Need to write code to generate a bursty topic dataset
 6 | 
 7 | '''
 8 | from bnpy.data import WordsData, AdmixMinibatchIterator
 9 | import os
10 | 
11 | data_dir = '/data/liv/liv-x/topic_models/data/bars/'
12 | matfilepath = os.environ['BNPYDATADIR'] + 'bars_bnpy_burstyK20_train.mat'
13 | 
14 | if not os.path.exists(matfilepath):
15 |     matfilepath = data_dir + 'bars_bnpy_burstyK20_train.mat'
16 | 
17 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
18 |     ''' Grab data from matfile specified by matfilepath
19 |     '''
20 |     Data = WordsData.read_from_mat( matfilepath )
21 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
22 |     return Data
23 | 
24 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
25 |     Data = WordsData.read_from_mat( matfilepath )
26 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
27 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
28 |     return DataIterator
29 | 
30 | def get_data_info(D, V):
31 |     return 'Bars Bursty K20 Data. D=%d. VocabSize=%d' % (D,V)
32 | 


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/BarsBurstyK6.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsBursty.py
 3 | 
 4 | Loads Mike Bryant's old bursty topics dataset. 
 5 | TODO: Need to write code to generate a bursty topic dataset
 6 | 
 7 | '''
 8 | from bnpy.data import WordsData, AdmixMinibatchIterator
 9 | import os
10 | 
11 | data_dir = '/data/liv/liv-x/topic_models/data/bars/'
12 | matfilepath = os.environ['BNPYDATADIR'] + 'bars_bnpy_burstyK6_train.mat'
13 | 
14 | if not os.path.exists(matfilepath):
15 |     matfilepath = data_dir + 'bars_bnpy_burstyK6_train.mat'
16 | 
17 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
18 |     ''' Grab data from matfile specified by matfilepath
19 |     '''
20 |     Data = WordsData.read_from_mat( matfilepath )
21 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
22 |     return Data
23 | 
24 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
25 |     Data = WordsData.read_from_mat( matfilepath )
26 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
27 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
28 |     return DataIterator
29 | 
30 | def get_data_info(D, V):
31 |     return 'Bars Bursty Data. D=%d. VocabSize=%d' % (D,V)
32 | 


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/BarsK8.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | BarsK8.py
 3 | 
 4 | Toy Bars data, with K=8 topics
 5 | 4 horizontal, and 4 vertical.
 6 | '''
 7 | import numpy as np
 8 | 
 9 | from bnpy.data import WordsData, AdmixMinibatchIterator
10 | 
11 | Defaults = dict()
12 | Defaults['nDocTotal'] = 2000
13 | Defaults['nWordsPerDoc'] = 100
14 | 
15 | SEED = 8675309
16 | 
17 | # FIXED DATA GENERATION PARAMS
18 | K = 8 # Number of topics
19 | V = 16 # Vocabulary Size
20 | gamma = 0.5 # hyperparameter over doc-topic distribution
21 | 
22 | # TOPIC by WORD distribution
23 | true_tw = np.zeros( (K,V) )
24 | true_tw[0,:] = [ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
25 | true_tw[1,:] = [ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
26 | true_tw[2,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0]
27 | true_tw[3,:] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
28 | true_tw[4,:] = [ 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]
29 | true_tw[5,:] = [ 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
30 | true_tw[6,:] = [ 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
31 | true_tw[7,:] = [ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]
32 | # Add "smoothing" term to each entry of the topic-word matrix
33 | # With V = 16 and 8 sets of bars,
34 | #  smoothMass=0.02 yields 0.944 probability of drawing "on topic" word
35 | smoothMass = 0.02
36 | true_tw += smoothMass
37 | # Ensure each row of true_tw is a probability vector
38 | for k in xrange(K):
39 |     true_tw[k,:] /= np.sum( true_tw[k,:] )
40 | Defaults['TopicWordProbs'] = true_tw
41 | 
42 | 
43 | # GLOBAL PROB DISTRIBUTION OVER TOPICS
44 | trueBeta = np.hstack([1.1*np.ones(K/2), np.ones(K/2)])
45 | trueBeta /= trueBeta.sum()
46 | Defaults['docTopicParamVec'] = gamma * trueBeta
47 | 
48 | def get_data_info(**kwargs):
49 |     if 'nDocTotal' in kwargs:
50 |       nDocTotal = kwargs['nDocTotal']
51 |     else:
52 |       nDocTotal = Defaults['nDocTotal']
53 |     return 'Toy Bars Data. Ktrue=%d. nDocTotal=%d.' % (K, nDocTotal)
54 | 
55 | def get_data(**kwargs):
56 |     ''' 
57 |         Args
58 |         -------
59 |         seed
60 |         nDocTotal
61 |         nWordsPerDoc
62 |     '''
63 |     Data = genWordsData(seed=SEED, **kwargs)
64 |     Data.summary = get_data_info(**kwargs)
65 |     return Data
66 | 
67 | def get_minibatch_iterator(seed=SEED, nBatch=10, nLap=1,
68 |                            dataorderseed=0, **kwargs):
69 |     '''
70 |         Args
71 |         -------
72 |         seed
73 |         nDocTotal
74 |         nWordsPerDoc
75 |     '''
76 |     Data = genWordsData(seed=seed, **kwargs)
77 |     DataIterator = AdmixMinibatchIterator(Data, 
78 |                         nBatch=nBatch, nLap=nLap, dataorderseed=dataorderseed)
79 |     DataIterator.summary = get_data_info(**kwargs)
80 |     return DataIterator
81 | 
82 | def genWordsData(**kwargs):
83 |   for key in Defaults:
84 |     if key not in kwargs:
85 |       kwargs[key] = Defaults[key]
86 |   return WordsData.genToyData(**kwargs)
87 | 
88 | if __name__ == '__main__':
89 |   import bnpy.viz.BarsViz
90 |   WData = genWordsData(seed=SEED)
91 |   bnpy.viz.BarsViz.plotExampleBarsDocs(WData)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/DeadLeavesD25.py:
--------------------------------------------------------------------------------
 1 | import DeadLeaves as DL
 2 | 
 3 | DL.makeTrueParams(25)
 4 | 
 5 | def get_data(**kwargs):
 6 |   return DL.get_data(**kwargs)
 7 |   
 8 | def get_minibatch_iterator(**kwargs):
 9 |   return DL.get_minibatch_iterator(**kwargs)
10 |   
11 | def get_short_name():
12 |   return DL.get_short_name()
13 | 
14 | def get_data_info():
15 |   return DL.get_data_info()
16 |   
17 | 
18 | if __name__ == '__main__':
19 |   DL.plotTrueCovMats(doShowNow=False)
20 |   DL.plotImgPatchPrototypes()
21 | 
22 |   
23 | 


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/HuffPost.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HuffPost.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | data_dir = '/Users/daeil/Dropbox/research/bnpy/data/huffpost/'
 9 | matfilepath = os.environ['BNPYDATADIR'] + 'huffpost_bnpy.mat'
10 | 
11 | if not os.path.exists(matfilepath):
12 |     matfilepath = data_dir + 'huffpost_bnpy.mat'
13 | 
14 | def get_data(seed=8675309, **kwargs):
15 |     ''' Grab data from matfile specified by matfilepath
16 |     '''
17 |     Data = WordsData.read_from_mat(matfilepath)
18 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
19 |     return Data
20 | 
21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nLap=1, 
22 |                            dataorderseed=0, **kwargs):
23 |     Data = WordsData.read_from_mat(matfilepath)
24 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=300, nLap=nLap, dataorderseed=dataorderseed)
25 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
26 |     return DataIterator
27 | 
28 | def get_data_info(D, V):
29 |     return 'Huffington Post Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/NIPS.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | NIPSCorpus.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | datadir = '/Users/daeil/Dropbox/research/bnpy/data/nips/'
 9 | NIPSmatfile = 'nips_bnpy.mat'
10 | matfilepath = os.environ['BNPYDATADIR'] + NIPSmatfile
11 | 
12 | if not os.path.exists(matfilepath):
13 |     matfilepath = datadir + NIPSmatfile
14 | 
15 | def get_data(**kwargs):
16 |     ''' Grab data from matfile specified by matfilepath
17 |     '''
18 |     Data = WordsData.read_from_mat(matfilepath)
19 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
20 |     return Data
21 | 
22 | def get_minibatch_iterator(nBatch=10, nLap=1, 
23 |                            dataorderseed=0, **kwargs):
24 |     Data = WordsData.read_from_mat(matfilepath)
25 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch,
26 |                     nLap=nLap, dataorderseed=dataorderseed)
27 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
28 |     return DataIterator
29 | 
30 | def get_data_info(D, V):
31 |     return 'NIPS bag-of-words data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/NYTimes.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | NYTimes.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/nytimes_small/'
 9 | matfilepath = os.environ['BNPYDATADIR'] + 'nyt_small_bnpy.mat'
10 | 
11 | if not os.path.exists(matfilepath):
12 |     matfilepath = data_dir + 'nyt_small_bnpy.mat'
13 | 
14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
15 |     ''' Grab data from matfile specified by matfilepath
16 |     '''
17 |     Data = WordsData.read_from_mat( matfilepath )
18 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
19 |     return Data
20 | 
21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
22 |     Data = WordsData.read_from_mat( matfilepath )
23 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
24 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
25 |     return DataIterator
26 | 
27 | def get_data_info(D, V):
28 |     return 'NYTimes (Small) Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/NYTimesDB.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | NYTimes.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIteratorDB
 6 | 
 7 | dbpath = '/Users/daeil/Dropbox/research/local/nytimes_ldc'
 8 | ''' Use the dbpath below in order to connect to the nytimes database at Brown
 9 | '''
10 | #dbpath='/data/liv/nytimes/liv/nytimes_ldc'
11 | 
12 | D = 1816909
13 | V = 8000
14 | 
15 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
16 |     ''' Grab data from database to initialize (used only once really)
17 |     '''
18 |     doc_id_select = range(1,500) # grab the first 500 documents to initialize
19 |     nDoc = len(doc_id_select)
20 |     query = 'select * from data where rowid in (' + ','.join(map(str, doc_id_select)) + ')'
21 |     Data = WordsData.read_from_db( dbpath, query, nDoc=nDoc, nDocTotal = nDoc, vocab_size = V )
22 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
23 |     return Data
24 | 
25 | def get_minibatch_iterator(seed=8675309, nBatch=10000, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
26 |     ''' Data is primarily loaded through AdmixMinibatchIteratorDB. 
27 |     If creating from database, put in true number of documents and vocabulary size for the entire corpus
28 |     Initialize with only a handful of documents however, specified by doc_id_select
29 |     '''
30 |     #Data object isn't passed in, is this bottom part necessary again?
31 |     #doc_id_select = range(1,500) # grab the first 500 documents
32 |     #query = 'select * from data where rowid in (' + ','.join(map(str, doc_id_select)) + ')'
33 |     #Data = WordsData.read_from_db( dbpath, query, nDoc=len(doc_id_select), nDocTotal = D, vocab_size = V )
34 |     Data = get_data(nDocTotal = D, vocab_size = V)
35 |     
36 |     #Create iterator that grabs documents from the sqlite3 database
37 |     DataIterator = AdmixMinibatchIteratorDB(Data, dbpath=dbpath, nDocTotal=D, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
38 |     DataIterator.summary = get_data_info(D, V)
39 |     return DataIterator
40 | 
41 | def get_data_info(D, V):
42 |     return 'NYTimes (Very Large) Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/Science.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Science.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/science/'
 9 | matfilepath = os.environ['BNPYDATADIR'] + 'science_bnpy.mat'
10 | 
11 | if not os.path.exists(matfilepath):
12 |     matfilepath = data_dir + 'science_bnpy.mat'
13 | 
14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
15 |     ''' Grab data from matfile specified by matfilepath
16 |     '''
17 |     Data = WordsData.read_from_mat( matfilepath )
18 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
19 |     return Data
20 | 
21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
22 |     Data = WordsData.read_from_mat( matfilepath )
23 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
24 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
25 |     return DataIterator
26 | 
27 | def get_data_info(D, V):
28 |     return 'Science Abstracts Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/Wikipedia.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Science.py
 3 | 
 4 | '''
 5 | from bnpy.data import WordsData, AdmixMinibatchIterator
 6 | import os
 7 | 
 8 | data_dir = '/Users/daeil/Dropbox/research/liv-test/topic_models/data/wikipedia/'
 9 | matfilepath = os.environ['BNPYDATADIR'] + 'wiki_bnpy.mat'
10 | 
11 | if not os.path.exists(matfilepath):
12 |     matfilepath = data_dir + 'wiki_bnpy.mat'
13 | 
14 | def get_data(seed=8675309, nObsTotal=25000, **kwargs):
15 |     ''' Grab data from matfile specified by matfilepath
16 |     '''
17 |     Data = WordsData.read_from_mat( matfilepath )
18 |     Data.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
19 |     return Data
20 | 
21 | def get_minibatch_iterator(seed=8675309, nBatch=10, nObsBatch=None, nObsTotal=25000, nLap=1, allocModelName=None, dataorderseed=0, **kwargs):
22 |     Data = WordsData.read_from_mat( matfilepath )
23 |     DataIterator = AdmixMinibatchIterator(Data, nBatch=nBatch, nObsBatch=nObsBatch, nLap=nLap, dataorderseed=dataorderseed)
24 |     DataIterator.summary = get_data_info(Data.nDocTotal, Data.vocab_size)
25 |     return DataIterator
26 | 
27 | def get_data_info(D, V):
28 |     return 'Wikipedia Data. D=%d. VocabSize=%d' % (D,V)


--------------------------------------------------------------------------------
/refinery/bnpy/scripts/customFunc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | customFunc.py
 3 | A custom function that we can use to hook into the BNPy analysis. Runs at every minibatch or lap depending on the
 4 | the type of analysis.
 5 | 
 6 | onLapComplete() run after every complete lap through all B batches
 7 | onBatchComplete() run after every complete visit (Mstep, Estep, Sstep, ELBOstep) to a single batch
 8 | onAlgorithmComplete() run after the algorithm converges/reaches maximum number of laps
 9 | 
10 | '''
11 | import redis
12 | msgServer = redis.StrictRedis()
13 | 
14 | def onLapComplete(hModel, percentDone, customFuncArgs):
15 |     update = str(percentDone) + "% Done"
16 |     msgServer.publish('analysis', "%s" % (update))
17 |     print "onLapComplete"
18 | 
19 | def onBatchComplete(hModel, percentDone, customFuncArgs):
20 |     update = str(percentDone) + "% Done"
21 |     msgServer.publish('analysis', "%s" % (update))
22 |     print "onBatchComplete"
23 | 
24 | def onAlgorithmComplete(hModel, percentDone, customFuncArgs):
25 |     msgServer.publish('analysis', "%s" % ('status:Analysis Finished'))
26 |     print "onAlgorithmComplete"


--------------------------------------------------------------------------------
/refinery/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | import os
 4 | import sys
 5 | basedir = os.path.abspath(os.path.dirname(__file__))
 6 | 
 7 | # Must turn this off when in devlopment
 8 | DEBUG = True
 9 | 
10 | # Flask WTF module requires these two settings
11 | CSRF_ENABLED = True
12 | SECRET_KEY = 'bulgogi'
13 | # Path of our database file, required by flask-SQLAlchemy
14 | #SQLALCHEMY_DATABASE_URI = 'sqlite:///' + os.path.join(basedir, 'app.db')
15 | SQLALCHEMY_DATABASE_URI = "postgresql:///refinery"
16 | 
17 | #SQLALCHEMY_DATABASE_URI = 'postgresql://refinery_admin@localhost/refinery'
18 | # Folder that stores our SQLAlchemy-migrate data files
19 | SQLALCHEMY_MIGRATE_REPO = os.path.join(basedir, 'db_repository')
20 | 
21 | UPLOAD_FOLDER = 'refinery/static/datasets/'
22 | USER_DIRECTORY = 'refinery/static/users/'
23 | RANDOM_IMG_DIRECTORY = 'refinery/static/assets/images/random/'
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/refinery/data/nips0-12.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nips0-12.zip


--------------------------------------------------------------------------------
/refinery/data/nyt_2013_obama.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nyt_2013_obama.tar.gz


--------------------------------------------------------------------------------
/refinery/data/nyt_2013_obama.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/nyt_2013_obama.zip


--------------------------------------------------------------------------------
/refinery/data/reuters.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/data/reuters.zip


--------------------------------------------------------------------------------
/refinery/fact_classifier/classify_ex.py:
--------------------------------------------------------------------------------
 1 | from sklearn import svm
 2 | from sklearn.feature_extraction import DictVectorizer
 3 | from collections import defaultdict
 4 | import pickle
 5 | v = DictVectorizer()
 6 | 
 7 | #TODO : need to tokenize the words before using them as features!
 8 | 
 9 | def main():
10 | 
11 |     def munge(s):
12 |         ps = s.split()
13 |         label = int(ps[0])
14 |         ws = defaultdict(int)
15 |         for w in ps[1:]:
16 |             ws[w] += 1
17 |         return [label,ws]
18 | 
19 |     data = [munge(l.strip()) for l in open("/home/chonger/Downloads/annotations.txt")]
20 | 
21 |     labels = [x[0] for x in data]
22 |     dicts = [x[1] for x in data]
23 | 
24 |     feats = v.fit_transform(dicts)
25 | 
26 |     ttsplit = int(len(labels) * .8)
27 |     clf = svm.SVC(kernel='linear', class_weight={1: 10})
28 |     #clf = svm.SVC()
29 |     clf.fit(feats[:ttsplit],labels[:ttsplit])
30 | 
31 |     print clf.score(feats[ttsplit:],labels[ttsplit:])
32 | 
33 |     tot = defaultdict(int)
34 |     tr = defaultdict(int)
35 |     for ex in labels[ttsplit:]:
36 |         tr[ex] += 1
37 | 
38 |     for ex in feats[ttsplit:]:
39 |         tot[(clf.predict(ex).tolist())[0]] += 1
40 | 
41 |     print tr
42 |     print tot
43 | 
44 |     print feats[0]
45 |     print feats[1]
46 | 
47 |     f = open("/home/chonger/factsvm",'w')
48 |     pickle.dump(clf,f)
49 |     f.close()
50 | 
51 |     f = open("/home/chonger/factfeat",'w')
52 |     pickle.dump(v,f)
53 |     f.close()
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/refinery/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/__init__.py


--------------------------------------------------------------------------------
/refinery/lib/model_svm/feats:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/feats


--------------------------------------------------------------------------------
/refinery/lib/model_svm/lower_words:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/lower_words


--------------------------------------------------------------------------------
/refinery/lib/model_svm/non_abbrs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/model_svm/non_abbrs


--------------------------------------------------------------------------------
/refinery/lib/svmlite/LICENSE.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/LICENSE.txt


--------------------------------------------------------------------------------
/refinery/lib/svmlite/kernel.h:
--------------------------------------------------------------------------------
 1 | /************************************************************************/
 2 | /*                                                                      */
 3 | /*   kernel.h                                                           */
 4 | /*                                                                      */
 5 | /*   User defined kernel function. Feel free to plug in your own.       */
 6 | /*                                                                      */
 7 | /*   Copyright: Thorsten Joachims                                       */
 8 | /*   Date: 16.12.97                                                     */
 9 | /*                                                                      */
10 | /************************************************************************/
11 | 
12 | /* KERNEL_PARM is defined in svm_common.h The field 'custom' is reserved for */
13 | /* parameters of the user defined kernel. You can also access and use */
14 | /* the parameters of the other kernels. Just replace the line 
15 |              return((double)(1.0)); 
16 |    with your own kernel. */
17 | 
18 |   /* Example: The following computes the polynomial kernel. sprod_ss
19 |               computes the inner product between two sparse vectors. 
20 | 
21 |       return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)
22 |              +kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
23 |   */
24 | 
25 | /* If you are implementing a kernel that is not based on a
26 |    feature/value representation, you might want to make use of the
27 |    field "userdefined" in SVECTOR. By default, this field will contain
28 |    whatever string you put behind a # sign in the example file. So, if
29 |    a line in your training file looks like
30 | 
31 |    -1 1:3 5:6 #abcdefg
32 | 
33 |    then the SVECTOR field "words" will contain the vector 1:3 5:6, and
34 |    "userdefined" will contain the string "abcdefg". */
35 | 
36 | double custom_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 
37 |      /* plug in you favorite kernel */                          
38 | {
39 |   return((double)(1.0)); 
40 | }
41 | 


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_classify:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_classify


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_classify.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_classify.o


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_common.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_common.o


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_hideo.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_hideo.o


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_learn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_learn.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn.o


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_learn_main.c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn_main.c


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_learn_main.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_learn_main.o


--------------------------------------------------------------------------------
/refinery/lib/svmlite/svm_light.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/lib/svmlite/svm_light.tar.gz


--------------------------------------------------------------------------------
/refinery/lib/word_tokenize.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | """
 4 | A list of (regexp, repl) pairs applied in sequence.
 5 | The resulting string is split on whitespace.
 6 | (Adapted from the Punkt Word Tokenizer)
 7 | """
 8 | 
 9 | _tokenize_regexps = [
10 | 
11 |     # uniform quotes
12 |     (re.compile(r'\'\''), r'"'),
13 |     (re.compile(r'\`\`'), r'"'),
14 | 
15 |     # Separate punctuation (except period) from words:
16 |     (re.compile(r'(^|\s)(\')'), r'\1\2 '),
17 |     (re.compile(r'(?=[\(\"\`{\[:;&\#\*@])(.)'), r'\1 '),
18 |     
19 |     (re.compile(r'(.)(?=[?!)\";}\]\*:@\'])'), r'\1 '),
20 |     (re.compile(r'(?=[\)}\]])(.)'), r'\1 '),
21 |     (re.compile(r'(.)(?=[({\[])'), r'\1 '),
22 |     (re.compile(r'((^|\s)\-)(?=[^\-])'), r'\1 '),
23 | 
24 |     # Treat double-hyphen as one token:
25 |     (re.compile(r'([^-])(\-\-+)([^-])'), r'\1 \2 \3'),
26 |     (re.compile(r'(\s|^)(,)(?=(\S))'), r'\1\2 '),
27 | 
28 |     # Only separate comma if space follows:
29 |     (re.compile(r'(.)(,)(\s|$)'), r'\1 \2\3'),
30 | 
31 |     # Combine dots separated by whitespace to be a single token:
32 |     (re.compile(r'\.\s\.\s\.'), r'...'),
33 | 
34 |     # Separate "No.6"
35 |     (re.compile(r'([A-Za-z]\.)(\d+)'), r'\1 \2'),
36 |     
37 |     # Separate words from ellipses
38 |     (re.compile(r'([^\.]|^)(\.{2,})(.?)'), r'\1 \2 \3'),
39 |     (re.compile(r'(^|\s)(\.{2,})([^\.\s])'), r'\1\2 \3'),
40 |     (re.compile(r'([^\.\s])(\.{2,})($|\s)'), r'\1 \2\3'),
41 | 
42 |     ## adding a few things here:
43 | 
44 |     # fix %, $, &
45 |     (re.compile(r'(\d)%'), r'\1 %'),
46 |     (re.compile(r'\$(\.?\d)'), r'$ \1'),
47 |     (re.compile(r'(\w)& (\w)'), r'\1&\2'),
48 |     (re.compile(r'(\w\w+)&(\w\w+)'), r'\1 & \2'),
49 | 
50 |     # fix (n 't) --> ( n't)
51 |     (re.compile(r'n \'t( |$)'), r" n't\1"),
52 |     (re.compile(r'N \'T( |$)'), r" N'T\1"),
53 | 
54 |     # treebank tokenizer special words
55 |     (re.compile(r'([Cc])annot'), r'\1an not'),
56 | 
57 |     (re.compile(r'\s+'), r' '),
58 |     
59 |     ]
60 | 
61 | def tokenize(s):
62 |     """
63 |     Tokenize a string using the rule above
64 |     """
65 |     for (regexp, repl) in _tokenize_regexps:
66 |         s = regexp.sub(repl, s)
67 |     return s
68 | 
69 | 


--------------------------------------------------------------------------------
/refinery/refinery/__init__.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from flask.ext.sqlalchemy import SQLAlchemy
 3 | from flask.ext.login import LoginManager
 4 | from celery import Celery
 5 | 
 6 | print "Opening a Refinery"
 7 | 
 8 | app = Flask(__name__)
 9 | app.config.from_object('config')
10 | db = SQLAlchemy(app)
11 | 
12 | lm = LoginManager()
13 | lm.init_app(app)
14 | lm.login_view = 'login'
15 | 
16 | def make_celery(app):
17 |     celery = Celery(app.import_name, broker=app.config['CELERY_BROKER_URL'])
18 |     celery.conf.update(app.config)
19 |     TaskBase = celery.Task
20 |     class ContextTask(TaskBase):
21 |         abstract = True
22 |         def __call__(self, *args, **kwargs):
23 |             with app.app_context():
24 |                 return TaskBase.__call__(self, *args, **kwargs)
25 |     celery.Task = ContextTask
26 |     return celery
27 | 
28 | app.config.update(
29 |     CELERY_BROKER_URL='redis://localhost:6379',
30 |     CELERY_RESULT_BACKEND='redis://localhost:6379',
31 |     CELERY_IMPORTS=['refinery.webapp.topicmodel','refinery.webapp.main_menu'],
32 |     CELERY_REDIS_MAX_CONNECTIONS=4
33 | )
34 | 
35 | celery = make_celery(app)
36 | 


--------------------------------------------------------------------------------
/refinery/refinery/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/data/__init__.py


--------------------------------------------------------------------------------
/refinery/refinery/static/.gitignore:
--------------------------------------------------------------------------------
1 | users/
2 | 


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/bpicons/bpicons.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/bpicons/bpicons.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/bpicons/bpicons.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/bpicons/bpicons.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/bpicons/license.txt:
--------------------------------------------------------------------------------
1 | Icon Set:	Font Awesome -- http://fortawesome.github.com/Font-Awesome/
2 | License:	SIL -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL
3 | 
4 | 
5 | Icon Set:	Eco Ico -- http://dribbble.com/shots/665585-Eco-Ico
6 | License:	CC0 -- http://creativecommons.org/publicdomain/zero/1.0/


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
 3 | <svg xmlns="http://www.w3.org/2000/svg">
 4 | <metadata>
 5 | This is a custom SVG font generated by IcoMoon.
 6 | <iconset grid="14"></iconset>
 7 | </metadata>
 8 | <defs>
 9 | <font id="fontawesome" horiz-adv-x="448" >
10 | <font-face units-per-em="448" ascent="432" descent="-16" />
11 | <missing-glyph horiz-adv-x="448" />
12 | <glyph unicode="&#xe000;" d="M 221.657,407.485 ,m0.00,0.00,c 0.00,0.00 -132.984-182.838 -132.205-286.236 0.515-68.522 61.089-123.688 135.314-123.218 74.202,0.479 133.943,56.421 133.428,124.943 C 357.414,226.368 221.657,407.485 221.657,407.485 z" horiz-adv-x="448.00000000000006"  />
13 | <glyph unicode="&#xf060;" d="M 384.00,208.00l0.00-32.00 q0.00-13.25 -8.125-22.625t-21.125-9.375l-176.00,0.00 l 73.25-73.50q 9.50-9.00 9.50-22.50t-9.50-22.50l-18.75-19.00q-9.25-9.25 -22.50-9.25q-13.00,0.00 -22.75,9.25l-162.75,163.00q-9.25,9.25 -9.25,22.50q0.00,13.00 9.25,22.75l 162.75,162.50q 9.50,9.50 22.75,9.50q 13.00,0.00 22.50-9.50l 18.75-18.50q 9.50-9.50 9.50-22.75t-9.50-22.75l-73.25-73.25l 176.00,0.00 q 13.00,0.00 21.125-9.375 t 8.125-22.625z" horiz-adv-x="384"  />
14 | <glyph unicode="&#x20;" horiz-adv-x="224" />
15 | <glyph class="hidden" unicode="&#xf000;" d="M0,432L 448 -16L0 -16 z" horiz-adv-x="0" />
16 | </font></defs></svg>


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome/Read Me.txt:
--------------------------------------------------------------------------------
1 | To modify your generated font, use the *dev.svg* file, located in the *fonts* folder in this package. You can import this dev.svg file to the IcoMoon app. All the tags (class names) and the Unicode points of your glyphs are saved in this file.
2 | 
3 | See the documentation for more info on how to use this package: http://icomoon.io/#docs/font-face


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/fontawesome/fontawesome.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/fontawesome/license.txt:
--------------------------------------------------------------------------------
1 | Icon Set:	Font Awesome -- http://fortawesome.github.com/Font-Awesome/
2 | License:	SIL -- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon/icomoon.dev.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
 3 | <svg xmlns="http://www.w3.org/2000/svg">
 4 | <metadata>
 5 | This is a custom SVG font generated by IcoMoon.
 6 | <iconset grid="16"></iconset>
 7 | </metadata>
 8 | <defs>
 9 | <font id="icomoon" horiz-adv-x="512" >
10 | <font-face units-per-em="512" ascent="480" descent="-32" />
11 | <missing-glyph horiz-adv-x="512" />
12 | <glyph class="hidden" unicode="&#xf000;" d="M0,480L 512 -32L0 -32 z" horiz-adv-x="0" />
13 | <glyph unicode="&#xe000;" d="M 496.131,44.302L 374.855,147.449c-12.537,11.283-25.945,16.463-36.776,15.963C 366.707,196.946, 384,240.451, 384,288
14 | 	C 384,394.039, 298.039,480, 192,480C 85.962,480,0,394.039,0,288c0-106.039, 85.961-192, 192-192c 47.549,0, 91.054,17.293, 124.588,45.922
15 | 	c-0.5-10.831, 4.68-24.239, 15.963-36.776l 103.147-121.276c 17.661-19.623, 46.511-21.277, 64.11-3.678S 515.754,26.641, 496.131,44.302z
16 | 	 M 192,160c-70.692,0-128,57.308-128,128S 121.308,416, 192,416s 128-57.308, 128-128S 262.693,160, 192,160z" data-tags="search, magnifier, lookup, find" />
17 | <glyph unicode="&#x20;" horiz-adv-x="256" />
18 | </font></defs></svg>


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon/icomoon.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon/icomoon.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon/icomoon.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon/icomoon.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon/license.txt:
--------------------------------------------------------------------------------
1 | Icon Set:	IcoMoon - Free -- http://keyamoon.com/icomoon/
2 | License:	CC BY 3.0 -- http://creativecommons.org/licenses/by/3.0/


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/fonts/icomoon_arrows/icomoon.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/Logo.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/arrows.dev.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
 3 | <svg xmlns="http://www.w3.org/2000/svg">
 4 | <metadata>
 5 | This is a custom SVG font generated by IcoMoon.
 6 | <iconset grid="20"></iconset>
 7 | </metadata>
 8 | <defs>
 9 | <font id="arrows" horiz-adv-x="640" >
10 | <font-face units-per-em="640" ascent="608" descent="-32" />
11 | <missing-glyph horiz-adv-x="640" />
12 | <glyph unicode="&#xe002;" d="M 392.096,302.144l-232.8,146.24C 142.080,459.648, 128,451.36, 128,429.92l0-283.872 c0-21.44, 14.080-29.696, 31.296-18.432l 232.8,146.208
13 | 		c0,0, 8.416,5.92, 8.416,14.144S 392.096,302.144, 392.096,302.144z M 464,473.728c-28.416,0-48-8.736-48-37.152l0-297.152 
14 | 		c0-28.448, 19.584-37.184, 48-37.184s 48,8.736, 48,37.184L 512,436.576 C 512,464.992, 492.416,473.728, 464,473.728z" data-tags="last, end, media control" />
15 | <glyph unicode="&#xe003;" d="M 196.212,467.2L 196.174,108.8L 507.826,288 z" data-tags="arrow-right, triangle, right, next" />
16 | <glyph unicode="&#x20;" horiz-adv-x="320" />
17 | <glyph class="hidden" unicode="&#xf000;" d="M0,608L 640 -32L0 -32 z" horiz-adv-x="0" />
18 | </font></defs></svg>


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/arrows.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.eot


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/arrows.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
 3 | <svg xmlns="http://www.w3.org/2000/svg">
 4 | <metadata>
 5 | This is a custom SVG font generated by IcoMoon.
 6 | <iconset grid="20"></iconset>
 7 | </metadata>
 8 | <defs>
 9 | <font id="arrows" horiz-adv-x="640" >
10 | <font-face units-per-em="640" ascent="608" descent="-32" />
11 | <missing-glyph horiz-adv-x="640" />
12 | <glyph unicode="&#xe002;" d="M 392.096,302.144l-232.8,146.24C 142.080,459.648, 128,451.36, 128,429.92l0-283.872 c0-21.44, 14.080-29.696, 31.296-18.432l 232.8,146.208
13 | 		c0,0, 8.416,5.92, 8.416,14.144S 392.096,302.144, 392.096,302.144z M 464,473.728c-28.416,0-48-8.736-48-37.152l0-297.152 
14 | 		c0-28.448, 19.584-37.184, 48-37.184s 48,8.736, 48,37.184L 512,436.576 C 512,464.992, 492.416,473.728, 464,473.728z"  />
15 | <glyph unicode="&#xe003;" d="M 196.212,467.2L 196.174,108.8L 507.826,288 z"  />
16 | <glyph unicode="&#x20;" horiz-adv-x="320" />
17 | <glyph class="hidden" unicode="&#xf000;" d="M0,608L 640 -32L0 -32 z" horiz-adv-x="0" />
18 | </font></defs></svg>


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/arrows.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.ttf


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/arrows.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/arrows/arrows.woff


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/arrows/license.txt:
--------------------------------------------------------------------------------
1 | Icon Set:	Entypo -- http://www.entypo.com/
2 | License:	CC BY-SA 3.0 -- http://creativecommons.org/licenses/by-sa/3.0/


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/elephants/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/1.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/elephants/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/2.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/elephants/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/3.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/elephants/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/4.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/elephants/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/elephants/5.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_020_home.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_020_home.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_063_power.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_063_power.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_144_folder_open.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_144_folder_open.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_145_folder_plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_145_folder_plus.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_220_play_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_220_play_button.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_232_cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_232_cloud.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_341_briefcase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_341_briefcase.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_357_suitcase.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_357_suitcase.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_363_cloud_upload.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_363_cloud_upload.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_364_cloud_download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_364_cloud_download.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_370_globe_af.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_370_globe_af.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/glyphicons_371_global.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/glyphicons_371_global.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/hopper-chop-suey.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-chop-suey.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/hopper-early-sunday-morning.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-early-sunday-morning.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/hopper-gas.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-gas.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/hopper-morning-sun.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-morning-sun.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/hopper-nighthawks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/hopper-nighthawks.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/arrow_down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/arrow_down.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/arrow_up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/arrow_up.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-add-folder-icon-256.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-add-folder-icon-256.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-arrow-4-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-arrow-4-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-book-17-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-book-17-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-3-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-3-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-9-icon-48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-cloud-9-icon-48.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-edit-8-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-edit-8-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-flask-7-icon-256.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-flask-7-icon-256.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-gear-icon-48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-gear-icon-48.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-128.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-256.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-info-6-icon-256.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-4-icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-4-icon-64.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-icon-64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-line-chart-icon-64.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-lock-13-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-13-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-lock-15-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-15-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-lock-3-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-3-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-lock-9-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-lock-9-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-4-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-4-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-6-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-magnifier-6-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-12-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-12-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-3-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-3-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-4-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-4-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-7-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-newspaper-7-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-note-25-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-note-25-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-6-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-6-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-8-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-photo-camera-8-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-plus-5-icon-24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-plus-5-icon-24.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-star-5-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-star-5-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-star-7-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-star-7-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-text-file-4-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-text-file-4-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-24.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-256.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon-256.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/iconmonstr-x-mark-4-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/pixel-arrow-right.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/pixel-arrow-right_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right_.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/pixel-arrow-right__.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/pixel-arrow-right__.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/icons/running-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/icons/running-icon.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/dali-rose.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/dali-rose.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/hopper-chop-suey.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-chop-suey.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/hopper-early-sunday-morning.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-early-sunday-morning.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/hopper-gas.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-gas.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/hopper-morning-sun.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-morning-sun.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/hopper-nighthawks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/hopper-nighthawks.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/kandinsky-trans.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/kandinsky-trans.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/mondrian-comp-a.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/mondrian-comp-a.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/monet-fog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-fog.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/monet-gal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-gal.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/paintings/monet-poppies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/paintings/monet-poppies.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/progress.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/progressbar.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/progressbar.gif


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/img05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img05.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/img06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img06.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/img07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img07.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/img08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/img08.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/latest/img004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img004.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/latest/img01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img01.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/latest/img02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img02.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/latest/img03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img03.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/projects/latest/img04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/projects/latest/img04.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/random/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/1.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/random/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/2.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/random/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/3.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/random/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/4.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/random/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/random/5.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/sliders/camera/slide00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide00.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/sliders/camera/slide01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide01.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/sliders/camera/slide02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide02.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/sliders/camera/slide03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/sliders/camera/slide03.jpg


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_flat_0_aaaaaa_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_flat_0_aaaaaa_40x100.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_flat_75_ffffff_40x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_flat_75_ffffff_40x100.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_glass_55_fbf9ee_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_55_fbf9ee_1x400.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_glass_65_ffffff_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_65_ffffff_1x400.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_dadada_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_dadada_1x400.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_e6e6e6_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_75_e6e6e6_1x400.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_glass_95_fef1ec_1x400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_glass_95_fef1ec_1x400.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-bg_highlight-soft_75_cccccc_1x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-bg_highlight-soft_75_cccccc_1x100.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_222222_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_222222_256x240.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_2e83ff_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_2e83ff_256x240.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240___________.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_454545_256x240___________.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_888888_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_888888_256x240.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/ui/ui-icons_cd0a0a_256x240.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/ui/ui-icons_cd0a0a_256x240.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/1.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/10.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/2.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/3.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/4.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/5.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/6.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/7.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/8.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/images/view_mode/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/images/view_mode/9.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/lib/colorbox/images/border.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/border.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/lib/colorbox/images/controls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/controls.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/lib/colorbox/images/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/loading.gif


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/lib/colorbox/images/loading_background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/loading_background.png


--------------------------------------------------------------------------------
/refinery/refinery/static/assets/lib/colorbox/images/overlay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/static/assets/lib/colorbox/images/overlay.png


--------------------------------------------------------------------------------
/refinery/refinery/static/css/upload.css:
--------------------------------------------------------------------------------
 1 | #upload_dropzone
 2 | {
 3 |   position:aboslute;
 4 |   margin-top:30px;
 5 | }
 6 | 
 7 | .filedrag_box
 8 | {
 9 |   display: block;
10 |   position: relative;
11 |   height: 125px;
12 |   padding: 10px;
13 |   text-align: center;
14 |   color: #111;
15 |   border: 2px dashed #555;
16 |   border-radius: 10px;
17 |   cursor: default;
18 |   }
19 | 
20 | 
21 | .filedrag_box_hover
22 | {
23 |   display: block;
24 |   position: relative;
25 |   height: 125px;
26 |   padding: 10px;
27 |   text-align: center;
28 |   color: #111;
29 |   border: 2px dashed #555;
30 |   border-radius: 10px;
31 |   cursor: default;
32 |   color: #f00;
33 |   border-color: #f00;
34 |   border-style: solid;
35 |   box-shadow: inset 0 3px 4px #888;
36 | }
37 | 
38 | /** the img after it gets dropped but before submit */
39 | #img_frame
40 | {
41 |   height: 200px;
42 |   width: 200px;
43 |   margin-left: auto;
44 |   margin-right: auto;
45 |   }
46 | 
47 | /**
48 | #upload_progress
49 | {
50 |   border: 1px solid black;
51 |   margin: 0 auto;
52 |   position: relative;
53 |   border-radius: 10px;
54 |   padding: 5px;
55 |   width: 95%;
56 |   height: 19px;
57 |   background: #eee url(/static/assets/images/progress.png) 100% 0 repeat-y;
58 |   background-position: 0% 0;
59 |   }
60 | 
61 | #progress_text
62 | {
63 |   margin-top:0px;
64 |   padding: 5px;
65 |   position: absolute;
66 |   color: #444;
67 |   width: 100%;
68 |   height: 100%;
69 |   background: #eee url(/static/assets/images/progress.png) 100% 0 repeat-y;
70 | }
71 | 
72 | #upload_progress.success
73 | {
74 | 	background: #00cc33 none 0 0 no-repeat;
75 | }
76 | 
77 | #upload_progress.failed
78 | {
79 | 	background: #c00 none 0 0 no-repeat;
80 | }
81 | */
82 | #uploadlist
83 | {
84 |   height:100%;
85 |   border: 2px solid #222;  
86 |   border-radius: 10px;
87 |   }
88 | 
89 | #uploadlist li
90 | {
91 |   text-align:left;
92 | 
93 |   }
94 | 


--------------------------------------------------------------------------------
/refinery/refinery/static/js/annotate.js:
--------------------------------------------------------------------------------
 1 | 
 2 | var selected = {};
 3 | 
 4 | console.log(lines);
 5 | 
 6 | var ls = d3.select("#survey").selectAll("div").data(lines);
 7 | 
 8 | ls.enter().append("div").attr("class","sentex").on("click",function(d,i) {
 9 |         d3.select(this).style("background",function(d2,i2) {
10 |                 console.log("clicked " + i);
11 |                 if(i in selected) {
12 |                     delete selected[i];
13 |                     return "#ffffff";
14 |                 } else {
15 |                     selected[i] = 0;
16 |                     return "#aaeeff";
17 |                 }
18 |                 
19 |             });
20 |     }).html(function(d) { 
21 |             return d;
22 |         });
23 | 
24 | 
25 | 
26 | function submitSurvey() {
27 | 
28 |     var linse = d3.select("#survey").selectAll("div").data();
29 | 
30 |     var labels = [];
31 | 
32 |     for (i in selected){
33 |         console.log("!" + i);
34 |         labels.push(i);
35 |     }
36 | 
37 |     $.post(ann_url,{'labels[]' : labels, 'lines[]' : lines},function(d) {
38 | 
39 |             window.location = "/annotate";
40 | 
41 |         });
42 | 
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/refinery/refinery/templates/about.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block js_head %}
 3 | <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='assets/css/manage/listing.css') }}"  />
 4 | <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='assets/css/manage/colorbox.css') }}"  />
 5 | <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/dropdown/modernizr.custom.63321.js') }}"></script>
 6 | <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/colorbox/jquery.colorbox.js') }}"></script>
 7 | <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/dropdown/modernizr.custom.63321.js') }}"></script>
 8 | {% endblock %}
 9 | 
10 | {% block content %}
11 | <div class="onerow" id="top_row">	
12 | <h1>M L RUUUUN</h1>
13 | </div>
14 | {% endblock %}
15 | 
16 | 


--------------------------------------------------------------------------------
/refinery/refinery/templates/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!--[if lt IE 7 ]><html class="ie ie6" lang="en"> <![endif]-->
 3 | <!--[if IE 7 ]><html class="ie ie7" lang="en"> <![endif]-->
 4 | <!--[if IE 8 ]><html class="ie ie8" lang="en"> <![endif]-->
 5 | <!--[if (gte IE 9)|!(IE)]><!--><html lang="en"> <!--<![endif]-->
 6 | <head>
 7 | 
 8 | 	<!-- Basic Page Needs -->
 9 | 	<meta charset="utf-8">
10 |    
11 | 	{% block title %}
12 | 	{% endblock %}
13 | 
14 | 	<!-- Mobile Specific Metas -->
15 | 	<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
16 | 	<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
17 | 	<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='assets/lib/column12.css') }}" />
18 | 	<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/style.css') }}"/>
19 |     <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/jquery/jquery-1.9.1.min.js') }}"></script>
20 |     <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/jquery/jquery-ui-1.10.4.min.js') }}"></script>
21 | 
22 | 	{% block js_head %}
23 | 	{% endblock %}
24 | 
25 |   </head>
26 | 
27 | <body>
28 | 
29 | 	<div class="onepcssgrid-1000">
30 | 		<div class="onerow">
31 | 			<div class="col12" id="header">			
32 | 				<div id="logo">
33 |                   <img src="{{url_for('static',filename='assets/images/Logo.png')}}" style="width:40px;display:inline-block;"/>
34 | 					<a style="display:inline-block;bottom:7px;position:relative;" href="#">Refinery</a>
35 | 				</div>
36 | 				
37 | 				<div id="menu" >
38 | 					<ul id="nav" class="sf-menu">
39 | 						<li><a title='Logout' href="{{ url_for('logout') }}" class="current"><img class="imgbtn" id="icon_5" src='/static/assets/images/glyphicons_063_power.png'></a></li>
40 | 						<li><a title='Manage' href="{{ url_for('manage_data', username=g.user.username ) }}"><img class="imgbtn" id="icon_3" src='/static/assets/images/glyphicons_144_folder_open.png'></a></li>
41 | 						<li> <img src="/static/assets/images/icons/iconmonstr-info-6-icon-256.png" style="position:relative; top:13px;margin-left: 15px; margin-right: 15px;width:32px;" class="imgbtn" onclick="show_info(true);"/></li>
42 | 						<li><a id="menu_username" class="imgbtn" title='Profile' href="{{ url_for('profile') }}">{% if g.user %}{{g.user.username}}{% endif %}</a></li>
43 | 				 	</ul>  <!-- end #nav  -->
44 | 			 	</div>
45 | 			
46 | 			</div>
47 | 		</div>
48 | 		<div class="onerow">
49 | 			<div class="col12" id="header_line"> </div>
50 | 		</div>
51 | 
52 |         {% block content %}
53 |         {% endblock %}
54 | 
55 | </div> <!-- END BODY-->
56 | 
57 | {% block js %}
58 | {% endblock %}
59 | 
60 | </body>
61 | </html>
62 | 


--------------------------------------------------------------------------------
/refinery/refinery/templates/browse_list.html:
--------------------------------------------------------------------------------
 1 | {% for doc in dox %}
 2 | <link rel="stylesheet" href="//code.jquery.com/ui/1.10.4/themes/smoothness/jquery-ui.css">
 3 | <li style="position:relative;clear:both;padding:3px;border:solid 2px gray;display:inline-block;width:100%;border-radius:10px;">
 4 |   <img style="width:30px;float:left;" onclick="viewFile('{{g.user.username}}','{{doc.path}}','{{doc.name}}','{{doc.getStaticURL()}}')" src="{{url_for('static',filename='assets/images/icons/iconmonstr-text-file-4-icon.png')}}"></img>
 5 |   <img src="/static/assets/images/icons/iconmonstr-x-mark-4-icon.png" style="width:30px;float:left;" onclick="try_delete_document('{{g.user.username}}',{{data_id}},{{doc.id}},'{{doc.name}}')">
 6 | 
 7 |   <div style="float:left;margin-left:5px;font-size:x-large;">{{doc.name}}</div>
 8 | 
 9 | </li>
10 | <br/>
11 | {% endfor %}


--------------------------------------------------------------------------------
/refinery/refinery/templates/docview.html:
--------------------------------------------------------------------------------
1 | <div>
2 | 
3 |   {{doctext}}
4 | 
5 | </div>


--------------------------------------------------------------------------------
/refinery/refinery/templates/info_page.html:
--------------------------------------------------------------------------------
 1 | <div id="infopage" style="margin:auto;clear:both;display:inline-block;position:absolute;top:0px;left:0px;width:100%;height:100%;font-size:25px;">
 2 |   <p style="font-size:30px;">
 3 |     Welcome to the Refinery
 4 |   </p>
 5 |   
 6 |   <p style="margin-left:20px;text-align:left;">
 7 |     Refinery identifies topics in your data and allows you to refine it into folders that focus on the topics you're interested in.  Also, it helps you summarize your folders into important sentences with links back to the documents that they appear in.
 8 |   </p>
 9 |   
10 |   <p style="margin-left:20px;">
11 |     <span style="float:left;">Use</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/iconmonstr-gear-icon-48.png" /><span style="float:left;"> buttons to change settings</span>
12 |   </p>
13 |   <br/>
14 |   <p style="margin-left:20px;">
15 |     <span style="float:left;">Use</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/running-icon.png" /><span style="float:left;"> buttons to run machine learning</span>
16 |   </p>
17 |   <br/>
18 |   <p style="margin-left:20px;">
19 |     <span style="float:left;">Use</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="{{ url_for('static', filename='assets/images/icons/iconmonstr-flask-7-icon-256.png') }}" /><span style="float:left;"> buttons to experiment with Refinery's tools</span>
20 |   </p>
21 |   <br/>
22 |   <p style="margin-left:20px;">
23 |     <span style="float:left;">Use</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/iconmonstr-newspaper-4-icon.png" /><span style="float:left;"> buttons to browse the documents in a folder</span>
24 |   </p>
25 |   <br/>
26 |   <p style="margin-left:20px;">
27 |     <span style="float:left;">Use</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/iconmonstr-x-mark-4-icon.png" /><span style="float:left;"> buttons to delete things</span>
28 |   </p>
29 |   <br/>
30 |   <p style="margin-left:20px;">
31 |     <span style="float:left;">Click the </span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/iconmonstr-info-6-icon-256.png" /><span style="float:left;"> button to see this information page again</span>
32 |   </p>
33 |   <br/>
34 |   <p style="margin-left:20px;">
35 |     <span style="float:left;">To get started with Refinery, upload a dataset with the</span> <img style="top:-10px;position:relative;float:left;width:40px;margin:3px;" src="/static/assets/images/icons/iconmonstr-note-25-icon.png" /><span style="float:left;"> button at the bottom of the page</span>
36 |   </p>
37 |   
38 |   <img src="/static/assets/images/icons/iconmonstr-x-mark-4-icon.png" style="position:absolute;top:540px;left:1000px;width:50px;" class="imgbtn" onclick="show_info(false);"/>
39 | </div>
40 | 


--------------------------------------------------------------------------------
/refinery/refinery/templates/login.html:
--------------------------------------------------------------------------------
 1 | <head>
 2 |   <link href='http://fonts.googleapis.com/css?family=Muli' rel='stylesheet' type='text/css'/>
 3 |   <link rel=stylesheet type=text/css href="{{ url_for('static', filename='css/login.css') }}"/>
 4 | </head>
 5 | <body>
 6 | 	
 7 | <form id="login" method="post" action="{{ url_for('login') }}" name="login"> 
 8 | 	{{ form.hidden_tag() }}
 9 |     <center><h2>refinery<span id="small"> (beta: invitation only)</span></center>    
10 |     <div>
11 |     	<label for="login_username">Username</label>{{ form.username(id_='login_username', class_='field required') }} 
12 |     </div>			
13 |     <div>
14 |     	<label for="login_password">Password</label>{{ form.password(id_='login_password', class_='field required') }} 
15 |     </div>
16 | 			
17 |     <p class="forgot"><a href="#">Forgot your password?</a></p>
18 | 	<p class="forgot">Not a member? <a href="#">Sign Up</a></p>
19 |     <div class="submit">
20 |         <button type="submit">Log in</button>   
21 |         <label>{{form.remember_me(id_='login_remember', name_='remember', value_='yes')}} Remember Me </label>
22 |     </div>
23 | 
24 | </form>
25 | </div>
26 | </body>


--------------------------------------------------------------------------------
/refinery/refinery/templates/profile.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block js_head %}
 3 | 
 4 | {% endblock %}
 5 | 
 6 | {% block content %}
 7 | 	<!-- START PAGE WRAPPER -->
 8 | 	<div class="onerow">
 9 |   		<div class="col12 last bigbar" id="main_container">
10 | 
11 | 
12 | 			<div class="widget">
13 | 
14 | 				<div class="alignleft caption">
15 | 					<img src={% if user.image == None %}
16 |                       "{{url_for('static', filename='images/default.jpg')}}"
17 |                       {% else %}
18 |                       "{{url_for('static', filename = 'users/' + user.username + '/images/' + user.image)}}"
19 |                       {% endif %}
20 |                       alt="" title="Align Left" />
21 | 
22 | 				</div>
23 | 			</div><!-- row -->
24 | 		</div><!-- bigbar -->
25 | {% endblock %}
26 | {% block js %}
27 | 
28 | {% endblock %}
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/refinery/refinery/templates/summarize.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block js_head %}
 3 | <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/experiment.css') }}"/>
 4 | <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/d3/d3.v3.min.js') }}"></script>
 5 | <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='assets/lib/colorbox/colorbox.css') }}"  />
 6 | <script type="text/javascript" src="{{ url_for('static', filename='assets/lib/colorbox/jquery.colorbox-min.js') }}"></script>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | <div style='display:none'>
12 |   <div id='inline_viewer' class="bigbar">
13 |     INLINE STUFF!
14 |   </div>
15 | </div>
16 | 
17 | <div style='display:none'>
18 |   <div id='results_outer' style="position:relative;">
19 | 
20 |     <div>
21 |       <div style="font-size:30px;">Select notes by clicking the circles</div>
22 |       <div style="margin-bottom:5px;">When you're finished, click the "Keep" Button. Selecting nothing selects all results</div>
23 |       <input type="button" value="Keep" onclick="keep();"></input><input type="button" value="Cancel" onclick="$.colorbox.close();"></input>
24 |     </div>
25 |     <br/>
26 | 
27 |     <div id='results' class="bigbar">
28 |     </div>
29 | 
30 |   </div>
31 | </div>
32 | 
33 | <div class="onerow">
34 |   <div class="col12 last" id="main_container">
35 | 
36 |     <div class="onerow" id="menubar" style="margin-top:10px;background:#DDDDFF;border-radius:10px;">
37 |       <div id="menubarinfo" style="width:65%;float:left;margin:10px;font-size:24px;">
38 |         <div>
39 |           Select notes by clicking the circles.  Selecting nothing selects everything.
40 |         </div>
41 |         <div style="font-size:17px;">
42 |           Use the buttons on the right to find new notes that are similar to or different from your selection, or to delete your selection.  Click on a note to see it's source document.
43 |         </div>
44 |       </div>
45 |       <div id="menubarbuttons"></div>
46 |     </div>
47 | 
48 |     <div class="onerow">
49 |       <div id="cur_sum" class="bigbar" style="margin:3px;">
50 | 
51 | 
52 | 
53 |     </div>
54 |     </div>
55 |   </div>
56 | </div>
57 | {% endblock %}
58 | 
59 | 
60 | {% block js %}
61 | <script type="text/javascript">
62 |   var viewdoc_url ="{{url_for('get_doc_fulltext' )}}";
63 | 
64 |   var variety_url ="{{url_for('get_variety', username=g.user.username, folder_id=folder.id)}}";
65 |   var similar_url ="{{url_for('get_similar', username=g.user.username, folder_id=folder.id)}}";
66 |   var delete_url ="{{url_for('sum_delete', username=g.user.username, folder_id=folder.id)}}";
67 |   var add_url ="{{url_for('sum_add', username=g.user.username, folder_id=folder.id)}}";
68 | 
69 |   var event_url ="{{url_for('stream_sum', username=g.user.username, data_id=folder.id, ex_id=ex.id)}}";
70 | 
71 | 
72 | </script>
73 | <script type="text/javascript" src="{{ url_for('static', filename='js/summarize.js') }}"></script>
74 | <script type="text/javascript">
75 |   fill_with_facts(d3.select("#cur_sum"),JSON.parse({{data | tojson}}),true);
76 | </script>
77 | {% endblock %}


--------------------------------------------------------------------------------
/refinery/refinery/webapp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/refinery/webapp/__init__.py


--------------------------------------------------------------------------------
/refinery/refinery/webapp/customFunc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | customFunc.py
 3 | 
 4 | A custom function that we can use to hook into the BNPY analysis.
 5 | 
 6 | onLapComplete() run after every complete lap through all B batches
 7 | onBatchComplete() run after every complete visit (Mstep, Estep, Sstep, ELBOstep) to a single batch
 8 | onAlgorithmComplete() run after the algorithm converges/reaches maximum number of laps
 9 | 
10 | '''
11 | import redis
12 | import numpy as np
13 | import json
14 | 
15 | msgServer = redis.StrictRedis(socket_timeout=20)
16 | #from pubsub import msgServer
17 | 
18 | def getModelState(hmodel,LP,nTopW):
19 | 
20 |     num_topics = hmodel.allocModel.K 
21 | 
22 |     def lm_info():
23 |         for k in xrange(num_topics):
24 |             lamvec = hmodel.obsModel.comp[k].lamvec        #bag of words weights
25 |             elamvec = lamvec / lamvec.sum()                #renormalized weights
26 |             inds = np.argsort(elamvec)[-nTopW:].tolist()   #get the top indices
27 |             inds.reverse()
28 |             probs = [elamvec[idx] for idx in inds]         #get their weights
29 |             yield [elamvec,zip(inds,probs)]
30 | 
31 |     topW = []
32 |     lms = []
33 |     for lm,tops in lm_info():
34 |         topW.append(tops)
35 |         lms.append(lm)
36 | 
37 |     topic_props = hmodel.allocModel.Ebeta
38 | 
39 |     def renormalize(vec):
40 |         tot = sum(vec)
41 |         return [x/tot for x in vec]
42 | 
43 |     doc_tops = [renormalize(x[:-1]) for x in LP['alphaPi']] #topic posteriors for each document, drop the last value because bnpy
44 | 
45 |     return [topW,topic_props,doc_tops,lms]
46 | 
47 | def onLapComplete(hmodel, percentDone, customFuncArgs):
48 |     
49 |     update = str(percentDone * 100)
50 |     customArgs = json.loads(customFuncArgs)
51 |     tm_id = customArgs["tm_id"]
52 |     username = customArgs["username"]
53 | 
54 |     msgServer.publish(username + "Xmenus",'tm_prog,' + tm_id + "," + update)
55 | 
56 | '''
57 | 
58 | For now we dont use these hooks, but bnpy allows them
59 | 
60 | '''
61 | 
62 | def onBatchComplete(hModel, percentDone, customFuncArgs):
63 |     print "onBatchComplete!"
64 | 
65 | def onAlgorithmComplete(hModel, percentDone, customFuncArgs):
66 |     print "onAlgorithmComplete!"
67 | 


--------------------------------------------------------------------------------
/refinery/refinery/webapp/pubsub.py:
--------------------------------------------------------------------------------
 1 | import redis
 2 | from refinery import app
 3 | from flask import Response
 4 | 
 5 | '''
 6 | 
 7 | Handles pubsub stuff
 8 | 
 9 | '''
10 | 
11 | # START REDIS for pubsub system, should only happen once
12 | msgServer = redis.StrictRedis(socket_timeout=20)
13 | 
14 | #Generic function to call redis and start pub/sub messaging service
15 | def event_stream(channel=None):
16 |     
17 |     pubsub = msgServer.pubsub()
18 |     pubsub.subscribe(channel)
19 |     
20 |     # handle client disconnection in the client side by calling the exit keyword
21 | 
22 |     try:
23 |         for msg in pubsub.listen():
24 |             yield 'data: %s\n\n' % msg['data']
25 |     except Exception:
26 |         yield 'data: NONE\n\n' #if a timeout happens on the listen, we need to return something
27 | 
28 | 
29 | '''
30 | 
31 | These are the pubsub channels that serve the information
32 | 
33 | '''
34 |     
35 | @app.route("/<username>/stream_upload")
36 | def stream_upload(username=None):
37 |     mimetype = "text/event-stream"
38 |     channel = username + "Xupload"
39 |     return Response(event_stream(channel=channel), mimetype=mimetype)
40 | 
41 | @app.route("/<username>/stream_menus")
42 | def stream_menus(username=None):
43 |     mimetype = "text/event-stream"
44 |     channel = username + "Xmenus"
45 |     return Response(event_stream(channel=channel), mimetype=mimetype)
46 | 
47 | @app.route('/<username>/stream_sum/<int:data_id>/<int:ex_id>')
48 | def stream_sum(username=None, data_id=None,ex_id=None):
49 |     mimetype = "text/event-stream"
50 |     ch = username +"_summary_" + str(data_id) + "_" + str(ex_id)
51 |     return Response(event_stream(channel=ch), mimetype=mimetype)
52 | 
53 | 


--------------------------------------------------------------------------------
/refinery/requirements.txt:
--------------------------------------------------------------------------------
 1 | flask==0.10.1
 2 | flask-login==0.2.11
 3 | flask-sqlalchemy==2.0
 4 | gunicorn==19.1.1
 5 | numpy==1.9.0
 6 | scipy==0.9.0
 7 | wtforms==2.0.1
 8 | redis==2.10.3
 9 | joblib==0.8.3-r1
10 | celery==3.1.15
11 | flask-wtf==0.10.2
12 | psycopg2==2.5.4
13 | kombu==3.0.23
14 | 
15 | 


--------------------------------------------------------------------------------
/refinery/reset_db.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | import os
 4 | 
 5 | from config import basedir
 6 | from refinery import app,db
 7 | from refinery.data.models import *
 8 | import shutil
 9 | 
10 | def create_db_entries():
11 | 
12 |     print "Creating new DB"
13 | 
14 |     userdir = app.config['USER_DIRECTORY']
15 |     
16 |     try:
17 |         os.stat(userdir)
18 |     except:
19 |         os.mkdir(userdir)
20 |         
21 |     # remove all folders within the users directory
22 |     remove_dir(userdir)
23 |     
24 |     # recreate database structure
25 |     db.drop_all()
26 |     db.create_all()
27 |     
28 |     # username and passwords for mock db fill
29 |     usernames = ['doc']
30 |     passwords = ['refinery']
31 | 
32 |     # Create a bunch of users
33 |     for i in xrange(len(usernames)):
34 |         add_user(usernames[i],passwords[i])
35 |         
36 |     # Create a bunch of datasets
37 |     
38 |     # Create a bunch of experiments
39 | 
40 |     # create a bunch of reports
41 |     
42 |     check_db()
43 | 
44 | def check_db():
45 |     query_users = User.query.all()
46 |     query_exp = Experiment.query.all()
47 |     query_data = Dataset.query.all()
48 |     
49 |     #print query_users
50 |     #print query_exp
51 |     #print query_data
52 | 
53 | def remove_dir(folder):
54 |     for the_file in os.listdir(folder):
55 |         file_path = os.path.join(folder, the_file)
56 |         if os.path.isdir(file_path):
57 |             print "Deleting: " + file_path
58 |             shutil.rmtree(file_path)
59 | 
60 | def create_user_dir(username):
61 |     newdir = app.config['USER_DIRECTORY'] +  username
62 |     if os.path.exists(newdir):
63 |         print "Directory already exists for " + username
64 |     else:
65 |         datadir = newdir + "/documents"
66 |         imdir = newdir + "/images"
67 |         procdir = newdir + "/processed"
68 |         os.makedirs(newdir)
69 |         os.makedirs(procdir)
70 |         os.makedirs(datadir)
71 |         os.makedirs(imdir)
72 |         
73 |         print "Creating directory structure for: " + newdir
74 | 
75 | def add_user(username, password):
76 |     ''' When we add a new user, we first check if this user exists. If not,
77 |     we create this users directory structure.
78 |     '''
79 |     
80 |     create_user_dir(username)
81 |     email = username + "@refinery.com"
82 |     u = User(username = username, password=password, email=email)
83 |     
84 |     if(username == "doc"):
85 |         u.email = "refinery@docrefinery.com"
86 |         shutil.copyfile("reset_db_files/default.jpg","refinery/static/users/doc/images/default.jpg")
87 |         u.image = "default.jpg"
88 | 
89 |     db.session.add(u)
90 |     db.session.commit()
91 | 
92 | if __name__ == '__main__':
93 |     create_db_entries()
94 | 


--------------------------------------------------------------------------------
/refinery/reset_db_files/default.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daeilkim/refinery/0d5de8fc3d680a2c79bd0e9384b506229787c74f/refinery/reset_db_files/default.jpg


--------------------------------------------------------------------------------
/refinery/setup_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | refineryBASE=./
3 | export refineryBASE=$refineryBASE
4 | export BNPYOUTDIR=$refineryBASE/refinery/static/users/
5 | export BNPYDATADIR=$refineryBASE/bnpy/data/
6 | #export BNPYROOT=/Users/daeil/Dropbox/research/bnpy/bnpy-dev/
7 | echo $refineryBASE
8 | 


--------------------------------------------------------------------------------
/refinery/start_celery.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | celery --loglevel=debug --concurrency=2 -A refinery.celery worker
3 | 


--------------------------------------------------------------------------------
/refinery/start_refinery.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # redis-server &
3 | #. venv/bin/activate
4 | gunicorn --log-level=debug --timeout 1200 -w 4 -b 0.0.0.0:8080 refinery.webapp.main_menu:app
5 | 


--------------------------------------------------------------------------------